2022
Wehr, Matthias M.; Sarang, Satinder S.; Rooseboom, Martijn; Boogaard, Peter J.; Karwath, Andreas; Escher, Sylvia E.
RespiraTox – Development of a QSAR model to predict human respiratory irritants Journal Article
In: Regulatory Toxicology and Pharmacology, vol. 128, pp. 105089, 2022.
Links | BibTeX | Tags: cheminformatics, machine learning, QSAR
@article{Wehr_2022,
title = {RespiraTox – Development of a QSAR model to predict human respiratory irritants},
author = {Matthias M. Wehr and Satinder S. Sarang and Martijn Rooseboom and Peter J. Boogaard and Andreas Karwath and Sylvia E. Escher},
url = {https://doi.org/10.1016%2Fj.yrtph.2021.105089},
doi = {10.1016/j.yrtph.2021.105089},
year = {2022},
date = {2022-02-01},
urldate = {2022-02-01},
journal = {Regulatory Toxicology and Pharmacology},
volume = {128},
pages = {105089},
publisher = {Elsevier BV},
keywords = {cheminformatics, machine learning, QSAR},
pubstate = {published},
tppubtype = {article}
}
2020
Escher, S E; Mangelsdorf, I; Hoffmann-Doerr, S; Partosch, F; Karwath, Andreas; Schroeder, K; Zapf, A; Batke, M
Time extrapolation in regulatory risk assessment: The impact of study differences on the extrapolation factors Journal Article
In: Regul Toxicol Pharmacol, vol. 112, pp. 104584, 2020, ISSN: 0273-2300.
Links | BibTeX | Tags: cheminformatics, QSAR
@article{RN17,
title = {Time extrapolation in regulatory risk assessment: The impact of study differences on the extrapolation factors},
author = {S E Escher and I Mangelsdorf and S Hoffmann-Doerr and F Partosch and Andreas Karwath and K Schroeder and A Zapf and M Batke},
doi = {10.1016/j.yrtph.2020.104584},
issn = {0273-2300},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Regul Toxicol Pharmacol},
volume = {112},
pages = {104584},
keywords = {cheminformatics, QSAR},
pubstate = {published},
tppubtype = {article}
}
2013
Gütlein, Martin; Helma, Christoph; Karwath, Andreas; Kramer, Stefan
A Large-Scale Empirical Evaluation of Cross-Validation and External Test Set Validation in (Q)SAR Journal Article
In: Molecular Informatics, vol. 32, no. 5-6, pp. 516-528, 2013.
Abstract | Links | BibTeX | Tags: cheminformatics, crossvalidation, external validation, QSAR, validation
@article{guetlein2013,
title = {A Large-Scale Empirical Evaluation of Cross-Validation and External Test Set Validation in (Q)SAR},
author = {Martin Gütlein and Christoph Helma and Andreas Karwath and Stefan Kramer},
url = {http://onlinelibrary.wiley.com/doi/10.1002/minf.201200134/abstract},
doi = {10.1002/minf.201200134},
year = {2013},
date = {2013-10-14},
urldate = {2013-10-14},
journal = {Molecular Informatics},
volume = {32},
number = {5-6},
pages = {516-528},
abstract = {(Q)SAR model validation is essential to ensure the quality of inferred models and to indicate future model predictivity on unseen compounds. Proper validation is also one of the requirements of regulatory authorities in order to accept the (Q)SAR model, and to approve its use in real world scenarios as alternative testing method. However, at the same time, the question of how to validate a (Q)SAR model, in particular whether to employ variants of cross-validation or external test set validation, is still under discussion. In this paper, we empirically compare a k-fold cross-validation with external test set validation. To this end we introduce a workflow allowing to realistically simulate the common problem setting of building predictive models for relatively small datasets. The workflow allows to apply the built and validated models on large amounts of unseen data, and to compare the performance of the different validation approaches. The experimental results indicate that cross-validation produces higher performant (Q)SAR models than external test set validation, reduces the variance of the results, while at the same time underestimates the performance on unseen compounds. The experimental results reported in this paper suggest that, contrary to current conception in the community, cross-validation may play a significant role in evaluating the predictivity of (Q)SAR models.},
keywords = {cheminformatics, crossvalidation, external validation, QSAR, validation},
pubstate = {published},
tppubtype = {article}
}
2012
Seeland, Madeleine; Karwath, Andreas; Kramer, Stefan
A structural cluster kernel for learning on graphs Conference
The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2012, ACM ACM, New York, NY, USA, 2012, ISBN: 978-1-4503-1462-6.
Abstract | Links | BibTeX | Tags: cheminformatics, clustering, data mining, kernels, QSAR, suport vector machines
@conference{seeland2012,
title = {A structural cluster kernel for learning on graphs},
author = {Madeleine Seeland and Andreas Karwath and Stefan Kramer},
url = {http://doi.acm.org/10.1145/2339530.2339614},
doi = {10.1145/2339530.2339614},
isbn = {978-1-4503-1462-6},
year = {2012},
date = {2012-08-12},
booktitle = {The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2012},
pages = {516-524},
publisher = {ACM},
address = {New York, NY, USA},
organization = {ACM},
crossref = {DBLP:conf/kdd/2012},
abstract = {In recent years, graph kernels have received considerable interest within the machine learning and data mining community. Here, we introduce a novel approach enabling kernel methods to utilize additional information hidden in the structural neighborhood of the graphs under consideration. Our novel structural cluster kernel (SCK) incorporates similarities induced by a structural clustering algorithm to improve state-of-the-art graph kernels. The approach taken is based on the idea that graph similarity can not only be described by the similarity between the graphs themselves, but also by the similarity they possess with respect to their structural neighborhood. We applied our novel kernel in a supervised and a semi-supervised setting to regression and classification problems on a number of real-world datasets of molecular graphs.
Our results show that the structural cluster similarity information can indeed leverage the prediction performance of the base kernel, particularly when the dataset is structurally sparse and consequently structurally diverse. By additionally taking into account a large number of unlabeled instances the performance of the structural cluster kernel can further be improved.},
keywords = {cheminformatics, clustering, data mining, kernels, QSAR, suport vector machines},
pubstate = {published},
tppubtype = {conference}
}
Our results show that the structural cluster similarity information can indeed leverage the prediction performance of the base kernel, particularly when the dataset is structurally sparse and consequently structurally diverse. By additionally taking into account a large number of unlabeled instances the performance of the structural cluster kernel can further be improved.
Gütlein, Martin; Karwath, Andreas; Kramer, Stefan
CheS-Mapper - Chemical Space Mapping and Visualization in 3D Journal Article
In: J. Cheminformatics, vol. 4, pp. 7, 2012.
Abstract | Links | BibTeX | Tags: cheminformatics, clustering, dimensionality reduction, QSAR, visualization
@article{gutlein2012,
title = {CheS-Mapper - Chemical Space Mapping and Visualization in 3D},
author = {Martin Gütlein and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1186/1758-2946-4-7},
doi = {10.1186/1758-2946-4-7},
year = {2012},
date = {2012-03-17},
journal = {J. Cheminformatics},
volume = {4},
pages = {7},
abstract = {Analyzing chemical datasets is a challenging task for scientific researchers in the field of chemoinformatics. It is important, yet difficult to understand the relationship between the structure of chemical compounds, their physico-chemical properties, and biological or toxic effects. To that respect, visualization tools can help to better comprehend the underlying correlations. Our recently developed 3D molecular viewer CheS-Mapper (Chemical Space Mapper) divides large datasets into clusters of similar compounds and consequently arranges them in 3D space, such that their spatial proximity reflects their similarity. The user can indirectly determine similarity, by selecting which features to employ in the process. The tool can use and calculate different kind of features, like structural fragments as well as quantitative chemical descriptors. These features can be highlighted within CheS-Mapper, which aids the chemist to better understand patterns and regularities and relate the observations to established scientific knowledge. As a final function, the tool can also be used to select and export specific subsets of a given dataset for further analysis.
},
keywords = {cheminformatics, clustering, dimensionality reduction, QSAR, visualization},
pubstate = {published},
tppubtype = {article}
}
2010
Hardy, Barry J.; Douglas, Nicki; Helma, Christoph; Rautenberg, Micha; Jeliazkova, Nina; Jeliazkov, Vedrin; Nikolova, Ivelina; Benigni, Romualdo; Tcheremenskaia, Olga; Kramer, Stefan; Girschick, Tobias; Buchwald, Fabian; Wicker, Jörg; Karwath, Andreas; Gütlein, Martin; Maunz, Andreas; Sarimveis, Haralambos; Melagraki, Georgia; Afantitis, Antreas; Sopasakis, Pantelis; Gallagher, David; Poroikov, Vladimir; Filimonov, Dmitry; Zakharov, Alexey V.; Lagunin, Alexey; Gloriozova, Tatyana; Novikov, Sergey; Skvortsova, Natalia; Druzhilovsky, Dmitry; Chawla, Sunil; Ghosh, Indira; Ray, Surajit; Patel, Hitesh; Escher, Sylvia
Collaborative development of predictive toxicology applications Journal Article
In: J. Cheminformatics, vol. 2, pp. 7, 2010.
Abstract | Links | BibTeX | Tags: crossvalidation, data mining, QSAR, scientific knowledge, validation
@article{hardy2010,
title = {Collaborative development of predictive toxicology applications},
author = {Barry J. Hardy and Nicki Douglas and Christoph Helma and Micha Rautenberg and Nina Jeliazkova and Vedrin Jeliazkov and Ivelina Nikolova and Romualdo Benigni and Olga Tcheremenskaia and Stefan Kramer and Tobias Girschick and Fabian Buchwald and Jörg Wicker and Andreas Karwath and Martin Gütlein and Andreas Maunz and Haralambos Sarimveis and Georgia Melagraki and Antreas Afantitis and Pantelis Sopasakis and David Gallagher and Vladimir Poroikov and Dmitry Filimonov and Alexey V. Zakharov and Alexey Lagunin and Tatyana Gloriozova and Sergey Novikov and Natalia Skvortsova and Dmitry Druzhilovsky and Sunil Chawla and Indira Ghosh and Surajit Ray and Hitesh Patel and Sylvia Escher},
url = {http://dx.doi.org/10.1186/1758-2946-2-7},
doi = {10.1186/1758-2946-2-7},
year = {2010},
date = {2010-08-31},
urldate = {2010-08-31},
journal = {J. Cheminformatics},
volume = {2},
pages = {7},
abstract = {OpenTox provides an interoperable, standards-based Framework for the support of predictive toxicology data management, algorithms, modelling, validation and reporting. It is relevant to satisfying the chemical safety assessment requirements of the REACH legislation as it supports access to experimental data, (Quantitative) Structure-Activity Relationship models, and toxicological information through an integrating platform that adheres to regulatory requirements and OECD validation principles. Initial research defined the essential components of the Framework including the approach to data access, schema and management, use of controlled vocabularies and ontologies, architecture, web service and communications protocols, and selection and integration of algorithms for predictive modelling. OpenTox provides end-user oriented tools to non-computational specialists, risk assessors, and toxicological experts in addition to Application Programming Interfaces (APIs) for developers of new applications. OpenTox actively supports public standards for data representation, interfaces, vocabularies and ontologies, Open Source approaches to core platform components, and community-based collaboration approaches, so as to progress system interoperability goals.
The OpenTox Framework includes APIs and services for compounds, datasets, features, algorithms, models, ontologies, tasks, validation, and reporting which may be combined into multiple applications satisfying a variety of different user needs. OpenTox applications are based on a set of distributed, interoperable OpenTox API-compliant REST web services. The OpenTox approach to ontology allows for efficient mapping of complementary data coming from different datasets into a unifying structure having a shared terminology and representation.
Two initial OpenTox applications are presented as an illustration of the potential impact of OpenTox for high-quality and consistent structure-activity relationship modelling of REACH-relevant endpoints: ToxPredict which predicts and reports on toxicities for endpoints for an input chemical structure, and ToxCreate which builds and validates a predictive toxicity model based on an input toxicology dataset. Because of the extensible nature of the standardised Framework design, barriers of interoperability between applications and content are removed, as the user may combine data, models and validation from multiple sources in a dependable and time-effective way.},
keywords = {crossvalidation, data mining, QSAR, scientific knowledge, validation},
pubstate = {published},
tppubtype = {article}
}
The OpenTox Framework includes APIs and services for compounds, datasets, features, algorithms, models, ontologies, tasks, validation, and reporting which may be combined into multiple applications satisfying a variety of different user needs. OpenTox applications are based on a set of distributed, interoperable OpenTox API-compliant REST web services. The OpenTox approach to ontology allows for efficient mapping of complementary data coming from different datasets into a unifying structure having a shared terminology and representation.
Two initial OpenTox applications are presented as an illustration of the potential impact of OpenTox for high-quality and consistent structure-activity relationship modelling of REACH-relevant endpoints: ToxPredict which predicts and reports on toxicities for endpoints for an input chemical structure, and ToxCreate which builds and validates a predictive toxicity model based on an input toxicology dataset. Because of the extensible nature of the standardised Framework design, barriers of interoperability between applications and content are removed, as the user may combine data, models and validation from multiple sources in a dependable and time-effective way.
2006
Karwath, Andreas; De Raedt, Luc
SMIREP: Predicting Chemical Activity from SMILES Journal Article
In: Journal of Chemical Information and Modeling, vol. 46, no. 6, pp. 2432 - 2444, 2006.
Abstract | Links | BibTeX | Tags: cheminformatics, graph mining, machine learning, QSAR, relational learning, scientific knowledge
@article{karwath06c,
title = {SMIREP: Predicting Chemical Activity from SMILES},
author = {Andreas Karwath and De Raedt, Luc},
url = {http://pubs.acs.org/doi/abs/10.1021/ci060159g},
doi = {10.1021/ci060159g},
year = {2006},
date = {2006-10-12},
journal = {Journal of Chemical Information and Modeling},
volume = {46},
number = {6},
pages = {2432 - 2444},
abstract = {Most approaches to structure-activity-relationship (SAR) prediction proceed in two steps. In the first step, a typically large set of fingerprints, or fragments of interest, is constructed (either by hand or by some recent data mining techniques). In the second step, machine learning techniques are applied to obtain a predictive model. The result is often not only a highly accurate but also hard to interpret model. In this paper, we demonstrate the capabilities of a novel SAR algorithm, SMIREP, which tightly integrates the fragment and model generation steps and which yields simple models in the form of a small set of IF-THEN rules. These rules contain SMILES fragments, which are easy to understand to the computational chemist. SMIREP combines ideas from the well-known IREP rule learner with a novel fragmentation algorithm for SMILES strings. SMIREP has been evaluated on three problems: the prediction of binding activities for the estrogen receptor (Environmental Protection Agency's (EPA's) Distributed Structure-Searchable Toxicity (DSSTox) National Center for Toxicological Research estrogen receptor (NCTRER) Database), the prediction of mutagenicity using the carcinogenic potency database (CPDB), and the prediction of biodegradability on a subset of the Environmental Fate Database (EFDB). In these applications, SMIREP has the advantage of producing easily interpretable rules while having predictive accuracies that are comparable to those of alternative state-of-the-art techniques.},
keywords = {cheminformatics, graph mining, machine learning, QSAR, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {article}
}
2004
Karwath, Andreas; De Raedt, Luc
Predictive Graph Mining Conference
The International Workshop on Mining Graphs, Trees and Sequences, MGTS 2004, 2004, (workshop).
BibTeX | Tags: cheminformatics, graph mining, machine learning, QSAR
@conference{karwath04b,
title = {Predictive Graph Mining},
author = {Andreas Karwath and De Raedt, Luc},
year = {2004},
date = {2004-09-01},
booktitle = {The International Workshop on Mining Graphs, Trees and Sequences, MGTS 2004},
pages = {25-36},
note = {workshop},
keywords = {cheminformatics, graph mining, machine learning, QSAR},
pubstate = {published},
tppubtype = {conference}
}
Karwath, Andreas; De Raedt, Luc
Predictive Graph Mining Conference
The 7th International Conference of Discovery Science, DS 2004, vol. 3245, Lecture Notes in Artificial Intelligence Springer-Verlag Berlin Heidelberg Springer Verlag, Berlin Heidelberg, Germany, 2004, ISBN: 978-3-540-23357-2.
Abstract | Links | BibTeX | Tags: cheminformatics, graph mining, machine learning, QSAR
@conference{karwath04a,
title = {Predictive Graph Mining},
author = {Andreas Karwath and De Raedt, Luc},
url = {http://link.springer.com/chapter/10.1007%2F978-3-540-30214-8_1},
doi = {10.1007/978-3-540-30214-8_1},
isbn = {978-3-540-23357-2},
year = {2004},
date = {2004-01-01},
booktitle = {The 7th International Conference of Discovery Science, DS 2004},
volume = {3245},
pages = {1-15},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
series = {Lecture Notes in Artificial Intelligence},
abstract = {Graph mining approaches are extremely popular and effective in molecular databases. The vast majority of these approaches first derive interesting, i.e. frequent, patterns and then use these as features to build predictive models. Rather than building these models in a two step indirect way, the SMIREP system introduced in this paper, derives predictive rule models from molecular data directly. SMIREP combines the SMILES and SMARTS representation languages that are popular in computational chemistry with the IREP rule-learning algorithm by Fürnkranz. Even though SMIREP is focused on SMILES, its principles are also applicable to graph mining problems in other domains. SMIREP is experimentally evaluated on two benchmark databases.},
keywords = {cheminformatics, graph mining, machine learning, QSAR},
pubstate = {published},
tppubtype = {conference}
}