2022
Wehr, Matthias M.; Sarang, Satinder S.; Rooseboom, Martijn; Boogaard, Peter J.; Karwath, Andreas; Escher, Sylvia E.
RespiraTox – Development of a QSAR model to predict human respiratory irritants Journal Article
In: Regulatory Toxicology and Pharmacology, vol. 128, pp. 105089, 2022.
Links | BibTeX | Tags: cheminformatics, machine learning, QSAR
@article{Wehr_2022,
title = {RespiraTox – Development of a QSAR model to predict human respiratory irritants},
author = {Matthias M. Wehr and Satinder S. Sarang and Martijn Rooseboom and Peter J. Boogaard and Andreas Karwath and Sylvia E. Escher},
url = {https://doi.org/10.1016%2Fj.yrtph.2021.105089},
doi = {10.1016/j.yrtph.2021.105089},
year = {2022},
date = {2022-02-01},
urldate = {2022-02-01},
journal = {Regulatory Toxicology and Pharmacology},
volume = {128},
pages = {105089},
publisher = {Elsevier BV},
keywords = {cheminformatics, machine learning, QSAR},
pubstate = {published},
tppubtype = {article}
}
2021
Carr, E; Bendayan, R; Bean, D; Stammers, M; Wang, W; Zhang, H; Searle, T; Kraljevic, Z; Shek, A; Phan, H T T; Muruet, W; Gupta, R K; Shinton, A J; Wyatt, M; Shi, T; Zhang, X; Pickles, A; Stahl, D; Zakeri, R; Noursadeghi, M; O'Gallagher, K; Rogers, M; Folarin, A; Karwath, Andreas; Wickstrøm, K E; Köhn-Luque, A; Slater, L; Cardoso, V R; Bourdeaux, C; Holten, A R; Ball, S; McWilliams, C; Roguski, L; Borca, F; Batchelor, J; Amundsen, E K; Wu, X; Gkoutos, G V; Sun, J; Pinto, A; Guthrie, B; Breen, C; Douiri, A; Wu, H; Curcin, V; Teo, J T; Shah, A M; Dobson, R J B
Evaluation and improvement of the National Early Warning Score (NEWS2) for COVID-19: a multi-hospital study Journal Article
In: BMC Med, vol. 19, no. 1, pp. 23, 2021, ISSN: 1741-7015.
Links | BibTeX | Tags: artificial intelligence, COVID-19, early warning score, health data science, machine learning
@article{RN19,
title = {Evaluation and improvement of the National Early Warning Score (NEWS2) for COVID-19: a multi-hospital study},
author = {E Carr and R Bendayan and D Bean and M Stammers and W Wang and H Zhang and T Searle and Z Kraljevic and A Shek and H T T Phan and W Muruet and R K Gupta and A J Shinton and M Wyatt and T Shi and X Zhang and A Pickles and D Stahl and R Zakeri and M Noursadeghi and K O'Gallagher and M Rogers and A Folarin and Andreas Karwath and K E Wickstrøm and A Köhn-Luque and L Slater and V R Cardoso and C Bourdeaux and A R Holten and S Ball and C McWilliams and L Roguski and F Borca and J Batchelor and E K Amundsen and X Wu and G V Gkoutos and J Sun and A Pinto and B Guthrie and C Breen and A Douiri and H Wu and V Curcin and J T Teo and A M Shah and R J B Dobson},
doi = {10.1186/s12916-020-01893-3},
issn = {1741-7015},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {BMC Med},
volume = {19},
number = {1},
pages = {23},
keywords = {artificial intelligence, COVID-19, early warning score, health data science, machine learning},
pubstate = {published},
tppubtype = {article}
}
2020
Wu, H; Zhang, H; Karwath, Andreas; Ibrahim, Z; Shi, T; Zhang, X; Wang, K; Sun, J; Dhaliwal, K; Bean, D; Cardoso, V R; Li, K; Teo, J T; Banerjee, A; Gao-Smith, F; Whitehouse, T; Veenith, T; Gkoutos, G V; Wu, X; Dobson, R; Guthrie, B
Ensemble learning for poor prognosis predictions: a case study on SARS-CoV2 Journal Article
In: J Am Med Inform Assoc, 2020, ISSN: 1067-5027 (Print) 1067-5027.
Links | BibTeX | Tags: artificial intelligence, COVID-19, health data science, machine learning
@article{RN18,
title = {Ensemble learning for poor prognosis predictions: a case study on SARS-CoV2},
author = {H Wu and H Zhang and Andreas Karwath and Z Ibrahim and T Shi and X Zhang and K Wang and J Sun and K Dhaliwal and D Bean and V R Cardoso and K Li and J T Teo and A Banerjee and F Gao-Smith and T Whitehouse and T Veenith and G V Gkoutos and X Wu and R Dobson and B Guthrie},
doi = {10.1093/jamia/ocaa295},
issn = {1067-5027 (Print) 1067-5027},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {J Am Med Inform Assoc},
keywords = {artificial intelligence, COVID-19, health data science, machine learning},
pubstate = {published},
tppubtype = {article}
}
2018
Geilke, Michael; Karwath, Andreas; Frank, Eibe; Kramer, Stefan
Online estimation of discrete, continuous, and conditional joint densities using classifier chains Journal Article
In: Data Mining and Knowledge Discovery, vol. 32, no. 3, pp. 561-603, 2018, ISSN: 1384-5810.
Abstract | Links | BibTeX | Tags: artificial intelligence, data mining, density estimation, machine learning, stream mining
@article{geilke2018a,
title = {Online estimation of discrete, continuous, and conditional joint densities using classifier chains},
author = {Michael Geilke and Andreas Karwath and Eibe Frank and Stefan Kramer},
url = {https://doi.org/10.1007/s10618-017-0546-6},
doi = {10.1007/s10618-017-0546-6},
issn = {1384-5810},
year = {2018},
date = {2018-05-01},
urldate = {2018-05-01},
journal = {Data Mining and Knowledge Discovery},
volume = {32},
number = {3},
pages = {561-603},
publisher = {Springer US},
abstract = {We address the problem of estimating discrete, continuous, and conditional joint densities online, i.e., the algorithm is only provided the current example and its current estimate for its update. The family of proposed online density estimators, estimation of densities online (EDO), uses classifier chains to model dependencies among features, where each classifier in the chain estimates the probability of one particular feature. Because a single chain may not provide a reliable estimate, we also consider ensembles of classifier chains and ensembles of weighted classifier chains. For all density estimators, we provide consistency proofs and propose algorithms to perform certain inference tasks. The empirical evaluation of the estimators is conducted in several experiments and on datasets of up to several millions of instances. In the discrete case, we compare our estimators to density estimates computed by Bayesian structure learners. In the continuous case, we compare them to a state-of-the-art online density estimator. Our experiments demonstrate that, even though designed to work online, EDO delivers estimators of competitive accuracy compared to other density estimators (batch Bayesian structure learners on discrete datasets and the state-of-the-art online density estimator on continuous datasets). Besides achieving similar performance in these cases, EDO is also able to estimate densities with mixed types of variables, i.e., discrete and continuous random variables.},
keywords = {artificial intelligence, data mining, density estimation, machine learning, stream mining},
pubstate = {published},
tppubtype = {article}
}
2017
Karwath, Andreas; Hubrich, Markus; Kramer, Stefan
Artificial Intelligence in Medicine: 16th Conference on Artificial Intelligence in Medicine, AIME 2017, Vienna, Austria, June 21-24, 2017, Proceedings, Springer Springer International Publishing, Cham, 2017, ISBN: 978-3-319-59758-4.
Abstract | Links | BibTeX | Tags: alzheimer, artificial intelligence, deep learning, health data science, machine learning, medicine, visualization
@conference{karwath2017a,
title = {Convolutional Neural Networks for the Identification of Regions of Interests in PET Scans: A Study of Representation Learning for Diagnosing Alzheimer's Disease},
author = {Andreas Karwath and Markus Hubrich and Stefan Kramer},
editor = {en Teije, Annette and Popow, Christian and Holmes, John H. and Sacchi, Lucia},
url = {http://dx.doi.org/10.1007/978-3-319-59758-4_36},
doi = {10.1007/978-3-319-59758-4_36},
isbn = {978-3-319-59758-4},
year = {2017},
date = {2017-06-21},
urldate = {2017-06-21},
booktitle = {Artificial Intelligence in Medicine: 16th Conference on Artificial Intelligence in Medicine, AIME 2017, Vienna, Austria, June 21-24, 2017, Proceedings},
pages = {316-321},
publisher = {Springer International Publishing},
address = {Cham},
organization = {Springer},
abstract = {When diagnosing patients suffering from dementia based on imaging data like PET scans, the identification of suitable predictive regions of interest (ROIs) is of great importance. We present a case study of 3-D Convolutional Neural Networks (CNNs) for the detection of ROIs in this context, just using voxel data, without any knowledge given a priori. Our results on data from the Alzheimer’s Disease Neuroimaging Initiative (ADNI) suggest that the predictive performance of the method is on par with that of state-of-the-art methods, with the additional benefit of potential insights into affected brain regions.},
keywords = {alzheimer, artificial intelligence, deep learning, health data science, machine learning, medicine, visualization},
pubstate = {published},
tppubtype = {conference}
}
2015
Geilke, Michael; Karwath, Andreas; Kramer, Stefan
Modeling recurrent distributions in streams using possible worlds Conference
2015 IEEE International Conference on Data Science and Advanced Analytics, DSAA 2015, IEEE, 2015, ISBN: 978-1-4673-8272-4.
Abstract | Links | BibTeX | Tags: density estimation, machine learning, possible worlds, stream mining
@conference{geilke2015,
title = {Modeling recurrent distributions in streams using possible worlds},
author = {Michael Geilke and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1109/DSAA.2015.7344814},
doi = {10.1109/DSAA.2015.7344814},
isbn = {978-1-4673-8272-4},
year = {2015},
date = {2015-10-19},
booktitle = {2015 IEEE International Conference on Data Science and Advanced Analytics, DSAA 2015},
pages = {1-9},
publisher = {IEEE},
crossref = {DBLP:conf/dsaa/2015},
abstract = {Discovering changes in the data distribution of streams and discovering recurrent data distributions are challenging problems in data mining and machine learning. Both have received a lot of attention in the context of classification. With the ever increasing growth of data, however, there is a high demand of compact and universal representations of data streams that enable the user to analyze current as well as historic data without having access to the raw data. To make a first step towards this direction, we propose a condensed representation that captures the various - possibly recurrent - data distributions of the stream by extending the notion of possible worlds. The representation enables queries concerning the whole stream and can, hence, serve as a tool for supporting decision-making processes or serve as a basis for implementing data mining and machine learning algorithms on top of it. We evaluate this condensed representation on synthetic and real-world data.
},
keywords = {density estimation, machine learning, possible worlds, stream mining},
pubstate = {published},
tppubtype = {conference}
}
2014
Geilke, Michael; Karwath, Andreas; Kramer, Stefan
A probabilistic condensed representation of data for stream mining Conference
International Conference on Data Science and Advanced Analytics, DSAA 2014, IEEE, 2014.
Abstract | Links | BibTeX | Tags: density estimation, machine learning, stream mining
@conference{geilke2014,
title = {A probabilistic condensed representation of data for stream mining},
author = {Michael Geilke and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1109/DSAA.2014.7058088},
doi = {10.1109/DSAA.2014.7058088},
year = {2014},
date = {2014-10-30},
booktitle = {International Conference on Data Science and Advanced Analytics, DSAA 2014},
pages = {297-303},
publisher = {IEEE},
crossref = {DBLP:conf/dsaa/2014},
abstract = {Data mining and machine learning algorithms usually operate directly on the data. However, if the data is not available at once or consists of billions of instances, these algorithms easily become infeasible with respect to memory and run-time concerns. As a solution to this problem, we propose a framework, called MiDEO (Mining Density Estimates inferred Online), in which algorithms are designed to operate on a condensed representation of the data. In particular, we propose to use density estimates, which are able to represent billions of instances in a compact form and can be updated when new instances arrive. As an example for an algorithm that operates on density estimates, we consider the task of mining association rules, which we consider as a form of simple statements about the data. The algorithm, called POEt (Pattern mining on Online density esTimates), is evaluated on synthetic and real-world data and is compared to state-of-the-art algorithms.},
keywords = {density estimation, machine learning, stream mining},
pubstate = {published},
tppubtype = {conference}
}
Seeland, Madeleine; Maunz, Andreas; Karwath, Andreas; Kramer, Stefan
Extracting information from support vector machines for pattern-based classification Conference
Symposium on Applied Computing, SAC 2014, ACM ACM, New York, NY, USA, 2014.
Abstract | Links | BibTeX | Tags: machine learning, suport vector machines
@conference{seeland2014b,
title = {Extracting information from support vector machines for pattern-based classification},
author = {Madeleine Seeland and Andreas Maunz and Andreas Karwath and Stefan Kramer},
url = {http://dl.acm.org/citation.cfm?doid=2554850.2555065},
doi = {10.1145/2554850.2555065},
year = {2014},
date = {2014-03-24},
urldate = {2014-03-24},
booktitle = {Symposium on Applied Computing, SAC 2014},
pages = {129-136},
publisher = {ACM},
address = {New York, NY, USA},
organization = {ACM},
abstract = {Statistical machine learning algorithms building on patterns found by pattern mining algorithms have to cope with large solution sets and thus the high dimensionality of the feature space. Vice versa, pattern mining algorithms are frequently applied to irrelevant instances, thus causing noise in the output. Solution sets of pattern mining algorithms also typically grow with increasing input datasets. The paper proposes an approach to overcome these limitations. The approach extracts information from trained support vector machines, in particular their support vectors and their relevance according to their coefficients. It uses the support vectors along with their coefficients as input to pattern mining algorithms able to handle weighted instances. Our experiments in the domain of graph mining and molecular graphs show that the resulting models are not significantly less accurate than models trained on the full datasets, yet require only a fraction of the time using much smaller sets of patterns.},
keywords = {machine learning, suport vector machines},
pubstate = {published},
tppubtype = {conference}
}
2013
Geilke, Michael; Frank, Eibe; Karwath, Andreas; Kramer, Stefan
Online Estimation of Discrete Densities Conference
IEEE 13th International Conference on Data Mining, ICDM 2013, IEEE, 2013, ISSN: 1550-4786.
Abstract | Links | BibTeX | Tags: density estimation, machine learning, stream mining
@conference{geilke2013,
title = {Online Estimation of Discrete Densities},
author = {Michael Geilke and Eibe Frank and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1109/ICDM.2013.91},
doi = {10.1109/ICDM.2013.91},
issn = {1550-4786},
year = {2013},
date = {2013-12-07},
booktitle = {IEEE 13th International Conference on Data Mining, ICDM 2013},
pages = {191-200},
publisher = {IEEE},
crossref = {DBLP:conf/icdm/2013},
abstract = {We address the problem of estimating a discrete joint density online, that is, the algorithm is only provided the current example and its current estimate. The proposed online estimator of discrete densities, EDDO (Estimation of Discrete Densities Online), uses classifier chains to model dependencies among features. Each classifier in the chain estimates the probability of one particular feature. Because a single chain may not provide a reliable estimate, we also consider ensembles of classifier chains and ensembles of weighted classifier chains. For all density estimators, we provide consistency proofs and propose algorithms to perform certain inference tasks. The empirical evaluation of the estimators is conducted in several experiments and on data sets of up to several million instances: We compare them to density estimates computed from Bayesian structure learners, evaluate them under the influence of noise, measure their ability to deal with concept drift, and measure the run-time performance. Our experiments demonstrate that, even though designed to work online, EDDO delivers estimators of competitive accuracy compared to batch Bayesian structure learners and batch variants of EDDO.},
keywords = {density estimation, machine learning, stream mining},
pubstate = {published},
tppubtype = {conference}
}
2012
Grzonka, Slawomir; Karwath, Andreas; Dijoux, Frederic; Burgard, Wolfram
Activity-Based Estimation of Human Trajectories Journal Article
In: IEEE Transactions on Robotics, vol. 28, no. 1, pp. 234-245, 2012.
Abstract | Links | BibTeX | Tags: activity recognition, artificial intelligence, machine learning, simultaneous localization and mapping
@article{grzonka2012,
title = {Activity-Based Estimation of Human Trajectories},
author = {Slawomir Grzonka and Andreas Karwath and Frederic Dijoux and Wolfram Burgard},
url = {http://dx.doi.org/10.1109/TRO.2011.2165372},
doi = {10.1109/TRO.2011.2165372},
year = {2012},
date = {2012-02-02},
urldate = {2012-02-02},
journal = {IEEE Transactions on Robotics},
volume = {28},
number = {1},
pages = {234-245},
abstract = {We present a novel approach to incrementally determine the trajectory of a person in 3-D based on its motions and activities in real time. In our algorithm, we estimate the motions and activities of the user given the data that are obtained from a motion capture suit equipped with several inertial measurement units. These activities include walking up and down staircases, as well as opening and closing doors. We interpret the first two types of activities as motion constraints and door-handling events as landmark detections in a graph-based simultaneous localization and mapping (SLAM) framework. Since we cannot distinguish between individual doors, we employ a multihypothesis tracking approach on top of the SLAM procedure to deal with the high data-association uncertainty. As a result, we are able to accurately and robustly recover the trajectory of the person. Additionally, we present an algorithm to build approximate geometrical and topological maps based on the estimated trajectory and detected activities. We evaluate our approach in practical experiments that are carried out with different subjects and in various environments.},
keywords = {activity recognition, artificial intelligence, machine learning, simultaneous localization and mapping},
pubstate = {published},
tppubtype = {article}
}
2010
Grzonka, Slawomir; Dijoux, Frederic; Karwath, Andreas; Burgard, Wolfram
Mapping indoor environments based on human activity Conference
IEEE International Conference on Robotics and Automation, ICRA 2010, IEEE, 2010, ISBN: 978-1-4244-5038-1.
Abstract | Links | BibTeX | Tags: activity recognition, machine learning, simultaneous localization and mapping
@conference{grzonka2010b,
title = {Mapping indoor environments based on human activity},
author = {Slawomir Grzonka and Frederic Dijoux and Andreas Karwath and Wolfram Burgard},
url = {http://dx.doi.org/10.1109/ROBOT.2010.5509976},
doi = {10.1109/ROBOT.2010.5509976},
isbn = {978-1-4244-5038-1},
year = {2010},
date = {2010-05-03},
booktitle = {IEEE International Conference on Robotics and Automation, ICRA 2010},
pages = {476-481},
publisher = {IEEE},
crossref = {DBLP:conf/icra/2010},
abstract = {We present a novel approach to build approximate maps of structured environments utilizing human motion and activity. Our approach uses data recorded with a data suit which is equipped with several IMUs to detect movements of a person and door opening and closing events. In our approach we interpret the movements as motion constraints and door handling events as landmark detections in a graph-based SLAM framework. As we cannot distinguish between individual doors, we employ a multi-hypothesis approach on top of the SLAM system to deal with the high data-association uncertainty. As a result, our approach is able to accurately and robustly recover the trajectory of the person. We additionally take advantage of the fact that people traverse free space and that doors separate rooms to recover the geometric structure of the environment after the graph optimization. We evaluate our approach in several experiments carried out with different users and in environments of different types.
},
keywords = {activity recognition, machine learning, simultaneous localization and mapping},
pubstate = {published},
tppubtype = {conference}
}
Grzonka, Slawomir; Dijoux, Frederic; Karwath, Andreas; Burgard, Wolfram
Learning Maps of Indoor Environments Based on Human Activity Conference
Embedded Reasoning, Papers from the 2010 AAAI Spring Symposium, 2010.
Abstract | Links | BibTeX | Tags: activity recognition, localization, machine learning, mobile systems and mobility, simultaneous localization and mapping, social robotics
@conference{grzonka2010a,
title = {Learning Maps of Indoor Environments Based on Human Activity},
author = {Slawomir Grzonka and Frederic Dijoux and Andreas Karwath and Wolfram Burgard},
url = {http://www.aaai.org/ocs/index.php/SSS/SSS10/paper/view/1172},
year = {2010},
date = {2010-03-23},
booktitle = {Embedded Reasoning, Papers from the 2010 AAAI Spring Symposium},
crossref = {DBLP:conf/aaaiss/2010-4},
abstract = {We present a novel approach to build approximate maps of structured environments utilizing human motion and activity. Our approach uses data recorded with a data suit which is equipped with several IMUs to detect movements of a person and door opening and closing events. In our approach we interpret the movements as motion constraints and door handling events as landmark detections in a graph-based SLAM framework. As we cannot distinguish between individual doors, we employ a multi-hypothesis approach on top of the SLAM system to deal with the high data-association uncertainty. As a result, our approach is able to accurately and robustly recover the trajectory of the person. We additionally take advantage of the fact that people traverse free space and that doors separate rooms to recover the geometric structure of the environment after the graph optimization. We evaluate our approach in several experiments carried out with different users and in environments of different types.},
keywords = {activity recognition, localization, machine learning, mobile systems and mobility, simultaneous localization and mapping, social robotics},
pubstate = {published},
tppubtype = {conference}
}
2009
Gütlein, Martin; Frank, Eibe; Hall, Mark A.; Karwath, Andreas
Large-scale attribute selection using wrappers Conference
The IEEE Symposium on Computational Intelligence and Data Mining, CIDM 2009, IEEE, 2009, ISBN: 978-1-4244-2765-9.
Abstract | Links | BibTeX | Tags: crossvalidation, machine learning
@conference{gutlein2009,
title = {Large-scale attribute selection using wrappers},
author = {Martin Gütlein and Eibe Frank and Mark A. Hall and Andreas Karwath},
url = {http://dx.doi.org/10.1109/CIDM.2009.4938668},
doi = {10.1109/CIDM.2009.4938668},
isbn = {978-1-4244-2765-9},
year = {2009},
date = {2009-01-01},
booktitle = {The IEEE Symposium on Computational Intelligence and Data Mining, CIDM 2009},
pages = {332-339},
publisher = {IEEE},
crossref = {DBLP:conf/cidm/2009},
abstract = {Scheme-specific attribute selection with the wrapper and variants of forward selection is a popular attribute selection technique for classification that yields good results. However, it can run the risk of overfitting because of the extent of the search and the extensive use of internal cross-validation. Moreover, although wrapper evaluators tend to achieve superior accuracy compared to filters, they face a high computational cost. The problems of overfitting and high runtime occur in particular on high-dimensional datasets, like microarray data. We investigate Linear Forward Selection, a technique to reduce the number of attributes expansions in each forward selection step. Our experiments demonstrate that this approach is faster, finds smaller subsets and can even increase the accuracy compared to standard forward selection. We also investigate a variant that applies explicit subset size determination in forward selection to combat overfitting, where the search is forced to stop at a precomputed ldquooptimalrdquo subset size. We show that this technique reduces subset size while maintaining comparable accuracy.},
keywords = {crossvalidation, machine learning},
pubstate = {published},
tppubtype = {conference}
}
2008
Karwath, Andreas; Kersting, Kristian; Landwehr, Niels
Boosting Relational Sequence Alignments Conference
The 8th IEEE International Conference on Data Mining, ICDM 2008, IEEE, 2008, ISBN: 978-0-7695-3502-9.
Abstract | Links | BibTeX | Tags: inductive logic programming, machine learning, relational learning, scientific knowledge
@conference{karwath2008,
title = {Boosting Relational Sequence Alignments},
author = {Andreas Karwath and Kristian Kersting and Niels Landwehr},
url = {http://dx.doi.org/10.1109/ICDM.2008.127},
doi = {10.1109/ICDM.2008.127},
isbn = {978-0-7695-3502-9},
year = {2008},
date = {2008-12-15},
booktitle = {The 8th IEEE International Conference on Data Mining, ICDM 2008},
pages = {857-862},
publisher = {IEEE},
crossref = {DBLP:conf/icdm/2008},
abstract = {The task of aligning sequences arises in many applications. Classical dynamic programming approaches require the explicit state enumeration in the reward model. This is often impractical: the number of states grows very quickly with the number of domain objects and relations among these objects. Relational sequence alignment aims at exploiting symbolic structure to avoid the full enumeration. This comes at the expense of a more complex reward model selection problem: virtually infinitely many abstraction levels have to be explored. In this paper, we apply gradient-based boosting to leverage this problem. Specifically, we show how to reduce the learning problem to a series of relational regressions problems. The main benefit of this is that interactions between states variables are introduced only as needed, so that the potentially infinite search space is not explicitly considered. As our experimental results show, this boosting approach can significantly improve upon established results in challenging applications.},
keywords = {inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {conference}
}
Kersting, Kristian; De Raedt, Luc; Gutmann, Bernd; Karwath, Andreas; Landwehr, Niels
Relational Sequence Learning Book Chapter
In: Probabilistic Inductive Logic Programming - Theory and Applications, vol. 4911, pp. 28-55, Springer Verlag, Berlin Heidelberg, Germany, 2008, ISBN: 978-3-540-78651-1.
Abstract | Links | BibTeX | Tags: inductive logic programming, machine learning, relational learning, scientific knowledge
@inbook{kersting2008,
title = {Relational Sequence Learning},
author = {Kristian Kersting and De Raedt, Luc and Bernd Gutmann and Andreas Karwath and Niels Landwehr},
url = {http://dx.doi.org/10.1007/978-3-540-78652-8_2},
doi = {10.1007/978-3-540-78652-8_2},
isbn = {978-3-540-78651-1},
year = {2008},
date = {2008-01-01},
booktitle = {Probabilistic Inductive Logic Programming - Theory and Applications},
volume = {4911},
pages = {28-55},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
crossref = {DBLP:conf/ilp/2008p},
abstract = {Sequential behavior and sequence learning are essential to intelligence. Often the elements of sequences exhibit an internal structure that can elegantly be represented using relational atoms. Applying traditional sequential learning techniques to such relational sequences requires one either to ignore the internal structure or to live with a combinatorial explosion of the model complexity. This chapter briefly reviews relational sequence learning and describes several techniques tailored towards realizing this, such as local pattern mining techniques, (hidden) Markov models, conditional random fields, dynamic programming and reinforcement learning.},
keywords = {inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {inbook}
}
2007
King, Ross D.; Karwath, Andreas; Clare, Amanda; Dehaspe, Luc
Logic and the Automatic Acquisition of Scientific Knowledge: An Application to Functional Genomics Conference
Computational Discovery of Scientific Knowledge, Introduction, Techniques, and Applications in Environmental and Life Sciences, vol. 4660, Lecture Notes in Computer Science Springer-Verlag Berlin Heidelberg Springer Verlag, Berlin Heidelberg, Germany, 2007, ISBN: 978-3-540-73919-7.
Abstract | Links | BibTeX | Tags: bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge
@conference{king2007,
title = {Logic and the Automatic Acquisition of Scientific Knowledge: An Application to Functional Genomics},
author = {Ross D. King and Andreas Karwath and Amanda Clare and Luc Dehaspe},
url = {http://dx.doi.org/10.1007/978-3-540-73920-3_13},
doi = {10.1007/978-3-540-73920-3_13},
isbn = {978-3-540-73919-7},
year = {2007},
date = {2007-01-01},
booktitle = {Computational Discovery of Scientific Knowledge, Introduction, Techniques, and Applications in Environmental and Life Sciences},
volume = {4660},
pages = {273-289},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
crossref = {DBLP:conf/dis/2007book},
abstract = {This paper is a manifesto aimed at computer scientists interested in developing and applying scientific discovery methods. It argues that: science is experiencing an unprecedented “explosion” in the amount of available data; traditional data analysis methods cannot deal with this increased quantity of data; there is an urgent need to automate the process of refining scientific data into scientific knowledge; inductive logic programming (ILP) is a data analysis framework well suited for this task; and exciting new scientific discoveries can be achieved using ILP scientific discovery methods. We describe an example of using ILP to analyse a large and complex bioinformatic database that has produced unexpected and interesting scientific results in functional genomics. We then point a possible way forward to integrating machine learning with scientific databases to form intelligent databases.},
keywords = {bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {conference}
}
2006
Karwath, Andreas; De Raedt, Luc
SMIREP: Predicting Chemical Activity from SMILES Journal Article
In: Journal of Chemical Information and Modeling, vol. 46, no. 6, pp. 2432 - 2444, 2006.
Abstract | Links | BibTeX | Tags: cheminformatics, graph mining, machine learning, QSAR, relational learning, scientific knowledge
@article{karwath06c,
title = {SMIREP: Predicting Chemical Activity from SMILES},
author = {Andreas Karwath and De Raedt, Luc},
url = {http://pubs.acs.org/doi/abs/10.1021/ci060159g},
doi = {10.1021/ci060159g},
year = {2006},
date = {2006-10-12},
journal = {Journal of Chemical Information and Modeling},
volume = {46},
number = {6},
pages = {2432 - 2444},
abstract = {Most approaches to structure-activity-relationship (SAR) prediction proceed in two steps. In the first step, a typically large set of fingerprints, or fragments of interest, is constructed (either by hand or by some recent data mining techniques). In the second step, machine learning techniques are applied to obtain a predictive model. The result is often not only a highly accurate but also hard to interpret model. In this paper, we demonstrate the capabilities of a novel SAR algorithm, SMIREP, which tightly integrates the fragment and model generation steps and which yields simple models in the form of a small set of IF-THEN rules. These rules contain SMILES fragments, which are easy to understand to the computational chemist. SMIREP combines ideas from the well-known IREP rule learner with a novel fragmentation algorithm for SMILES strings. SMIREP has been evaluated on three problems: the prediction of binding activities for the estrogen receptor (Environmental Protection Agency's (EPA's) Distributed Structure-Searchable Toxicity (DSSTox) National Center for Toxicological Research estrogen receptor (NCTRER) Database), the prediction of mutagenicity using the carcinogenic potency database (CPDB), and the prediction of biodegradability on a subset of the Environmental Fate Database (EFDB). In these applications, SMIREP has the advantage of producing easily interpretable rules while having predictive accuracies that are comparable to those of alternative state-of-the-art techniques.},
keywords = {cheminformatics, graph mining, machine learning, QSAR, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {article}
}
Clare, Amanda; Karwath, Andreas; Ougham, Helen; King, Ross D.
Functional bioinformatics for Arabidopsis thaliana Journal Article
In: Bioinformatics, vol. 22, no. 9, pp. 1130-1136, 2006.
Abstract | Links | BibTeX | Tags: bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge
@article{karwath06a,
title = {Functional bioinformatics for Arabidopsis thaliana},
author = {Amanda Clare and Andreas Karwath and Helen Ougham and Ross D. King},
url = {https://bioinformatics.oxfordjournals.org/content/22/9/1130.full.pdf+html},
doi = {10.1093/bioinformatics/btl051},
year = {2006},
date = {2006-01-01},
journal = {Bioinformatics},
volume = {22},
number = {9},
pages = {1130-1136},
abstract = {Motivation: The genome of Arabidopsis thaliana, which has the best understood plant genome, still has approximately one-third of its genes with no functional annotation at all from either MIPS or TAIR. We have applied our Data Mining Prediction (DMP) method to the problem of predicting the functional classes of these protein sequences. This method is based on using a hybrid machine-learning/data-mining method to identify patterns in the bioinformatic data about sequences that are predictive of function. We use data about sequence, predicted secondary structure, predicted structural domain, InterPro patterns, sequence similarity profile and expressions data.
Results: We predicted the functional class of a high percentage of the Arabidopsis genes with currently unknown function. These predictions are interpretable and have good test accuracies. We describe in detail seven of the rules produced.
Availability: Rulesets are available at http://www.aber.ac.uk/compsci/Research/bio/dss/arabpreds/ and predictions are available at http://www.genepredictions.org
Contact:afc@aber.ac.uk},
keywords = {bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {article}
}
Results: We predicted the functional class of a high percentage of the Arabidopsis genes with currently unknown function. These predictions are interpretable and have good test accuracies. We describe in detail seven of the rules produced.
Availability: Rulesets are available at http://www.aber.ac.uk/compsci/Research/bio/dss/arabpreds/ and predictions are available at http://www.genepredictions.org
Contact:afc@aber.ac.uk
2005
Stolle, Christian; Karwath, Andreas; De Raedt, Luc
CLASSIC'CL: an integrated ILP system Conference
Proc. 8th International Conference of Discovery Science, DS 2005, vol. 3735, Lecture Notes in Artificial Intelligence Springer, 2005, ISBN: 978-3-540-29230-2, (Conference).
Abstract | Links | BibTeX | Tags: data mining, machine learning, relational learning
@conference{karwath05a,
title = {CLASSIC'CL: an integrated ILP system},
author = {Christian Stolle and Andreas Karwath and De Raedt, Luc},
url = {http://link.springer.com/chapter/10.1007%2F11563983_31},
doi = {10.1007/11563983_31},
isbn = {978-3-540-29230-2},
year = {2005},
date = {2005-10-08},
booktitle = {Proc. 8th International Conference of Discovery Science, DS 2005},
volume = {3735},
pages = {354-362},
publisher = {Springer},
series = {Lecture Notes in Artificial Intelligence},
abstract = {A novel inductive logic programming system, called Classic’cl is presented. Classic’cl integrates several settings for learning, in particular learning from interpretations and learning from satisfiability. Within these settings, it addresses descriptive and probabilistic modeling tasks. As such, Classic’cl (C-armr, cLAudien, icl-S(S)at, ICl, and CLlpad) integrates several well-known inductive logic programming systems such as Claudien, Warmr (and its extension C-armr), ICL, ICL-SAT, and LLPAD. We report on the implementation, the integration issues as well as on some experiments that compare Classic’cl with some of its predecessors.},
note = {Conference},
keywords = {data mining, machine learning, relational learning},
pubstate = {published},
tppubtype = {conference}
}
2004
Bringmann, Björn; Karwath, Andreas
Frequent SMILES Miscellaneous
Lernen, Wissensentdeckung und Adaptivität, Workshop GI Fachgruppe Maschinelles Lernen, part of LWA, 2004, (Berlin, Germany).
Abstract | BibTeX | Tags: cheminformatics, graph mining, machine learning
@misc{wshp-fgml-BringmannK04,
title = {Frequent SMILES},
author = {Björn Bringmann and Andreas Karwath},
year = {2004},
date = {2004-10-01},
abstract = {Predictive graph mining approaches in chemical databases are extremely popular and effective. Most of these approaches first extract frequent sub-graphs and then use them as features to build predictive models. In the work presented here, the approach taken is similar. However, instead of frequent sub-graphs, frequent trees, based on SMILES strings are derived. For this, the SMILES strings of chemical compounds are decomposed into fragment trees, which in turn are mined for interesting sub-trees. These tree based patterns are then used as features by a classifier to build predictive models. The approach is experimentally evaluated on a real world chemical data set.},
howpublished = {Lernen, Wissensentdeckung und Adaptivität, Workshop GI Fachgruppe Maschinelles Lernen, part of LWA},
note = {Berlin, Germany},
keywords = {cheminformatics, graph mining, machine learning},
pubstate = {published},
tppubtype = {misc}
}
Karwath, Andreas; De Raedt, Luc
Predictive Graph Mining Conference
The International Workshop on Mining Graphs, Trees and Sequences, MGTS 2004, 2004, (workshop).
BibTeX | Tags: cheminformatics, graph mining, machine learning, QSAR
@conference{karwath04b,
title = {Predictive Graph Mining},
author = {Andreas Karwath and De Raedt, Luc},
year = {2004},
date = {2004-09-01},
booktitle = {The International Workshop on Mining Graphs, Trees and Sequences, MGTS 2004},
pages = {25-36},
note = {workshop},
keywords = {cheminformatics, graph mining, machine learning, QSAR},
pubstate = {published},
tppubtype = {conference}
}
Karwath, Andreas; De Raedt, Luc
Predictive Graph Mining Conference
The 7th International Conference of Discovery Science, DS 2004, vol. 3245, Lecture Notes in Artificial Intelligence Springer-Verlag Berlin Heidelberg Springer Verlag, Berlin Heidelberg, Germany, 2004, ISBN: 978-3-540-23357-2.
Abstract | Links | BibTeX | Tags: cheminformatics, graph mining, machine learning, QSAR
@conference{karwath04a,
title = {Predictive Graph Mining},
author = {Andreas Karwath and De Raedt, Luc},
url = {http://link.springer.com/chapter/10.1007%2F978-3-540-30214-8_1},
doi = {10.1007/978-3-540-30214-8_1},
isbn = {978-3-540-23357-2},
year = {2004},
date = {2004-01-01},
booktitle = {The 7th International Conference of Discovery Science, DS 2004},
volume = {3245},
pages = {1-15},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
series = {Lecture Notes in Artificial Intelligence},
abstract = {Graph mining approaches are extremely popular and effective in molecular databases. The vast majority of these approaches first derive interesting, i.e. frequent, patterns and then use these as features to build predictive models. Rather than building these models in a two step indirect way, the SMIREP system introduced in this paper, derives predictive rule models from molecular data directly. SMIREP combines the SMILES and SMARTS representation languages that are popular in computational chemistry with the IREP rule-learning algorithm by Fürnkranz. Even though SMIREP is focused on SMILES, its principles are also applicable to graph mining problems in other domains. SMIREP is experimentally evaluated on two benchmark databases.},
keywords = {cheminformatics, graph mining, machine learning, QSAR},
pubstate = {published},
tppubtype = {conference}
}
2002
Karwath, Andreas; King, Ross D.
Homology Induction: the use of machine learning to improve sequence similarity searches Journal Article
In: BMC Bioinformatics, vol. 3, no. 1, 2002.
Abstract | Links | BibTeX | Tags: bioinformatics, data mining, inductive logic programming, machine learning, relational learning
@article{karwath02a,
title = {Homology Induction: the use of machine learning to improve sequence similarity searches},
author = {Andreas Karwath and Ross D. King},
url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-3-11},
doi = {10.1186/1471-2105-3-11},
year = {2002},
date = {2002-04-23},
journal = {BMC Bioinformatics},
volume = {3},
number = {1},
abstract = {Background
The inference of homology between proteins is a key problem in molecular biology The current best approaches only identify ~50% of homologies (with a false positive rate set at 1/1000).
Results
We present Homology Induction (HI), a new approach to inferring homology. HI uses machine learning to bootstrap from standard sequence similarity search methods. First a standard method is run, then HI learns rules which are true for sequences of high similarity to the target (assumed homologues) and not true for general sequences, these rules are then used to discriminate sequences in the twilight zone. To learn the rules HI describes the sequences in a novel way based on a bioinformatic knowledge base, and the machine learning method of inductive logic programming. To evaluate HI we used the PDB40D benchmark which lists sequences of known homology but low sequence similarity. We compared the HI methodoly with PSI-BLAST alone and found HI performed significantly better. In addition, Receiver Operating Characteristic (ROC) curve analysis showed that these improvements were robust for all reasonable error costs. The predictive homology rules learnt by HI by can be interpreted biologically to provide insight into conserved features of homologous protein families.
Conclusions
HI is a new technique for the detection of remote protein homolgy – a central bioinformatic problem. HI with PSI-BLAST is shown to outperform PSI-BLAST for all error costs. It is expect that similar improvements would be obtained using HI with any sequence similarity method.
},
keywords = {bioinformatics, data mining, inductive logic programming, machine learning, relational learning},
pubstate = {published},
tppubtype = {article}
}
The inference of homology between proteins is a key problem in molecular biology The current best approaches only identify ~50% of homologies (with a false positive rate set at 1/1000).
Results
We present Homology Induction (HI), a new approach to inferring homology. HI uses machine learning to bootstrap from standard sequence similarity search methods. First a standard method is run, then HI learns rules which are true for sequences of high similarity to the target (assumed homologues) and not true for general sequences, these rules are then used to discriminate sequences in the twilight zone. To learn the rules HI describes the sequences in a novel way based on a bioinformatic knowledge base, and the machine learning method of inductive logic programming. To evaluate HI we used the PDB40D benchmark which lists sequences of known homology but low sequence similarity. We compared the HI methodoly with PSI-BLAST alone and found HI performed significantly better. In addition, Receiver Operating Characteristic (ROC) curve analysis showed that these improvements were robust for all reasonable error costs. The predictive homology rules learnt by HI by can be interpreted biologically to provide insight into conserved features of homologous protein families.
Conclusions
HI is a new technique for the detection of remote protein homolgy – a central bioinformatic problem. HI with PSI-BLAST is shown to outperform PSI-BLAST for all error costs. It is expect that similar improvements would be obtained using HI with any sequence similarity method.
Karwath, Andreas
Large Logical Đatabases and their Applications to Molecular Biology PhD Thesis
University of Wales, Aberystwyth, 2002.
BibTeX | Tags: bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge
@phdthesis{karwath02b,
title = {Large Logical Đatabases and their Applications to Molecular Biology},
author = {Andreas Karwath},
year = {2002},
date = {2002-01-01},
school = {University of Wales, Aberystwyth},
keywords = {bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {phdthesis}
}
2001
Karwath, Andreas; King, Ross D.
An automated ILP server in the field of bioinformatics Conference
The Eleventh International Conference on Inductive Logic Programming, ILP 2001, vol. 2157, Lecture Notes in Computer Science Springer-Verlag Berlin Heidelberg Springer Verlag, Berlin Heidelberg, Germany, 2001, ISBN: 978-3-540-42538-0.
Abstract | Links | BibTeX | Tags: bioinformatics, data mining, inductive logic programming, machine learning, relational learning
@conference{Karwath2001,
title = {An automated ILP server in the field of bioinformatics},
author = {Andreas Karwath and Ross D. King},
editor = {Raghu Ramakrishnan and Michele Sebag},
url = {http://link.springer.com/chapter/10.1007%2F3-540-44797-0_8},
doi = {10.1007/3-540-44797-0_8},
isbn = {978-3-540-42538-0},
year = {2001},
date = {2001-09-09},
booktitle = {The Eleventh International Conference on Inductive Logic Programming, ILP 2001},
volume = {2157},
pages = {91-103},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
abstract = {The identification of evolutionary related (homologous) proteins is a key problem in molecular biology. Here we present a inductive logic programming based method, Homology Induction (HI), which acts as a filter for existing sequence similarity searches to improve their performance in the detection of remote protein homologies. HI performs a PSI-BLAST search to generate positive, negative, and uncertain examples, and collects descriptions of these examples. It then learns rules to discriminate the positive and negative examples. The rules are used to filter the uncertain examples in the “twilight zone”. HI uses a multitable database of 51,430,710 pre-fabricated facts from a variety of biological sources, and the inductive logic programming system Aleph to induce rules. Hi was tested on an independent set of protein sequences with equal or less than 40 per cent sequence similarity (PDB40D). ROC analysis is performed showing that HI can significantly improve existing similarity searches. The method is automated and can be used via a web/mail interface.},
keywords = {bioinformatics, data mining, inductive logic programming, machine learning, relational learning},
pubstate = {published},
tppubtype = {conference}
}