2023
Dretzke, Janine; Lorenc, Ava; Adriano, Ada; Herd, Clare; Mehanna, Hisham; Nankivell, Paul; Moore, David J.; Karwath, Andreas; Main, Barry; Firth, Charlotte; Gaunt, Claire; Greaves, Colin; Watson, Eila; Gkoutos, Georgios; Ozakinci, Gozde; Wolstenholme, Jane; Brett, Jo; Duda, Joan; Matheson, Lauren; Cherrill, Louise‐Rae; Calvert, Melanie; Kiely, Philip; Gaunt, Piers; Chernbumroong, Saisakul; Mittal, Saloni; Thomas, Steve; Winter, Stuart; Wong, Wai Lup; Team, PETNECK2 Research
Systematic review of patients’ and healthcare professionals’ views on patient‐initiated follow‐up in treated cancer patients Journal Article
In: Cancer Medicine, 2023, ISSN: 2045-7634.
Abstract | Links | BibTeX | Tags: cancer, health data science, medicine, systematic review
@article{9b13f3c6d25842a9bd3efe7343c24a41,
title = {Systematic review of patients’ and healthcare professionals’ views on patient‐initiated follow‐up in treated cancer patients},
author = {Janine Dretzke and Ava Lorenc and Ada Adriano and Clare Herd and Hisham Mehanna and Paul Nankivell and David J. Moore and Andreas Karwath and Barry Main and Charlotte Firth and Claire Gaunt and Colin Greaves and Eila Watson and Georgios Gkoutos and Gozde Ozakinci and Jane Wolstenholme and Jo Brett and Joan Duda and Lauren Matheson and Louise‐Rae Cherrill and Melanie Calvert and Philip Kiely and Piers Gaunt and Saisakul Chernbumroong and Saloni Mittal and Steve Thomas and Stuart Winter and Wai Lup Wong and PETNECK2 Research Team },
doi = {10.1002/cam4.6243},
issn = {2045-7634},
year = {2023},
date = {2023-06-16},
urldate = {2023-06-16},
journal = {Cancer Medicine},
publisher = {John Wiley & Sons},
abstract = {Background: Current follow‐up models in cancer are seen to be unsustainable and inflexible, and there is growing interest in alternative models, such as patient‐initiated follow‐up (PIFU). It is therefore important to understand whether PIFU is acceptable to patients and healthcare professionals (HCPs). Methods: Standard systematic review methodology aimed at limiting bias was used for study identification (to January 2022), selection and data extraction. Thematic synthesis was undertaken for qualitative data, and survey findings were tabulated and described. Results: Nine qualitative studies and 22 surveys were included, mainly in breast and endometrial cancer. Women treated for breast or endometrial cancer and HCPs were mostly supportive of PIFU. Facilitators for PIFU included convenience, control over own health and avoidance of anxiety‐inducing clinic appointments. Barriers included loss of reassurance from scheduled visits and lack of confidence in self‐management. HCPs were supportive of PIFU but concerned about resistance to change, unsuitability of PIFU for some patients and costs. Conclusion: PIFU is viewed mostly positively by women treated for breast or endometrial cancer, and by HCPs, but further evidence is needed from a wider range of cancers, men, and more representative samples. A protocol was registered with PROSPERO (CRD42020181412).},
keywords = {cancer, health data science, medicine, systematic review},
pubstate = {published},
tppubtype = {article}
}
Taib, Bilal Gani; Karwath, Andreas; Wensley, K.; Minku, L.; Gkoutos, G. V.; Moiemen, N.
Artificial intelligence in the management and treatment of burns: A systematic review and meta-analyses Journal Article
In: Journal of Plastic, Reconstructive & Aesthetic Surgery, vol. 77, pp. 133–161, 2023.
Links | BibTeX | Tags: burns, health data science, medicine, systematic review
@article{Taib_2023,
title = {Artificial intelligence in the management and treatment of burns: A systematic review and meta-analyses},
author = {Bilal Gani Taib and Andreas Karwath and K. Wensley and L. Minku and G. V. Gkoutos and N. Moiemen},
url = {https://doi.org/10.1016%2Fj.bjps.2022.11.049},
doi = {10.1016/j.bjps.2022.11.049},
year = {2023},
date = {2023-02-01},
urldate = {2023-02-01},
journal = {Journal of Plastic, Reconstructive & Aesthetic Surgery},
volume = {77},
pages = {133--161},
publisher = {Elsevier BV},
keywords = {burns, health data science, medicine, systematic review},
pubstate = {published},
tppubtype = {article}
}
Slater, Luke T.; Williams, John A.; Schofield, Paul N.; Russell, Sophie; Pendleton, Samantha C.; Karwath, Andreas; Fanning, Hilary; Ball, Simon; Hoehndorf, Robert; Gkoutos, Georgios V.
Klarigi: Characteristic explanations for semantic biomedical data Journal Article
In: Computers in Biology and Medicine, vol. 153, pp. 106425, 2023.
Links | BibTeX | Tags: artificial intelligence, health data science, NLP
@article{Slater_2023,
title = {Klarigi: Characteristic explanations for semantic biomedical data},
author = {Luke T. Slater and John A. Williams and Paul N. Schofield and Sophie Russell and Samantha C. Pendleton and Andreas Karwath and Hilary Fanning and Simon Ball and Robert Hoehndorf and Georgios V. Gkoutos},
url = {https://doi.org/10.1016%2Fj.compbiomed.2022.106425},
doi = {10.1016/j.compbiomed.2022.106425},
year = {2023},
date = {2023-02-01},
urldate = {2023-02-01},
journal = {Computers in Biology and Medicine},
volume = {153},
pages = {106425},
publisher = {Elsevier BV},
keywords = {artificial intelligence, health data science, NLP},
pubstate = {published},
tppubtype = {article}
}
Gill, Simrat; Karwath, Andreas; Uh, Hae-Won; Cardoso, Victor Roth; Gu, Zhujie; Barsky, Andrey; Slater, Luke; Acharjee, Animesh; Duan, Jinming; DallÓlio, Lorenzo; el Bouhaddani, Said; Chernbumroong, Saisakul; Stanbury, Mary; Haynes, Sandra; Asselbergs, Folkert W; Grobbee, Diederick; Eijkemans, Marinus; Gkoutos, Georgios; Kotecha, Dipak; group BigData@Heart Consortium,
Artificial intelligence to enhance clinical value across the spectrum of cardiovascular healthcare: artificial intelligence framework Journal Article
In: European Heart Journal, 2023, ISSN: 0195-668X, (textcopyright The Author(s) 2023. Published by Oxford University Press on behalf of the European Society of Cardiology.).
Abstract | Links | BibTeX | Tags: artificial intelligence, cardiology, EHR, health data science
@article{9b9767f517a040f4822591145f8c61a8,
title = {Artificial intelligence to enhance clinical value across the spectrum of cardiovascular healthcare: artificial intelligence framework},
author = {Simrat Gill and Andreas Karwath and Hae-Won Uh and Victor Roth Cardoso and Zhujie Gu and Andrey Barsky and Luke Slater and Animesh Acharjee and Jinming Duan and Lorenzo DallÓlio and Said el Bouhaddani and Saisakul Chernbumroong and Mary Stanbury and Sandra Haynes and Folkert W Asselbergs and Diederick Grobbee and Marinus Eijkemans and Georgios Gkoutos and Dipak Kotecha and group BigData@Heart Consortium },
doi = {10.1093/eurheartj/ehac758},
issn = {0195-668X},
year = {2023},
date = {2023-01-11},
urldate = {2023-01-11},
journal = {European Heart Journal},
publisher = {Öxford University Press},
abstract = {Artificial intelligence (AI) is increasingly being utilized in healthcare. This article provides clinicians and researchers with a step-wise foundation for high-value AI that can be applied to a variety of different data modalities. The aim is to improve the transparency and application of AI methods, with the potential to benefit patients in routine cardiovascular care. Following a clear research hypothesis, an AI-based workflow begins with data selection and pre-processing prior to analysis, with the type of data (structured, semi-structured, or unstructured) determining what type of pre-processing steps and machine-learning algorithms are required. Algorithmic and data validation should be performed to ensure the robustness of the chosen methodology, followed by an objective evaluation of performance. Seven case studies are provided to highlight the wide variety of data modalities and clinical questions that can benefit from modern AI techniques, with a focus on applying them to cardiovascular disease management.Despite the growing use of AI, further education for healthcare workers, researchers, and the public are needed to aid understanding of how AI works and to close the existing gap in knowledge. In addition, issues regarding data access, sharing, and security must be addressed to ensure full engagement by patients and the public. The application of AI within healthcare provides an opportunity for clinicians to deliver a more personalized approach to medical care by accounting for confounders, interactions, and the rising prevalence of multi-morbidity."},
note = {textcopyright The Author(s) 2023. Published by Oxford University Press on behalf of the European Society of Cardiology.},
keywords = {artificial intelligence, cardiology, EHR, health data science},
pubstate = {published},
tppubtype = {article}
}
2022
Wu, Honghan; Wang, Minhong; Wu, Jinge; Francis, Farah; Chang, Yun-Hsuan; Shavick, Alex; Dong, Hang; Poon, Michael T. C.; Fitzpatrick, Natalie; Levine, Adam P.; Slater, Luke T.; Handy, Alex; Karwath, Andreas; Gkoutos, Georgios V.; Chelala, Claude; Shah, Anoop Dinesh; Stewart, Robert; Collier, Nigel; Alex, Beatrice; Whiteley, William; Sudlow, Cathie; Roberts, Angus; Dobson, Richard J. B.
A survey on clinical natural language processing in the United Kingdom from 2007 to 2022 Journal Article
In: npj Digital Medicine, vol. 5, no. 1, 2022.
Links | BibTeX | Tags: artificial intelligence, health data science, NLP
@article{Wu_2022,
title = {A survey on clinical natural language processing in the United Kingdom from 2007 to 2022},
author = {Honghan Wu and Minhong Wang and Jinge Wu and Farah Francis and Yun-Hsuan Chang and Alex Shavick and Hang Dong and Michael T. C. Poon and Natalie Fitzpatrick and Adam P. Levine and Luke T. Slater and Alex Handy and Andreas Karwath and Georgios V. Gkoutos and Claude Chelala and Anoop Dinesh Shah and Robert Stewart and Nigel Collier and Beatrice Alex and William Whiteley and Cathie Sudlow and Angus Roberts and Richard J. B. Dobson},
url = {https://doi.org/10.1038%2Fs41746-022-00730-6},
doi = {10.1038/s41746-022-00730-6},
year = {2022},
date = {2022-12-01},
urldate = {2022-12-01},
journal = {npj Digital Medicine},
volume = {5},
number = {1},
publisher = {Springer Science and Business Media LLC},
keywords = {artificial intelligence, health data science, NLP},
pubstate = {published},
tppubtype = {article}
}
Manley, Susan Elisabeth; Karwath, Andreas; Williams, John; Nightingale, Peter; Webber, Jonathan; Raghavan, Rajeev; Barratt, Alison; Webster, Craig; Round, Rachel; Stratton, Irene; Gkoutos, Georgios; Roberts, Graham; Mostafa, Samiul; Ghosh, Sandip
Use of HbA1c for new diagnosis of diabetes in those with hyperglycaemia on admission to or attendance at hospital urgently requires research Journal Article
In: British Journal of Diabetes, vol. 22, no. 2, pp. 95–104, 2022.
Links | BibTeX | Tags: diabetes, EHR, health data science, medicine
@article{Manley_2022,
title = {Use of HbA1c for new diagnosis of diabetes in those with hyperglycaemia on admission to or attendance at hospital urgently requires research},
author = {Susan Elisabeth Manley and Andreas Karwath and John Williams and Peter Nightingale and Jonathan Webber and Rajeev Raghavan and Alison Barratt and Craig Webster and Rachel Round and Irene Stratton and Georgios Gkoutos and Graham Roberts and Samiul Mostafa and Sandip Ghosh},
url = {https://doi.org/10.15277%2Fbjd.2022.386},
doi = {10.15277/bjd.2022.386},
year = {2022},
date = {2022-12-01},
urldate = {2022-12-01},
journal = {British Journal of Diabetes},
volume = {22},
number = {2},
pages = {95--104},
publisher = {ABCD Diabetes Care, Ltd.},
keywords = {diabetes, EHR, health data science, medicine},
pubstate = {published},
tppubtype = {article}
}
Köppel, Marius; Segner, Alexander; Wagener, Martin; Pensel, Lukas; Karwath, Andreas; Schmitt, Christian; Kramer, Stefan
Learning to rank Higgs boson candidates Journal Article
In: Scientific Reports, vol. 12, no. 1, 2022.
Links | BibTeX | Tags: learning to rank
@article{K_ppel_2022,
title = {Learning to rank Higgs boson candidates},
author = {Marius Köppel and Alexander Segner and Martin Wagener and Lukas Pensel and Andreas Karwath and Christian Schmitt and Stefan Kramer},
url = {https://doi.org/10.1038/s41598-022-10383-w},
doi = {10.1038/s41598-022-10383-w},
year = {2022},
date = {2022-07-01},
urldate = {2022-07-01},
journal = {Scientific Reports},
volume = {12},
number = {1},
publisher = {Springer Science and Business Media LLC},
keywords = {learning to rank},
pubstate = {published},
tppubtype = {article}
}
Williams, John A.; Karwath, Andreas; Round, Rachel A.; Stratton, Irene M.; Ghosh, Sandip; Mostafa, Samiul; Roberts, Graham; Webber, Jonathan; Gkoutos, Georgios; Manley, Susan E.
133-LB: Relationship of HbA1c and Glucose by Ethnicity in UK Biobank Journal Article
In: Diabetes, vol. 71, no. Supplement_1, 2022.
Links | BibTeX | Tags: diabetes, EHR, health data science, medicine, UKBiobank
@article{WILLIAMS_2022,
title = {133-LB: Relationship of HbA1c and Glucose by Ethnicity in UK Biobank},
author = {John A. Williams and Andreas Karwath and Rachel A. Round and Irene M. Stratton and Sandip Ghosh and Samiul Mostafa and Graham Roberts and Jonathan Webber and Georgios Gkoutos and Susan E. Manley},
url = {https://doi.org/10.2337%2Fdb22-133-lb},
doi = {10.2337/db22-133-lb},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Diabetes},
volume = {71},
number = {Supplement_1},
publisher = {American Diabetes Association},
keywords = {diabetes, EHR, health data science, medicine, UKBiobank},
pubstate = {published},
tppubtype = {article}
}
Karwath, Andreas; Williams, John A.; Round, Rachel A.; Stratton, Irene M.; Gkoutos, Georgios; Mostafa, Samiul; Roberts, Graham; Webber, Jonathan; Manley, Susan E.
973-P: By How Much Does Red Blood Cell Status Affect the Accuracy of HbA1c? Journal Article
In: Diabetes, vol. 71, no. Supplement_1, 2022.
Links | BibTeX | Tags: diabetes, EHR, health data science, medicine
@article{KARWATH_2022,
title = {973-P: By How Much Does Red Blood Cell Status Affect the Accuracy of HbA1c?},
author = {Andreas Karwath and John A. Williams and Rachel A. Round and Irene M. Stratton and Georgios Gkoutos and Samiul Mostafa and Graham Roberts and Jonathan Webber and Susan E. Manley},
url = {https://doi.org/10.2337%2Fdb22-973-p},
doi = {10.2337/db22-973-p},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Diabetes},
volume = {71},
number = {Supplement_1},
publisher = {American Diabetes Association},
keywords = {diabetes, EHR, health data science, medicine},
pubstate = {published},
tppubtype = {article}
}
Lorenc, A; Wells, M; Fulton-Lieuw, T; Nankivell, P; Mehanna, H; Jepson, M; Karwath, Andreas; Main, B; Firth, C; Gaunt, C; Greaves, C; Moore, D; Watson, E; Gkoutos, G; Ozakinci, G; Wolstenholme, J; Dretzke, J; Brett, J; Duda, J; Matheson, L; Cherrill, L -R; Calvert, M; Kiely, P; Gaunt, P; Chernbumroong, S; Mittal, S.; Thomas, S.; Winter, S.; Wong, W.
Clinicians' Views of Patient-initiated Follow-up in Head and Neck Cancer: a Qualitative Study to Inform the PETNECK2 Trial Journal Article
In: Clinical Oncology, vol. 34, no. 4, pp. 230–240, 2022.
Links | BibTeX | Tags: cancer, medicine
@article{Lorenc_2022,
title = {Clinicians' Views of Patient-initiated Follow-up in Head and Neck Cancer: a Qualitative Study to Inform the PETNECK2 Trial},
author = {A Lorenc and M Wells and T Fulton-Lieuw and P Nankivell and H Mehanna and M Jepson and Andreas Karwath and B Main and C Firth and C Gaunt and C Greaves and D Moore and E Watson and G Gkoutos and G Ozakinci and J Wolstenholme and J Dretzke and J Brett and J Duda and L Matheson and L -R Cherrill and M Calvert and P Kiely and P Gaunt and S Chernbumroong and S. Mittal and S. Thomas and S. Winter and W. Wong},
url = {https://doi.org/10.1016%2Fj.clon.2021.11.010},
doi = {10.1016/j.clon.2021.11.010},
year = {2022},
date = {2022-04-01},
urldate = {2022-04-01},
journal = {Clinical Oncology},
volume = {34},
number = {4},
pages = {230--240},
publisher = {Elsevier BV},
keywords = {cancer, medicine},
pubstate = {published},
tppubtype = {article}
}
Williams, John; Burgess, Stephen; Suckling, John; Lalousis, Paris Alexandros; Batool, Fatima; Griffiths, Lowri; Palmer, Edward; Karwath, Andreas; Barsky, Andrey; Gkoutos, Georgios; Wood, Stephen; Barnes, Nicholas; David, Anthony S; Donohoe, Gary; Neill, Joanna; Deakin, Bill; Khandaker, Golam; Upthegrove, Rachel; collaboration, PIMS
Inflammation and brain structure in schizophrenia and other neuropsychiatric disorders: A Mendelian randomization study Journal Article
In: JAMA psychiatry, vol. 2022, pp. 1–11, 2022, ISSN: 2168-622X.
Abstract | Links | BibTeX | Tags: artificial intelligence, health data science, medicine
@article{44ac2137c0fa4666839a213d0fc6175c,
title = {Inflammation and brain structure in schizophrenia and other neuropsychiatric disorders: A Mendelian randomization study},
author = {John Williams and Stephen Burgess and John Suckling and Paris Alexandros Lalousis and Fatima Batool and Lowri Griffiths and Edward Palmer and Andreas Karwath and Andrey Barsky and Georgios Gkoutos and Stephen Wood and Nicholas Barnes and Anthony S David and Gary Donohoe and Joanna Neill and Bill Deakin and Golam Khandaker and Rachel Upthegrove and PIMS collaboration},
doi = {10.1001/jamapsychiatry.2022.0407},
issn = {2168-622X},
year = {2022},
date = {2022-03-30},
urldate = {2022-03-30},
journal = {JAMA psychiatry},
volume = {2022},
pages = {1--11},
publisher = {Ämerican Medical Association},
abstract = {Importance: Previous in vitro and postmortem research suggests that inflammation may lead to structural brain changes via activation of microglia and/or astrocytic dysfunction in a range of neuropsychiatric disorders. Objective: To investigate the relationship between inflammation and changes in brain structures in vivo and to explore a transcriptome-driven functional basis with relevance to mental illness. Design, Setting, and Participants: This study used multistage linked analyses, including mendelian randomization (MR), gene expression correlation, and connectivity analyses. A total of 20688 participants in the UK Biobank, which includes clinical, genomic, and neuroimaging data, and 6 postmortem brains from neurotypical individuals in the Allen Human Brain Atlas (AHBA), including RNA microarray data. Data were extracted in February 2021 and analyzed between March and October 2021. Exposures: Genetic variants regulating levels and activity of circulating interleukin 1 (IL-1), IL-2, IL-6, C-reactive protein (CRP), and brain-derived neurotrophic factor (BDNF) were used as exposures in MR analyses. Main Outcomes and Measures: Brain imaging measures, including gray matter volume (GMV) and cortical thickness (CT), were used as outcomes. Associations were considered significant at a multiple testing-corrected threshold of P < 1.1 × 10-4. Differential gene expression in AHBA data was modeled in brain regions mapped to areas significant in MR analyses; genes were tested for biological and disease overrepresentation in annotation databases and for connectivity in protein-protein interaction networks. Results: Of 20688 participants in the UK Biobank sample, 10828 (52.3%) were female, and the mean (SD) age was 55.5 (7.5) years. In the UK Biobank sample, genetically predicted levels of IL-6 were associated with GMV in the middle temporal cortex (z score, 5.76; P = 8.39 × 10-9), inferior temporal (z score, 3.38; P = 7.20 × 10-5), fusiform (z score, 4.70; P = 2.60 × 10-7), and frontal (z score, -3.59; P = 3.30 × 10-5) cortex together with CT in the superior frontal region (z score, -5.11; P = 3.22 × 10-7). No significant associations were found for IL-1, IL-2, CRP, or BDNF after correction for multiple comparison. In the AHBA sample, 5 of 6 participants (83%) were male, and the mean (SD) age was 42.5 (13.4) years. Brain-wide coexpression analysis showed a highly interconnected network of genes preferentially expressed in the middle temporal gyrus (MTG), which further formed a highly connected protein-protein interaction network with IL-6 (enrichment test of expected vs observed network given the prevalence and degree of interactions in the STRING database: 43 nodes/30 edges observed vs 8 edges expected; mean node degree, 1.4; genome-wide significanc},
keywords = {artificial intelligence, health data science, medicine},
pubstate = {published},
tppubtype = {article}
}
Slater, Luke T; Russell, Sophie; Makepeace, Silver; Carberry, Alexander; Karwath, Andreas; Williams, John A; Fanning, Hilary; Ball, Simon; Hoehndorf, Robert; Gkoutos, Georgios V
Evaluating semantic similarity methods for comparison of text-derived phenotype profiles Journal Article
In: BMC Medical Informatics and Decision Making, vol. 22, no. 1, 2022, ISSN: 1472-6947.
Abstract | Links | BibTeX | Tags: differential diagnosis, health data science, MIMIC-III, ontology, semantic similarity, semantic web
@article{6b64a2f714094b7abb9373ccb6d527e0,
title = {Evaluating semantic similarity methods for comparison of text-derived phenotype profiles},
author = {Luke T Slater and Sophie Russell and Silver Makepeace and Alexander Carberry and Andreas Karwath and John A Williams and Hilary Fanning and Simon Ball and Robert Hoehndorf and Georgios V Gkoutos},
doi = {10.1186/s12911-022-01770-4},
issn = {1472-6947},
year = {2022},
date = {2022-02-05},
urldate = {2022-02-05},
journal = {BMC Medical Informatics and Decision Making},
volume = {22},
number = {1},
publisher = {Springer},
abstract = {BACKGROUND: Semantic similarity is a valuable tool for analysis in biomedicine. When applied to phenotype profiles derived from clinical text, they have the capacity to enable and enhance 'patient-like me' analyses, automated coding, differential diagnosis, and outcome prediction. While a large body of work exists exploring the use of semantic similarity for multiple tasks, including protein interaction prediction, and rare disease differential diagnosis, there is less work exploring comparison of patient phenotype profiles for clinical tasks. Moreover, there are no experimental explorations of optimal parameters or better methods in the area.METHODS: We develop a platform for reproducible benchmarking and comparison of experimental conditions for patient phentoype similarity. Using the platform, we evaluate the task of ranking shared primary diagnosis from uncurated phenotype profiles derived from all text narrative associated with admissions in the medical information mart for intensive care (MIMIC-III).RESULTS: 300 semantic similarity configurations were evaluated, as well as one embedding-based approach. On average, measures that did not make use of an external information content measure performed slightly better, however the best-performing configurations when measured by area under receiver operating characteristic curve and Top Ten Accuracy used term-specificity and annotation-frequency measures.CONCLUSION: We identified and interpreted the performance of a large number of semantic similarity configurations for the task of classifying diagnosis from text-derived phenotype profiles in one setting. We also provided a basis for further research on other settings and related tasks in the area.},
keywords = {differential diagnosis, health data science, MIMIC-III, ontology, semantic similarity, semantic web},
pubstate = {published},
tppubtype = {article}
}
Wehr, Matthias M.; Sarang, Satinder S.; Rooseboom, Martijn; Boogaard, Peter J.; Karwath, Andreas; Escher, Sylvia E.
RespiraTox – Development of a QSAR model to predict human respiratory irritants Journal Article
In: Regulatory Toxicology and Pharmacology, vol. 128, pp. 105089, 2022.
Links | BibTeX | Tags: cheminformatics, machine learning, QSAR
@article{Wehr_2022,
title = {RespiraTox – Development of a QSAR model to predict human respiratory irritants},
author = {Matthias M. Wehr and Satinder S. Sarang and Martijn Rooseboom and Peter J. Boogaard and Andreas Karwath and Sylvia E. Escher},
url = {https://doi.org/10.1016%2Fj.yrtph.2021.105089},
doi = {10.1016/j.yrtph.2021.105089},
year = {2022},
date = {2022-02-01},
urldate = {2022-02-01},
journal = {Regulatory Toxicology and Pharmacology},
volume = {128},
pages = {105089},
publisher = {Elsevier BV},
keywords = {cheminformatics, machine learning, QSAR},
pubstate = {published},
tppubtype = {article}
}
2021
Slater, Luke T.; Williams, John A.; Karwath, Andreas; Fanning, Hilary; Ball, Simon; Schofield, Paul N.; Hoehndorf, Robert; Gkoutos, Georgios V.
Multi-faceted semantic clustering with text-derived phenotypes Journal Article
In: Computers in biology and medicine, 2021, ISSN: 0010-4825.
Abstract | Links | BibTeX | Tags: cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity
@article{14598610a01b4af99802a4b22e67a119,
title = {Multi-faceted semantic clustering with text-derived phenotypes},
author = {Luke T. Slater and John A. Williams and Andreas Karwath and Hilary Fanning and Simon Ball and Paul N. Schofield and Robert Hoehndorf and Georgios V. Gkoutos},
doi = {10.1016/j.compbiomed.2021.104904},
issn = {0010-4825},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Computers in biology and medicine},
publisher = {Elsevier},
abstract = {Identification of ontology concepts in clinical narrative text enables the creation of phenotype profiles that can be associated with clinical entities, such as patients or drugs. Constructing patient phenotype profiles using formal ontologies enables their analysis via semantic similarity, in turn enabling the use of background knowledge in clustering or classification analyses. However, traditional semantic similarity approaches collapse complex relationships between patient phenotypes into a unitary similarity scores for each pair of patients. Moreover, single scores may be based only on matching terms with the greatest information content (IC), ignoring other dimensions of patient similarity. This process necessarily leads to a loss of information in the resulting representation of patient similarity, and is especially apparent when using very large text-derived and highly multi-morbid phenotype profiles. Moreover, it renders finding a biological explanation for similarity very difficult; the black box problem. In this article, we explore the generation of multiple semantic similarity scores for patients based on different facets of their phenotypic manifestation, which we define through different sub-graphs in the Human Phenotype Ontology. We further present a new methodology for deriving sets of qualitative class descriptions for groups of entities described by ontology terms. Leveraging this strategy to obtain meaningful explanations for our semantic clusters alongside other evaluation techniques, we show that semantic clustering with ontology-derived facets enables the representation, and thus identification of, clinically relevant phenotype relationships not easily recoverable using overall clustering alone. In this way, we demonstrate the potential of faceted semantic clustering for gaining a deeper and more nuanced understanding of text-derived patient phenotypes.},
keywords = {cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity},
pubstate = {published},
tppubtype = {article}
}
Chapman, Martin; Mumtaz, Shahzad; Rasmussen, Luke V; Karwath, Andreas; Gkoutos, Georgios V; Gao, Chuang; Thayer, Dan; Pacheco, Jennifer A; Parkinson, Helen; Richesson, Rachel L; Jefferson, Emily; Denaxas, Spiros; Curcin, Vasa
Desiderata for the development of next-generation electronic health record phenotype libraries Journal Article
In: GigaScience, vol. 10, no. 9, 2021.
Links | BibTeX | Tags: EHR, health data science, phenotypes, validation
@article{Chapman_2021,
title = {Desiderata for the development of next-generation electronic health record phenotype libraries},
author = {Martin Chapman and Shahzad Mumtaz and Luke V Rasmussen and Andreas Karwath and Georgios V Gkoutos and Chuang Gao and Dan Thayer and Jennifer A Pacheco and Helen Parkinson and Rachel L Richesson and Emily Jefferson and Spiros Denaxas and Vasa Curcin},
url = {https://doi.org/10.1093%2Fgigascience%2Fgiab059},
doi = {10.1093/gigascience/giab059},
year = {2021},
date = {2021-09-01},
urldate = {2021-09-01},
journal = {GigaScience},
volume = {10},
number = {9},
publisher = {Oxford University Press (OUP)},
keywords = {EHR, health data science, phenotypes, validation},
pubstate = {published},
tppubtype = {article}
}
Karwath, Andreas; Bunting, Karina V; Gill, Simrat K; Tica, Otilia; Pendleton, Samantha; Aziz, Furqan; Barsky, Andrey D; Chernbumroong, Saisakul; Duan, Jinming; Mobley, Alastair R; Cardoso, Victor Roth; Slater, Luke; Williams, John A; Bruce, Emma-Jane; Wang, Xiaoxia; Flather, Marcus D; Coats, Andrew J S; Gkoutos, Georgios V; Kotecha, Dipak
Redefining beta-blocker response in heart failure patients with sinus rhythm and atrial fibrillation: a machine learning cluster analysis Journal Article
In: The Lancet, 2021.
Abstract | Links | BibTeX | Tags: artificial intelligence, clustering, crossvalidation, deep learning, EHR, health data science, phenotypes, validation
@article{Karwath_2021,
title = {Redefining beta-blocker response in heart failure patients with sinus rhythm and atrial fibrillation: a machine learning cluster analysis},
author = {Andreas Karwath and Karina V Bunting and Simrat K Gill and Otilia Tica and Samantha Pendleton and Furqan Aziz and Andrey D Barsky and Saisakul Chernbumroong and Jinming Duan and Alastair R Mobley and Victor Roth Cardoso and Luke Slater and John A Williams and Emma-Jane Bruce and Xiaoxia Wang and Marcus D Flather and Andrew J S Coats and Georgios V Gkoutos and Dipak Kotecha},
url = {https://doi.org/10.1016%2Fs0140-6736%2821%2901638-x},
doi = {10.1016/s0140-6736(21)01638-x},
year = {2021},
date = {2021-08-01},
urldate = {2021-08-01},
journal = {The Lancet},
publisher = {Elsevier BV},
abstract = {Background
Mortality remains unacceptably high in patients with heart failure and reduced left ventricular ejection fraction (LVEF) despite advances in therapeutics. We hypothesised that a novel artificial intelligence approach could better assess multiple and higher-dimension interactions of comorbidities, and define clusters of β-blocker efficacy in patients with sinus rhythm and atrial fibrillation.
Methods
Neural network-based variational autoencoders and hierarchical clustering were applied to pooled individual patient data from nine double-blind, randomised, placebo-controlled trials of β blockers. All-cause mortality during median 1·3 years of follow-up was assessed by intention to treat, stratified by electrocardiographic heart rhythm. The number of clusters and dimensions was determined objectively, with results validated using a leave-one-trial-out approach. This study was prospectively registered with ClinicalTrials.gov (NCT00832442) and the PROSPERO database of systematic reviews (CRD42014010012).
Findings
15 659 patients with heart failure and LVEF of less than 50% were included, with median age 65 years (IQR 56–72) and LVEF 27% (IQR 21–33). 3708 (24%) patients were women. In sinus rhythm (n=12 822), most clusters demonstrated a consistent overall mortality benefit from β blockers, with odds ratios (ORs) ranging from 0·54 to 0·74. One cluster in sinus rhythm of older patients with less severe symptoms showed no significant efficacy (OR 0·86, 95% CI 0·67–1·10; p=0·22). In atrial fibrillation (n=2837), four of five clusters were consistent with the overall neutral effect of β blockers versus placebo (OR 0·92, 0·77–1·10; p=0·37). One cluster of younger atrial fibrillation patients at lower mortality risk but similar LVEF to average had a statistically significant reduction in mortality with β blockers (OR 0·57, 0·35–0·93; p=0·023). The robustness and consistency of clustering was confirmed for all models (p<0·0001 vs random), and cluster membership was externally validated across the nine independent trials.
Interpretation
An artificial intelligence-based clustering approach was able to distinguish prognostic response from β blockers in patients with heart failure and reduced LVEF. This included patients in sinus rhythm with suboptimal efficacy, as well as a cluster of patients with atrial fibrillation where β blockers did reduce mortality.
Funding
Medical Research Council, UK, and EU/EFPIA Innovative Medicines Initiative BigData@Heart.},
keywords = {artificial intelligence, clustering, crossvalidation, deep learning, EHR, health data science, phenotypes, validation},
pubstate = {published},
tppubtype = {article}
}
Mortality remains unacceptably high in patients with heart failure and reduced left ventricular ejection fraction (LVEF) despite advances in therapeutics. We hypothesised that a novel artificial intelligence approach could better assess multiple and higher-dimension interactions of comorbidities, and define clusters of β-blocker efficacy in patients with sinus rhythm and atrial fibrillation.
Methods
Neural network-based variational autoencoders and hierarchical clustering were applied to pooled individual patient data from nine double-blind, randomised, placebo-controlled trials of β blockers. All-cause mortality during median 1·3 years of follow-up was assessed by intention to treat, stratified by electrocardiographic heart rhythm. The number of clusters and dimensions was determined objectively, with results validated using a leave-one-trial-out approach. This study was prospectively registered with ClinicalTrials.gov (NCT00832442) and the PROSPERO database of systematic reviews (CRD42014010012).
Findings
15 659 patients with heart failure and LVEF of less than 50% were included, with median age 65 years (IQR 56–72) and LVEF 27% (IQR 21–33). 3708 (24%) patients were women. In sinus rhythm (n=12 822), most clusters demonstrated a consistent overall mortality benefit from β blockers, with odds ratios (ORs) ranging from 0·54 to 0·74. One cluster in sinus rhythm of older patients with less severe symptoms showed no significant efficacy (OR 0·86, 95% CI 0·67–1·10; p=0·22). In atrial fibrillation (n=2837), four of five clusters were consistent with the overall neutral effect of β blockers versus placebo (OR 0·92, 0·77–1·10; p=0·37). One cluster of younger atrial fibrillation patients at lower mortality risk but similar LVEF to average had a statistically significant reduction in mortality with β blockers (OR 0·57, 0·35–0·93; p=0·023). The robustness and consistency of clustering was confirmed for all models (p<0·0001 vs random), and cluster membership was externally validated across the nine independent trials.
Interpretation
An artificial intelligence-based clustering approach was able to distinguish prognostic response from β blockers in patients with heart failure and reduced LVEF. This included patients in sinus rhythm with suboptimal efficacy, as well as a cluster of patients with atrial fibrillation where β blockers did reduce mortality.
Funding
Medical Research Council, UK, and EU/EFPIA Innovative Medicines Initiative BigData@Heart.
Pendleton, Samantha C.; Slater, Luke T.; Karwath, Andreas; Gilbert, Rose M.; Davis, Nicola; Pesudovs, Konrad; Liu, Xiaoxuan; Denniston, Alastair K.; Gkoutos, Georgios V.; Braithwaite, Tasanee
In: Computers in Biology and Medicine, vol. 135, pp. 104542, 2021.
Links | BibTeX | Tags: health data science, NLP, semantic similarity
@article{Pendleton_2021,
title = {Development and application of the ocular immune-mediated inflammatory diseases ontology enhanced with synonyms from online patient support forum conversation},
author = {Samantha C. Pendleton and Luke T. Slater and Andreas Karwath and Rose M. Gilbert and Nicola Davis and Konrad Pesudovs and Xiaoxuan Liu and Alastair K. Denniston and Georgios V. Gkoutos and Tasanee Braithwaite},
url = {https://doi.org/10.1016%2Fj.compbiomed.2021.104542},
doi = {10.1016/j.compbiomed.2021.104542},
year = {2021},
date = {2021-08-01},
urldate = {2021-08-01},
journal = {Computers in Biology and Medicine},
volume = {135},
pages = {104542},
publisher = {Elsevier BV},
keywords = {health data science, NLP, semantic similarity},
pubstate = {published},
tppubtype = {article}
}
Slater, Luke T; Karwath, Andreas; Williams, John A; Russell, Sophie; Makepeace, Silver; Carberry, Alexander; Hoehndorf, Robert; Gkoutos, Georgios V
Towards similarity-based differential diagnostics for common diseases Journal Article
In: Computers in Biology and Medicine, vol. 133, pp. 104360, 2021.
Links | BibTeX | Tags: artificial intelligence, health data science, NLP, semantic similarity
@article{Slater_2021,
title = {Towards similarity-based differential diagnostics for common diseases},
author = {Luke T Slater and Andreas Karwath and John A Williams and Sophie Russell and Silver Makepeace and Alexander Carberry and Robert Hoehndorf and Georgios V Gkoutos},
url = {https://doi.org/10.1016%2Fj.compbiomed.2021.104360},
doi = {10.1016/j.compbiomed.2021.104360},
year = {2021},
date = {2021-06-01},
urldate = {2021-06-01},
journal = {Computers in Biology and Medicine},
volume = {133},
pages = {104360},
publisher = {Elsevier BV},
keywords = {artificial intelligence, health data science, NLP, semantic similarity},
pubstate = {published},
tppubtype = {article}
}
Carr, E; Bendayan, R; Bean, D; Stammers, M; Wang, W; Zhang, H; Searle, T; Kraljevic, Z; Shek, A; Phan, H T T; Muruet, W; Gupta, R K; Shinton, A J; Wyatt, M; Shi, T; Zhang, X; Pickles, A; Stahl, D; Zakeri, R; Noursadeghi, M; O'Gallagher, K; Rogers, M; Folarin, A; Karwath, Andreas; Wickstrøm, K E; Köhn-Luque, A; Slater, L; Cardoso, V R; Bourdeaux, C; Holten, A R; Ball, S; McWilliams, C; Roguski, L; Borca, F; Batchelor, J; Amundsen, E K; Wu, X; Gkoutos, G V; Sun, J; Pinto, A; Guthrie, B; Breen, C; Douiri, A; Wu, H; Curcin, V; Teo, J T; Shah, A M; Dobson, R J B
Evaluation and improvement of the National Early Warning Score (NEWS2) for COVID-19: a multi-hospital study Journal Article
In: BMC Med, vol. 19, no. 1, pp. 23, 2021, ISSN: 1741-7015.
Links | BibTeX | Tags: artificial intelligence, COVID-19, early warning score, health data science, machine learning
@article{RN19,
title = {Evaluation and improvement of the National Early Warning Score (NEWS2) for COVID-19: a multi-hospital study},
author = {E Carr and R Bendayan and D Bean and M Stammers and W Wang and H Zhang and T Searle and Z Kraljevic and A Shek and H T T Phan and W Muruet and R K Gupta and A J Shinton and M Wyatt and T Shi and X Zhang and A Pickles and D Stahl and R Zakeri and M Noursadeghi and K O'Gallagher and M Rogers and A Folarin and Andreas Karwath and K E Wickstrøm and A Köhn-Luque and L Slater and V R Cardoso and C Bourdeaux and A R Holten and S Ball and C McWilliams and L Roguski and F Borca and J Batchelor and E K Amundsen and X Wu and G V Gkoutos and J Sun and A Pinto and B Guthrie and C Breen and A Douiri and H Wu and V Curcin and J T Teo and A M Shah and R J B Dobson},
doi = {10.1186/s12916-020-01893-3},
issn = {1741-7015},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {BMC Med},
volume = {19},
number = {1},
pages = {23},
keywords = {artificial intelligence, COVID-19, early warning score, health data science, machine learning},
pubstate = {published},
tppubtype = {article}
}
KV, Bunting; SK, Gill; A, Sitch; S, Mehta; K, O'Connor; GY, Lip; P, Kirchhof; VY, Strauss; K, Rahimi; AJ, Camm; M, Stanbury; M, Griffith; JN, Townend; GV, Gkoutos; control in permanent trial group, RAte Therapy Evaluation Atrial Fibrillation (RATE-AF)
Improving the diagnosis of heart failure in patients with atrial fibrillation. Journal Article
In: Heart (British Cardiac Society), 2021.
Links | BibTeX | Tags: artificial intelligence, cardiology, health data science
@article{PMID:33692093,
title = {Improving the diagnosis of heart failure in patients with atrial fibrillation.},
author = {Bunting KV and Gill SK and Sitch A and Mehta S and O'Connor K and Lip GY and Kirchhof P and Strauss VY and Rahimi K and Camm AJ and Stanbury M and Griffith M and Townend JN and Gkoutos GV and RAte Therapy Evaluation Atrial Fibrillation (RATE-AF) control in permanent trial group},
doi = {10.1136/heartjnl-2020-318557},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Heart (British Cardiac Society)},
keywords = {artificial intelligence, cardiology, health data science},
pubstate = {published},
tppubtype = {article}
}
2020
Escher, S E; Mangelsdorf, I; Hoffmann-Doerr, S; Partosch, F; Karwath, Andreas; Schroeder, K; Zapf, A; Batke, M
Time extrapolation in regulatory risk assessment: The impact of study differences on the extrapolation factors Journal Article
In: Regul Toxicol Pharmacol, vol. 112, pp. 104584, 2020, ISSN: 0273-2300.
Links | BibTeX | Tags: cheminformatics, QSAR
@article{RN17,
title = {Time extrapolation in regulatory risk assessment: The impact of study differences on the extrapolation factors},
author = {S E Escher and I Mangelsdorf and S Hoffmann-Doerr and F Partosch and Andreas Karwath and K Schroeder and A Zapf and M Batke},
doi = {10.1016/j.yrtph.2020.104584},
issn = {0273-2300},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Regul Toxicol Pharmacol},
volume = {112},
pages = {104584},
keywords = {cheminformatics, QSAR},
pubstate = {published},
tppubtype = {article}
}
Wu, H; Zhang, H; Karwath, Andreas; Ibrahim, Z; Shi, T; Zhang, X; Wang, K; Sun, J; Dhaliwal, K; Bean, D; Cardoso, V R; Li, K; Teo, J T; Banerjee, A; Gao-Smith, F; Whitehouse, T; Veenith, T; Gkoutos, G V; Wu, X; Dobson, R; Guthrie, B
Ensemble learning for poor prognosis predictions: a case study on SARS-CoV2 Journal Article
In: J Am Med Inform Assoc, 2020, ISSN: 1067-5027 (Print) 1067-5027.
Links | BibTeX | Tags: artificial intelligence, COVID-19, health data science, machine learning
@article{RN18,
title = {Ensemble learning for poor prognosis predictions: a case study on SARS-CoV2},
author = {H Wu and H Zhang and Andreas Karwath and Z Ibrahim and T Shi and X Zhang and K Wang and J Sun and K Dhaliwal and D Bean and V R Cardoso and K Li and J T Teo and A Banerjee and F Gao-Smith and T Whitehouse and T Veenith and G V Gkoutos and X Wu and R Dobson and B Guthrie},
doi = {10.1093/jamia/ocaa295},
issn = {1067-5027 (Print) 1067-5027},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {J Am Med Inform Assoc},
keywords = {artificial intelligence, COVID-19, health data science, machine learning},
pubstate = {published},
tppubtype = {article}
}
2019
Köppel, Marius; Segner, Alexander; Wagener, Martin; Pensel, Lukas; Karwath, Andreas; Kramer, Stefan
Pairwise Learning to Rank by Neural Networks Revisited: Reconstruction, Theoretical Analysis and Practical Performance Journal Article
In: CoRR, 2019.
Links | BibTeX | Tags: artificial intelligence, learning to rank
@article{koeppel2019,
title = {Pairwise Learning to Rank by Neural Networks Revisited: Reconstruction, Theoretical Analysis and Practical Performance},
author = {Marius Köppel and Alexander Segner and Martin Wagener and Lukas Pensel and Andreas Karwath and Stefan Kramer},
url = {http://arxiv.org/abs/1909.02768},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {CoRR},
keywords = {artificial intelligence, learning to rank},
pubstate = {published},
tppubtype = {article}
}
Althubaiti, Sara; Karwath, Andreas; Dallol, Ashraf; Noor, Adeeb; Alkhayyat, Shadi Salem; Alwassia, Rolina; Mineta, Katsuhiko; Gojobori, Takashi; Beggs, Andrew D; Schofield, Paul N; Gkoutos, Georgios V; Hoehndorf, Robert
Ontology-based prediction of cancer driver genes Journal Article
In: Scientific Reports, vol. 9, no. 1, pp. 17405, 2019, ISSN: 2045-2322.
Links | BibTeX | Tags: bioinformatics, cancer, health data science
@article{RN16,
title = {Ontology-based prediction of cancer driver genes},
author = {Sara Althubaiti and Andreas Karwath and Ashraf Dallol and Adeeb Noor and Shadi Salem Alkhayyat and Rolina Alwassia and Katsuhiko Mineta and Takashi Gojobori and Andrew D Beggs and Paul N Schofield and Georgios V Gkoutos and Robert Hoehndorf},
url = {https://doi.org/10.1038/s41598-019-53454-1},
doi = {10.1038/s41598-019-53454-1},
issn = {2045-2322},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {Scientific Reports},
volume = {9},
number = {1},
pages = {17405},
keywords = {bioinformatics, cancer, health data science},
pubstate = {published},
tppubtype = {article}
}
2018
Geilke, Michael; Karwath, Andreas; Frank, Eibe; Kramer, Stefan
Online estimation of discrete, continuous, and conditional joint densities using classifier chains Journal Article
In: Data Mining and Knowledge Discovery, vol. 32, no. 3, pp. 561-603, 2018, ISSN: 1384-5810.
Abstract | Links | BibTeX | Tags: artificial intelligence, data mining, density estimation, machine learning, stream mining
@article{geilke2018a,
title = {Online estimation of discrete, continuous, and conditional joint densities using classifier chains},
author = {Michael Geilke and Andreas Karwath and Eibe Frank and Stefan Kramer},
url = {https://doi.org/10.1007/s10618-017-0546-6},
doi = {10.1007/s10618-017-0546-6},
issn = {1384-5810},
year = {2018},
date = {2018-05-01},
urldate = {2018-05-01},
journal = {Data Mining and Knowledge Discovery},
volume = {32},
number = {3},
pages = {561-603},
publisher = {Springer US},
abstract = {We address the problem of estimating discrete, continuous, and conditional joint densities online, i.e., the algorithm is only provided the current example and its current estimate for its update. The family of proposed online density estimators, estimation of densities online (EDO), uses classifier chains to model dependencies among features, where each classifier in the chain estimates the probability of one particular feature. Because a single chain may not provide a reliable estimate, we also consider ensembles of classifier chains and ensembles of weighted classifier chains. For all density estimators, we provide consistency proofs and propose algorithms to perform certain inference tasks. The empirical evaluation of the estimators is conducted in several experiments and on datasets of up to several millions of instances. In the discrete case, we compare our estimators to density estimates computed by Bayesian structure learners. In the continuous case, we compare them to a state-of-the-art online density estimator. Our experiments demonstrate that, even though designed to work online, EDO delivers estimators of competitive accuracy compared to other density estimators (batch Bayesian structure learners on discrete datasets and the state-of-the-art online density estimator on continuous datasets). Besides achieving similar performance in these cases, EDO is also able to estimate densities with mixed types of variables, i.e., discrete and continuous random variables.},
keywords = {artificial intelligence, data mining, density estimation, machine learning, stream mining},
pubstate = {published},
tppubtype = {article}
}
2017
Karwath, Andreas; Hubrich, Markus; Kramer, Stefan
Artificial Intelligence in Medicine: 16th Conference on Artificial Intelligence in Medicine, AIME 2017, Vienna, Austria, June 21-24, 2017, Proceedings, Springer Springer International Publishing, Cham, 2017, ISBN: 978-3-319-59758-4.
Abstract | Links | BibTeX | Tags: alzheimer, artificial intelligence, deep learning, health data science, machine learning, medicine, visualization
@conference{karwath2017a,
title = {Convolutional Neural Networks for the Identification of Regions of Interests in PET Scans: A Study of Representation Learning for Diagnosing Alzheimer's Disease},
author = {Andreas Karwath and Markus Hubrich and Stefan Kramer},
editor = {en Teije, Annette and Popow, Christian and Holmes, John H. and Sacchi, Lucia},
url = {http://dx.doi.org/10.1007/978-3-319-59758-4_36},
doi = {10.1007/978-3-319-59758-4_36},
isbn = {978-3-319-59758-4},
year = {2017},
date = {2017-06-21},
urldate = {2017-06-21},
booktitle = {Artificial Intelligence in Medicine: 16th Conference on Artificial Intelligence in Medicine, AIME 2017, Vienna, Austria, June 21-24, 2017, Proceedings},
pages = {316-321},
publisher = {Springer International Publishing},
address = {Cham},
organization = {Springer},
abstract = {When diagnosing patients suffering from dementia based on imaging data like PET scans, the identification of suitable predictive regions of interest (ROIs) is of great importance. We present a case study of 3-D Convolutional Neural Networks (CNNs) for the detection of ROIs in this context, just using voxel data, without any knowledge given a priori. Our results on data from the Alzheimer’s Disease Neuroimaging Initiative (ADNI) suggest that the predictive performance of the method is on par with that of state-of-the-art methods, with the additional benefit of potential insights into affected brain regions.},
keywords = {alzheimer, artificial intelligence, deep learning, health data science, machine learning, medicine, visualization},
pubstate = {published},
tppubtype = {conference}
}
2016
Geilke, Michael; Karwath, Andreas; Kramer, Stefan
Online density estimation of heterogeneous data streams in higher dimensions Conference
Machine learning and knowledge discovery in databases : European Conference, ECML PKDD 2016, Riva del Garda, Italy, September 19-23, 2016 : Proceedings Part 1, 2016.
Abstract | Links | BibTeX | Tags: data mining, density estimation, stream mining
@conference{geilke2016,
title = {Online density estimation of heterogeneous data streams in higher dimensions},
author = {Michael Geilke and Andreas Karwath and Stefan Kramer},
url = {http://link.springer.com/chapter/10.1007/978-3-319-46128-1_5},
doi = {doi:10.1007/978-3-319-46128-1_5},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
booktitle = {Machine learning and knowledge discovery in databases : European Conference, ECML PKDD 2016, Riva del Garda, Italy, September 19-23, 2016 : Proceedings Part 1},
pages = {65-80},
abstract = {The joint density of a data stream is suitable for performing data mining tasks without having access to the original data. However, the methods proposed so far only target a small to medium number of variables, since their estimates rely on representing all the interdependencies between the variables of the data. High-dimensional data streams, which are becoming more and more frequent due to increasing numbers of interconnected devices, are, therefore, pushing these methods to their limits. To mitigate these limitations, we present an approach that projects the original data stream into a vector space and uses a set of representatives to provide an estimate. Due to the structure of the estimates, it enables the density estimation of higher-dimensional data and approaches the true density with increasing dimensionality of the vector space. Moreover, it is not only designed to estimate homogeneous data, i.e., where all variables are nominal or all variables are numeric, but it can also estimate heterogeneous data. The evaluation is conducted on synthetic and real-world data. The software related to this paper is available at https://github.com/geilke/mideo.},
howpublished = {urlhttps://publications.UB.Uni-Mainz.DE/opus/frontdoor.php?source_opus=54808},
keywords = {data mining, density estimation, stream mining},
pubstate = {published},
tppubtype = {conference}
}
2015
Geilke, Michael; Karwath, Andreas; Kramer, Stefan
Modeling recurrent distributions in streams using possible worlds Conference
2015 IEEE International Conference on Data Science and Advanced Analytics, DSAA 2015, IEEE, 2015, ISBN: 978-1-4673-8272-4.
Abstract | Links | BibTeX | Tags: density estimation, machine learning, possible worlds, stream mining
@conference{geilke2015,
title = {Modeling recurrent distributions in streams using possible worlds},
author = {Michael Geilke and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1109/DSAA.2015.7344814},
doi = {10.1109/DSAA.2015.7344814},
isbn = {978-1-4673-8272-4},
year = {2015},
date = {2015-10-19},
booktitle = {2015 IEEE International Conference on Data Science and Advanced Analytics, DSAA 2015},
pages = {1-9},
publisher = {IEEE},
crossref = {DBLP:conf/dsaa/2015},
abstract = {Discovering changes in the data distribution of streams and discovering recurrent data distributions are challenging problems in data mining and machine learning. Both have received a lot of attention in the context of classification. With the ever increasing growth of data, however, there is a high demand of compact and universal representations of data streams that enable the user to analyze current as well as historic data without having access to the raw data. To make a first step towards this direction, we propose a condensed representation that captures the various - possibly recurrent - data distributions of the stream by extending the notion of possible worlds. The representation enables queries concerning the whole stream and can, hence, serve as a tool for supporting decision-making processes or serve as a basis for implementing data mining and machine learning algorithms on top of it. We evaluate this condensed representation on synthetic and real-world data.
},
keywords = {density estimation, machine learning, possible worlds, stream mining},
pubstate = {published},
tppubtype = {conference}
}
2014
Geilke, Michael; Karwath, Andreas; Kramer, Stefan
A probabilistic condensed representation of data for stream mining Conference
International Conference on Data Science and Advanced Analytics, DSAA 2014, IEEE, 2014.
Abstract | Links | BibTeX | Tags: density estimation, machine learning, stream mining
@conference{geilke2014,
title = {A probabilistic condensed representation of data for stream mining},
author = {Michael Geilke and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1109/DSAA.2014.7058088},
doi = {10.1109/DSAA.2014.7058088},
year = {2014},
date = {2014-10-30},
booktitle = {International Conference on Data Science and Advanced Analytics, DSAA 2014},
pages = {297-303},
publisher = {IEEE},
crossref = {DBLP:conf/dsaa/2014},
abstract = {Data mining and machine learning algorithms usually operate directly on the data. However, if the data is not available at once or consists of billions of instances, these algorithms easily become infeasible with respect to memory and run-time concerns. As a solution to this problem, we propose a framework, called MiDEO (Mining Density Estimates inferred Online), in which algorithms are designed to operate on a condensed representation of the data. In particular, we propose to use density estimates, which are able to represent billions of instances in a compact form and can be updated when new instances arrive. As an example for an algorithm that operates on density estimates, we consider the task of mining association rules, which we consider as a form of simple statements about the data. The algorithm, called POEt (Pattern mining on Online density esTimates), is evaluated on synthetic and real-world data and is compared to state-of-the-art algorithms.},
keywords = {density estimation, machine learning, stream mining},
pubstate = {published},
tppubtype = {conference}
}
Gütlein, Martin; Karwath, Andreas; Kramer, Stefan
CheS-Mapper 2.0 for visual validation of (Q)SAR models Journal Article
In: J. Cheminformatics, vol. 6, no. 1, pp. 41, 2014.
Abstract | Links | BibTeX | Tags: cheminformatics, data mining, graph mining, validation, visualization
@article{gutlein2014,
title = {CheS-Mapper 2.0 for visual validation of (Q)SAR models},
author = {Martin Gütlein and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1186/s13321-014-0041-7},
doi = {10.1186/s13321-014-0041-7},
year = {2014},
date = {2014-09-23},
journal = {J. Cheminformatics},
volume = {6},
number = {1},
pages = {41},
abstract = {Background
Sound statistical validation is important to evaluate and compare the overall performance of (Q)SAR models. However, classical validation does not support the user in better understanding the properties of the model or the underlying data. Even though, a number of visualization tools for analyzing (Q)SAR information in small molecule datasets exist, integrated visualization methods that allow the investigation of model validation results are still lacking.
Results
We propose visual validation, as an approach for the graphical inspection of (Q)SAR model validation results. The approach applies the 3D viewer CheS-Mapper, an open-source application for the exploration of small molecules in virtual 3D space. The present work describes the new functionalities in CheS-Mapper 2.0, that facilitate the analysis of (Q)SAR information and allows the visual validation of (Q)SAR models. The tool enables the comparison of model predictions to the actual activity in feature space. The approach is generic: It is model-independent and can handle physico-chemical and structural input features as well as quantitative and qualitative endpoints.
Conclusions
Visual validation with CheS-Mapper enables analyzing (Q)SAR information in the data and indicates how this information is employed by the (Q)SAR model. It reveals, if the endpoint is modeled too specific or too generic and highlights common properties of misclassified compounds. Moreover, the researcher can use CheS-Mapper to inspect how the (Q)SAR model predicts activity cliffs. The CheS-Mapper software is freely available at http://ches-mapper.org.
Graphical abstract
Comparing actual and predicted activity values with CheS-Mapper.},
keywords = {cheminformatics, data mining, graph mining, validation, visualization},
pubstate = {published},
tppubtype = {article}
}
Sound statistical validation is important to evaluate and compare the overall performance of (Q)SAR models. However, classical validation does not support the user in better understanding the properties of the model or the underlying data. Even though, a number of visualization tools for analyzing (Q)SAR information in small molecule datasets exist, integrated visualization methods that allow the investigation of model validation results are still lacking.
Results
We propose visual validation, as an approach for the graphical inspection of (Q)SAR model validation results. The approach applies the 3D viewer CheS-Mapper, an open-source application for the exploration of small molecules in virtual 3D space. The present work describes the new functionalities in CheS-Mapper 2.0, that facilitate the analysis of (Q)SAR information and allows the visual validation of (Q)SAR models. The tool enables the comparison of model predictions to the actual activity in feature space. The approach is generic: It is model-independent and can handle physico-chemical and structural input features as well as quantitative and qualitative endpoints.
Conclusions
Visual validation with CheS-Mapper enables analyzing (Q)SAR information in the data and indicates how this information is employed by the (Q)SAR model. It reveals, if the endpoint is modeled too specific or too generic and highlights common properties of misclassified compounds. Moreover, the researcher can use CheS-Mapper to inspect how the (Q)SAR model predicts activity cliffs. The CheS-Mapper software is freely available at http://ches-mapper.org.
Graphical abstract
Comparing actual and predicted activity values with CheS-Mapper.
Seeland, Madeleine; Karwath, Andreas; Kramer, Stefan
Structural clustering of millions of molecular graphs Conference
Symposium on Applied Computing, SAC 2014, ACM ACM, New York, NY, USA, 2014.
Abstract | Links | BibTeX | Tags: cluster explanation, clustering, graph mining
@conference{seeland2014a,
title = {Structural clustering of millions of molecular graphs},
author = {Madeleine Seeland and Andreas Karwath and Stefan Kramer},
url = {http://doi.acm.org/10.1145/2554850.2555063},
doi = {10.1145/2554850.2555063},
year = {2014},
date = {2014-03-24},
urldate = {2014-03-24},
booktitle = {Symposium on Applied Computing, SAC 2014},
pages = {121-128},
publisher = {ACM},
address = {New York, NY, USA},
organization = {ACM},
abstract = {Statistical machine learning algorithms building on patterns found by pattern mining algorithms have to cope with large solution sets and thus the high dimensionality of the feature space. Vice versa, pattern mining algorithms are frequently applied to irrelevant instances, thus causing noise in the output. Solution sets of pattern mining algorithms also typically grow with increasing input datasets. The paper proposes an approach to overcome these limitations. The approach extracts information from trained support vector machines, in particular their support vectors and their relevance according to their coefficients. It uses the support vectors along with their coefficients as input to pattern mining algorithms able to handle weighted instances. Our experiments in the domain of graph mining and molecular graphs show that the resulting models are not significantly less accurate than models trained on the full datasets, yet require only a fraction of the time using much smaller sets of patterns.},
keywords = {cluster explanation, clustering, graph mining},
pubstate = {published},
tppubtype = {conference}
}
Seeland, Madeleine; Maunz, Andreas; Karwath, Andreas; Kramer, Stefan
Extracting information from support vector machines for pattern-based classification Conference
Symposium on Applied Computing, SAC 2014, ACM ACM, New York, NY, USA, 2014.
Abstract | Links | BibTeX | Tags: machine learning, suport vector machines
@conference{seeland2014b,
title = {Extracting information from support vector machines for pattern-based classification},
author = {Madeleine Seeland and Andreas Maunz and Andreas Karwath and Stefan Kramer},
url = {http://dl.acm.org/citation.cfm?doid=2554850.2555065},
doi = {10.1145/2554850.2555065},
year = {2014},
date = {2014-03-24},
urldate = {2014-03-24},
booktitle = {Symposium on Applied Computing, SAC 2014},
pages = {129-136},
publisher = {ACM},
address = {New York, NY, USA},
organization = {ACM},
abstract = {Statistical machine learning algorithms building on patterns found by pattern mining algorithms have to cope with large solution sets and thus the high dimensionality of the feature space. Vice versa, pattern mining algorithms are frequently applied to irrelevant instances, thus causing noise in the output. Solution sets of pattern mining algorithms also typically grow with increasing input datasets. The paper proposes an approach to overcome these limitations. The approach extracts information from trained support vector machines, in particular their support vectors and their relevance according to their coefficients. It uses the support vectors along with their coefficients as input to pattern mining algorithms able to handle weighted instances. Our experiments in the domain of graph mining and molecular graphs show that the resulting models are not significantly less accurate than models trained on the full datasets, yet require only a fraction of the time using much smaller sets of patterns.},
keywords = {machine learning, suport vector machines},
pubstate = {published},
tppubtype = {conference}
}
2013
Geilke, Michael; Frank, Eibe; Karwath, Andreas; Kramer, Stefan
Online Estimation of Discrete Densities Conference
IEEE 13th International Conference on Data Mining, ICDM 2013, IEEE, 2013, ISSN: 1550-4786.
Abstract | Links | BibTeX | Tags: density estimation, machine learning, stream mining
@conference{geilke2013,
title = {Online Estimation of Discrete Densities},
author = {Michael Geilke and Eibe Frank and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1109/ICDM.2013.91},
doi = {10.1109/ICDM.2013.91},
issn = {1550-4786},
year = {2013},
date = {2013-12-07},
booktitle = {IEEE 13th International Conference on Data Mining, ICDM 2013},
pages = {191-200},
publisher = {IEEE},
crossref = {DBLP:conf/icdm/2013},
abstract = {We address the problem of estimating a discrete joint density online, that is, the algorithm is only provided the current example and its current estimate. The proposed online estimator of discrete densities, EDDO (Estimation of Discrete Densities Online), uses classifier chains to model dependencies among features. Each classifier in the chain estimates the probability of one particular feature. Because a single chain may not provide a reliable estimate, we also consider ensembles of classifier chains and ensembles of weighted classifier chains. For all density estimators, we provide consistency proofs and propose algorithms to perform certain inference tasks. The empirical evaluation of the estimators is conducted in several experiments and on data sets of up to several million instances: We compare them to density estimates computed from Bayesian structure learners, evaluate them under the influence of noise, measure their ability to deal with concept drift, and measure the run-time performance. Our experiments demonstrate that, even though designed to work online, EDDO delivers estimators of competitive accuracy compared to batch Bayesian structure learners and batch variants of EDDO.},
keywords = {density estimation, machine learning, stream mining},
pubstate = {published},
tppubtype = {conference}
}
Gütlein, Martin; Helma, Christoph; Karwath, Andreas; Kramer, Stefan
A Large-Scale Empirical Evaluation of Cross-Validation and External Test Set Validation in (Q)SAR Journal Article
In: Molecular Informatics, vol. 32, no. 5-6, pp. 516-528, 2013.
Abstract | Links | BibTeX | Tags: cheminformatics, crossvalidation, external validation, QSAR, validation
@article{guetlein2013,
title = {A Large-Scale Empirical Evaluation of Cross-Validation and External Test Set Validation in (Q)SAR},
author = {Martin Gütlein and Christoph Helma and Andreas Karwath and Stefan Kramer},
url = {http://onlinelibrary.wiley.com/doi/10.1002/minf.201200134/abstract},
doi = {10.1002/minf.201200134},
year = {2013},
date = {2013-10-14},
urldate = {2013-10-14},
journal = {Molecular Informatics},
volume = {32},
number = {5-6},
pages = {516-528},
abstract = {(Q)SAR model validation is essential to ensure the quality of inferred models and to indicate future model predictivity on unseen compounds. Proper validation is also one of the requirements of regulatory authorities in order to accept the (Q)SAR model, and to approve its use in real world scenarios as alternative testing method. However, at the same time, the question of how to validate a (Q)SAR model, in particular whether to employ variants of cross-validation or external test set validation, is still under discussion. In this paper, we empirically compare a k-fold cross-validation with external test set validation. To this end we introduce a workflow allowing to realistically simulate the common problem setting of building predictive models for relatively small datasets. The workflow allows to apply the built and validated models on large amounts of unseen data, and to compare the performance of the different validation approaches. The experimental results indicate that cross-validation produces higher performant (Q)SAR models than external test set validation, reduces the variance of the results, while at the same time underestimates the performance on unseen compounds. The experimental results reported in this paper suggest that, contrary to current conception in the community, cross-validation may play a significant role in evaluating the predictivity of (Q)SAR models.},
keywords = {cheminformatics, crossvalidation, external validation, QSAR, validation},
pubstate = {published},
tppubtype = {article}
}
2012
Seeland, Madeleine; Karwath, Andreas; Kramer, Stefan
A structural cluster kernel for learning on graphs Conference
The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2012, ACM ACM, New York, NY, USA, 2012, ISBN: 978-1-4503-1462-6.
Abstract | Links | BibTeX | Tags: cheminformatics, clustering, data mining, kernels, QSAR, suport vector machines
@conference{seeland2012,
title = {A structural cluster kernel for learning on graphs},
author = {Madeleine Seeland and Andreas Karwath and Stefan Kramer},
url = {http://doi.acm.org/10.1145/2339530.2339614},
doi = {10.1145/2339530.2339614},
isbn = {978-1-4503-1462-6},
year = {2012},
date = {2012-08-12},
booktitle = {The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2012},
pages = {516-524},
publisher = {ACM},
address = {New York, NY, USA},
organization = {ACM},
crossref = {DBLP:conf/kdd/2012},
abstract = {In recent years, graph kernels have received considerable interest within the machine learning and data mining community. Here, we introduce a novel approach enabling kernel methods to utilize additional information hidden in the structural neighborhood of the graphs under consideration. Our novel structural cluster kernel (SCK) incorporates similarities induced by a structural clustering algorithm to improve state-of-the-art graph kernels. The approach taken is based on the idea that graph similarity can not only be described by the similarity between the graphs themselves, but also by the similarity they possess with respect to their structural neighborhood. We applied our novel kernel in a supervised and a semi-supervised setting to regression and classification problems on a number of real-world datasets of molecular graphs.
Our results show that the structural cluster similarity information can indeed leverage the prediction performance of the base kernel, particularly when the dataset is structurally sparse and consequently structurally diverse. By additionally taking into account a large number of unlabeled instances the performance of the structural cluster kernel can further be improved.},
keywords = {cheminformatics, clustering, data mining, kernels, QSAR, suport vector machines},
pubstate = {published},
tppubtype = {conference}
}
Our results show that the structural cluster similarity information can indeed leverage the prediction performance of the base kernel, particularly when the dataset is structurally sparse and consequently structurally diverse. By additionally taking into account a large number of unlabeled instances the performance of the structural cluster kernel can further be improved.
Gütlein, Martin; Karwath, Andreas; Kramer, Stefan
CheS-Mapper - Chemical Space Mapping and Visualization in 3D Journal Article
In: J. Cheminformatics, vol. 4, pp. 7, 2012.
Abstract | Links | BibTeX | Tags: cheminformatics, clustering, dimensionality reduction, QSAR, visualization
@article{gutlein2012,
title = {CheS-Mapper - Chemical Space Mapping and Visualization in 3D},
author = {Martin Gütlein and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1186/1758-2946-4-7},
doi = {10.1186/1758-2946-4-7},
year = {2012},
date = {2012-03-17},
journal = {J. Cheminformatics},
volume = {4},
pages = {7},
abstract = {Analyzing chemical datasets is a challenging task for scientific researchers in the field of chemoinformatics. It is important, yet difficult to understand the relationship between the structure of chemical compounds, their physico-chemical properties, and biological or toxic effects. To that respect, visualization tools can help to better comprehend the underlying correlations. Our recently developed 3D molecular viewer CheS-Mapper (Chemical Space Mapper) divides large datasets into clusters of similar compounds and consequently arranges them in 3D space, such that their spatial proximity reflects their similarity. The user can indirectly determine similarity, by selecting which features to employ in the process. The tool can use and calculate different kind of features, like structural fragments as well as quantitative chemical descriptors. These features can be highlighted within CheS-Mapper, which aids the chemist to better understand patterns and regularities and relate the observations to established scientific knowledge. As a final function, the tool can also be used to select and export specific subsets of a given dataset for further analysis.
},
keywords = {cheminformatics, clustering, dimensionality reduction, QSAR, visualization},
pubstate = {published},
tppubtype = {article}
}
Grzonka, Slawomir; Karwath, Andreas; Dijoux, Frederic; Burgard, Wolfram
Activity-Based Estimation of Human Trajectories Journal Article
In: IEEE Transactions on Robotics, vol. 28, no. 1, pp. 234-245, 2012.
Abstract | Links | BibTeX | Tags: activity recognition, artificial intelligence, machine learning, simultaneous localization and mapping
@article{grzonka2012,
title = {Activity-Based Estimation of Human Trajectories},
author = {Slawomir Grzonka and Andreas Karwath and Frederic Dijoux and Wolfram Burgard},
url = {http://dx.doi.org/10.1109/TRO.2011.2165372},
doi = {10.1109/TRO.2011.2165372},
year = {2012},
date = {2012-02-02},
urldate = {2012-02-02},
journal = {IEEE Transactions on Robotics},
volume = {28},
number = {1},
pages = {234-245},
abstract = {We present a novel approach to incrementally determine the trajectory of a person in 3-D based on its motions and activities in real time. In our algorithm, we estimate the motions and activities of the user given the data that are obtained from a motion capture suit equipped with several inertial measurement units. These activities include walking up and down staircases, as well as opening and closing doors. We interpret the first two types of activities as motion constraints and door-handling events as landmark detections in a graph-based simultaneous localization and mapping (SLAM) framework. Since we cannot distinguish between individual doors, we employ a multihypothesis tracking approach on top of the SLAM procedure to deal with the high data-association uncertainty. As a result, we are able to accurately and robustly recover the trajectory of the person. Additionally, we present an algorithm to build approximate geometrical and topological maps based on the estimated trajectory and detected activities. We evaluate our approach in practical experiments that are carried out with different subjects and in various environments.},
keywords = {activity recognition, artificial intelligence, machine learning, simultaneous localization and mapping},
pubstate = {published},
tppubtype = {article}
}
2010
Hardy, Barry J.; Douglas, Nicki; Helma, Christoph; Rautenberg, Micha; Jeliazkova, Nina; Jeliazkov, Vedrin; Nikolova, Ivelina; Benigni, Romualdo; Tcheremenskaia, Olga; Kramer, Stefan; Girschick, Tobias; Buchwald, Fabian; Wicker, Jörg; Karwath, Andreas; Gütlein, Martin; Maunz, Andreas; Sarimveis, Haralambos; Melagraki, Georgia; Afantitis, Antreas; Sopasakis, Pantelis; Gallagher, David; Poroikov, Vladimir; Filimonov, Dmitry; Zakharov, Alexey V.; Lagunin, Alexey; Gloriozova, Tatyana; Novikov, Sergey; Skvortsova, Natalia; Druzhilovsky, Dmitry; Chawla, Sunil; Ghosh, Indira; Ray, Surajit; Patel, Hitesh; Escher, Sylvia
Collaborative development of predictive toxicology applications Journal Article
In: J. Cheminformatics, vol. 2, pp. 7, 2010.
Abstract | Links | BibTeX | Tags: crossvalidation, data mining, QSAR, scientific knowledge, validation
@article{hardy2010,
title = {Collaborative development of predictive toxicology applications},
author = {Barry J. Hardy and Nicki Douglas and Christoph Helma and Micha Rautenberg and Nina Jeliazkova and Vedrin Jeliazkov and Ivelina Nikolova and Romualdo Benigni and Olga Tcheremenskaia and Stefan Kramer and Tobias Girschick and Fabian Buchwald and Jörg Wicker and Andreas Karwath and Martin Gütlein and Andreas Maunz and Haralambos Sarimveis and Georgia Melagraki and Antreas Afantitis and Pantelis Sopasakis and David Gallagher and Vladimir Poroikov and Dmitry Filimonov and Alexey V. Zakharov and Alexey Lagunin and Tatyana Gloriozova and Sergey Novikov and Natalia Skvortsova and Dmitry Druzhilovsky and Sunil Chawla and Indira Ghosh and Surajit Ray and Hitesh Patel and Sylvia Escher},
url = {http://dx.doi.org/10.1186/1758-2946-2-7},
doi = {10.1186/1758-2946-2-7},
year = {2010},
date = {2010-08-31},
urldate = {2010-08-31},
journal = {J. Cheminformatics},
volume = {2},
pages = {7},
abstract = {OpenTox provides an interoperable, standards-based Framework for the support of predictive toxicology data management, algorithms, modelling, validation and reporting. It is relevant to satisfying the chemical safety assessment requirements of the REACH legislation as it supports access to experimental data, (Quantitative) Structure-Activity Relationship models, and toxicological information through an integrating platform that adheres to regulatory requirements and OECD validation principles. Initial research defined the essential components of the Framework including the approach to data access, schema and management, use of controlled vocabularies and ontologies, architecture, web service and communications protocols, and selection and integration of algorithms for predictive modelling. OpenTox provides end-user oriented tools to non-computational specialists, risk assessors, and toxicological experts in addition to Application Programming Interfaces (APIs) for developers of new applications. OpenTox actively supports public standards for data representation, interfaces, vocabularies and ontologies, Open Source approaches to core platform components, and community-based collaboration approaches, so as to progress system interoperability goals.
The OpenTox Framework includes APIs and services for compounds, datasets, features, algorithms, models, ontologies, tasks, validation, and reporting which may be combined into multiple applications satisfying a variety of different user needs. OpenTox applications are based on a set of distributed, interoperable OpenTox API-compliant REST web services. The OpenTox approach to ontology allows for efficient mapping of complementary data coming from different datasets into a unifying structure having a shared terminology and representation.
Two initial OpenTox applications are presented as an illustration of the potential impact of OpenTox for high-quality and consistent structure-activity relationship modelling of REACH-relevant endpoints: ToxPredict which predicts and reports on toxicities for endpoints for an input chemical structure, and ToxCreate which builds and validates a predictive toxicity model based on an input toxicology dataset. Because of the extensible nature of the standardised Framework design, barriers of interoperability between applications and content are removed, as the user may combine data, models and validation from multiple sources in a dependable and time-effective way.},
keywords = {crossvalidation, data mining, QSAR, scientific knowledge, validation},
pubstate = {published},
tppubtype = {article}
}
The OpenTox Framework includes APIs and services for compounds, datasets, features, algorithms, models, ontologies, tasks, validation, and reporting which may be combined into multiple applications satisfying a variety of different user needs. OpenTox applications are based on a set of distributed, interoperable OpenTox API-compliant REST web services. The OpenTox approach to ontology allows for efficient mapping of complementary data coming from different datasets into a unifying structure having a shared terminology and representation.
Two initial OpenTox applications are presented as an illustration of the potential impact of OpenTox for high-quality and consistent structure-activity relationship modelling of REACH-relevant endpoints: ToxPredict which predicts and reports on toxicities for endpoints for an input chemical structure, and ToxCreate which builds and validates a predictive toxicity model based on an input toxicology dataset. Because of the extensible nature of the standardised Framework design, barriers of interoperability between applications and content are removed, as the user may combine data, models and validation from multiple sources in a dependable and time-effective way.
Grzonka, Slawomir; Dijoux, Frederic; Karwath, Andreas; Burgard, Wolfram
Mapping indoor environments based on human activity Conference
IEEE International Conference on Robotics and Automation, ICRA 2010, IEEE, 2010, ISBN: 978-1-4244-5038-1.
Abstract | Links | BibTeX | Tags: activity recognition, machine learning, simultaneous localization and mapping
@conference{grzonka2010b,
title = {Mapping indoor environments based on human activity},
author = {Slawomir Grzonka and Frederic Dijoux and Andreas Karwath and Wolfram Burgard},
url = {http://dx.doi.org/10.1109/ROBOT.2010.5509976},
doi = {10.1109/ROBOT.2010.5509976},
isbn = {978-1-4244-5038-1},
year = {2010},
date = {2010-05-03},
booktitle = {IEEE International Conference on Robotics and Automation, ICRA 2010},
pages = {476-481},
publisher = {IEEE},
crossref = {DBLP:conf/icra/2010},
abstract = {We present a novel approach to build approximate maps of structured environments utilizing human motion and activity. Our approach uses data recorded with a data suit which is equipped with several IMUs to detect movements of a person and door opening and closing events. In our approach we interpret the movements as motion constraints and door handling events as landmark detections in a graph-based SLAM framework. As we cannot distinguish between individual doors, we employ a multi-hypothesis approach on top of the SLAM system to deal with the high data-association uncertainty. As a result, our approach is able to accurately and robustly recover the trajectory of the person. We additionally take advantage of the fact that people traverse free space and that doors separate rooms to recover the geometric structure of the environment after the graph optimization. We evaluate our approach in several experiments carried out with different users and in environments of different types.
},
keywords = {activity recognition, machine learning, simultaneous localization and mapping},
pubstate = {published},
tppubtype = {conference}
}
Grzonka, Slawomir; Dijoux, Frederic; Karwath, Andreas; Burgard, Wolfram
Learning Maps of Indoor Environments Based on Human Activity Conference
Embedded Reasoning, Papers from the 2010 AAAI Spring Symposium, 2010.
Abstract | Links | BibTeX | Tags: activity recognition, localization, machine learning, mobile systems and mobility, simultaneous localization and mapping, social robotics
@conference{grzonka2010a,
title = {Learning Maps of Indoor Environments Based on Human Activity},
author = {Slawomir Grzonka and Frederic Dijoux and Andreas Karwath and Wolfram Burgard},
url = {http://www.aaai.org/ocs/index.php/SSS/SSS10/paper/view/1172},
year = {2010},
date = {2010-03-23},
booktitle = {Embedded Reasoning, Papers from the 2010 AAAI Spring Symposium},
crossref = {DBLP:conf/aaaiss/2010-4},
abstract = {We present a novel approach to build approximate maps of structured environments utilizing human motion and activity. Our approach uses data recorded with a data suit which is equipped with several IMUs to detect movements of a person and door opening and closing events. In our approach we interpret the movements as motion constraints and door handling events as landmark detections in a graph-based SLAM framework. As we cannot distinguish between individual doors, we employ a multi-hypothesis approach on top of the SLAM system to deal with the high data-association uncertainty. As a result, our approach is able to accurately and robustly recover the trajectory of the person. We additionally take advantage of the fact that people traverse free space and that doors separate rooms to recover the geometric structure of the environment after the graph optimization. We evaluate our approach in several experiments carried out with different users and in environments of different types.},
keywords = {activity recognition, localization, machine learning, mobile systems and mobility, simultaneous localization and mapping, social robotics},
pubstate = {published},
tppubtype = {conference}
}
2009
Gütlein, Martin; Frank, Eibe; Hall, Mark A.; Karwath, Andreas
Large-scale attribute selection using wrappers Conference
The IEEE Symposium on Computational Intelligence and Data Mining, CIDM 2009, IEEE, 2009, ISBN: 978-1-4244-2765-9.
Abstract | Links | BibTeX | Tags: crossvalidation, machine learning
@conference{gutlein2009,
title = {Large-scale attribute selection using wrappers},
author = {Martin Gütlein and Eibe Frank and Mark A. Hall and Andreas Karwath},
url = {http://dx.doi.org/10.1109/CIDM.2009.4938668},
doi = {10.1109/CIDM.2009.4938668},
isbn = {978-1-4244-2765-9},
year = {2009},
date = {2009-01-01},
booktitle = {The IEEE Symposium on Computational Intelligence and Data Mining, CIDM 2009},
pages = {332-339},
publisher = {IEEE},
crossref = {DBLP:conf/cidm/2009},
abstract = {Scheme-specific attribute selection with the wrapper and variants of forward selection is a popular attribute selection technique for classification that yields good results. However, it can run the risk of overfitting because of the extent of the search and the extensive use of internal cross-validation. Moreover, although wrapper evaluators tend to achieve superior accuracy compared to filters, they face a high computational cost. The problems of overfitting and high runtime occur in particular on high-dimensional datasets, like microarray data. We investigate Linear Forward Selection, a technique to reduce the number of attributes expansions in each forward selection step. Our experiments demonstrate that this approach is faster, finds smaller subsets and can even increase the accuracy compared to standard forward selection. We also investigate a variant that applies explicit subset size determination in forward selection to combat overfitting, where the search is forced to stop at a precomputed ldquooptimalrdquo subset size. We show that this technique reduces subset size while maintaining comparable accuracy.},
keywords = {crossvalidation, machine learning},
pubstate = {published},
tppubtype = {conference}
}
Schulz, Hannes; Kersting, Kristian; Karwath, Andreas
ILP, the Blind, and the Elephant: Euclidean Embedding of Co-proven Queries Conference
Inductive Logic Programming, 19th International Conference, ILP 2009, Springer-Verlag Berlin Heidelberg Springer Verlag, Berlin Heidelberg, Germany, 2009, ISBN: 978-3-642-13839-3.
Abstract | Links | BibTeX | Tags: cheminformatics, dimensionality reduction, inductive logic programming, relational learning, scientific knowledge, visualization
@conference{schulz2009,
title = {ILP, the Blind, and the Elephant: Euclidean Embedding of Co-proven Queries},
author = {Hannes Schulz and Kristian Kersting and Andreas Karwath},
url = {http://dx.doi.org/10.1007/978-3-642-13840-9_20},
doi = {10.1007/978-3-642-13840-9_20},
isbn = {978-3-642-13839-3},
year = {2009},
date = {2009-01-01},
booktitle = {Inductive Logic Programming, 19th International Conference, ILP 2009},
pages = {209-216},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
crossref = {DBLP:conf/ilp/2009},
abstract = {Relational data is complex. This complexity makes one of the basic steps of ILP difficult: understanding the data and results. If the user cannot easily understand it, he draws incomplete conclusions. The situation is very much as in the parable of the blind men and the elephant that appears in many cultures. In this tale the blind work independently and with quite different pieces of information, thereby drawing very different conclusions about the nature of the beast. In contrast, visual representations make it easy to shift from one perspective to another while exploring and analyzing data. This paper describes a method for embedding interpretations and queries into a single, common Euclidean space based on their co-proven statistics. We demonstrate our method on real-world datasets showing that ILP results can indeed be captured at a glance.},
keywords = {cheminformatics, dimensionality reduction, inductive logic programming, relational learning, scientific knowledge, visualization},
pubstate = {published},
tppubtype = {conference}
}
2008
Karwath, Andreas; Kersting, Kristian; Landwehr, Niels
Boosting Relational Sequence Alignments Conference
The 8th IEEE International Conference on Data Mining, ICDM 2008, IEEE, 2008, ISBN: 978-0-7695-3502-9.
Abstract | Links | BibTeX | Tags: inductive logic programming, machine learning, relational learning, scientific knowledge
@conference{karwath2008,
title = {Boosting Relational Sequence Alignments},
author = {Andreas Karwath and Kristian Kersting and Niels Landwehr},
url = {http://dx.doi.org/10.1109/ICDM.2008.127},
doi = {10.1109/ICDM.2008.127},
isbn = {978-0-7695-3502-9},
year = {2008},
date = {2008-12-15},
booktitle = {The 8th IEEE International Conference on Data Mining, ICDM 2008},
pages = {857-862},
publisher = {IEEE},
crossref = {DBLP:conf/icdm/2008},
abstract = {The task of aligning sequences arises in many applications. Classical dynamic programming approaches require the explicit state enumeration in the reward model. This is often impractical: the number of states grows very quickly with the number of domain objects and relations among these objects. Relational sequence alignment aims at exploiting symbolic structure to avoid the full enumeration. This comes at the expense of a more complex reward model selection problem: virtually infinitely many abstraction levels have to be explored. In this paper, we apply gradient-based boosting to leverage this problem. Specifically, we show how to reduce the learning problem to a series of relational regressions problems. The main benefit of this is that interactions between states variables are introduced only as needed, so that the potentially infinite search space is not explicitly considered. As our experimental results show, this boosting approach can significantly improve upon established results in challenging applications.},
keywords = {inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {conference}
}
Kersting, Kristian; De Raedt, Luc; Gutmann, Bernd; Karwath, Andreas; Landwehr, Niels
Relational Sequence Learning Book Chapter
In: Probabilistic Inductive Logic Programming - Theory and Applications, vol. 4911, pp. 28-55, Springer Verlag, Berlin Heidelberg, Germany, 2008, ISBN: 978-3-540-78651-1.
Abstract | Links | BibTeX | Tags: inductive logic programming, machine learning, relational learning, scientific knowledge
@inbook{kersting2008,
title = {Relational Sequence Learning},
author = {Kristian Kersting and De Raedt, Luc and Bernd Gutmann and Andreas Karwath and Niels Landwehr},
url = {http://dx.doi.org/10.1007/978-3-540-78652-8_2},
doi = {10.1007/978-3-540-78652-8_2},
isbn = {978-3-540-78651-1},
year = {2008},
date = {2008-01-01},
booktitle = {Probabilistic Inductive Logic Programming - Theory and Applications},
volume = {4911},
pages = {28-55},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
crossref = {DBLP:conf/ilp/2008p},
abstract = {Sequential behavior and sequence learning are essential to intelligence. Often the elements of sequences exhibit an internal structure that can elegantly be represented using relational atoms. Applying traditional sequential learning techniques to such relational sequences requires one either to ignore the internal structure or to live with a combinatorial explosion of the model complexity. This chapter briefly reviews relational sequence learning and describes several techniques tailored towards realizing this, such as local pattern mining techniques, (hidden) Markov models, conditional random fields, dynamic programming and reinforcement learning.},
keywords = {inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {inbook}
}
2007
King, Ross D.; Karwath, Andreas; Clare, Amanda; Dehaspe, Luc
Logic and the Automatic Acquisition of Scientific Knowledge: An Application to Functional Genomics Conference
Computational Discovery of Scientific Knowledge, Introduction, Techniques, and Applications in Environmental and Life Sciences, vol. 4660, Lecture Notes in Computer Science Springer-Verlag Berlin Heidelberg Springer Verlag, Berlin Heidelberg, Germany, 2007, ISBN: 978-3-540-73919-7.
Abstract | Links | BibTeX | Tags: bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge
@conference{king2007,
title = {Logic and the Automatic Acquisition of Scientific Knowledge: An Application to Functional Genomics},
author = {Ross D. King and Andreas Karwath and Amanda Clare and Luc Dehaspe},
url = {http://dx.doi.org/10.1007/978-3-540-73920-3_13},
doi = {10.1007/978-3-540-73920-3_13},
isbn = {978-3-540-73919-7},
year = {2007},
date = {2007-01-01},
booktitle = {Computational Discovery of Scientific Knowledge, Introduction, Techniques, and Applications in Environmental and Life Sciences},
volume = {4660},
pages = {273-289},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
crossref = {DBLP:conf/dis/2007book},
abstract = {This paper is a manifesto aimed at computer scientists interested in developing and applying scientific discovery methods. It argues that: science is experiencing an unprecedented “explosion” in the amount of available data; traditional data analysis methods cannot deal with this increased quantity of data; there is an urgent need to automate the process of refining scientific data into scientific knowledge; inductive logic programming (ILP) is a data analysis framework well suited for this task; and exciting new scientific discoveries can be achieved using ILP scientific discovery methods. We describe an example of using ILP to analyse a large and complex bioinformatic database that has produced unexpected and interesting scientific results in functional genomics. We then point a possible way forward to integrating machine learning with scientific databases to form intelligent databases.},
keywords = {bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {conference}
}
Karwath, Andreas; Kersting, Kristian
Relational Sequence Alignments and Logos Conference
Inductive Logic Programming, 16th International Conference, ILP 2006, vol. 4455, Lecture Notes in Computer Science Springer-Verlag Berlin Heidelberg Springer Verlag, Berlin Heidelberg, Germany, 2007, ISBN: 978-3-540-73846-6.
Abstract | Links | BibTeX | Tags: bioinformatics, inductive logic programming, relational learning, scientific knowledge
@conference{karwath2007,
title = {Relational Sequence Alignments and Logos},
author = {Andreas Karwath and Kristian Kersting},
url = {http://dx.doi.org/10.1007/978-3-540-73847-3_29},
doi = {10.1007/978-3-540-73847-3_29},
isbn = {978-3-540-73846-6},
year = {2007},
date = {2007-01-01},
booktitle = {Inductive Logic Programming, 16th International Conference, ILP 2006},
volume = {4455},
pages = {290-304},
publisher = {Springer Verlag},
address = {Berlin Heidelberg, Germany},
organization = {Springer-Verlag Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
crossref = {DBLP:conf/ilp/2006},
abstract = {The need to measure sequence similarity arises in many applicitation domains and often coincides with sequence alignment: the more similar two sequences are, the better they can be aligned. Aligning sequences not only shows how similar sequences are, it also shows where there are differences and correspondences between the sequences.
Traditionally, the alignment has been considered for sequences of flat symbols only. Many real world sequences such as natural language sentences and protein secondary structures, however, exhibit rich internal structures. This is akin to the problem of dealing with structured examples studied in the field of inductive logic programming (ILP). In this paper, we introduce Real, which is a powerful, yet simple approach to align sequence of structured symbols using well-established ILP distance measures within traditional alignment methods. Although straight-forward, experiments on protein data and Medline abstracts show that this approach works well in practice, that the resulting alignments can indeed provide more information than flat ones, and that they are meaningful to experts when represented graphically.},
keywords = {bioinformatics, inductive logic programming, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {conference}
}
Traditionally, the alignment has been considered for sequences of flat symbols only. Many real world sequences such as natural language sentences and protein secondary structures, however, exhibit rich internal structures. This is akin to the problem of dealing with structured examples studied in the field of inductive logic programming (ILP). In this paper, we introduce Real, which is a powerful, yet simple approach to align sequence of structured symbols using well-established ILP distance measures within traditional alignment methods. Although straight-forward, experiments on protein data and Medline abstracts show that this approach works well in practice, that the resulting alignments can indeed provide more information than flat ones, and that they are meaningful to experts when represented graphically.
2006
Karwath, Andreas; De Raedt, Luc
SMIREP: Predicting Chemical Activity from SMILES Journal Article
In: Journal of Chemical Information and Modeling, vol. 46, no. 6, pp. 2432 - 2444, 2006.
Abstract | Links | BibTeX | Tags: cheminformatics, graph mining, machine learning, QSAR, relational learning, scientific knowledge
@article{karwath06c,
title = {SMIREP: Predicting Chemical Activity from SMILES},
author = {Andreas Karwath and De Raedt, Luc},
url = {http://pubs.acs.org/doi/abs/10.1021/ci060159g},
doi = {10.1021/ci060159g},
year = {2006},
date = {2006-10-12},
journal = {Journal of Chemical Information and Modeling},
volume = {46},
number = {6},
pages = {2432 - 2444},
abstract = {Most approaches to structure-activity-relationship (SAR) prediction proceed in two steps. In the first step, a typically large set of fingerprints, or fragments of interest, is constructed (either by hand or by some recent data mining techniques). In the second step, machine learning techniques are applied to obtain a predictive model. The result is often not only a highly accurate but also hard to interpret model. In this paper, we demonstrate the capabilities of a novel SAR algorithm, SMIREP, which tightly integrates the fragment and model generation steps and which yields simple models in the form of a small set of IF-THEN rules. These rules contain SMILES fragments, which are easy to understand to the computational chemist. SMIREP combines ideas from the well-known IREP rule learner with a novel fragmentation algorithm for SMILES strings. SMIREP has been evaluated on three problems: the prediction of binding activities for the estrogen receptor (Environmental Protection Agency's (EPA's) Distributed Structure-Searchable Toxicity (DSSTox) National Center for Toxicological Research estrogen receptor (NCTRER) Database), the prediction of mutagenicity using the carcinogenic potency database (CPDB), and the prediction of biodegradability on a subset of the Environmental Fate Database (EFDB). In these applications, SMIREP has the advantage of producing easily interpretable rules while having predictive accuracies that are comparable to those of alternative state-of-the-art techniques.},
keywords = {cheminformatics, graph mining, machine learning, QSAR, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {article}
}
Karwath, Andreas; Kersting, Kristian
Relational Sequence Alignments Conference
Proc. The 4th International Workshop on Mining and Learning with Graphs, MLG 2006, % editor = Thomas Gärtner and Gemma C. Garriga and Thorsten Meinl, % month = September, 2006, (workshop).
BibTeX | Tags: bioinformatics, cheminformatics, relational learning, scientific knowledge
@conference{karwath06b,
title = {Relational Sequence Alignments},
author = {Andreas Karwath and Kristian Kersting},
year = {2006},
date = {2006-01-01},
booktitle = {Proc. The 4th International Workshop on Mining and Learning with Graphs, MLG 2006, % editor = Thomas Gärtner and Gemma C. Garriga and Thorsten Meinl, % month = September},
pages = {149-156},
note = {workshop},
keywords = {bioinformatics, cheminformatics, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {conference}
}
Clare, Amanda; Karwath, Andreas; Ougham, Helen; King, Ross D.
Functional bioinformatics for Arabidopsis thaliana Journal Article
In: Bioinformatics, vol. 22, no. 9, pp. 1130-1136, 2006.
Abstract | Links | BibTeX | Tags: bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge
@article{karwath06a,
title = {Functional bioinformatics for Arabidopsis thaliana},
author = {Amanda Clare and Andreas Karwath and Helen Ougham and Ross D. King},
url = {https://bioinformatics.oxfordjournals.org/content/22/9/1130.full.pdf+html},
doi = {10.1093/bioinformatics/btl051},
year = {2006},
date = {2006-01-01},
journal = {Bioinformatics},
volume = {22},
number = {9},
pages = {1130-1136},
abstract = {Motivation: The genome of Arabidopsis thaliana, which has the best understood plant genome, still has approximately one-third of its genes with no functional annotation at all from either MIPS or TAIR. We have applied our Data Mining Prediction (DMP) method to the problem of predicting the functional classes of these protein sequences. This method is based on using a hybrid machine-learning/data-mining method to identify patterns in the bioinformatic data about sequences that are predictive of function. We use data about sequence, predicted secondary structure, predicted structural domain, InterPro patterns, sequence similarity profile and expressions data.
Results: We predicted the functional class of a high percentage of the Arabidopsis genes with currently unknown function. These predictions are interpretable and have good test accuracies. We describe in detail seven of the rules produced.
Availability: Rulesets are available at http://www.aber.ac.uk/compsci/Research/bio/dss/arabpreds/ and predictions are available at http://www.genepredictions.org
Contact:afc@aber.ac.uk},
keywords = {bioinformatics, data mining, inductive logic programming, machine learning, relational learning, scientific knowledge},
pubstate = {published},
tppubtype = {article}
}
Results: We predicted the functional class of a high percentage of the Arabidopsis genes with currently unknown function. These predictions are interpretable and have good test accuracies. We describe in detail seven of the rules produced.
Availability: Rulesets are available at http://www.aber.ac.uk/compsci/Research/bio/dss/arabpreds/ and predictions are available at http://www.genepredictions.org
Contact:afc@aber.ac.uk
Backofen, Rolf; Borrmann, Hans-Gunther; Deck, Werner; Dedner, Andreas; De Raedt, Luc; Desch, Klaus; Diesmann, Markus; Geier, Martin; Greiner, Andreas; Hess, Wolfgang R.; Honerkamp, Josef; Jankowski, Stefan; Krossing, Ingo; Liehr, Andreas W.; Karwath, Andreas; Klöfkorn, Robert; Pesché, Raphaël; Potjans, Tobias C.; Röttger, Michael C.; Schmidt-Thieme, Lars; Schneider, Gerhard; Voß, Björn; Wiebelt, Bernd; Wienemann, Peter; Winterer, Volker-Henning
A Bottom-up approach to Grid-Computing at a University: the Black-Forest-Grid Initiative Journal Article
In: Praxis der Informationsverarbeitung und Kommunikation, vol. 29, no. 2, pp. 81-87, 2006.
Abstract | Links | BibTeX | Tags: data mining, HPC
@article{backofen2006,
title = {A Bottom-up approach to Grid-Computing at a University: the Black-Forest-Grid Initiative},
author = {Rolf Backofen and Hans-Gunther Borrmann and Werner Deck and Andreas Dedner and De Raedt, Luc and Klaus Desch and Markus Diesmann and Martin Geier and Andreas Greiner and Wolfgang R. Hess and Josef Honerkamp and Stefan Jankowski and Ingo Krossing and Andreas W. Liehr and Andreas Karwath and Robert Klöfkorn and Raphaël Pesché and Tobias C. Potjans and Michael C. Röttger and Lars Schmidt-Thieme and Gerhard Schneider and Björn Voß and Bernd Wiebelt and Peter Wienemann and Volker-Henning Winterer},
url = {http://dx.doi.org/10.1515/PIKO.2006.81},
doi = {10.1515/PIKO.2006.81},
year = {2006},
date = {2006-01-01},
journal = {Praxis der Informationsverarbeitung und Kommunikation},
volume = {29},
number = {2},
pages = {81-87},
abstract = {Recent years have seen a rapid increase in the need for highperformance computing. These demands come from disciplines such as particle physics traditionally relying on High Performance Computing (HPC) but lately also from the various branches of life science that have matured into quantitative disciplines. The classical infrastructure of university computer centres results to be unsuited to cope with the new requirements for a multitude of reasons. Here we discuss the causes of this failure and present a solution developed at the University of Freiburg in a collaborative effort of several faculties. We demonstrate that using state of the art grid computing technology the problem can now be addressed in a bottom-up approach. The organizational, technical, and financial components of our framework, the Black Forest Grid Initiative (BFG) are described and results of its implementation are presented. In the process, a number of new questions have emerged which the next phase of our project needs to address.},
keywords = {data mining, HPC},
pubstate = {published},
tppubtype = {article}
}