@article {1210, title = {Open Source Infrastructure for Health Care Data Integration and Machine Learning Analyses.}, journal = {JCO Clin Cancer Inform}, volume = {3}, year = {2019}, month = {2019 Aug}, pages = {1-16}, abstract = {

PURPOSE: We have created a cloud-based machine learning system (CLOBNET) that is an open-source, lean infrastructure for electronic health record (EHR) data integration and is capable of extract, transform, and load (ETL) processing. CLOBNET enables comprehensive analysis and visualization of structured EHR data. We demonstrate the utility of CLOBNET by predicting primary therapy outcomes of patients with high-grade serous ovarian cancer (HGSOC) on the basis of EHR data.

MATERIALS AND METHODS: CLOBNET is built using open-source software to make data preprocessing, analysis, and model training user friendly. The source code of CLOBNET is available in GitHub. The HGSOC data set was based on a prospective cohort of 208 patients with HGSOC who were treated at Turku University Hospital, Finland, from 2009 to 2019 for whom comprehensive clinical and EHR data were available.

RESULTS: We trained machine learning (ML) models using clinical data, including a herein developed dissemination score that quantifies the disease burden at the time of diagnosis, to identify patients with progressive disease (PD) or a complete response (CR) on the basis of RECIST (version 1.1). The best performance was achieved with a logistic regression model, which resulted in an area under receiver operating characteristic curve (AUROC) of 0.86, with a specificity of 73\% and a sensitivity of 89\%, when it classified between patients who experienced PD and CR.

CONCLUSION: We have developed an open-source computational infrastructure, CLOBNET, that enables effective and rapid analysis of EHR and other clinical data. Our results demonstrate that CLOBNET allows predictions to be made on the basis of EHR data to address clinically relevant questions.

}, issn = {2473-4276}, doi = {10.1200/CCI.18.00132}, author = {Isoviita, Veli-Matti and Salminen, Liina and Azar, Jimmy and Lehtonen, Rainer and Roering, Pia and Carp{\'e}n, Olli and Hietanen, Sakari and Gr{\'e}nman, Seija and Hynninen, Johanna and F{\"a}rkkil{\"a}, Anniina and Hautaniemi, Sampsa} }