@article {1145, title = {A Generic Data Harmonization Process for Cross-linked Research and Network Interaction. Construction and Application for the Lung Cancer Phenotype Database of the German Center for Lung Research.}, journal = {Methods Inf Med}, volume = {54}, year = {2015}, month = {2015}, pages = {455-60}, abstract = {

OBJECTIVE: Joint data analysis is a key requirement in medical research networks. Data are available in heterogeneous formats at each network partner and their harmonization is often rather complex. The objective of our paper is to provide a generic approach for the harmonization process in research networks. We applied the process when harmonizing data from three sites for the Lung Cancer Phenotype Database within the German Center for Lung Research.

METHODS: We developed a spreadsheet-based solution as tool to support the harmonization process for lung cancer data and a data integration procedure based on Talend Open Studio.

RESULTS: The harmonization process consists of eight steps describing a systematic approach for defining and reviewing source data elements and standardizing common data elements. The steps for defining common data elements and harmonizing them with local data definitions are repeated until consensus is reached. Application of this process for building the phenotype database led to a common basic data set on lung cancer with 285 structured parameters. The Lung Cancer Phenotype Database was realized as an i2b2 research data warehouse.

CONCLUSION: Data harmonization is a challenging task requiring informatics skills as well as domain knowledge. Our approach facilitates data harmonization by providing guidance through a uniform process that can be applied in a wide range of projects.

}, issn = {0026-1270}, doi = {10.3414/ME14-02-0030}, author = {Firnkorn, D and Ganzinger, M and Muley, T and Thomas, M and Knaup, P} }