xml2csv.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #!/usr/bin/env python3
  2. # call with xml2csv <basename> <path/to/subject>
  3. #
  4. # writes header only for new files
  5. import sys
  6. import shutil
  7. from xml.etree.ElementTree import parse as xmlparse
  8. import csv
  9. from pathlib import Path
  10. from tempfile import (
  11. TemporaryDirectory,
  12. TemporaryFile,
  13. )
  14. import re
  15. # define CSV columns (changing here will re-order)
  16. csv_fieldnames = [
  17. 'SubjectID',
  18. 'NCR', 'ICR', 'IQR', 'TIV', 'GM', 'WM', 'CSF', 'WMH', 'TSA'
  19. ]
  20. # NCR: noise to contrast ratio
  21. # ICR: inhomogeneity to contrast ratio
  22. # IQR: image quality rating
  23. # TIV: total intracranial volume (GM+WM+CSF)
  24. # GM: total gray matter volume
  25. # WM: total white matter volume
  26. # CSF: total cerebral spinal fluid volume
  27. # WMH: total white matter hyperintensities volume
  28. # TSA: total surface area
  29. def val2out(str_float):
  30. """Uniform formating of floating point values for output.
  31. The input does not have to be a float, but can also be a str that
  32. is convertable to float.
  33. """
  34. return '{:.4f}'.format(float(str_float))
  35. def get_basic_catlog(report_dir, sub):
  36. # load report XML
  37. catrep_file = report_dir / 'cat_{}_T1w.xml'.format(sub)
  38. with TemporaryFile() as tf:
  39. tf.write(re.sub(b'item\.\.\.', b'item>...', catrep_file.read_bytes()))
  40. tf.seek(0)
  41. catrep = xmlparse(tf)
  42. catreport = catrep.getroot()
  43. # build CSV record
  44. catlog = {
  45. 'SubjectID': sub,
  46. 'IQR': val2out(catreport.find('qualityratings/IQR').text),
  47. 'NCR': val2out(catreport.find('qualityratings/NCR').text),
  48. 'ICR': val2out(catreport.find('qualityratings/ICR').text),
  49. 'TIV': val2out(catreport.find('subjectmeasures/vol_TIV').text),
  50. 'TSA': val2out(catreport.find('subjectmeasures/surf_TSA').text),
  51. }
  52. # get total and tissue volumes
  53. absTV = catreport.find('subjectmeasures/vol_abs_CGW').text.strip('[]')
  54. for t, tv in zip(('CSF', 'GM', 'WM', 'WMH'), absTV.split()):
  55. if float(tv) > 0:
  56. catlog[t] = val2out(tv)
  57. return catlog
  58. def xml2csv(infile, outfilebase, catlog_templ, data_tag,
  59. additional_extractor=None):
  60. # load surface XML
  61. root_node = xmlparse(infile).getroot()
  62. # iterate over surface atlas found in XML
  63. for child in root_node:
  64. destfile = Path('{}_{}.csv'.format(
  65. outfilebase,
  66. child.tag,
  67. ))
  68. # get ROI names
  69. rois = sorted([
  70. name.text
  71. for name in root_node.findall(child.tag + '/names/item')
  72. ])
  73. # this list will define the output columns
  74. roi_names = list(rois)
  75. need_header = not destfile.is_file()
  76. # use context manager to get automatic cleanup
  77. with destfile.open('a') as catlog_data:
  78. # build CSV record
  79. catlog = catlog_templ.copy()
  80. # get ROI thickness matching succession
  81. ROIvol = root_node.find(
  82. child.tag + '/data/' + data_tag).text.strip('[]')
  83. for id, vol in zip(roi_names, ROIvol.split(';')):
  84. catlog[id] = val2out(vol)
  85. if additional_extractor:
  86. additional_extractor(
  87. root_node, child.tag, rois, catlog, roi_names)
  88. writer = csv.DictWriter(
  89. catlog_data,
  90. fieldnames=csv_fieldnames + roi_names
  91. )
  92. # if there was no CSV, write the header
  93. if need_header:
  94. writer.writeheader()
  95. # write CSV row
  96. writer.writerow(catlog)
  97. def add_WM_CSF(root_node, tag, rois, catlog, roi_names):
  98. # if atlas has WM volume, add at the end
  99. if root_node.findtext(tag + '/data/Vwm'):
  100. roi_namesWM = [name + '_WM' for name in rois]
  101. ROIwm = root_node.find(tag + '/data/Vwm').text.strip('[]')
  102. for id, vol in zip(roi_namesWM, ROIwm.split(';')):
  103. catlog[id] = val2out(vol)
  104. roi_names.extend(roi_namesWM)
  105. # if atlas has CSF volume, add at the end
  106. if root_node.findtext(tag + '/data/Vcsf'):
  107. roi_namesCSF = [name + '_CSF' for name in rois]
  108. ROIcsf = root_node.find(tag + '/data/Vcsf').text.strip('[]')
  109. for id, vol in zip(roi_namesCSF, ROIcsf.split(';')):
  110. catlog[id] = val2out(vol)
  111. roi_names.extend(roi_namesCSF)
  112. # output base name
  113. base_name = sys.argv[1]
  114. # path to the report
  115. path2data= Path(sys.argv[2])
  116. # extract subject identifier from path
  117. sub = path2data.parts[0]
  118. # load report XML
  119. catlog = get_basic_catlog(path2data / 'report', sub)
  120. # load atlas ROIs volume
  121. xml2csv(
  122. path2data / 'label' / 'catROI_{}_T1w.xml'.format(sub),
  123. '{}_rois'.format(base_name),
  124. catlog,
  125. 'Vgm',
  126. add_WM_CSF,
  127. )
  128. # load surface XML
  129. xml2csv(
  130. path2data / 'label' / 'catROIs_{}_T1w.xml'.format(sub),
  131. '{}_thickness'.format(base_name),
  132. catlog,
  133. 'thickness',
  134. )
  135. # xml2csv(
  136. # path2data / 'label' / 'catROIs_{}_T1w.xml'.format(sub),
  137. # '{}_gyrification'.format(base_name),
  138. # catlog,
  139. # 'gyrification',
  140. # )
  141. #
  142. # xml2csv(
  143. # path2data / 'label' / 'catROIs_{}_T1w.xml'.format(sub),
  144. # '{}_toroGI20mm'.format(base_name),
  145. # catlog,
  146. # 'toroGI20mm',
  147. # )
  148. #
  149. # xml2csv(
  150. # path2data / 'label' / 'catROIs_{}_T1w.xml'.format(sub),
  151. # '{}_surfarea'.format(base_name),
  152. # catlog,
  153. # 'area',
  154. # )
  155. #
  156. # xml2csv(
  157. # path2data / 'label' / 'catROIs_{}_T1w.xml'.format(sub),
  158. # '{}_surfgmvol'.format(base_name),
  159. # catlog,
  160. # 'gmv',
  161. # )
  162. #
  163. # xml2csv(
  164. # path2data / 'label' / 'catROIs_{}_T1w.xml'.format(sub),
  165. # '{}_sulcusdepth'.format(base_name),
  166. # catlog,
  167. # 'depth',
  168. # )
  169. #
  170. # xml2csv(
  171. # path2data / 'label' / 'catROIs_{}_T1w.xml'.format(sub),
  172. # '{}_fractaldim'.format(base_name),
  173. # catlog,
  174. # 'fractaldimension',
  175. # )