"""Image cohort
The Cohort class is a collection of Patient objects.
Typical example usage:
cohort = Cohort('input')
"""
from datetime import datetime, date, time
from sortedcontainers import SortedDict
import logging
import argparse
from pathlib import Path
from .series import Series
from .readdata import read as r_read
logger = logging.getLogger(__name__)
def _get_attribute(_data, _attr):
# Get attribute from first instance in _data
if len(_data) < 1:
raise IndexError('No instance in present {}.'.format(type(_data)))
_instance = list(_data.values())[0]
if issubclass(type(_instance), Series):
return _instance.getDicomAttribute(_attr)
# Recurse to lower level, until finding a Series instance
return _get_attribute(_instance, _attr)
def _sort_in_series(_data, _opts):
"""Sort series into a dict of Series
Called by Cohort for url data
Called by Patient, Study ?
Args:
_data: url (str) or dict of Series instances
_opts
Returns:
dict of Series instances, key is SeriesInstanceUID
"""
_series_dict = {}
if issubclass(type(_data), list):
if len(_data):
if issubclass(type(_data[0]), Series):
# _data is list of Series
for _series in _data:
_series_dict[_series.seriesInstanceUID] = _series
else:
raise ValueError(
'Unexpected series data type {}'.format(
type(_data[0])))
elif issubclass(type(_data), dict):
if len(_data):
if issubclass(type(list(_data.values())[0]), Series):
# _data is list of Series
for _seriesInstanceUID in _data:
_series_dict[_seriesInstanceUID] = _data[_seriesInstanceUID]
else:
raise ValueError(
'Unexpected series data type {}'.format(
type(list(_data.values())[0])))
elif issubclass(type(_data), Study):
raise ValueError('Why here')
# return {_data.seriesInstanceUID: _data}
elif issubclass(type(_data), str) or issubclass(type(_data), Path):
# _data is URL
# Read input, hdr is dict of attributes
_opts['separate_series'] = True
_hdr, _si = r_read(_data, order='auto', opts=_opts)
for _uid in _hdr:
_series = Series(_si[_uid])
_series.header = _hdr[_uid]
_series_dict[_series.seriesInstanceUID] = _series
else:
raise ValueError('Unexpected series type {}'.format(type(_data)))
return _series_dict
def _sort_in_studies(_series_dict, _opts):
"""Sort series in studies
Called by Cohort for series dict
Args:
_series_dict: dict of Series instances
_opts:
Returns:
dict of Study instances, key is StudyInstanceUID
"""
_studies = {}
for _seriesInstanceUID in _series_dict:
_series = _series_dict[_seriesInstanceUID]
_studyInstanceUID = _series.studyInstanceUID
if _studyInstanceUID not in _studies:
_studies[_studyInstanceUID] = {}
_studies[_studyInstanceUID][_seriesInstanceUID] = _series
# Make found _studies into Study instances
_study_dict = {}
for _studyInstanceUID in _studies:
_study_dict[_studyInstanceUID] = Study(_studies[_studyInstanceUID], opts=_opts)
return _study_dict
# if issubclass(type(_data), dict):
# # _data is list of Study
# for _seriesInstanceUID in _data:
# _series = _data[_seriesInstanceUID]
# if _series.studyInstanceUID not in _studies:
# _studies[_series.studyInstanceUID] = {}
# _studies[_series.studyInstanceUID][_seriesInstanceUID] = _series
# return _studies
# raise ValueError('Why here?')
def _sort_in_patients(_study_dict, _opts):
"""Sort studies in patients
Called by Cohort for study dict
Args:
_study_dict: dict of Study instances
_opts:
Returns:
dict of Patient instances
"""
_patients = {}
for _studyInstanceUID in _study_dict:
_study = _study_dict[_studyInstanceUID]
_series = list(_study.values())[0]
_patientID = _series.patientID
if _patientID not in _patients:
_patients[_patientID] = {}
_study = Study(_study_dict[_studyInstanceUID])
_patients[_patientID][_studyInstanceUID] = _study
# Make found _patients into Patient instances
_patient_dict = {}
for _patientID in _patients:
# _patient_dict[_patientID] = _patients[_patientID]
_patient_dict[_patientID] = Patient(_patients[_patientID], opts=_opts)
return _patient_dict
# return _patients
[docs]
class UnknownOptionType(Exception):
pass
[docs]
class GeneralEquipment(object):
_equipment_attributes = [
'manufacturer', 'manufacturersModelName', 'stationName', 'deviceSerialNumber',
'softwareVersions'
]
def __init__(self, obj):
super(GeneralEquipment, self).__init__()
for _attr in self._equipment_attributes:
_dicom_attribute = _attr[0].upper() + _attr[1:]
setattr(self, _attr, obj.getDicomAttribute(_dicom_attribute))
[docs]
class ClinicalTrialSubject(object):
_clinical_trial_attributes = [
'sponsorName',
'protocolID',
'protocolName',
'siteID',
'siteName',
'subjectID',
'subjectReadingID',
'protocolEthicsCommitteeName',
'protocolEthicsCommitteeApprovalNumber'
]
def __init__(self, obj):
super(ClinicalTrialSubject, self).__init__()
for _attr in self._clinical_trial_attributes:
_dicom_attribute = 'ClinicalTrial' + _attr[0].upper() + _attr[1:]
setattr(self, _attr, _get_attribute(obj, _dicom_attribute))
[docs]
class Study(SortedDict):
"""Study -- Read and sort images into a collection of Series objects.
Study(data, opts=None)
Examples:
>>> from imagedata import Study
>>> study = Study('directory/')
>>> for uid in study:
>>> series = study[uid]
Args:
data: URL to input data, or list of Series instances
opts: Dict of input options, mostly for format specific plugins
(argparse.Namespace or dict)
* 'strict_values': Whether study attributes should match in each series (bool)
Returns:
Study instance
"""
name = "Study"
description = "Image study"
authors = "Erling Andersen"
version = "1.0.0"
url = "www.helse-bergen.no"
_attributes = [
'studyDate', 'studyTime', 'studyDescription', 'studyID', 'studyInstanceUID',
'referringPhysiciansName'
]
def __init__(self, data, opts=None):
super(Study, self).__init__()
for _attr in self._attributes:
setattr(self, _attr, None)
if opts is None:
_in_opts = {}
elif issubclass(type(opts), dict):
_in_opts = opts
elif issubclass(type(opts), argparse.Namespace):
_in_opts = vars(opts)
else:
raise UnknownOptionType('Unknown opts type ({}): {}'.format(type(opts),
opts))
_strict_values = True if 'strict_values' not in _in_opts \
else _in_opts['strict_values']
if issubclass(type(data), dict):
_series_dict = data
elif issubclass(type(data), Study):
raise ValueError("Why here?")
else:
# Assume data is URL
try:
_series_dict = _sort_in_series(data, _in_opts)
except Exception as e:
print(e)
print('data: {}'.format(type(data)))
raise
for _seriesInstanceUID in _series_dict:
_series = _series_dict[_seriesInstanceUID]
self[_series.seriesInstanceUID] = _series
for _attr in self._attributes:
_dicom_attribute = _attr[0].upper() + _attr[1:]
_value = self[_seriesInstanceUID].getDicomAttribute(_dicom_attribute)
if _value is not None and _attr == 'studyDate':
try:
_value = datetime.strptime(_value, "%Y%m%d")
except ValueError:
_value = None
elif _value is not None and _attr == 'studyTime':
try:
if '.' in _value:
_value = datetime.strptime(_value, "%H%M%S.%f").time()
else:
_value = datetime.strptime(_value, "%H%M%S").time()
except ValueError:
_value = datetime.time(_value)
# Update self property if None from series
if getattr(self, _attr, None) is None:
# _series = self[_seriesInstanceUID]
setattr(self, _attr, _value)
elif _strict_values and \
getattr(self, _attr, None) != _value:
# Study attributes differ, should be considered an exception.
raise ValueError('Study attribute "{}" differ ("{}" vs. "{}")'.format(
_attr,
getattr(self, _attr, None),
_value
))
setattr(self, 'generalEquipment', GeneralEquipment(self[_seriesInstanceUID]))
def __str__(self):
_date = self.studyDate if self.studyDate is not None else date.min
_time = self.studyTime if self.studyTime is not None else time.min
_descr = self.studyDescription if self.studyDescription is not None else ''
return \
"Study: {} {}".format(datetime.combine(_date, _time), _descr)
[docs]
def write(self, url, opts=None, formats=None):
"""Write image data, calling appropriate format plugins
Args:
self: Study instance
url: output destination url
opts: Output options (argparse.Namespace or dict)
formats: list of output formats, overriding opts.output_format (list or str)
Raises:
imagedata.collections.UnknownOptionType: When opts cannot be made into a dict.
TypeError: List of output format is not list().
ValueError: Wrong number of destinations given,
or no way to write multidimensional image.
imagedata.formats.WriteNotImplemented: Cannot write this image format.
"""
_used_urls = []
for _seriesInstanceUID in self.keys():
_series = self[_seriesInstanceUID]
try:
series_str = '{}-{}'.format(_series.seriesNumber, _series.seriesDescription)
except (TypeError, ValueError):
series_str = _seriesInstanceUID
_url = "{}/{}".format(
url,
series_str
)
while _url in _used_urls:
_url = _url + "0" # Make unique output file
_used_urls.append(_url)
try:
_series.write(_url, opts=opts, formats=formats)
except Exception as e:
raise Exception(_url) from e
[docs]
class Patient(SortedDict):
"""Patient -- Read and sort images into a collection of Study objects.
Patient(data, opts=None)
Examples:
>>> from imagedata import Patient
>>> patient = Patient('directory/')
>>> for uid in patient:
>>> study = patient[uid]
Args:
data: URL to input data, or list of Study instances
opts: Dict of input options, mostly for format specific plugins
(argparse.Namespace or dict)
* 'strict_values': Whether attributes should match in each study (bool)
Returns:
Patient instance
"""
name = "Patient"
description = "Image patient"
authors = "Erling Andersen"
version = "1.0.0"
url = "www.helse-bergen.no"
_attributes = ['patientName', 'patientID', 'patientBirthDate', 'patientSex',
'patientAge', 'patientSize', 'patientWeight', 'qualityControlSubject',
'patientIdentityRemoved', 'deidentificationMethod'
]
def __init__(self, data, opts=None):
super(Patient, self).__init__()
for _attr in self._attributes:
setattr(self, _attr, None)
if opts is None:
_in_opts = {}
elif issubclass(type(opts), dict):
_in_opts = opts
elif issubclass(type(opts), argparse.Namespace):
_in_opts = vars(opts)
else:
raise UnknownOptionType('Unknown opts type ({}): {}'.format(type(opts),
opts))
_strict_values = True if 'strict_values' not in _in_opts \
else _in_opts['strict_values']
if issubclass(type(data), list) and len(data) and issubclass(type(data[0]), Study):
# Add each study to self patient
self._add_studies(data)
elif issubclass(type(data), dict) and issubclass(type(list(data.values())[0]), Study):
self._add_studies(data)
# _study_dict = data
# for _studyInstanceUID in data:
# self[_studyInstanceUID] = _study_dict[_studyInstanceUID]
else:
_series_dict = _sort_in_series(data, _in_opts)
_study_dict = _sort_in_studies(_series_dict, _in_opts)
# Add each study to self patient
for _studyInstanceUID in _study_dict:
# self[_studyInstanceUID] = Study(_study_dict[_studyInstanceUID])
self[_studyInstanceUID] = _study_dict[_studyInstanceUID]
for _studyInstanceUID in self.keys():
for _attr in self._attributes:
_dicom_attribute = _attr[0].upper() + _attr[1:]
_value = _get_attribute(self[_studyInstanceUID], _dicom_attribute)
if _value is not None and (_attr == 'patientSize' or _attr == 'patientWeight'):
try:
_value = float(_value)
except ValueError:
_value = None
# Update self property if None from study
if getattr(self, _attr, None) is None:
setattr(self, _attr, _value)
elif _strict_values and \
getattr(self, _attr, None) != _value:
# Patient attributes differ, should be considered an exception.
raise ValueError('Patient attribute "{}" differ ("{}" vs. "{}")'.format(
_attr,
getattr(self, _attr, None),
_value
))
setattr(self, 'clinicalTrialSubject', ClinicalTrialSubject(self[_studyInstanceUID]))
def _add_studies(self, data):
"""Add each study to self patient
Args:
data: dict of Study instances
Returns:
self: added Study instances to self
"""
if issubclass(type(data), dict):
for _studyInstanceUID in data:
if issubclass(type(data[_studyInstanceUID]), Study):
self[_studyInstanceUID] = data[_studyInstanceUID]
else:
raise ValueError(
'Unexpected patient data type {}'.format(
type(data[_studyInstanceUID])
))
elif issubclass(type(data), list):
for _study in data:
if issubclass(type(_study), Study):
self[_study.studyInstanceUID] = _study
else:
raise ValueError(
'Unexpected study data type {}'.format(
type(_study)
))
else:
raise ValueError('Unexpected data type {} (expected: dict or list)'.format(type(data)))
def __str__(self):
try:
_id = self.patientID
except ValueError:
_id = ''
try:
_patientName = self.patientName
except ValueError:
_patientName = ''
return \
"Patient: [{}] {}".format(_id, _patientName)
[docs]
def write(self, url, opts=None, formats=None):
"""Write image data, calling appropriate format plugins
Args:
self: Patient instance
url: output destination url
opts: Output options (argparse.Namespace or dict)
formats: list of output formats, overriding opts.output_format (list or str)
Raises:
imagedata.collections.UnknownOptionType: When opts cannot be made into a dict.
TypeError: List of output format is not list().
ValueError: Wrong number of destinations given,
or no way to write multidimensional image.
imagedata.formats.WriteNotImplemented: Cannot write this image format.
"""
_used_urls = []
for _studyInstanceUID in self.keys():
_study = self[_studyInstanceUID]
try:
study_str = datetime.combine(
_study.studyDate, _study.studyTime
).strftime('%Y%m%d-%H%M%S')
except (TypeError, ValueError):
study_str = _studyInstanceUID
for _seriesInstanceUID in _study:
_series = _study[_seriesInstanceUID]
try:
series_str = '{}-{}'.format(_series.seriesNumber, _series.seriesDescription)
except (TypeError, ValueError):
series_str = _seriesInstanceUID
_url = "{}/{}/{}".format(
url,
study_str,
series_str
)
while _url in _used_urls:
_url = _url + "0" # Make unique output file
_used_urls.append(_url)
try:
_series.write(_url, opts=opts, formats=formats)
except Exception as e:
raise Exception(_url) from e
[docs]
class Cohort(SortedDict):
"""Cohort -- Read and sort images into a collection of Patient objects.
Cohort(data, opts=None)
Examples:
>>> from imagedata import Cohort
>>> cohort = Cohort('directory/')
>>> for id in cohort:
>>> patient = cohort[id]
Args:
data: URL to input data, or list of Patient instances
opts: Dict of input options, mostly for format specific plugins
(argparse.Namespace or dict)
* 'strict_values': Whether attributes should match in each study (bool)
Returns:
Cohort instance
"""
name = "Cohort"
description = "Image cohort"
authors = "Erling Andersen"
version = "1.0.0"
url = "www.helse-bergen.no"
_attributes = []
def __init__(self, data, opts=None):
super(Cohort, self).__init__()
self.data = data
for _attr in self._attributes:
setattr(self, _attr, None)
if opts is None:
_in_opts = {}
elif issubclass(type(opts), dict):
_in_opts = opts
elif issubclass(type(opts), argparse.Namespace):
_in_opts = vars(opts)
else:
raise UnknownOptionType('Unknown opts type ({}): {}'.format(type(opts),
opts))
_strict_values = True if 'strict_values' not in _in_opts \
else _in_opts['strict_values']
if issubclass(type(data), list) and len(data) and issubclass(type(data[0]), Patient):
# Add each patient to self cohort
self._add_patients(data, _in_opts)
elif issubclass(type(data), str) or issubclass(type(data), Path):
# Read input data from url
_series_dict = _sort_in_series(data, _in_opts)
_study_dict = _sort_in_studies(_series_dict, _in_opts)
_patient_dict = _sort_in_patients(_study_dict, _in_opts)
# Add each patient to self cohort
for _patientID in _patient_dict:
# self[_patientID] = Patient(_patient_dict[_patientID])
self[_patientID] = _patient_dict[_patientID]
else:
raise ValueError('Unexpected cohort data type {}'.format(type(data)))
for _patientID in self.keys():
for _attr in self._attributes:
_dicom_attribute = _attr[0].upper() + _attr[1:]
# Update self property if None from study
if getattr(self, _attr, None) is None:
setattr(self, _attr,
_get_attribute(self[_patientID], _dicom_attribute))
elif _strict_values and\
getattr(self, _attr, None) != \
_get_attribute(self[_patientID], _dicom_attribute):
# Patient attributes differ, should be considered an exception.
raise ValueError('Cohort attribute "{}" differ ("{}" vs. "{}")'.format(
_attr,
getattr(self, _attr, None),
_get_attribute(self[_patientID], _dicom_attribute)
))
def __str__(self):
return \
"Cohort: {}".format(self.data)
def _add_patients(self, data, opts=None):
"""Add each patient to self cohort
Args:
data: dict of Patient instances
opts:
Returns:
self: added Patient instances to self
"""
if issubclass(type(data), dict):
for _patientID in data:
if issubclass(type(data[_patientID], Patient)):
self[_patientID] = data[_patientID]
else:
raise ValueError(
'Unexpected patient data type {}'.format(
type(data[_patientID])
))
else:
raise ValueError('Unexpected data type {} (expected: dict)'.format(type(data)))
[docs]
def write(self, url, opts=None, formats=None):
"""Write image data, calling appropriate format plugins
Args:
self: Cohort instance
url: output destination url
opts: Output options (argparse.Namespace or dict)
formats: list of output formats, overriding opts.output_format (list or str)
Raises:
imagedata.collections.UnknownOptionType: When opts cannot be made into a dict.
TypeError: List of output format is not list().
ValueError: Wrong number of destinations given,
or no way to write multidimensional image.
imagedata.formats.WriteNotImplemented: Cannot write this image format.
"""
_used_urls = []
for _patientID in self.keys():
_patient = self[_patientID]
for _studyInstanceUID in _patient:
_study = _patient[_studyInstanceUID]
try:
study_str = datetime.combine(
_study.studyDate, _study.studyTime
).strftime('%Y%m%d-%H%M%S')
except (TypeError, ValueError):
study_str = _studyInstanceUID
for _seriesInstanceUID in _study:
_series = _study[_seriesInstanceUID]
try:
series_str = '{}-{}'.format(
_series.seriesNumber, _series.seriesDescription
)
except (TypeError, ValueError):
series_str = _seriesInstanceUID
_url = "{}/{}/{}/{}".format(
url,
_patientID,
study_str,
series_str
)
while _url in _used_urls:
_url = _url + "0" # Make unique output file
_used_urls.append(_url)
try:
_series.write(_url, opts=opts, formats=formats)
except Exception as e:
raise Exception(_url) from e