forked from AIM-Harvard/pyradiomics
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDatasetHierarchyReader.py
156 lines (123 loc) · 6.2 KB
/
DatasetHierarchyReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# -*- coding: utf-8 -*-
from __future__ import print_function
import collections
import glob
import os
import six
class DatasetHierarchyReader(object):
def __init__(self, inputDatasetDirectory, filetype='.nrrd'):
self.inputDatasetDirectory = inputDatasetDirectory
self.filetype = filetype
self.DatabaseHierarchyDict = collections.OrderedDict()
def setInputDatasetDirectory(self, inputDatasetDirectory):
self.inputDatasetDirectory = inputDatasetDirectory
def setFiletype(self, filetype):
self.filetype = filetype
def ReadDatasetHierarchy(self, create=False):
patientDirectories = glob.glob(os.path.join(self.inputDatasetDirectory, '*'))
for patientDirectory in patientDirectories:
self.DatabaseHierarchyDict[patientDirectory] = collections.OrderedDict()
studyDirectories = glob.glob(os.path.join(patientDirectory, '*'))
for studyDirectory in studyDirectories:
self.DatabaseHierarchyDict[patientDirectory][studyDirectory] = collections.OrderedDict()
subfolders = [dirpath for dirpath in glob.glob(os.path.join(studyDirectory, '*')) if os.path.isdir(dirpath)]
reconstructionsDirectory, images = self.readReconstructionsDirectory(studyDirectory, subfolders, create=create)
self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["reconstructions"] = images
resourcesDirectory, resources = self.readResourcesDirectory(studyDirectory, subfolders, create=create)
self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["resources"] = resources
segmentationsDirectory, labels = self.readSegmentationsDirectory(studyDirectory, subfolders, create=create)
self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["segmentations"] = labels
return self.DatabaseHierarchyDict
def readReconstructionsDirectory(self, studyDirectory, subfolders, create=False):
images = []
recDirectory = "NONE"
try:
recDirectory = [item for item in subfolders if 'reconstructions' in os.path.basename(item).lower()][0]
images = [item for item in glob.glob(os.path.join(recDirectory, "*")) if self.filetype in os.path.basename(item)]
except IndexError:
if create:
recDirectory = os.path.join(studyDirectory, "Reconstructions")
if not os.path.exists(recDirectory):
os.mkdir(recDirectory)
print("\tCreated:", recDirectory)
return recDirectory, images
def readSegmentationsDirectory(self, studyDirectory, subfolders, create=False):
labels = []
segDirectory = "NONE"
try:
segDirectory = [item for item in subfolders if 'segmentations' in os.path.basename(item).lower()][0]
labels = [item for item in glob.glob(os.path.join(segDirectory, "*")) if self.filetype in os.path.basename(item)]
except IndexError:
if create:
segDirectory = os.path.join(studyDirectory, "Segmentations")
if not os.path.exists(segDirectory):
os.mkdir(segDirectory)
print("\tCreated:", segDirectory)
return segDirectory, labels
def readResourcesDirectory(self, studyDirectory, subfolders, create=False):
resources = []
resDirectory = "NONE"
try:
resDirectory = [item for item in subfolders if 'resources' in os.path.basename(item).lower()][0]
resources = [item for item in glob.glob(os.path.join(resDirectory, "*"))]
except IndexError:
if create:
resDirectory = os.path.join(studyDirectory, "Resources")
if not os.path.exists(resDirectory):
os.mkdir(resDirectory)
print("\tCreated:", resDirectory)
return resDirectory, resources
def findImageAndLabelPair(self, imageFilepaths, maskFilepaths, keywordSettings):
"""
Accepts a list of image filepaths, a list of mask/label filepaths, and a
dict of keyword settings in the form:
keywordSettings['image'] = ""
keywordSettings['imageExclusion'] = ""
keywordSettings['mask'] = ""
keywordSettings['maskExclusion'] = ""
where each field is a string of words separated by commas (case and spaces do not matter).
The output is the image filepath and mask/label filepath pair that satisfies the keyword
conditions.
"""
keywordSettings = {k: [str(keyword.strip()) for keyword in v.split(',')]
for (k, v) in six.iteritems(keywordSettings)}
matchedImages = []
for imageFilepath in imageFilepaths:
imageFilename = str(os.path.basename(imageFilepath))
if self.testString(imageFilename, keywordSettings['image'], keywordSettings['imageExclusion']):
matchedImages.append(imageFilepath)
matchedMasks = []
for maskFilepath in maskFilepaths:
maskFilename = str(os.path.basename(maskFilepath))
if self.testString(maskFilename, keywordSettings['mask'], keywordSettings['maskExclusion']):
matchedMasks.append(maskFilepath)
if len(matchedImages) < 1:
print("ERROR: No Images Matched")
elif len(matchedImages) > 1:
print("ERROR: Multiple Images Matched")
if len(matchedMasks) < 1:
print("ERROR: No Masks Matched")
elif len(matchedMasks) > 1:
print("ERROR: Multiple Masks Matched")
if (len(matchedImages) == 1) and (len(matchedMasks) == 1):
return matchedImages[0], matchedMasks[0]
else:
return None, None
def testString(self, fileName, inclusionKeywords, exclusionKeywords):
fileName = fileName.upper()
inclusionKeywords = [keyword.upper() for keyword in inclusionKeywords if (keyword != '')]
exclusionKeywords = [keyword.upper() for keyword in exclusionKeywords if (keyword != '')]
result = False
if (len(inclusionKeywords) == 0) and (len(exclusionKeywords) > 0):
if (not any(keyword in fileName for keyword in exclusionKeywords)):
result = True
elif (len(inclusionKeywords) > 0) and (len(exclusionKeywords) == 0):
if (all(keyword in fileName for keyword in inclusionKeywords)):
result = True
elif (len(inclusionKeywords) > 0) and (len(exclusionKeywords) > 0):
if (all(keyword in fileName for keyword in inclusionKeywords)) and \
(not any(keyword in fileName for keyword in exclusionKeywords)):
result = True
elif (len(inclusionKeywords) == 0) and (len(exclusionKeywords) == 0):
result = True
return result