-
Notifications
You must be signed in to change notification settings - Fork 0
/
validateDICOMRawFiles.m
executable file
·287 lines (239 loc) · 11.2 KB
/
validateDICOMRawFiles.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
function [ success , nRuns , seriesNumbers ] = validateDICOMRawFiles( dataPath , datasetConfigs , subIdx , sesIdx , newRawDataFolder )
%UNTITLED Summary of this function goes here
% Detailed explanation goes here
success = false;
% DCMinfo = struct();
% idx_info = 1;
% -------------------------------------------------------------------------
% Find DICOM files in dataPath
% -------------------------------------------------------------------------
D = dir(fullfile(dataPath,'*.IMA'));
% Check if the total number of files is incorrect
if (length(D) < sum(datasetConfigs(subIdx).sessions(sesIdx).volumes)) && (length(D) > 5)
disp('[createFolderStructure] Fewer files than expected...');
% Maybe the files are *.dcm!
elseif (length(D) < sum(datasetConfigs(subIdx).sessions(sesIdx).volumes)) && (length(D) < 5)
D = dir(fullfile(dataPath,'*.dcm'));
end
% Extract all file names
files = extractfield(D,'name')';
nFiles = length(files);
% -------------------------------------------------------------------------
% Check if names on files match
% -------------------------------------------------------------------------
% Retrieve first DICOM file
firstDCMfile = dicominfo(fullfile(D(1).folder,D(1).name));
% Check if the patient given name exists. If it does, the DICOM files were
% not anonimized.
if isfield(firstDCMfile.PatientName,'GivenName')
% Compare the name given in datasetConfigs with the GivenName+Surname
% in the DICOM file.
if ~strcmpi(datasetConfigs(subIdx).name,...
[firstDCMfile.PatientName.GivenName firstDCMfile.PatientName.FamilyName])
disp('[createFolderStructure] Check if files correspond to subject!')
fprintf('Name on DCM files: %s %s\n',...
firstDCMfile.PatientName.GivenName,firstDCMfile.PatientName.FamilyName);
fprintf('Name provided: %s\n',datasetConfigs(subIdx).name);
x = input('[createFolderStructure] Do you wish to proceed anyway (Y/N)?','s');
switch lower(x)
case 'y'
disp('[createFolderStructure] Proceeding...');
otherwise
return
end
end
else % the GivenName field does not exist
% When using an anonimization standard, the name in datasetConfigs
% should match the PatientName.FamilyName field in the DICOM file
if ~strcmpi(datasetConfigs(subIdx).name,firstDCMfile.PatientName.FamilyName)
disp('[createFolderStructure] Check if files correspond to subject!')
fprintf('Name on DCM files: %s \n',firstDCMfile.PatientName.FamilyName);
fprintf('Name provided: %s\n',datasetConfigs(subIdx).name);
x = input('[createFolderStructure] Do you wish to proceed anyway (Y/N)?','s');
switch lower(x)
case 'y'
disp('[createFolderStructure] Proceeding...');
otherwise
return
end
end
end
% -------------------------------------------------------------------------
% Extract series
% -------------------------------------------------------------------------
series = zeros(nFiles,1);
% Iterate on the files and search for the series number
% The filenames are formated as <subjectID>.<MR>.<series>.(...)
seriesSplitIdx = 3;
for ii = 1:nFiles
auxnamesplit = strsplit(files{ii},'.');
series(ii) = str2double(auxnamesplit{seriesSplitIdx});
if isnan(series(ii)) % or maybe the filenames are formatted as <subjectID>.<MR>.<ICNAS_CRANIO>.<series>.(...)
seriesSplitIdx = 4;
series(ii) = str2double(auxnamesplit{seriesSplitIdx});
end
end
% Find the unique series numbers
[seriesNumbers , seriesIdx] = unique(series);
% Confirm the series numbers with the information in the first DICOM file
% of each series/run
READ_HEADERS = false;
for ii = 1:length(seriesNumbers)
file_idx = seriesIdx(ii);
dcmInfo = dicominfo(fullfile(D(file_idx).folder,D(file_idx).name));
if dcmInfo.SeriesNumber ~= seriesNumbers(ii)
fprintf('[createFolderStructure] Series numbers do not match between filename and DICOM header: M1=%i M2=%i \n',...
seriesNumbers(ii),dcmInfo.SeriesNumber);
x = input('[createFolderStructure] Do you wish to read all DICOM headers (a Parallel pool will run this) (Y/N)?','s');
switch lower(x)
case 'y'
READ_HEADERS = true;
break
otherwise
return
end
end
end
% If the user chooses to read all DICOM headers, this block will run
if READ_HEADERS
series = zeros(nFiles,1);
parfor ii = 1:nFiles
dcmInfo = dicominfo(fullfile(D(ii).folder,D(ii).name));
series(ii) = dcmInfo.SeriesNumber;
end
% Find the unique series numbers
seriesNumbers = unique(series);
end
% Retrieve number of files per series
seriesVolumes = hist(series,length(1:seriesNumbers(end)));
seriesVolumes = seriesVolumes(seriesVolumes~=0);
% -------------------------------------------------------------------------
% Check for incomplete runs or extra runs
% -------------------------------------------------------------------------
nRuns = length(datasetConfigs(subIdx).sessions(sesIdx).runtypes);
% Number of series larger than expected number of runs
if length(seriesNumbers) > nRuns
% Find series with strange number of volumes
ignoreS = seriesNumbers(ismember(seriesVolumes,datasetConfigs(subIdx).sessions(sesIdx).volumes) == 0);
% More than one anatomical
% This is assessed using the number of volumes of the first run
% (anatomical).
if sum(seriesVolumes == datasetConfigs(subIdx).sessions(sesIdx).volumes(1)) > 1
boolInput = false;
disp(['[createFolderStructure] More than one run of anatomical data detected: ' num2str(seriesNumbers(seriesVolumes == datasetConfigs(subIdx).sessions(sesIdx).volumes(1))')])
while ~boolInput
x = input('Please input the ones to ignore [<series numbers>]: ','s');
if ~ismember(str2num(x),seriesNumbers(seriesVolumes == datasetConfigs(subIdx).sessions(sesIdx).volumes(1)))
disp('!---> ERROR: Incorrect series number.');
else
ignoreS = [ str2num(x) ignoreS ];
boolInput = true;
end
end
end
disp(['[createFolderStructure] Ignoring files with series number of ' num2str(ignoreS')]);
files(ismember(series,ignoreS)) = [];
idx_to_delete = ismember(seriesNumbers,ignoreS);
seriesNumbers(idx_to_delete) = [];
seriesVolumes(idx_to_delete) = [];
% Still more series than expected
if length(seriesNumbers) > nRuns
ignoreS = [];
boolInput = false;
disp(['[createFolderStructure] ' num2str(length(seriesNumbers) - nRuns) ' extra series remain.']);
while ~boolInput
disp(['[createFolderStructure] Current series: ' mat2str(seriesNumbers) '.'])
x = input('Please input the ones to ignore [<series numbers>]: ','s');
if length(str2num(x)) > length(seriesNumbers) - nRuns
disp(['!---> ERROR: Too many series to delete. Choose only ' length(seriesNumbers) - nRuns ]);
elseif ~ismember(str2num(x),seriesNumbers)
disp('!---> ERROR: Incorrect series number.');
else
ignoreS = [ str2num(x) ignoreS ];
boolInput = true;
end
end
disp(['[createFolderStructure] Ignoring files with series number of ' num2str(ignoreS)]);
files(ismember(series,ignoreS)) = [];
idx_to_delete = ismember(seriesNumbers,ignoreS);
seriesNumbers(idx_to_delete) = [];
seriesVolumes(idx_to_delete) = [];
end
% Number of series smaller than expected number of runs
elseif length(seriesNumbers) < nRuns
disp('[createFolderStructure] !---> ERROR: Unsufficient data.')
boolInput = false;
while ~boolInput
x = input('[createFolderStructure] Do you wish to proceed anyway (Y/N)?','s');
switch lower(x)
case 'y'
nRuns = length(seriesNumbers);
boolInput = true;
otherwise
return
end
end
end
% -------------------------------------------------------------------------
% Check for incorrect number of volumes in all runs
% -------------------------------------------------------------------------
if any(datasetConfigs(subIdx).sessions(sesIdx).volumes ~= seriesVolumes)
disp('[createFolderStructure] Run volumes do not match the expected:');
disp(['Expected: ' num2str(datasetConfigs(subIdx).sessions(sesIdx).volumes)])
disp(['Input: ' num2str(seriesVolumes)]);
boolInput = false;
while ~boolInput
x = input('[createFolderStructure] Do you wish to proceed anyway (Y/N)?','s');
switch lower(x)
case 'y'
boolInput = true;
otherwise
return
end
end
end
% -------------------------------------------------------------------------
% Iterate on the runs
% -------------------------------------------------------------------------
parfor rr = 1:nRuns
% Copy DICOM files of the series/run
fprintf('Copying %s files...\n',datasetConfigs(subIdx).sessions(sesIdx).runs{rr});
search_name = '';
if seriesSplitIdx == 3
search_name = [auxnamesplit{1} '.' auxnamesplit{2} '.' num2str(seriesNumbers(rr),'%.4i') '*'];
elseif seriesSplitIdx == 4
search_name = [auxnamesplit{1} '.' auxnamesplit{2} '.' auxnamesplit{3} '.' num2str(seriesNumbers(rr),'%.4i') '*'];
end
copyfile( fullfile(dataPath,search_name) , newRawDataFolder );
% Extract important header information
auxdir = dir(fullfile(newRawDataFolder,search_name));
% if strcmp(datasetConfigs(subIdx).sessions(sesIdx).runtypes{rr},'func')
% dcmHeader = dicominfo(fullfile(auxdir(1).folder,auxdir(1).name));
% DCMinfo(idx_info).sliceTimes = dcmHeader.Private_0019_1029;
% DCMinfo(idx_info).sliceNumber = length(DCMinfo(idx_info).sliceTimes);
% [~,DCMinfo(idx_info).sliceVector] = sort(DCMinfo(idx_info).sliceTimes);
% DCMinfo(idx_info).TR = dcmHeader.RepetitionTime / 1000;
% DCMinfo(idx_info).TA = DCMinfo(idx_info).TR-(DCMinfo(idx_info).TR/DCMinfo(idx_info).sliceNumber);
% DCMinfo(idx_info).RefSlice = DCMinfo(idx_info).sliceVector(1);
% DCMinfo(idx_info).EchoTime = dcmHeader.EchoTime / 1000;
%
% idx_info = idx_info + 1;
% end
% Renonimize (necessary due to inconsistent series info on the header)
disp('Re-anonimizing...')
values = struct();
values.StudyInstanceUID = dicomuid;
values.SeriesInstanceUID = dicomuid;
values.PatientName = datasetConfigs(subIdx).name;
for p = 1:numel(auxdir)
dicomanon(fullfile(auxdir(p).folder,auxdir(p).name), ...
fullfile(newRawDataFolder, sprintf('%s-%04d-%s-%04d.dcm', datasetConfigs(subIdx).name, seriesNumbers(rr), datasetConfigs(subIdx).sessions(sesIdx).runs{rr}, p)) , ...
'update', values, ...
'WritePrivate',true);
delete(fullfile(auxdir(p).folder,auxdir(p).name));
end
end
% save(fullfile(subFolder,'DCMinfo.mat'),'DCMinfo');
success = true;
% disp('[createFolderStructure] Folder structure creation completed.')
end