bin/DatasetHierarchyReader.py

# -*- coding: utf-8 -*-
from __future__ import print_function

import collections
import glob
import os

import six


class DatasetHierarchyReader(object):
  def __init__(self, inputDatasetDirectory, filetype='.nrrd'):
    self.inputDatasetDirectory = inputDatasetDirectory
    self.filetype = filetype
    self.DatabaseHierarchyDict = collections.OrderedDict()

  def setInputDatasetDirectory(self, inputDatasetDirectory):
    self.inputDatasetDirectory = inputDatasetDirectory

  def setFiletype(self, filetype):
    self.filetype = filetype

  def ReadDatasetHierarchy(self, create=False):
    patientDirectories = glob.glob(os.path.join(self.inputDatasetDirectory, '*'))

    for patientDirectory in patientDirectories:
      self.DatabaseHierarchyDict[patientDirectory] = collections.OrderedDict()
      studyDirectories = glob.glob(os.path.join(patientDirectory, '*'))

      for studyDirectory in studyDirectories:
        self.DatabaseHierarchyDict[patientDirectory][studyDirectory] = collections.OrderedDict()

        subfolders = [dirpath for dirpath in glob.glob(os.path.join(studyDirectory, '*')) if os.path.isdir(dirpath)]

        reconstructionsDirectory, images = self.readReconstructionsDirectory(studyDirectory, subfolders, create=create)
        self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["reconstructions"] = images

        resourcesDirectory, resources = self.readResourcesDirectory(studyDirectory, subfolders, create=create)
        self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["resources"] = resources

        segmentationsDirectory, labels = self.readSegmentationsDirectory(studyDirectory, subfolders, create=create)
        self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["segmentations"] = labels

    return self.DatabaseHierarchyDict

  def readReconstructionsDirectory(self, studyDirectory, subfolders, create=False):
    images = []
    recDirectory = "NONE"
    try:
      recDirectory = [item for item in subfolders if 'reconstructions' in os.path.basename(item).lower()][0]
      images = [item for item in glob.glob(os.path.join(recDirectory, "*")) if self.filetype in os.path.basename(item)]
    except IndexError:
      if create:
        recDirectory = os.path.join(studyDirectory, "Reconstructions")
        if not os.path.exists(recDirectory):
          os.mkdir(recDirectory)
          print("\tCreated:", recDirectory)

    return recDirectory, images

  def readSegmentationsDirectory(self, studyDirectory, subfolders, create=False):
    labels = []
    segDirectory = "NONE"
    try:
      segDirectory = [item for item in subfolders if 'segmentations' in os.path.basename(item).lower()][0]
      labels = [item for item in glob.glob(os.path.join(segDirectory, "*")) if self.filetype in os.path.basename(item)]
    except IndexError:
      if create:
        segDirectory = os.path.join(studyDirectory, "Segmentations")
        if not os.path.exists(segDirectory):
          os.mkdir(segDirectory)
          print("\tCreated:", segDirectory)

    return segDirectory, labels

  def readResourcesDirectory(self, studyDirectory, subfolders, create=False):
    resources = []
    resDirectory = "NONE"
    try:
      resDirectory = [item for item in subfolders if 'resources' in os.path.basename(item).lower()][0]
      resources = [item for item in glob.glob(os.path.join(resDirectory, "*"))]
    except IndexError:
      if create:
        resDirectory = os.path.join(studyDirectory, "Resources")
        if not os.path.exists(resDirectory):
          os.mkdir(resDirectory)
          print("\tCreated:", resDirectory)

    return resDirectory, resources

  def findImageAndLabelPair(self, imageFilepaths, maskFilepaths, keywordSettings):
    """
    Accepts a list of image filepaths, a list of mask/label filepaths, and a
    dict of keyword settings in the form:

    keywordSettings['image'] = ""
    keywordSettings['imageExclusion'] = ""
    keywordSettings['mask'] = ""
    keywordSettings['maskExclusion'] = ""

    where each field is a string of words separated by commas (case and spaces do not matter).

    The output is the image filepath and mask/label filepath pair that satisfies the keyword
    conditions.
    """

    keywordSettings = {k: [str(keyword.strip()) for keyword in v.split(',')]
                       for (k, v) in six.iteritems(keywordSettings)}

    matchedImages = []
    for imageFilepath in imageFilepaths:
      imageFilename = str(os.path.basename(imageFilepath))
      if self.testString(imageFilename, keywordSettings['image'], keywordSettings['imageExclusion']):
        matchedImages.append(imageFilepath)

    matchedMasks = []
    for maskFilepath in maskFilepaths:
      maskFilename = str(os.path.basename(maskFilepath))
      if self.testString(maskFilename, keywordSettings['mask'], keywordSettings['maskExclusion']):
        matchedMasks.append(maskFilepath)

    if len(matchedImages) < 1:
      print("ERROR: No Images Matched")
    elif len(matchedImages) > 1:
      print("ERROR: Multiple Images Matched")

    if len(matchedMasks) < 1:
      print("ERROR: No Masks Matched")
    elif len(matchedMasks) > 1:
      print("ERROR: Multiple Masks Matched")

    if (len(matchedImages) == 1) and (len(matchedMasks) == 1):
      return matchedImages[0], matchedMasks[0]
    else:
      return None, None

  def testString(self, fileName, inclusionKeywords, exclusionKeywords):
    fileName = fileName.upper()
    inclusionKeywords = [keyword.upper() for keyword in inclusionKeywords if (keyword != '')]
    exclusionKeywords = [keyword.upper() for keyword in exclusionKeywords if (keyword != '')]

    result = False
    if (len(inclusionKeywords) == 0) and (len(exclusionKeywords) > 0):
      if (not any(keyword in fileName for keyword in exclusionKeywords)):
        result = True
    elif (len(inclusionKeywords) > 0) and (len(exclusionKeywords) == 0):
      if (all(keyword in fileName for keyword in inclusionKeywords)):
        result = True
    elif (len(inclusionKeywords) > 0) and (len(exclusionKeywords) > 0):
      if (all(keyword in fileName for keyword in inclusionKeywords)) and \
        (not any(keyword in fileName for keyword in exclusionKeywords)):
        result = True
    elif (len(inclusionKeywords) == 0) and (len(exclusionKeywords) == 0):
      result = True

    return result