From f267f8aed33163bde6cbaa1c16c23d24029019b6 Mon Sep 17 00:00:00 2001 From: Boris Sekachev Date: Tue, 20 Apr 2021 11:45:16 +0300 Subject: [PATCH] Added DICOM conversion script (#3095) * Added DICOM conversion script * Updated changelog * Fixed strip to rstrip * Fixed some detected issues * Removed extra variable, updated README.md --- CHANGELOG.md | 1 + utils/dicom_converter/README.md | 21 +++++ utils/dicom_converter/requirements.txt | 4 + utils/dicom_converter/script.py | 113 +++++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 utils/dicom_converter/README.md create mode 100644 utils/dicom_converter/requirements.txt create mode 100644 utils/dicom_converter/script.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ad808be..62042ab3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Documentation on mask annotation () - Hotkeys to switch a label of existing object or to change default label (for objects created with N) () +- A script to convert some kinds of DICOM files to regular images () ### Changed diff --git a/utils/dicom_converter/README.md b/utils/dicom_converter/README.md new file mode 100644 index 00000000..e4c5ed80 --- /dev/null +++ b/utils/dicom_converter/README.md @@ -0,0 +1,21 @@ +# Description + +The script is used to convert some kinds of DICOM data to regular images. +Then you can annotate these images on CVAT and get a segmentation mask. +The conversion script was tested on CT, MT and some multi-frame DICOM data. +DICOM files with series (multi-frame) are saved under the same name with a number postfix: 001, 002, 003, etc. + +# Installation + +```bash +python3 -m venv .env +. .env/bin/activate +pip install -r requirements.txt +``` + +# Running + +``` +. .env/bin/activate # if not activated +python script.py input_data output_data +``` diff --git a/utils/dicom_converter/requirements.txt b/utils/dicom_converter/requirements.txt new file mode 100644 index 00000000..9ed6b39b --- /dev/null +++ b/utils/dicom_converter/requirements.txt @@ -0,0 +1,4 @@ +numpy==1.20.2 +Pillow==8.2.0 +pydicom==2.1.2 +tqdm==4.60.0 diff --git a/utils/dicom_converter/script.py b/utils/dicom_converter/script.py new file mode 100644 index 00000000..5bfbba3a --- /dev/null +++ b/utils/dicom_converter/script.py @@ -0,0 +1,113 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + + +import os +import argparse +import logging +from glob import glob + +import numpy as np +from tqdm import tqdm +from PIL import Image +from pydicom import dcmread +from pydicom.pixel_data_handlers.util import convert_color_space + + +# Script configuration +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') +parser = argparse.ArgumentParser(description='The script is used to convert some kinds of DICOM (.dcm) files to regular image files (.png)') +parser.add_argument('input', type=str, help='A root directory with medical data files in DICOM format. The script finds all these files based on their extension') +parser.add_argument('output', type=str, help='Where to save converted files. The script repeats internal directories structure of the input root directory') +args = parser.parse_args() + + +class Converter: + def __init__(self, filename): + with dcmread(filename) as ds: + self._pixel_array = ds.pixel_array + self._photometric_interpretation = ds.PhotometricInterpretation + self._min_value = ds.pixel_array.min() + self._max_value = ds.pixel_array.max() + self._depth = ds.BitsStored + + logging.debug('File: {}'.format(filename)) + logging.debug('Photometric interpretation: {}'.format(self._photometric_interpretation)) + logging.debug('Min value: {}'.format(self._min_value)) + logging.debug('Max value: {}'.format(self._max_value)) + logging.debug('Depth: {}'.format(self._depth)) + + try: + self._length = ds["NumberOfFrames"].value + except KeyError: + self._length = 1 + + def __len__(self): + return self._length + + def __iter__(self): + if self._length == 1: + self._pixel_array = np.expand_dims(self._pixel_array, axis=0) + + for pixel_array in self._pixel_array: + # Normalization to an output range 0..255, 0..65535 + pixel_array = pixel_array - self._min_value + pixel_array = pixel_array.astype(int) * (2 ** self._depth - 1) + pixel_array = pixel_array // (self._max_value - self._min_value) + + # In some cases we need to convert colors additionally + if 'YBR' in self._photometric_interpretation: + pixel_array = convert_color_space(pixel_array, self._photometric_interpretation, 'RGB') + + if self._depth == 8: + image = Image.fromarray(pixel_array.astype(np.uint8)) + elif self._depth == 16: + image = Image.fromarray(pixel_array.astype(np.uint16)) + else: + raise Exception('Not supported depth {}'.format(self._depth)) + + yield image + + +def main(root_dir, output_root_dir): + dicom_files = glob(os.path.join(root_dir, '**', '*.dcm'), recursive = True) + if not len(dicom_files): + logging.info('DICOM files are not found under the specified path') + else: + logging.info('Number of found DICOM files: ' + str(len(dicom_files))) + + pbar = tqdm(dicom_files) + for input_filename in pbar: + pbar.set_description('Conversion: ' + input_filename) + input_basename = os.path.basename(input_filename) + + output_subpath = os.path.relpath(os.path.dirname(input_filename), root_dir) + output_path = os.path.join(output_root_dir, output_subpath) + output_basename = '{}.png'.format(os.path.splitext(input_basename)[0]) + output_filename = os.path.join(output_path, output_basename) + + if not os.path.exists(output_path): + os.makedirs(output_path) + + try: + iterated_converter = Converter(input_filename) + length = len(iterated_converter) + for i, image in enumerate(iterated_converter): + if length == 1: + image.save(output_filename) + else: + filename_index = str(i).zfill(len(str(length))) + list_output_filename = '{}_{}.png'.format(os.path.splitext(output_filename)[0], filename_index) + image.save(list_output_filename) + except Exception as ex: + logging.error('Error while processing ' + input_filename) + logging.error(ex) + +if __name__ == '__main__': + input_root_path = os.path.abspath(args.input.rstrip(os.sep)) + output_root_path = os.path.abspath(args.output.rstrip(os.sep)) + + logging.info('From: {}'.format(input_root_path)) + logging.info('To: {}'.format(output_root_path)) + main(input_root_path, output_root_path)