From 2044d35566bbfb44576a525050806e0cdd391e65 Mon Sep 17 00:00:00 2001 From: zhiltsov-max Date: Fri, 5 Jun 2020 10:52:14 +0300 Subject: [PATCH] Add directory check on image save (#1636) * Add directory check on image save * Add tests * change exception type * update test --- cvat/apps/dataset_manager/formats/cvat.py | 5 ++--- datumaro/datumaro/cli/contexts/project/diff.py | 4 ++-- .../datumaro/plugins/datumaro_format/converter.py | 8 +++----- datumaro/datumaro/plugins/image_dir.py | 7 +++++-- datumaro/datumaro/plugins/labelme_format.py | 8 +++----- datumaro/datumaro/plugins/voc_format/converter.py | 8 +++++--- datumaro/datumaro/plugins/yolo_format/converter.py | 2 +- datumaro/datumaro/util/image.py | 14 ++++++++++++-- datumaro/tests/test_image.py | 11 +++++++++++ 9 files changed, 44 insertions(+), 23 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 42e0d6cc..ddd81be2 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -518,7 +518,6 @@ def _export(dst_file, task_data, anno_callback, save_images=False): if save_images: img_dir = osp.join(temp_dir, 'images') - os.makedirs(img_dir) frame_provider = FrameProvider(task_data.db_task.data) frames = frame_provider.get_frames( frame_provider.Quality.ORIGINAL, @@ -527,10 +526,10 @@ def _export(dst_file, task_data, anno_callback, save_images=False): frame_name = task_data.frame_info[frame_id]['path'] if '.' in frame_name: save_image(osp.join(img_dir, frame_name), - frame_data, jpeg_quality=100) + frame_data, jpeg_quality=100, create_dir=True) else: save_image(osp.join(img_dir, frame_name + '.png'), - frame_data) + frame_data, create_dir=True) make_zip_archive(temp_dir, dst_file) diff --git a/datumaro/datumaro/cli/contexts/project/diff.py b/datumaro/datumaro/cli/contexts/project/diff.py index 78fdcd51..06684c03 100644 --- a/datumaro/datumaro/cli/contexts/project/diff.py +++ b/datumaro/datumaro/cli/contexts/project/diff.py @@ -220,10 +220,10 @@ class DiffVisualizer: img = np.hstack([img_a, img_b]) - path = osp.join(self.save_dir, 'diff_%s' % item_a.id) + path = osp.join(self.save_dir, item_a.id) if self.output_format is Format.simple: - save_image(path + '.png', img) + save_image(path + '.png', img, create_dir=True) elif self.output_format is Format.tensorboard: self.save_as_tensorboard(img, path) diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py index 1976bb1e..a1b7a1e7 100644 --- a/datumaro/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/datumaro/plugins/datumaro_format/converter.py @@ -231,16 +231,14 @@ class _Converter: subsets = self._extractor.subsets() if len(subsets) == 0: subsets = [ None ] - subsets = [n if n else DEFAULT_SUBSET_NAME for n in subsets] + subsets = [n or DEFAULT_SUBSET_NAME for n in subsets] subsets = { name: _SubsetWriter(name, self) for name in subsets } for subset, writer in subsets.items(): writer.write_categories(self._extractor.categories()) for item in self._extractor: - subset = item.subset - if not subset: - subset = DEFAULT_SUBSET_NAME + subset = item.subset or DEFAULT_SUBSET_NAME writer = subsets[subset] writer.write_item(item) @@ -260,7 +258,7 @@ class _Converter: filename = item.id filename += DatumaroPath.IMAGE_EXT image_path = osp.join(self._images_dir, filename) - save_image(image_path, image) + save_image(image_path, image, create_dir=True) return filename class DatumaroConverter(Converter, CliPlugin): diff --git a/datumaro/datumaro/plugins/image_dir.py b/datumaro/datumaro/plugins/image_dir.py index 5f3a1884..0ba68ee3 100644 --- a/datumaro/datumaro/plugins/image_dir.py +++ b/datumaro/datumaro/plugins/image_dir.py @@ -65,8 +65,10 @@ class ImageDirExtractor(SourceExtractor): return self._items[item_id] def _is_image(self, path): + if not osp.isfile(path): + return False for ext in self._SUPPORTED_FORMATS: - if osp.isfile(path) and path.endswith(ext): + if path.endswith(ext): return True return False @@ -83,4 +85,5 @@ class ImageDirConverter(Converter): else: filename = item.id filename += '.jpg' - save_image(osp.join(save_dir, filename), item.image.data) \ No newline at end of file + save_image(osp.join(save_dir, filename), item.image.data, + create_dir=True) \ No newline at end of file diff --git a/datumaro/datumaro/plugins/labelme_format.py b/datumaro/datumaro/plugins/labelme_format.py index 0607ba9b..96bdf3f0 100644 --- a/datumaro/datumaro/plugins/labelme_format.py +++ b/datumaro/datumaro/plugins/labelme_format.py @@ -54,9 +54,7 @@ class LabelMeExtractor(SourceExtractor): for p in sorted(p for p in os.listdir(path) if p.endswith('.xml')): root = ElementTree.parse(osp.join(path, p)) - image = None image_path = osp.join(path, root.find('filename').text) - image_size = None imagesize_elem = root.find('imagesize') if imagesize_elem is not None: @@ -67,8 +65,8 @@ class LabelMeExtractor(SourceExtractor): annotations = self._parse_annotations(root, path, categories) - items.append(DatasetItem(id=osp.splitext(p)[0], subset=self._subset, - image=image, annotations=annotations)) + items.append(DatasetItem(id=osp.splitext(p)[0], + subset=self._subset, image=image, annotations=annotations)) return items, categories @classmethod @@ -344,7 +342,7 @@ class LabelMeConverter(Converter, CliPlugin): image_filename = item.id image_filename += LabelMePath.IMAGE_EXT save_image(osp.join(subset_dir, image_filename), - item.image.data) + item.image.data, create_dir=True) else: log.debug("Item '%s' has no image" % item.id) diff --git a/datumaro/datumaro/plugins/voc_format/converter.py b/datumaro/datumaro/plugins/voc_format/converter.py index 234b83be..4e4fefd1 100644 --- a/datumaro/datumaro/plugins/voc_format/converter.py +++ b/datumaro/datumaro/plugins/voc_format/converter.py @@ -145,7 +145,7 @@ class _Converter: image_filename = item.id image_filename += VocPath.IMAGE_EXT save_image(osp.join(self._images_dir, image_filename), - item.image.data) + item.image.data, create_dir=True) else: log.debug("Item '%s' has no image" % item.id) @@ -262,7 +262,9 @@ class _Converter: VocTask.detection, VocTask.person_layout, VocTask.action_classification}: - with open(osp.join(self._ann_dir, item.id + '.xml'), 'w') as f: + ann_path = osp.join(self._ann_dir, item.id + '.xml') + os.makedirs(osp.dirname(ann_path), exist_ok=True) + with open(ann_path, 'w') as f: f.write(ET.tostring(root_elem, encoding='unicode', pretty_print=True)) @@ -403,7 +405,7 @@ class _Converter: if colormap is None: colormap = self._categories[AnnotationType.mask].colormap mask = paint_mask(mask, colormap) - save_image(path, mask) + save_image(path, mask, create_dir=True) def save_label_map(self): path = osp.join(self._save_dir, VocPath.LABELMAP_FILE) diff --git a/datumaro/datumaro/plugins/yolo_format/converter.py b/datumaro/datumaro/plugins/yolo_format/converter.py index de30d7d7..a4fe3316 100644 --- a/datumaro/datumaro/plugins/yolo_format/converter.py +++ b/datumaro/datumaro/plugins/yolo_format/converter.py @@ -88,7 +88,7 @@ class YoloConverter(Converter, CliPlugin): item_name = item.id image_name = item_name + '.jpg' save_image(osp.join(subset_dir, image_name), - item.image.data) + item.image.data, create_dir=True) else: log.warning("Item '%s' has no image" % item.id) image_paths[item.id] = osp.join('data', diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py index 3a7687a5..47d5fff1 100644 --- a/datumaro/datumaro/util/image.py +++ b/datumaro/datumaro/util/image.py @@ -5,11 +5,12 @@ # pylint: disable=unused-import +from enum import Enum from io import BytesIO import numpy as np +import os import os.path as osp -from enum import Enum _IMAGE_BACKENDS = Enum('_IMAGE_BACKENDS', ['cv2', 'PIL']) _IMAGE_BACKEND = None try: @@ -45,7 +46,16 @@ def load_image(path): assert image.shape[2] in {3, 4} return image -def save_image(path, image, **kwargs): +def save_image(path, image, create_dir=False, **kwargs): + # NOTE: Check destination path for existence + # OpenCV silently fails if target directory does not exist + dst_dir = osp.dirname(path) + if dst_dir: + if create_dir: + os.makedirs(dst_dir, exist_ok=True) + elif not osp.isdir(dst_dir): + raise FileNotFoundError("Directory does not exist: '%s'" % dst_dir) + if not kwargs: kwargs = {} diff --git a/datumaro/tests/test_image.py b/datumaro/tests/test_image.py index bdb29438..5f4ef81c 100644 --- a/datumaro/tests/test_image.py +++ b/datumaro/tests/test_image.py @@ -51,3 +51,14 @@ class ImageOperationsTest(TestCase): self.assertTrue(np.array_equal(src_image, dst_image), 'save: %s, load: %s' % (save_backend, load_backend)) + + def test_save_image_to_inexistent_dir_raises_error(self): + with self.assertRaises(FileNotFoundError): + image_module.save_image('some/path.jpg', np.ones((5, 4, 3)), + create_dir=False) + + def test_save_image_can_create_dir(self): + with TestDir() as test_dir: + path = osp.join(test_dir, 'some', 'path.jpg') + image_module.save_image(path, np.ones((5, 4, 3)), create_dir=True) + self.assertTrue(osp.isfile(path))