From 98a9718e630360932938ce235143c0a7be9b7022 Mon Sep 17 00:00:00 2001 From: zhiltsov-max Date: Sun, 17 May 2020 08:15:00 +0300 Subject: [PATCH] [Datumaro] Change alignment in mask parsing (#1547) --- CHANGELOG.md | 1 + datumaro/datumaro/components/extractor.py | 18 +++++----- .../datumaro/plugins/voc_format/extractor.py | 3 +- datumaro/tests/test_masks.py | 13 ++++++- datumaro/tests/test_voc_format.py | 35 +++++++++++++++++++ 5 files changed, 57 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3181d404..f341fba1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Task/Job buttons has no "Open in new tab" option () - Delete point context menu option has no shortcut hint () - Fixed issue with unnecessary tag activation in cvat-canvas () +- Fixed an issue with large number of instances in instance mask (https://github.com/opencv/cvat/issues/1539) - Fixed full COCO dataset import error with conflicting labels in keypoints and detection (https://github.com/opencv/cvat/pull/1548) - Fixed COCO keypoints skeleton parsing and saving (https://github.com/opencv/cvat/issues/1539) diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py index 461fdd4b..fc35b05c 100644 --- a/datumaro/datumaro/components/extractor.py +++ b/datumaro/datumaro/components/extractor.py @@ -270,7 +270,7 @@ class CompiledMask: if instance_ids is not None: assert len(instance_ids) == len(instance_masks) else: - instance_ids = [1 + i for i in range(len(instance_masks))] + instance_ids = range(1, len(instance_masks) + 1) if instance_labels is not None: assert len(instance_labels) == len(instance_masks) @@ -310,15 +310,13 @@ class CompiledMask: def instance_count(self): return int(self.instance_mask.max()) - def get_instance_labels(self, class_count=None): - if class_count is None: - class_count = np.max(self.class_mask) + 1 - - m = self.class_mask * class_count + self.instance_mask - m = m.astype(int) + def get_instance_labels(self): + class_shift = 16 + m = (self.class_mask.astype(np.uint32) << class_shift) \ + + self.instance_mask.astype(np.uint32) keys = np.unique(m) - instance_labels = {k % class_count: k // class_count - for k in keys if k % class_count != 0 + instance_labels = {k & ((1 << class_shift) - 1): k >> class_shift + for k in keys if k & ((1 << class_shift) - 1) != 0 } return instance_labels @@ -783,4 +781,4 @@ class Transform(Extractor): return self._extractor.categories() def transform_item(self, item): - raise NotImplementedError() \ No newline at end of file + raise NotImplementedError() diff --git a/datumaro/datumaro/plugins/voc_format/extractor.py b/datumaro/datumaro/plugins/voc_format/extractor.py index 96312185..dd340db0 100644 --- a/datumaro/datumaro/plugins/voc_format/extractor.py +++ b/datumaro/datumaro/plugins/voc_format/extractor.py @@ -251,8 +251,7 @@ class VocSegmentationExtractor(_VocExtractor): if class_mask is not None: label_cat = self._categories[AnnotationType.label] - instance_labels = compiled_mask.get_instance_labels( - class_count=len(label_cat.items)) + instance_labels = compiled_mask.get_instance_labels() else: instance_labels = {i: None for i in range(compiled_mask.instance_count)} diff --git a/datumaro/tests/test_masks.py b/datumaro/tests/test_masks.py index a4f54018..43969660 100644 --- a/datumaro/tests/test_masks.py +++ b/datumaro/tests/test_masks.py @@ -3,6 +3,7 @@ import numpy as np from unittest import TestCase import datumaro.util.mask_tools as mask_tools +from datumaro.components.extractor import CompiledMask class PolygonConversionsTest(TestCase): @@ -183,4 +184,14 @@ class ColormapOperationsTest(TestCase): actual = mask_tools.merge_masks(masks) self.assertTrue(np.array_equal(expected, actual), - '%s\nvs.\n%s' % (expected, actual)) \ No newline at end of file + '%s\nvs.\n%s' % (expected, actual)) + + def test_can_decode_compiled_mask(self): + class_idx = 1000 + instance_idx = 10000 + mask = np.array([1]) + compiled_mask = CompiledMask(mask * class_idx, mask * instance_idx) + + labels = compiled_mask.get_instance_labels() + + self.assertEqual({instance_idx: class_idx}, labels) \ No newline at end of file diff --git a/datumaro/tests/test_voc_format.py b/datumaro/tests/test_voc_format.py index 62f6b314..7bb4d082 100644 --- a/datumaro/tests/test_voc_format.py +++ b/datumaro/tests/test_voc_format.py @@ -444,6 +444,41 @@ class VocConverterTest(TestCase): VocSegmentationConverter(label_map='voc'), test_dir, target_dataset=DstExtractor()) + def test_can_save_voc_segm_with_many_instances(self): + def bit(x, y, shape): + mask = np.zeros(shape) + mask[y, x] = 1 + return mask + + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='a', annotations=[ + Mask(image=bit(x, y, shape=[10, 10]), + label=self._label(VOC.VocLabel(3).name), + z_order=10 * y + x + 1 + ) + for y in range(10) for x in range(10) + ]), + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='a', annotations=[ + Mask(image=bit(x, y, shape=[10, 10]), + label=self._label(VOC.VocLabel(3).name), + group=10 * y + x + 1 + ) + for y in range(10) for x in range(10) + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + VocSegmentationConverter(label_map='voc'), test_dir, + target_dataset=DstExtractor()) + def test_can_save_voc_layout(self): class TestExtractor(TestExtractorBase): def __iter__(self):