From fe862b4abc846750870e12890a1b5c9cb92b9f96 Mon Sep 17 00:00:00 2001 From: zhiltsov-max Date: Fri, 20 Mar 2020 16:37:49 +0300 Subject: [PATCH] [Datumaro] Update LabelMe format (#1296) * Little refactoring * Add LabelMe format * Add usernames * Update tests * Add extractor test --- datumaro/datumaro/components/project.py | 2 + datumaro/datumaro/plugins/labelme_format.py | 35 +++-- .../labelme_dataset/Masks/img1_mask_1.png | Bin 0 -> 211 bytes .../labelme_dataset/Masks/img1_mask_5.png | Bin 0 -> 388 bytes .../Scribbles/img1_scribble_1.png | Bin 0 -> 206 bytes .../Scribbles/img1_scribble_5.png | Bin 0 -> 387 bytes .../tests/assets/labelme_dataset/img1.png | Bin 0 -> 215 bytes .../tests/assets/labelme_dataset/img1.xml | 1 + datumaro/tests/test_labelme_format.py | 144 +++++++++++++++--- 9 files changed, 148 insertions(+), 34 deletions(-) create mode 100644 datumaro/tests/assets/labelme_dataset/Masks/img1_mask_1.png create mode 100644 datumaro/tests/assets/labelme_dataset/Masks/img1_mask_5.png create mode 100644 datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_1.png create mode 100644 datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png create mode 100644 datumaro/tests/assets/labelme_dataset/img1.png create mode 100644 datumaro/tests/assets/labelme_dataset/img1.xml diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py index bcc210e9..4f23639b 100644 --- a/datumaro/datumaro/components/project.py +++ b/datumaro/datumaro/components/project.py @@ -367,6 +367,8 @@ class Dataset(Extractor): def get(self, item_id, subset=None, path=None): if path: raise KeyError("Requested dataset item path is not found") + if subset is None: + subset = '' return self._subsets[subset].items[item_id] def put(self, item, item_id=None, subset=None, path=None): diff --git a/datumaro/datumaro/plugins/labelme_format.py b/datumaro/datumaro/plugins/labelme_format.py index 22a07d70..41069da9 100644 --- a/datumaro/datumaro/plugins/labelme_format.py +++ b/datumaro/datumaro/plugins/labelme_format.py @@ -59,7 +59,9 @@ class LabelMeExtractor(SourceExtractor): def _parse(self, path): categories = { - AnnotationType.label: LabelCategories(attributes={'occluded'}) + AnnotationType.label: LabelCategories(attributes={ + 'occluded', 'username' + }) } items = [] @@ -136,10 +138,17 @@ class LabelMeExtractor(SourceExtractor): if deleted_elem is not None and deleted_elem.text: deleted = bool(int(deleted_elem.text)) + user = '' + poly_elem = obj_elem.find('polygon') segm_elem = obj_elem.find('segm') type_elem = obj_elem.find('type') # the only value is 'bounding_box' if poly_elem is not None: + user_elem = poly_elem.find('username') + if user_elem is not None and user_elem.text: + user = user_elem.text + attributes.append(('username', user)) + points = [] for point_elem in poly_elem.iter('pt'): x = float(point_elem.find('x').text) @@ -153,20 +162,25 @@ class LabelMeExtractor(SourceExtractor): ymin = min(points[1::2]) ymax = max(points[1::2]) ann_items.append(Bbox(xmin, ymin, xmax - xmin, ymax - ymin, - label=label, attributes=attributes, + label=label, attributes=attributes, id=obj_id, )) else: ann_items.append(Polygon(points, - label=label, attributes=attributes, + label=label, attributes=attributes, id=obj_id, )) elif segm_elem is not None: + user_elem = segm_elem.find('username') + if user_elem is not None and user_elem.text: + user = user_elem.text + attributes.append(('username', user)) + mask_path = osp.join(dataset_root, LabelMePath.MASKS_DIR, segm_elem.find('mask').text) if not osp.isfile(mask_path): raise Exception("Can't find mask at '%s'" % mask_path) mask = load_mask(mask_path) mask = np.any(mask, axis=2) - ann_items.append(Mask(image=mask, label=label, + ann_items.append(Mask(image=mask, label=label, id=obj_id, attributes=attributes)) if not deleted: @@ -368,7 +382,7 @@ class LabelMeConverter(Converter, CliPlugin): ET.SubElement(obj_elem, 'deleted').text = '0' ET.SubElement(obj_elem, 'verified').text = '0' ET.SubElement(obj_elem, 'occluded').text = \ - 'yes' if ann.attributes.get('occluded') == True else 'no' + 'yes' if ann.attributes.pop('occluded', '') == True else 'no' ET.SubElement(obj_elem, 'date').text = '' ET.SubElement(obj_elem, 'id').text = str(obj_id) @@ -390,7 +404,8 @@ class LabelMeConverter(Converter, CliPlugin): ET.SubElement(point_elem, 'x').text = '%.2f' % x ET.SubElement(point_elem, 'y').text = '%.2f' % y - ET.SubElement(poly_elem, 'username').text = '' + ET.SubElement(poly_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) elif ann.type == AnnotationType.polygon: poly_elem = ET.SubElement(obj_elem, 'polygon') for x, y in zip(ann.points[::2], ann.points[1::2]): @@ -398,7 +413,8 @@ class LabelMeConverter(Converter, CliPlugin): ET.SubElement(point_elem, 'x').text = '%.2f' % x ET.SubElement(point_elem, 'y').text = '%.2f' % y - ET.SubElement(poly_elem, 'username').text = '' + ET.SubElement(poly_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) elif ann.type == AnnotationType.mask: mask_filename = '%s_mask_%s.png' % (item.id, obj_id) save_image(osp.join(subset_dir, LabelMePath.MASKS_DIR, @@ -416,13 +432,14 @@ class LabelMeConverter(Converter, CliPlugin): '%.2f' % (bbox[0] + bbox[2]) ET.SubElement(box_elem, 'ymax').text = \ '%.2f' % (bbox[1] + bbox[3]) + + ET.SubElement(segm_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) else: raise NotImplementedError("Unknown shape type '%s'" % ann.type) attrs = [] for k, v in ann.attributes.items(): - if k == 'occluded': - continue if isinstance(v, bool): attrs.append(k) else: diff --git a/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_1.png b/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_1.png new file mode 100644 index 0000000000000000000000000000000000000000..a37c5508f9b63496a536ed20877da884eb88e854 GIT binary patch literal 211 zcmeAS@N?(olHy`uVBq!ia0vp^X+Z4D!3HElwg^rKQjEnx?oJHr&dIz4a#+$GeH|GX zHuiJ>Nn{1`ISV`@iy0XB4ude`@%$AjK*0=87srr_xVI+`@;U?vxL#bycvQnnei5hc zhLCnYowqYjtas39DV{Xjd?rva4$x4&YSVo4ZO12EPC5RyNn{1`ISV`@iy0XB4ude`@%$AjKn;?fE{-7;ac{31>ahgMuzgTe;cE4J%pt_u z%eaGheZXkv$pvXG0a(833^=Pp#(6nQr1-oLt&-|y8g|NHFh znJ}Oo3=9SvHkRpMIhQl-X`0-L#L%_-w->8>>8IJ94m0LY+59@`*zLU`|95_r+pV`W ztjbXS?Z3<3MYCj7s!w`;db;qgwdpGLXEw`|%vau5b^p0{{vEaaldhL`2HF}1SN>PI zKW);^Ct9wu-`AC_`I7W+(~O+ub6QhWKS#aK3Z1*P`ni|>Gpn81$Kqf4zwAvho#`=e z((DwCDRDl3cg)H=womV~#P0m(=cfL%Pcu60WBmQ=oTRwhl3+)p5eX6@SJh`Nn{1`ISV`@iy0XB4ude`@%$AjK*3~B7srr_xVPsI@;W#Ov|W7o>cJYhySe4J z3IdswmoXIz-1GidGkI?=P$>>@z&7^l=WiXF#?RaC%#hE2{yX~XRr?#Ovh#NO0d>q1 ZEM(}&Wm)G^ko^K=fTydU%Q~loCII=|J+=S< literal 0 HcmV?d00001 diff --git a/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png b/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png new file mode 100644 index 0000000000000000000000000000000000000000..415e1f88b2cafe6d77e4e9b4555664a005460d5a GIT binary patch literal 387 zcmeAS@N?(olHy`uVBq!ia0vp^X+Z4D!3HElwg^rKQjEnx?oJHr&dIz4a#+$GeH|GX zHuiJ>Nn{1`ISV`@iy0XB4ude`@%$AjKn)U}E{-7;ac{5bd$Tw)v_G``Kh4?2Vhcyp z(FBnz3QcaV-6u;r*eL8?AX362lWO7;64@S>5uG=90N5U)pT{QaStLiKg{Se$_7x zoV)C;$<$4mq4wXOU-tZbV14${&Y#D8Z+W`E)VaHS`xWmVFFz-9-dU&JOlMaWKP@%- zKJ9X))a>xc?0u&;y@-~)cKP@7BLRPpt+@U`@$NJCze_ATo!l| OB<<8wB`-xB}__|NjF?B0==h_NhRnoCO|{#S9F5he4R}c>anMprC=Li(^Q{ z;kTz33Nk1NFmFg_-t~VIhs?TZ-Zg5r&*$0pKegz4Y_c3gTJ(MX?#b7j^*rxUX47<# N-JY&~F6*2UngCPUH#YzP literal 0 HcmV?d00001 diff --git a/datumaro/tests/assets/labelme_dataset/img1.xml b/datumaro/tests/assets/labelme_dataset/img1.xml new file mode 100644 index 00000000..ff8ae1b4 --- /dev/null +++ b/datumaro/tests/assets/labelme_dataset/img1.xml @@ -0,0 +1 @@ +img1.pngexample_folderThe MIT-CSAIL database of objects and scenesLabelMe Webtoolwindow0025-May-2012 00:09:480admin433445344537433777102license plate00no27-Jul-2014 02:58:501brussell58666268img1_mask_1.png58666268img1_scribble_1.pngo100yesa13,415-Nov-2019 14:38:512anonymous3012422124261522181422122712q100nokj215-Nov-2019 14:39:003anonymous352143224028283131223225b100yeshg215-Nov-2019 14:39:094bounding_boxanonymous1319231923301330m100nod615-Nov-2019 14:39:305bounding_boxanonymous56147023img1_mask_5.png55137023img1_scribble_5.pnghg00nogfd lkj lkj hi515-Nov-2019 14:41:576anonymous642174247232623460276222 \ No newline at end of file diff --git a/datumaro/tests/test_labelme_format.py b/datumaro/tests/test_labelme_format.py index 2ec731e3..35fa2ca8 100644 --- a/datumaro/tests/test_labelme_format.py +++ b/datumaro/tests/test_labelme_format.py @@ -1,11 +1,14 @@ import numpy as np +import os.path as osp from unittest import TestCase from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Bbox, Mask, Polygon, LabelCategories ) -from datumaro.plugins.labelme_format import LabelMeImporter, LabelMeConverter +from datumaro.components.project import Dataset +from datumaro.plugins.labelme_format import LabelMeExtractor, LabelMeImporter, \ + LabelMeConverter from datumaro.util.test_utils import TestDir, compare_datasets @@ -35,7 +38,8 @@ class LabelMeConverterTest(TestCase): Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={ 'occluded': True }), - Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2), + Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, + attributes={ 'username': 'test' }), Bbox(1, 2, 3, 4, group=3), Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3, attributes={ 'occluded': True } @@ -58,20 +62,28 @@ class LabelMeConverterTest(TestCase): DatasetItem(id=1, subset='train', image=np.ones((16, 16, 3)), annotations=[ - Bbox(0, 4, 4, 8, label=0, group=2, attributes={ - 'occluded': False - }), - Polygon([0, 4, 4, 4, 5, 6], label=1, attributes={ - 'occluded': True - }), + Bbox(0, 4, 4, 8, label=0, group=2, id=0, + attributes={ + 'occluded': False, 'username': '', + } + ), + Polygon([0, 4, 4, 4, 5, 6], label=1, id=1, + attributes={ + 'occluded': True, 'username': '', + } + ), Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, - attributes={ 'occluded': False } + id=2, attributes={ + 'occluded': False, 'username': 'test' + } ), - Bbox(1, 2, 3, 4, group=1, attributes={ - 'occluded': False + Bbox(1, 2, 3, 4, group=1, id=3, attributes={ + 'occluded': False, 'username': '', }), Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1, - attributes={ 'occluded': True } + id=4, attributes={ + 'occluded': True, 'username': '' + } ), ] ), @@ -90,31 +102,113 @@ class LabelMeConverterTest(TestCase): SrcExtractor(), LabelMeConverter(save_images=True), test_dir, target_dataset=DstExtractor()) -class LabelMeImporterTest(TestCase): - def test_can_detect(self): - class TestExtractor(Extractor): + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset') + +class LabelMeExtractorTest(TestCase): + def test_can_load(self): + class DstExtractor(Extractor): def __iter__(self): + img1 = np.ones((77, 102, 3)) * 255 + img1[6:32, 7:41] = 0 + + mask1 = np.zeros((77, 102), dtype=int) + mask1[67:69, 58:63] = 1 + + mask2 = np.zeros((77, 102), dtype=int) + mask2[13:25, 54:71] = [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] + return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((16, 16, 3)), + DatasetItem(id='img1', image=img1, annotations=[ - Bbox(0, 4, 4, 8, label=2), + Polygon([43, 34, 45, 34, 45, 37, 43, 37], + label=0, id=0, + attributes={ + 'occluded': False, + 'username': 'admin' + } + ), + Mask(mask1, label=1, id=1, + attributes={ + 'occluded': False, + 'username': 'brussell' + } + ), + Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12], + label=2, group=2, id=2, + attributes={ + 'a1': '1', + 'occluded': True, + 'username': 'anonymous' + } + ), + Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], + label=3, group=2, id=3, + attributes={ + 'kj': '1', + 'occluded': False, + 'username': 'anonymous' + } + ), + Bbox(13, 19, 10, 11, label=4, group=2, id=4, + attributes={ + 'hg': '1', + 'occluded': True, + 'username': 'anonymous' + } + ), + Mask(mask2, label=5, group=1, id=5, + attributes={ + 'd': '1', + 'occluded': False, + 'username': 'anonymous' + } + ), + Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22], + label=6, group=1, id=6, + attributes={ + 'gfd lkj lkj hi': '1', + 'occluded': False, + 'username': 'anonymous' + } + ), ] ), ]) def categories(self): label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) + label_cat.add('window') + label_cat.add('license plate') + label_cat.add('o1') + label_cat.add('q1') + label_cat.add('b1') + label_cat.add('m1') + label_cat.add('hg') return { AnnotationType.label: label_cat, } - def generate_dummy(path): - LabelMeConverter()(TestExtractor(), save_dir=path) + parsed = Dataset.from_extractors(LabelMeExtractor(DUMMY_DATASET_DIR)) + compare_datasets(self, expected=DstExtractor(), actual=parsed) - with TestDir() as test_dir: - generate_dummy(test_dir) +class LabelMeImporterTest(TestCase): + def test_can_detect(self): + self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR)) - self.assertTrue(LabelMeImporter.detect(test_dir)) \ No newline at end of file + def test_can_import(self): + parsed = LabelMeImporter()(DUMMY_DATASET_DIR).make_dataset() + self.assertEqual(1, len(parsed))