[Datumaro] Remove annotation dir for cvat dataset format (#1637)

* Remove annotation dir for cvat dataset format * fixes
6 years ago · 727fcd50b3
parent 2044d35566
commit 727fcd50b3
4 changed files with 27 additions and 47 deletions
--- a/datumaro/datumaro/plugins/cvat_format/converter.py
+++ b/datumaro/datumaro/plugins/cvat_format/converter.py
@ -341,10 +341,6 @@ class _Converter:
        os.makedirs(images_dir, exist_ok=True)
        self._images_dir = images_dir

-        annotations_dir = osp.join(self._save_dir, CvatPath.ANNOTATIONS_DIR)
-        os.makedirs(annotations_dir, exist_ok=True)
-        self._annotations_dir = annotations_dir
-
        subsets = self._extractor.subsets()
        if len(subsets) == 0:
            subsets = [ None ]
@ -356,7 +352,7 @@ class _Converter:
                subset_name = DEFAULT_SUBSET_NAME
                subset = self._extractor

-            with open(osp.join(annotations_dir, '%s.xml' % subset_name), 'w') as f:
+            with open(osp.join(self._save_dir, '%s.xml' % subset_name), 'w') as f:
                writer = _SubsetWriter(f, subset_name, subset, self)
                writer.write()

--- a/datumaro/datumaro/plugins/cvat_format/extractor.py
+++ b/datumaro/datumaro/plugins/cvat_format/extractor.py
@ -21,11 +21,9 @@ class CvatExtractor(SourceExtractor):

    def __init__(self, path):
        assert osp.isfile(path), path
-        rootpath = ''
-        if path.endswith(osp.join(CvatPath.ANNOTATIONS_DIR, osp.basename(path))):
-            rootpath = path.rsplit(CvatPath.ANNOTATIONS_DIR, maxsplit=1)[0]
+        rootpath = osp.dirname(path)
        images_dir = ''
-        if rootpath and osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)):
+        if osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)):
            images_dir = osp.join(rootpath, CvatPath.IMAGES_DIR)
        self._images_dir = images_dir
        self._path = path
@ -64,7 +62,7 @@ class CvatExtractor(SourceExtractor):
            if ev == 'start':
                if el.tag == 'track':
                    track = {
-                        'id': el.attrib.get('id'),
+                        'id': el.attrib['id'],
                        'label': el.attrib.get('label'),
                        'group': int(el.attrib.get('group_id', 0)),
                        'height': frame_size[0],
@ -85,6 +83,7 @@ class CvatExtractor(SourceExtractor):
                    }
                    if track:
                        shape.update(track)
+                        shape['track_id'] = int(track['id'])
                    if image:
                        shape.update(image)
                elif el.tag == 'tag' and image:
@ -165,8 +164,7 @@ class CvatExtractor(SourceExtractor):
        categories = {}

        frame_size = None
-        has_z_order = False
-        mode = 'annotation'
+        mode = None
        labels = OrderedDict()
        label = None

@ -192,7 +190,7 @@ class CvatExtractor(SourceExtractor):
            if ev == 'start':
                if accepted('annotations', 'meta'): pass
                elif accepted('meta', 'task'): pass
-                elif accepted('task', 'z_order'): pass
+                elif accepted('task', 'mode'): pass
                elif accepted('task', 'original_size'):
                    frame_size = [None, None]
                elif accepted('original_size', 'height', next_state='frame_height'): pass
@ -214,8 +212,8 @@ class CvatExtractor(SourceExtractor):
                if consumed('meta', 'meta'):
                    break
                elif consumed('task', 'task'): pass
-                elif consumed('z_order', 'z_order'):
-                    has_z_order = (el.text == 'True')
+                elif consumed('mode', 'mode'):
+                    mode = el.text
                elif consumed('original_size', 'original_size'): pass
                elif consumed('frame_height', 'height'):
                    frame_size[0] = int(el.text)
@ -241,6 +239,7 @@ class CvatExtractor(SourceExtractor):
        if mode == 'interpolation':
            common_attrs.append('keyframe')
            common_attrs.append('outside')
+            common_attrs.append('track_id')

        label_cat = LabelCategories(attributes=common_attrs)
        for label, attrs in labels.items():
@ -255,13 +254,15 @@ class CvatExtractor(SourceExtractor):
        ann_id = ann.get('id')
        ann_type = ann['type']

-        attributes = ann.get('attributes', {})
+        attributes = ann.get('attributes') or {}
        if 'occluded' in categories[AnnotationType.label].attributes:
            attributes['occluded'] = ann.get('occluded', False)
-        if 'outside' in categories[AnnotationType.label].attributes:
-            attributes['outside'] = ann.get('outside', False)
-        if 'keyframe' in categories[AnnotationType.label].attributes:
-            attributes['keyframe'] = ann.get('keyframe', False)
+        if 'outside' in ann:
+            attributes['outside'] = ann['outside']
+        if 'keyframe' in ann:
+            attributes['keyframe'] = ann['keyframe']
+        if 'track_id' in ann:
+            attributes['track_id'] = ann['track_id']

        group = ann.get('group')

@ -302,30 +303,17 @@ class CvatExtractor(SourceExtractor):

    def _load_items(self, parsed):
        for frame_id, item_desc in parsed.items():
-            filename = item_desc.get('name')
-            if filename:
-                filename = self._find_image(filename)
-            if not filename:
-                filename = item_desc.get('name')
+            path = item_desc.get('name', 'frame_%06d.png' % int(frame_id))
            image_size = (item_desc.get('height'), item_desc.get('width'))
            if all(image_size):
                image_size = (int(image_size[0]), int(image_size[1]))
            else:
                image_size = None
            image = None
-            if filename:
-                image = Image(path=filename, size=image_size)
+            if path:
+                image = Image(path=osp.join(self._images_dir, path),
+                    size=image_size)

            parsed[frame_id] = DatasetItem(id=frame_id, subset=self._subset,
                image=image, annotations=item_desc.get('annotations'))
        return parsed
-
-    def _find_image(self, file_name):
-        search_paths = []
-        if self._images_dir:
-            search_paths += [ osp.join(self._images_dir, file_name) ]
-        search_paths += [ osp.join(osp.dirname(self._path), file_name) ]
-        for image_path in search_paths:
-            if osp.isfile(image_path):
-                return image_path
-        return None
--- a/datumaro/datumaro/plugins/cvat_format/format.py
+++ b/datumaro/datumaro/plugins/cvat_format/format.py
@ -5,6 +5,5 @@

 class CvatPath:
    IMAGES_DIR = 'images'
-    ANNOTATIONS_DIR = 'annotations'

    IMAGE_EXT = '.jpg'
--- a/datumaro/tests/test_cvat_format.py
+++ b/datumaro/tests/test_cvat_format.py
@ -18,10 +18,7 @@ from datumaro.util.test_utils import TestDir, compare_datasets

 def generate_dummy_cvat(path):
    images_dir = osp.join(path, CvatPath.IMAGES_DIR)
-    anno_dir = osp.join(path, CvatPath.ANNOTATIONS_DIR)
-
    os.makedirs(images_dir)
-    os.makedirs(anno_dir)

    root_elem = ET.Element('annotations')
    ET.SubElement(root_elem, 'version').text = '1.1'
@ -93,7 +90,7 @@ def generate_dummy_cvat(path):
        'label': 'label2', 'points': '1,2;3,4;5,6', 'z_order': '2',
    })

-    with open(osp.join(anno_dir, 'train.xml'), 'w') as f:
+    with open(osp.join(path, 'train.xml'), 'w') as f:
        f.write(ET.tostring(root_elem, encoding='unicode'))

 class CvatImporterTest(TestCase):
@ -213,13 +210,13 @@ class CvatConverterTest(TestCase):
                return iter([
                    DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4], z_order=0,
+                            Polygon([0, 0, 4, 0, 4, 4],
                                label=1, group=4,
                                attributes={ 'occluded': True }),
-                            Polygon([5, 0, 9, 0, 5, 5], z_order=0,
+                            Polygon([5, 0, 9, 0, 5, 5],
                                label=2, group=4,
                                attributes={ 'occluded': False }),
-                            Points([1, 1, 3, 2, 2, 3], z_order=0,
+                            Points([1, 1, 3, 2, 2, 3],
                                label=2,
                                attributes={ 'occluded': False,
                                    'a1': 'x', 'a2': 42 }),
@ -229,10 +226,10 @@ class CvatConverterTest(TestCase):
                    ),
                    DatasetItem(id=1, subset='s1',
                        annotations=[
-                            PolyLine([0, 0, 4, 0, 4, 4], z_order=0,
+                            PolyLine([0, 0, 4, 0, 4, 4],
                                label=3, group=4,
                                attributes={ 'occluded': False }),
-                            Bbox(5, 0, 1, 9, z_order=0,
+                            Bbox(5, 0, 1, 9,
                                label=3, group=4,
                                attributes={ 'occluded': False }),
                        ]