diff --git a/datumaro/datumaro/plugins/cvat_format/converter.py b/datumaro/datumaro/plugins/cvat_format/converter.py index c04eef49..8249bd0d 100644 --- a/datumaro/datumaro/plugins/cvat_format/converter.py +++ b/datumaro/datumaro/plugins/cvat_format/converter.py @@ -341,10 +341,6 @@ class _Converter: os.makedirs(images_dir, exist_ok=True) self._images_dir = images_dir - annotations_dir = osp.join(self._save_dir, CvatPath.ANNOTATIONS_DIR) - os.makedirs(annotations_dir, exist_ok=True) - self._annotations_dir = annotations_dir - subsets = self._extractor.subsets() if len(subsets) == 0: subsets = [ None ] @@ -356,7 +352,7 @@ class _Converter: subset_name = DEFAULT_SUBSET_NAME subset = self._extractor - with open(osp.join(annotations_dir, '%s.xml' % subset_name), 'w') as f: + with open(osp.join(self._save_dir, '%s.xml' % subset_name), 'w') as f: writer = _SubsetWriter(f, subset_name, subset, self) writer.write() diff --git a/datumaro/datumaro/plugins/cvat_format/extractor.py b/datumaro/datumaro/plugins/cvat_format/extractor.py index c268b31d..0478cf05 100644 --- a/datumaro/datumaro/plugins/cvat_format/extractor.py +++ b/datumaro/datumaro/plugins/cvat_format/extractor.py @@ -21,11 +21,9 @@ class CvatExtractor(SourceExtractor): def __init__(self, path): assert osp.isfile(path), path - rootpath = '' - if path.endswith(osp.join(CvatPath.ANNOTATIONS_DIR, osp.basename(path))): - rootpath = path.rsplit(CvatPath.ANNOTATIONS_DIR, maxsplit=1)[0] + rootpath = osp.dirname(path) images_dir = '' - if rootpath and osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)): + if osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)): images_dir = osp.join(rootpath, CvatPath.IMAGES_DIR) self._images_dir = images_dir self._path = path @@ -64,7 +62,7 @@ class CvatExtractor(SourceExtractor): if ev == 'start': if el.tag == 'track': track = { - 'id': el.attrib.get('id'), + 'id': el.attrib['id'], 'label': el.attrib.get('label'), 'group': int(el.attrib.get('group_id', 0)), 'height': frame_size[0], @@ -85,6 +83,7 @@ class CvatExtractor(SourceExtractor): } if track: shape.update(track) + shape['track_id'] = int(track['id']) if image: shape.update(image) elif el.tag == 'tag' and image: @@ -165,8 +164,7 @@ class CvatExtractor(SourceExtractor): categories = {} frame_size = None - has_z_order = False - mode = 'annotation' + mode = None labels = OrderedDict() label = None @@ -192,7 +190,7 @@ class CvatExtractor(SourceExtractor): if ev == 'start': if accepted('annotations', 'meta'): pass elif accepted('meta', 'task'): pass - elif accepted('task', 'z_order'): pass + elif accepted('task', 'mode'): pass elif accepted('task', 'original_size'): frame_size = [None, None] elif accepted('original_size', 'height', next_state='frame_height'): pass @@ -214,8 +212,8 @@ class CvatExtractor(SourceExtractor): if consumed('meta', 'meta'): break elif consumed('task', 'task'): pass - elif consumed('z_order', 'z_order'): - has_z_order = (el.text == 'True') + elif consumed('mode', 'mode'): + mode = el.text elif consumed('original_size', 'original_size'): pass elif consumed('frame_height', 'height'): frame_size[0] = int(el.text) @@ -241,6 +239,7 @@ class CvatExtractor(SourceExtractor): if mode == 'interpolation': common_attrs.append('keyframe') common_attrs.append('outside') + common_attrs.append('track_id') label_cat = LabelCategories(attributes=common_attrs) for label, attrs in labels.items(): @@ -255,13 +254,15 @@ class CvatExtractor(SourceExtractor): ann_id = ann.get('id') ann_type = ann['type'] - attributes = ann.get('attributes', {}) + attributes = ann.get('attributes') or {} if 'occluded' in categories[AnnotationType.label].attributes: attributes['occluded'] = ann.get('occluded', False) - if 'outside' in categories[AnnotationType.label].attributes: - attributes['outside'] = ann.get('outside', False) - if 'keyframe' in categories[AnnotationType.label].attributes: - attributes['keyframe'] = ann.get('keyframe', False) + if 'outside' in ann: + attributes['outside'] = ann['outside'] + if 'keyframe' in ann: + attributes['keyframe'] = ann['keyframe'] + if 'track_id' in ann: + attributes['track_id'] = ann['track_id'] group = ann.get('group') @@ -302,30 +303,17 @@ class CvatExtractor(SourceExtractor): def _load_items(self, parsed): for frame_id, item_desc in parsed.items(): - filename = item_desc.get('name') - if filename: - filename = self._find_image(filename) - if not filename: - filename = item_desc.get('name') + path = item_desc.get('name', 'frame_%06d.png' % int(frame_id)) image_size = (item_desc.get('height'), item_desc.get('width')) if all(image_size): image_size = (int(image_size[0]), int(image_size[1])) else: image_size = None image = None - if filename: - image = Image(path=filename, size=image_size) + if path: + image = Image(path=osp.join(self._images_dir, path), + size=image_size) parsed[frame_id] = DatasetItem(id=frame_id, subset=self._subset, image=image, annotations=item_desc.get('annotations')) return parsed - - def _find_image(self, file_name): - search_paths = [] - if self._images_dir: - search_paths += [ osp.join(self._images_dir, file_name) ] - search_paths += [ osp.join(osp.dirname(self._path), file_name) ] - for image_path in search_paths: - if osp.isfile(image_path): - return image_path - return None diff --git a/datumaro/datumaro/plugins/cvat_format/format.py b/datumaro/datumaro/plugins/cvat_format/format.py index e0c7a104..c73fd467 100644 --- a/datumaro/datumaro/plugins/cvat_format/format.py +++ b/datumaro/datumaro/plugins/cvat_format/format.py @@ -5,6 +5,5 @@ class CvatPath: IMAGES_DIR = 'images' - ANNOTATIONS_DIR = 'annotations' IMAGE_EXT = '.jpg' diff --git a/datumaro/tests/test_cvat_format.py b/datumaro/tests/test_cvat_format.py index c2879ef8..35979d9f 100644 --- a/datumaro/tests/test_cvat_format.py +++ b/datumaro/tests/test_cvat_format.py @@ -18,10 +18,7 @@ from datumaro.util.test_utils import TestDir, compare_datasets def generate_dummy_cvat(path): images_dir = osp.join(path, CvatPath.IMAGES_DIR) - anno_dir = osp.join(path, CvatPath.ANNOTATIONS_DIR) - os.makedirs(images_dir) - os.makedirs(anno_dir) root_elem = ET.Element('annotations') ET.SubElement(root_elem, 'version').text = '1.1' @@ -93,7 +90,7 @@ def generate_dummy_cvat(path): 'label': 'label2', 'points': '1,2;3,4;5,6', 'z_order': '2', }) - with open(osp.join(anno_dir, 'train.xml'), 'w') as f: + with open(osp.join(path, 'train.xml'), 'w') as f: f.write(ET.tostring(root_elem, encoding='unicode')) class CvatImporterTest(TestCase): @@ -213,13 +210,13 @@ class CvatConverterTest(TestCase): return iter([ DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), annotations=[ - Polygon([0, 0, 4, 0, 4, 4], z_order=0, + Polygon([0, 0, 4, 0, 4, 4], label=1, group=4, attributes={ 'occluded': True }), - Polygon([5, 0, 9, 0, 5, 5], z_order=0, + Polygon([5, 0, 9, 0, 5, 5], label=2, group=4, attributes={ 'occluded': False }), - Points([1, 1, 3, 2, 2, 3], z_order=0, + Points([1, 1, 3, 2, 2, 3], label=2, attributes={ 'occluded': False, 'a1': 'x', 'a2': 42 }), @@ -229,10 +226,10 @@ class CvatConverterTest(TestCase): ), DatasetItem(id=1, subset='s1', annotations=[ - PolyLine([0, 0, 4, 0, 4, 4], z_order=0, + PolyLine([0, 0, 4, 0, 4, 4], label=3, group=4, attributes={ 'occluded': False }), - Bbox(5, 0, 1, 9, z_order=0, + Bbox(5, 0, 1, 9, label=3, group=4, attributes={ 'occluded': False }), ]