[Datumaro] CVAT format import (#974)

* Add label-specific attributes
* Add CVAT format import
* Register CVAT format
* Add little more logs
* Little refactoring for tests
* Cvat format checks
* Add missing check
* Refactor datumaro format
* Little refactoring
* Regularize dataset importer logic
* Fix project import issue
* Refactor coco extractor
* Refactor tests
* Codacy
main
zhiltsov-max 6 years ago committed by Nikita Manovich
parent 8edfe0dcb4
commit 36b1e9c1ce

@ -7,6 +7,7 @@ import argparse
import logging as log
import os
import os.path as osp
import shutil
from datumaro.components.project import Project
from datumaro.components.comparator import Comparator
@ -27,10 +28,13 @@ def create_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)
if not args.overwrite and osp.isdir(project_dir) and os.listdir(project_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
if osp.isdir(project_dir) and os.listdir(project_dir):
if not args.overwrite:
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
else:
shutil.rmtree(project_dir)
os.makedirs(project_dir, exist_ok=args.overwrite)
if not args.overwrite and osp.isfile(project_path):
@ -78,10 +82,13 @@ def import_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)
if not args.overwrite and osp.isdir(project_dir) and os.listdir(project_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
if osp.isdir(project_dir) and os.listdir(project_dir):
if not args.overwrite:
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
else:
shutil.rmtree(project_dir)
os.makedirs(project_dir, exist_ok=args.overwrite)
if not args.overwrite and osp.isfile(project_path):
@ -147,7 +154,11 @@ def export_command(args):
return 1
os.makedirs(dst_dir, exist_ok=args.overwrite)
project.make_dataset().export(
log.info("Loading the project...")
dataset = project.make_dataset()
log.info("Exporting the project...")
dataset.export(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,

@ -210,8 +210,12 @@ def export_command(args):
return 1
os.makedirs(dst_dir, exist_ok=args.overwrite)
log.info("Loading the project...")
source_project = project.make_source_project(args.name)
source_project.make_dataset().export(
dataset = source_project.make_dataset()
log.info("Exporting the project...")
dataset.export(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,

@ -24,10 +24,7 @@ from datumaro.components.converters.voc import (
)
from datumaro.components.converters.yolo import YoloConverter
from datumaro.components.converters.tfrecord import (
DetectionApiConverter,
)
from datumaro.components.converters.tfrecord import DetectionApiConverter
items = [

@ -11,8 +11,7 @@ import os.path as osp
from datumaro.components.converter import Converter
from datumaro.components.extractor import (
DEFAULT_SUBSET_NAME,
AnnotationType, Annotation,
DEFAULT_SUBSET_NAME, Annotation,
LabelObject, MaskObject, PointsObject, PolygonObject,
PolyLineObject, BboxObject, CaptionObject,
LabelCategories, MaskCategories, PointsCategories
@ -52,11 +51,13 @@ class _SubsetWriter:
def write_item(self, item):
annotations = []
self.items.append({
item_desc = {
'id': item.id,
'path': item.path,
'annotations': annotations,
})
}
if item.path:
item_desc['path'] = item.path
self.items.append(item_desc)
for ann in item.annotations:
if isinstance(ann, LabelObject):

@ -476,6 +476,7 @@ class DatasetItem:
(self.id == other.id) and \
(self.subset == other.subset) and \
(self.annotations == other.annotations) and \
(self.path == other.path) and \
(self.has_image == other.has_image) and \
(self.has_image and np.all(self.image == other.image) or \
not self.has_image)

@ -26,14 +26,9 @@ from datumaro.components.extractors.voc import (
VocComp_9_10_Extractor,
)
from datumaro.components.extractors.yolo import (
YoloExtractor,
)
from datumaro.components.extractors.tfrecord import (
DetectionApiExtractor,
)
from datumaro.components.extractors.yolo import YoloExtractor
from datumaro.components.extractors.tfrecord import DetectionApiExtractor
from datumaro.components.extractors.cvat import CvatExtractor
items = [
('datumaro', DatumaroExtractor),
@ -59,4 +54,6 @@ items = [
('yolo', YoloExtractor),
('tf_detection_api', DetectionApiExtractor),
('cvat', CvatExtractor),
]

@ -0,0 +1,286 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
import os.path as osp
import xml.etree as ET
from datumaro.components.extractor import (Extractor, DatasetItem,
DEFAULT_SUBSET_NAME, AnnotationType,
PointsObject, PolygonObject, PolyLineObject, BboxObject,
LabelCategories
)
from datumaro.components.formats.cvat import CvatPath
from datumaro.util.image import lazy_image
class CvatExtractor(Extractor):
_SUPPORTED_SHAPES = ('box', 'polygon', 'polyline', 'points')
def __init__(self, path):
super().__init__()
assert osp.isfile(path)
rootpath = path.rsplit(CvatPath.ANNOTATIONS_DIR, maxsplit=1)[0]
self._path = rootpath
subset = osp.splitext(osp.basename(path))[0]
if subset == DEFAULT_SUBSET_NAME:
subset = None
self._subset = subset
items, categories = self._parse(path)
self._items = self._load_items(items)
self._categories = categories
def categories(self):
return self._categories
def __iter__(self):
for item in self._items.values():
yield item
def __len__(self):
return len(self._items)
def subsets(self):
if self._subset:
return [self._subset]
return None
def get_subset(self, name):
if name != self._subset:
return None
return self
@classmethod
def _parse(cls, path):
context = ET.ElementTree.iterparse(path, events=("start", "end"))
context = iter(context)
categories = cls._parse_meta(context)
items = OrderedDict()
track = None
shape = None
image = None
for ev, el in context:
if ev == 'start':
if el.tag == 'track':
track = {
'id': el.attrib.get('id'),
'label': el.attrib.get('label'),
'group': int(el.attrib.get('group_id', 0)),
}
elif el.tag == 'image':
image = {
'name': el.attrib.get('name'),
'frame': el.attrib['id'],
}
elif el.tag in cls._SUPPORTED_SHAPES and (track or image):
shape = {
'type': None,
'attributes': {},
}
if track:
shape.update(track)
if image:
shape.update(image)
elif ev == 'end':
if el.tag == 'attribute' and shape is not None:
shape['attributes'][el.attrib['name']] = el.text
elif el.tag in cls._SUPPORTED_SHAPES:
if track is not None:
shape['frame'] = el.attrib['frame']
shape['outside'] = (el.attrib.get('outside') == '1')
shape['keyframe'] = (el.attrib.get('keyframe') == '1')
if image is not None:
shape['label'] = el.attrib.get('label')
shape['group'] = int(el.attrib.get('group_id', 0))
shape['type'] = el.tag
shape['occluded'] = (el.attrib.get('occluded') == '1')
shape['z_order'] = int(el.attrib.get('z_order', 0))
if el.tag == 'box':
shape['points'] = list(map(float, [
el.attrib['xtl'], el.attrib['ytl'],
el.attrib['xbr'], el.attrib['ybr'],
]))
else:
shape['points'] = []
for pair in el.attrib['points'].split(';'):
shape['points'].extend(map(float, pair.split(',')))
frame_desc = items.get(shape['frame'], {
'name': shape.get('name'),
'annotations': [],
})
frame_desc['annotations'].append(
cls._parse_ann(shape, categories))
items[shape['frame']] = frame_desc
shape = None
elif el.tag == 'track':
track = None
elif el.tag == 'image':
image = None
el.clear()
return items, categories
@staticmethod
def _parse_meta(context):
ev, el = next(context)
if not (ev == 'start' and el.tag == 'annotations'):
raise Exception("Unexpected token ")
categories = {}
has_z_order = False
mode = 'annotation'
labels = OrderedDict()
label = None
# Recursive descent parser
el = None
states = ['annotations']
def accepted(expected_state, tag, next_state=None):
state = states[-1]
if state == expected_state and el is not None and el.tag == tag:
if not next_state:
next_state = tag
states.append(next_state)
return True
return False
def consumed(expected_state, tag):
state = states[-1]
if state == expected_state and el is not None and el.tag == tag:
states.pop()
return True
return False
for ev, el in context:
if ev == 'start':
if accepted('annotations', 'meta'): pass
elif accepted('meta', 'task'): pass
elif accepted('task', 'z_order'): pass
elif accepted('task', 'labels'): pass
elif accepted('labels', 'label'):
label = { 'name': None, 'attributes': set() }
elif accepted('label', 'name', next_state='label_name'): pass
elif accepted('label', 'attributes'): pass
elif accepted('attributes', 'attribute'): pass
elif accepted('attribute', 'name', next_state='attr_name'): pass
elif accepted('annotations', 'image') or \
accepted('annotations', 'track') or \
accepted('annotations', 'tag'):
break
else:
pass
elif ev == 'end':
if consumed('meta', 'meta'):
break
elif consumed('task', 'task'): pass
elif consumed('z_order', 'z_order'):
has_z_order = (el.text == 'True')
elif consumed('label_name', 'name'):
label['name'] = el.text
elif consumed('attr_name', 'name'):
label['attributes'].add(el.text)
elif consumed('attribute', 'attribute'): pass
elif consumed('attributes', 'attributes'): pass
elif consumed('label', 'label'):
labels[label['name']] = label['attributes']
label = None
elif consumed('labels', 'labels'): pass
else:
pass
assert len(states) == 1 and states[0] == 'annotations', \
"Expected 'meta' section in the annotation file, path: %s" % states
common_attrs = ['occluded']
if has_z_order:
common_attrs.append('z_order')
if mode == 'interpolation':
common_attrs.append('keyframe')
common_attrs.append('outside')
label_cat = LabelCategories(attributes=common_attrs)
for label, attrs in labels.items():
label_cat.add(label, attributes=attrs)
categories[AnnotationType.label] = label_cat
return categories
@classmethod
def _parse_ann(cls, ann, categories):
ann_id = ann.get('id')
ann_type = ann['type']
attributes = ann.get('attributes', {})
if 'occluded' in categories[AnnotationType.label].attributes:
attributes['occluded'] = ann.get('occluded', False)
if 'z_order' in categories[AnnotationType.label].attributes:
attributes['z_order'] = ann.get('z_order', 0)
if 'outside' in categories[AnnotationType.label].attributes:
attributes['outside'] = ann.get('outside', False)
if 'keyframe' in categories[AnnotationType.label].attributes:
attributes['keyframe'] = ann.get('keyframe', False)
group = ann.get('group')
if group == 0:
group = None
label = ann.get('label')
label_id = categories[AnnotationType.label].find(label)[0]
points = ann.get('points', [])
if ann_type == 'polyline':
return PolyLineObject(points, label=label_id,
id=ann_id, attributes=attributes, group=group)
elif ann_type == 'polygon':
return PolygonObject(points, label=label_id,
id=ann_id, attributes=attributes, group=group)
elif ann_type == 'points':
return PointsObject(points, label=label_id,
id=ann_id, attributes=attributes, group=group)
elif ann_type == 'box':
x, y = points[0], points[1]
w, h = points[2] - x, points[3] - y
return BboxObject(x, y, w, h, label=label_id,
id=ann_id, attributes=attributes, group=group)
else:
raise NotImplementedError("Unknown annotation type '%s'" % ann_type)
def _load_items(self, parsed):
for item_id, item_desc in parsed.items():
file_name = item_desc.get('name')
if not file_name:
file_name = item_id
file_name += CvatPath.IMAGE_EXT
image = self._find_image(file_name)
parsed[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=image, annotations=item_desc.get('annotations', None))
return parsed
def _find_image(self, file_name):
images_dir = osp.join(self._path, CvatPath.IMAGES_DIR)
search_paths = [
osp.join(images_dir, file_name),
osp.join(images_dir, self._subset or DEFAULT_SUBSET_NAME, file_name),
]
for image_path in search_paths:
if osp.exists(image_path):
return lazy_image(image_path)

@ -3,70 +3,58 @@
#
# SPDX-License-Identifier: MIT
from collections import defaultdict
import json
import logging as log
import os.path as osp
from datumaro.components.extractor import (Extractor, DatasetItem,
DEFAULT_SUBSET_NAME,
AnnotationType,
DEFAULT_SUBSET_NAME, AnnotationType,
LabelObject, MaskObject, PointsObject, PolygonObject,
PolyLineObject, BboxObject, CaptionObject,
LabelCategories, MaskCategories, PointsCategories
)
from datumaro.components.formats.datumaro import DatumaroPath
from datumaro.util import dir_items
from datumaro.util.image import lazy_image
from datumaro.util.mask_tools import lazy_mask
class DatumaroExtractor(Extractor):
class Subset(Extractor):
def __init__(self, name, parent):
super().__init__()
self._parent = parent
self._name = name
self.items = []
def __iter__(self):
for item in self.items:
yield self._parent._get(item, self._name)
def __init__(self, path):
super().__init__()
def __len__(self):
return len(self.items)
assert osp.isfile(path)
rootpath = path.rsplit(DatumaroPath.ANNOTATIONS_DIR, maxsplit=1)[0]
self._path = rootpath
def categories(self):
return self._parent.categories()
subset_name = osp.splitext(osp.basename(path))[0]
if subset_name == DEFAULT_SUBSET_NAME:
subset_name = None
self._subset_name = subset_name
def __init__(self, path):
super().__init__()
with open(path, 'r') as f:
parsed_anns = json.load(f)
self._categories = self._load_categories(parsed_anns)
self._items = self._load_items(parsed_anns)
assert osp.isdir(path)
self._path = path
def categories(self):
return self._categories
annotations = defaultdict(list)
found_subsets = self._find_subsets(path)
parsed_anns = None
subsets = {}
for subset_name, subset_path in found_subsets.items():
if subset_name == DEFAULT_SUBSET_NAME:
subset_name = None
subset = self.Subset(subset_name, self)
with open(subset_path, 'r') as f:
parsed_anns = json.load(f)
def __iter__(self):
for item in self._items:
yield item
for index, _ in enumerate(parsed_anns['items']):
subset.items.append(index)
def __len__(self):
return len(self._items)
annotations[subset_name] = parsed_anns
subsets[subset_name] = subset
self._annotations = dict(annotations)
self._subsets = subsets
def subsets(self):
if self._subset_name:
return [self._subset_name]
return None
self._categories = {}
if parsed_anns is not None:
self._categories = self._load_categories(parsed_anns)
def get_subset(self, name):
if name != self._subset_name:
return None
return self
@staticmethod
def _load_categories(parsed):
@ -101,21 +89,24 @@ class DatumaroExtractor(Extractor):
return categories
def _get(self, index, subset_name):
item = self._annotations[subset_name]['items'][index]
def _load_items(self, parsed):
items = []
for item_desc in parsed['items']:
item_id = item_desc['id']
image = None
image_path = osp.join(self._path, DatumaroPath.IMAGES_DIR,
item_id + DatumaroPath.IMAGE_EXT)
if osp.exists(image_path):
image = lazy_image(image_path)
item_id = item.get('id')
annotations = self._load_annotations(item_desc)
image_path = osp.join(self._path, DatumaroPath.IMAGES_DIR,
item_id + DatumaroPath.IMAGE_EXT)
image = None
if osp.isfile(image_path):
image = lazy_image(image_path)
item = DatasetItem(id=item_id, subset=self._subset_name,
annotations=annotations, image=image)
annotations = self._load_annotations(item)
items.append(item)
return DatasetItem(id=item_id, subset=subset_name,
annotations=annotations, image=image)
return items
def _load_annotations(self, item):
parsed = item['annotations']
@ -182,33 +173,3 @@ class DatumaroExtractor(Extractor):
raise NotImplementedError()
return loaded
def categories(self):
return self._categories
def __iter__(self):
for subset_name, subset in self._subsets.items():
for index in subset.items:
yield self._get(index, subset_name)
def __len__(self):
length = 0
for subset in self._subsets.values():
length += len(subset)
return length
def subsets(self):
return list(self._subsets)
def get_subset(self, name):
return self._subsets[name]
@staticmethod
def _find_subsets(path):
anno_dir = osp.join(path, DatumaroPath.ANNOTATIONS_DIR)
if not osp.isdir(anno_dir):
raise Exception('Datumaro dataset not found at "%s"' % path)
return { name: osp.join(anno_dir, name + '.json')
for name in dir_items(anno_dir, '.json', truncate_ext=True)
}

@ -4,6 +4,7 @@
# SPDX-License-Identifier: MIT
from collections import OrderedDict
from itertools import chain
import numpy as np
import os.path as osp
@ -11,7 +12,7 @@ from pycocotools.coco import COCO
import pycocotools.mask as mask_utils
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType,
DEFAULT_SUBSET_NAME, AnnotationType,
LabelObject, MaskObject, PointsObject, PolygonObject,
BboxObject, CaptionObject,
LabelCategories, PointsCategories
@ -42,46 +43,46 @@ class RleMask(MaskObject):
return super().__eq__(other)
return self._rle == other._rle
class CocoExtractor(Extractor):
class Subset(Extractor):
def __init__(self, name, parent):
super().__init__()
self._name = name
self._parent = parent
self.loaders = {}
self.items = OrderedDict()
def __iter__(self):
for img_id in self.items:
yield self._parent._get(img_id, self._name)
def __len__(self):
return len(self.items)
def categories(self):
return self._parent.categories()
def __init__(self, path, task, merge_instance_polygons=False):
super().__init__()
assert osp.isfile(path)
rootpath = path.rsplit(CocoPath.ANNOTATIONS_DIR, maxsplit=1)[0]
self._path = rootpath
self._task = task
self._subsets = {}
subset_name = osp.splitext(osp.basename(path))[0] \
subset = osp.splitext(osp.basename(path))[0] \
.rsplit('_', maxsplit=1)[1]
subset = CocoExtractor.Subset(subset_name, self)
if subset == DEFAULT_SUBSET_NAME:
subset = None
self._subset = subset
self._merge_instance_polygons = merge_instance_polygons
loader = self._make_subset_loader(path)
subset.loaders[task] = loader
for img_id in loader.getImgIds():
subset.items[img_id] = None
self._subsets[subset_name] = subset
self._load_categories(loader)
self._items = self._load_items(loader)
self._load_categories()
def categories(self):
return self._categories
self._merge_instance_polygons = merge_instance_polygons
def __iter__(self):
for item in self._items.values():
yield item
def __len__(self):
return len(self._items)
def subsets(self):
if self._subset:
return [self._subset]
return None
def get_subset(self, name):
if name != self._subset:
return None
return self
@staticmethod
def _make_subset_loader(path):
@ -95,31 +96,17 @@ class CocoExtractor(Extractor):
coco_api.createIndex()
return coco_api
def _load_categories(self):
loaders = {}
for subset in self._subsets.values():
loaders.update(subset.loaders)
def _load_categories(self, loader):
self._categories = {}
label_loader = loaders.get(CocoTask.labels)
instances_loader = loaders.get(CocoTask.instances)
person_kp_loader = loaders.get(CocoTask.person_keypoints)
if label_loader is None and instances_loader is not None:
label_loader = instances_loader
if label_loader is None and person_kp_loader is not None:
label_loader = person_kp_loader
if label_loader is not None:
label_categories, label_map = \
self._load_label_categories(label_loader)
if self._task in [CocoTask.instances, CocoTask.labels,
CocoTask.person_keypoints, CocoTask.stuff, CocoTask.panoptic]:
label_categories, label_map = self._load_label_categories(loader)
self._categories[AnnotationType.label] = label_categories
self._label_map = label_map
if person_kp_loader is not None:
person_kp_categories = \
self._load_person_kp_categories(person_kp_loader)
if self._task == CocoTask.person_keypoints:
person_kp_categories = self._load_person_kp_categories(loader)
self._categories[AnnotationType.points] = person_kp_categories
# pylint: disable=no-self-use
@ -142,76 +129,47 @@ class CocoExtractor(Extractor):
categories = PointsCategories()
for cat in cats:
label_id, _ = self._categories[AnnotationType.label].find(cat['name'])
label_id = self._label_map[cat['id']]
categories.add(label_id=label_id,
labels=cat['keypoints'], adjacent=cat['skeleton'])
return categories
def categories(self):
return self._categories
def _load_items(self, loader):
items = OrderedDict()
def __iter__(self):
for subset in self._subsets.values():
for item in subset:
yield item
for img_id in loader.getImgIds():
image_info = loader.loadImgs(img_id)[0]
image = self._find_image(image_info['file_name'])
def __len__(self):
length = 0
for subset in self._subsets.values():
length += len(subset)
return length
anns = loader.getAnnIds(imgIds=img_id)
anns = loader.loadAnns(anns)
anns = list(chain(*(
self._load_annotations(ann, image_info) for ann in anns)))
def subsets(self):
return list(self._subsets)
items[img_id] = DatasetItem(id=img_id, subset=self._subset,
image=image, annotations=anns)
def get_subset(self, name):
return self._subsets[name]
def _get(self, img_id, subset):
file_name = None
image_info = None
image = None
annotations = []
for ann_type, loader in self._subsets[subset].loaders.items():
if image is None:
image_info = loader.loadImgs(img_id)[0]
file_name = image_info['file_name']
if file_name != '':
image_dir = osp.join(self._path, CocoPath.IMAGES_DIR)
search_paths = [
osp.join(image_dir, file_name),
osp.join(image_dir, subset, file_name),
]
for image_path in search_paths:
if osp.exists(image_path):
image = lazy_image(image_path)
break
annIds = loader.getAnnIds(imgIds=img_id)
anns = loader.loadAnns(annIds)
for ann in anns:
self._parse_annotation(ann, ann_type, annotations, image_info)
return DatasetItem(id=img_id, subset=subset,
image=image, annotations=annotations)
def _parse_label(self, ann):
return items
def _get_label_id(self, ann):
cat_id = ann.get('category_id')
if cat_id in [0, None]:
return None
return self._label_map[cat_id]
def _parse_annotation(self, ann, ann_type, parsed_annotations,
image_info=None):
def _load_annotations(self, ann, image_info=None):
parsed_annotations = []
ann_id = ann.get('id')
attributes = {}
if 'score' in ann:
attributes['score'] = ann['score']
if ann_type is CocoTask.instances:
if self._task is CocoTask.instances:
x, y, w, h = ann['bbox']
label_id = self._parse_label(ann)
label_id = self._get_label_id(ann)
group = None
is_crowd = bool(ann['iscrowd'])
@ -253,18 +211,17 @@ class CocoExtractor(Extractor):
BboxObject(x, y, w, h, label=label_id,
id=ann_id, attributes=attributes, group=group)
)
elif ann_type is CocoTask.labels:
label_id = self._parse_label(ann)
elif self._task is CocoTask.labels:
label_id = self._get_label_id(ann)
parsed_annotations.append(
LabelObject(label=label_id,
id=ann_id, attributes=attributes)
LabelObject(label=label_id, id=ann_id, attributes=attributes)
)
elif ann_type is CocoTask.person_keypoints:
elif self._task is CocoTask.person_keypoints:
keypoints = ann['keypoints']
points = [p for i, p in enumerate(keypoints) if i % 3 != 2]
visibility = keypoints[2::3]
bbox = ann.get('bbox')
label_id = self._parse_label(ann)
label_id = self._get_label_id(ann)
group = None
if bbox is not None:
group = ann_id
@ -276,7 +233,7 @@ class CocoExtractor(Extractor):
parsed_annotations.append(
BboxObject(*bbox, label=label_id, group=group)
)
elif ann_type is CocoTask.captions:
elif self._task is CocoTask.captions:
caption = ann['caption']
parsed_annotations.append(
CaptionObject(caption,
@ -287,6 +244,16 @@ class CocoExtractor(Extractor):
return parsed_annotations
def _find_image(self, file_name):
images_dir = osp.join(self._path, CocoPath.IMAGES_DIR)
search_paths = [
osp.join(images_dir, file_name),
osp.join(images_dir, self._subset or DEFAULT_SUBSET_NAME, file_name),
]
for image_path in search_paths:
if osp.exists(image_path):
return lazy_image(image_path)
class CocoImageInfoExtractor(CocoExtractor):
def __init__(self, path, **kwargs):
super().__init__(path, task=CocoTask.image_info, **kwargs)

@ -0,0 +1,10 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
class CvatPath:
IMAGES_DIR = 'images'
ANNOTATIONS_DIR = 'annotations'
IMAGE_EXT = '.jpg'

@ -4,19 +4,16 @@
# SPDX-License-Identifier: MIT
from datumaro.components.importers.datumaro import DatumaroImporter
from datumaro.components.importers.ms_coco import (
CocoImporter,
)
from datumaro.components.importers.ms_coco import CocoImporter
from datumaro.components.importers.voc import (
VocImporter,
VocResultsImporter,
)
from datumaro.components.importers.tfrecord import (
DetectionApiImporter,
)
from datumaro.components.importers.tfrecord import DetectionApiImporter
from datumaro.components.importers.yolo import YoloImporter
from datumaro.components.importers.cvat import CvatImporter
items = [
@ -27,5 +24,9 @@ items = [
('voc', VocImporter),
('voc_results', VocResultsImporter),
('yolo', YoloImporter),
('tf_detection_api', DetectionApiImporter),
('cvat', CvatImporter),
]

@ -0,0 +1,46 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
from glob import glob
import logging as log
import os.path as osp
from datumaro.components.formats.cvat import CvatPath
class CvatImporter:
EXTRACTOR_NAME = 'cvat'
def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()
if path.endswith('.xml') and osp.isfile(path):
subset_paths = [path]
else:
subset_paths = glob(osp.join(path, '*.xml'))
if osp.basename(osp.normpath(path)) != CvatPath.ANNOTATIONS_DIR:
path = osp.join(path, CvatPath.ANNOTATIONS_DIR)
subset_paths += glob(osp.join(path, '*.xml'))
if len(subset_paths) == 0:
raise Exception("Failed to find 'cvat' dataset at '%s'" % path)
for subset_path in subset_paths:
if not osp.isfile(subset_path):
continue
log.info("Found a dataset at '%s'" % subset_path)
subset_name = osp.splitext(osp.basename(subset_path))[0]
project.add_source(subset_name, {
'url': subset_path,
'format': self.EXTRACTOR_NAME,
'options': extra_params,
})
return project

@ -3,23 +3,44 @@
#
# SPDX-License-Identifier: MIT
from glob import glob
import logging as log
import os.path as osp
from datumaro.components.formats.datumaro import DatumaroPath
class DatumaroImporter:
EXTRACTOR_NAME = 'datumaro'
def __call__(self, path):
def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()
if not osp.exists(path):
if path.endswith('.json') and osp.isfile(path):
subset_paths = [path]
else:
subset_paths = glob(osp.join(path, '*.json'))
if osp.basename(osp.normpath(path)) != DatumaroPath.ANNOTATIONS_DIR:
path = osp.join(path, DatumaroPath.ANNOTATIONS_DIR)
subset_paths += glob(osp.join(path, '*.json'))
if len(subset_paths) == 0:
raise Exception("Failed to find 'datumaro' dataset at '%s'" % path)
source_name = osp.splitext(osp.basename(path))[0]
project.add_source(source_name, {
'url': path,
'format': self.EXTRACTOR_NAME,
})
for subset_path in subset_paths:
if not osp.isfile(subset_path):
continue
log.info("Found a dataset at '%s'" % subset_path)
subset_name = osp.splitext(osp.basename(subset_path))[0]
project.add_source(subset_name, {
'url': subset_path,
'format': self.EXTRACTOR_NAME,
'options': extra_params,
})
return project
return project

@ -4,7 +4,8 @@
# SPDX-License-Identifier: MIT
from collections import defaultdict
import os
from glob import glob
import logging as log
import os.path as osp
from datumaro.components.formats.ms_coco import CocoTask, CocoPath
@ -19,9 +20,6 @@ class CocoImporter:
CocoTask.image_info: 'coco_images',
}
def __init__(self, task_filter=None):
self._task_filter = task_filter
def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()
@ -33,6 +31,8 @@ class CocoImporter:
for ann_files in subsets.values():
for ann_type, ann_file in ann_files.items():
log.info("Found a dataset at '%s'" % ann_file)
source_name = osp.splitext(osp.basename(ann_file))[0]
project.add_source(source_name, {
'url': ann_file,
@ -43,28 +43,29 @@ class CocoImporter:
return project
@staticmethod
def find_subsets(dataset_dir):
ann_dir = os.path.join(dataset_dir, CocoPath.ANNOTATIONS_DIR)
if not osp.isdir(ann_dir):
raise NotADirectoryError(
'COCO annotations directory not found at "%s"' % ann_dir)
def find_subsets(path):
if path.endswith('.json') and osp.isfile(path):
subset_paths = [path]
else:
subset_paths = glob(osp.join(path, '*_*.json'))
if osp.basename(osp.normpath(path)) != CocoPath.ANNOTATIONS_DIR:
path = osp.join(path, CocoPath.ANNOTATIONS_DIR)
subset_paths += glob(osp.join(path, '*_*.json'))
subsets = defaultdict(dict)
for ann_file in os.listdir(ann_dir):
subset_path = osp.join(ann_dir, ann_file)
if not subset_path.endswith('.json'):
continue
for subset_path in subset_paths:
name_parts = osp.splitext(osp.basename(subset_path))[0] \
.rsplit('_', maxsplit=1)
name_parts = osp.splitext(ann_file)[0].rsplit('_', maxsplit=1)
ann_type = name_parts[0]
try:
ann_type = CocoTask[ann_type]
except KeyError:
raise Exception(
'Unknown subset type %s, only known are: %s' % \
(ann_type,
', '.join([e.name for e in CocoTask])
))
log.warn("Skipping '%s': unknown subset "
"type '%s', the only known are: %s" % \
(subset_path, ann_type,
', '.join([e.name for e in CocoTask])))
subset_name = name_parts[1]
subsets[subset_name][ann_type] = subset_path
return dict(subsets)

@ -4,32 +4,39 @@
# SPDX-License-Identifier: MIT
from glob import glob
import logging as log
import os.path as osp
class DetectionApiImporter:
EXTRACTOR_NAME = 'tf_detection_api'
def __call__(self, path):
def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()
subset_paths = glob(osp.join(path, '*.tfrecord'))
if path.endswith('.tfrecord') and osp.isfile(path):
subset_paths = [path]
else:
subset_paths = glob(osp.join(path, '*.tfrecord'))
if len(subset_paths) == 0:
raise Exception(
"Failed to find 'tf_detection_api' dataset at '%s'" % path)
for subset_path in subset_paths:
if not osp.isfile(subset_path):
continue
log.info("Found a dataset at '%s'" % subset_path)
subset_name = osp.splitext(osp.basename(subset_path))[0]
project.add_source(subset_name, {
'url': subset_path,
'format': self.EXTRACTOR_NAME,
'options': extra_params,
})
if len(project.config.sources) == 0:
raise Exception(
"Failed to find 'tf_detection_api' dataset at '%s'" % path)
return project

@ -19,7 +19,7 @@ class VocImporter:
(VocTask.action_classification, 'voc_action', 'Action'),
]
def __call__(self, path):
def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()
@ -31,6 +31,7 @@ class VocImporter:
project.add_source(task.name, {
'url': path,
'format': extractor_type,
'options': extra_params,
})
if len(project.config.sources) == 0:
@ -53,7 +54,7 @@ class VocResultsImporter:
('comp10', 'voc_comp_9_10', 'Action'),
]
def __call__(self, path):
def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()
@ -68,6 +69,7 @@ class VocResultsImporter:
project.add_source(task_name, {
'url': task_dir,
'format': extractor_type,
'options': extra_params,
})
if len(project.config.sources) == 0:

@ -3,8 +3,9 @@
#
# SPDX-License-Identifier: MIT
from glob import glob
import logging as log
import os.path as osp
from datumaro.util import dir_items
class YoloImporter:
@ -15,13 +16,14 @@ class YoloImporter:
if not osp.exists(path):
raise Exception("Failed to find 'yolo' dataset at '%s'" % path)
configs = []
if osp.isfile(path):
configs = path
elif osp.isdir(path):
configs = [osp.join(path, p) for p in dir_items(path, '.data')]
if path.endswith('.data') and osp.isfile(path):
config_paths = [path]
else:
config_paths = glob(osp.join(path, '*.data'))
for config_path in config_paths:
log.info("Found a dataset at '%s'" % config_path)
for config_path in configs:
source_name = osp.splitext(osp.basename(config_path))[0]
project.add_source(source_name, {
'url': config_path,

@ -269,6 +269,8 @@ class Subset(Extractor):
class DatasetItemWrapper(DatasetItem):
def __init__(self, item, path, annotations, image=None):
self._item = item
if path is None:
path = []
self._path = path
self._annotations = annotations
self._image = image
@ -334,7 +336,10 @@ class ProjectDataset(Extractor):
own_source = None
own_source_dir = osp.join(config.project_dir, config.dataset_dir)
if osp.isdir(own_source_dir):
own_source = env.make_extractor(DEFAULT_FORMAT, own_source_dir)
log.disable(log.INFO)
own_source = env.make_importer(DEFAULT_FORMAT)(own_source_dir) \
.make_dataset()
log.disable(log.NOTSET)
# merge categories
# TODO: implement properly with merging and annotations remapping
@ -351,7 +356,7 @@ class ProjectDataset(Extractor):
# merge items
subsets = defaultdict(lambda: Subset(self))
for source_name, source in self._sources.items():
log.info("Loading '%s' source contents..." % source_name)
log.debug("Loading '%s' source contents..." % source_name)
for item in source:
if dataset_filter and not dataset_filter(item):
continue
@ -387,7 +392,7 @@ class ProjectDataset(Extractor):
# override with our items, fallback to existing images
if own_source is not None:
log.info("Loading own dataset...")
log.debug("Loading own dataset...")
for item in own_source:
if dataset_filter and not dataset_filter(item):
continue

@ -7,9 +7,7 @@ from PIL import Image
from unittest import TestCase
from datumaro.components.project import Project
from datumaro.components.extractor import (
DEFAULT_SUBSET_NAME,
Extractor, DatasetItem,
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, LabelObject, MaskObject, PointsObject, PolygonObject,
BboxObject, CaptionObject,
LabelCategories, PointsCategories
@ -150,10 +148,8 @@ class CocoConverterTest(TestCase):
if target_dataset is not None:
source_dataset = target_dataset
source_subsets = [s if s else DEFAULT_SUBSET_NAME
for s in source_dataset.subsets()]
self.assertListEqual(
sorted(source_subsets),
sorted(source_dataset.subsets()),
sorted(parsed_dataset.subsets()),
)
@ -172,7 +168,7 @@ class CocoConverterTest(TestCase):
def test_can_save_and_load_captions(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, subset='train',
annotations=[
CaptionObject('hello', id=1),
@ -188,11 +184,7 @@ class CocoConverterTest(TestCase):
CaptionObject('word', id=1),
]
),
]
return iter(items)
def subsets(self):
return ['train', 'val']
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
@ -201,7 +193,7 @@ class CocoConverterTest(TestCase):
def test_can_save_and_load_instances(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, subset='train', image=np.ones((4, 4, 3)),
annotations=[
# Bbox + single polygon
@ -234,11 +226,7 @@ class CocoConverterTest(TestCase):
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
]),
]
return iter(items)
def subsets(self):
return ['train', 'val']
])
def categories(self):
label_categories = LabelCategories()
@ -255,7 +243,7 @@ class CocoConverterTest(TestCase):
def test_can_save_and_load_instances_with_mask_conversion(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, image=np.zeros((5, 5, 3)), subset='train',
annotations=[
BboxObject(0, 0, 5, 5, label=3, id=4, group=4,
@ -276,11 +264,7 @@ class CocoConverterTest(TestCase):
label=3, id=4, group=4),
]
),
]
return iter(items)
def subsets(self):
return ['train']
])
def categories(self):
label_categories = LabelCategories()
@ -302,7 +286,7 @@ class CocoConverterTest(TestCase):
class SrcTestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, image=np.zeros((5, 10, 3)),
annotations=[
PolygonObject([0, 0, 4, 0, 4, 4],
@ -313,15 +297,14 @@ class CocoConverterTest(TestCase):
attributes={ 'is_crowd': False }),
]
),
]
return iter(items)
])
def categories(self):
return { AnnotationType.label: label_categories }
class DstTestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, image=np.zeros((5, 10, 3)),
annotations=[
BboxObject(1, 0, 8, 4, label=3, id=4, group=4,
@ -339,8 +322,7 @@ class CocoConverterTest(TestCase):
label=3, id=4, group=4),
]
),
]
return iter(items)
])
def categories(self):
return { AnnotationType.label: label_categories }
@ -353,7 +335,7 @@ class CocoConverterTest(TestCase):
def test_can_save_and_load_images(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, subset='train'),
DatasetItem(id=1, subset='train'),
@ -362,11 +344,7 @@ class CocoConverterTest(TestCase):
DatasetItem(id=4, subset='val'),
DatasetItem(id=5, subset='test'),
]
return iter(items)
def subsets(self):
return ['train', 'val', 'test']
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
@ -375,7 +353,7 @@ class CocoConverterTest(TestCase):
def test_can_save_and_load_labels(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, subset='train',
annotations=[
LabelObject(4, id=1),
@ -390,11 +368,7 @@ class CocoConverterTest(TestCase):
annotations=[
LabelObject(2, id=1),
]),
]
return iter(items)
def subsets(self):
return ['train', 'val']
])
def categories(self):
label_categories = LabelCategories()
@ -411,7 +385,7 @@ class CocoConverterTest(TestCase):
def test_can_save_and_load_keypoints(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=0, subset='train',
annotations=[
PointsObject([1, 2, 0, 2, 4, 1], [0, 1, 2],
@ -433,11 +407,7 @@ class CocoConverterTest(TestCase):
group=3, id=3),
BboxObject(0, 2, 4, 4, label=2, group=3),
]),
]
return iter(items)
def subsets(self):
return ['train', 'val']
])
def categories(self):
label_categories = LabelCategories()
@ -458,7 +428,7 @@ class CocoConverterTest(TestCase):
def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=1, annotations=[
LabelObject(2, id=1),
]),
@ -470,10 +440,7 @@ class CocoConverterTest(TestCase):
PolygonObject([0, 0, 4, 0, 4, 4], label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
]),
]
for item in items:
yield item
])
def categories(self):
label_cat = LabelCategories()

@ -0,0 +1,148 @@
import numpy as np
import os
import os.path as osp
from xml.etree import ElementTree as ET
from unittest import TestCase
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, PointsObject, PolygonObject, PolyLineObject, BboxObject,
LabelCategories,
)
from datumaro.components.importers.cvat import CvatImporter
import datumaro.components.formats.cvat as Cvat
from datumaro.util.image import save_image
from datumaro.util.test_utils import TestDir
class CvatExtractorTest(TestCase):
@staticmethod
def generate_dummy_cvat(path):
images_dir = osp.join(path, Cvat.CvatPath.IMAGES_DIR)
anno_dir = osp.join(path, Cvat.CvatPath.ANNOTATIONS_DIR)
os.makedirs(images_dir)
os.makedirs(anno_dir)
root_elem = ET.Element('annotations')
ET.SubElement(root_elem, 'version').text = '1.1'
meta_elem = ET.SubElement(root_elem, 'meta')
task_elem = ET.SubElement(meta_elem, 'task')
ET.SubElement(task_elem, 'z_order').text = 'True'
ET.SubElement(task_elem, 'mode').text = 'interpolation'
labels_elem = ET.SubElement(task_elem, 'labels')
label1_elem = ET.SubElement(labels_elem, 'label')
ET.SubElement(label1_elem, 'name').text = 'label1'
label1_attrs_elem = ET.SubElement(label1_elem, 'attributes')
label1_a1_elem = ET.SubElement(label1_attrs_elem, 'attribute')
ET.SubElement(label1_a1_elem, 'name').text = 'a1'
ET.SubElement(label1_a1_elem, 'input_type').text = 'checkbox'
ET.SubElement(label1_a1_elem, 'default_value').text = 'false'
ET.SubElement(label1_a1_elem, 'values').text = 'false\ntrue'
label1_a2_elem = ET.SubElement(label1_attrs_elem, 'attribute')
ET.SubElement(label1_a2_elem, 'name').text = 'a2'
ET.SubElement(label1_a2_elem, 'input_type').text = 'radio'
ET.SubElement(label1_a2_elem, 'default_value').text = 'v1'
ET.SubElement(label1_a2_elem, 'values').text = 'v1\nv2\nv3'
label2_elem = ET.SubElement(labels_elem, 'label')
ET.SubElement(label2_elem, 'name').text = 'label2'
# item 1
save_image(osp.join(images_dir, 'img0.jpg'), np.ones((8, 8, 3)))
item1_elem = ET.SubElement(root_elem, 'image')
item1_elem.attrib.update({
'id': '0', 'name': 'img0', 'width': '8', 'height': '8'
})
item1_ann1_elem = ET.SubElement(item1_elem, 'box')
item1_ann1_elem.attrib.update({
'label': 'label1', 'occluded': '1', 'z_order': '1',
'xtl': '0', 'ytl': '2', 'xbr': '4', 'ybr': '4'
})
item1_ann1_a1_elem = ET.SubElement(item1_ann1_elem, 'attribute')
item1_ann1_a1_elem.attrib['name'] = 'a1'
item1_ann1_a1_elem.text = 'true'
item1_ann1_a2_elem = ET.SubElement(item1_ann1_elem, 'attribute')
item1_ann1_a2_elem.attrib['name'] = 'a2'
item1_ann1_a2_elem.text = 'v3'
item1_ann2_elem = ET.SubElement(item1_elem, 'polyline')
item1_ann2_elem.attrib.update({
'label': '', 'points': '1.0,2;3,4;5,6;7,8'
})
# item 2
save_image(osp.join(images_dir, 'img1.jpg'), np.ones((10, 10, 3)))
item2_elem = ET.SubElement(root_elem, 'image')
item2_elem.attrib.update({
'id': '1', 'name': 'img1', 'width': '8', 'height': '8'
})
item2_ann1_elem = ET.SubElement(item2_elem, 'polygon')
item2_ann1_elem.attrib.update({
'label': '', 'points': '1,2;3,4;6,5', 'z_order': '1',
})
item2_ann2_elem = ET.SubElement(item2_elem, 'points')
item2_ann2_elem.attrib.update({
'label': 'label2', 'points': '1,2;3,4;5,6', 'z_order': '2',
})
with open(osp.join(anno_dir, 'train.xml'), 'w') as f:
f.write(ET.tostring(root_elem, encoding='unicode'))
def test_can_load(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
annotations=[
BboxObject(0, 2, 4, 2, label=0,
attributes={
'occluded': True, 'z_order': 1,
'a1': 'true', 'a2': 'v3'
}),
PolyLineObject([1, 2, 3, 4, 5, 6, 7, 8],
attributes={'occluded': False, 'z_order': 0}),
]),
DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
annotations=[
PolygonObject([1, 2, 3, 4, 6, 5],
attributes={'occluded': False, 'z_order': 1}),
PointsObject([1, 2, 3, 4, 5, 6], label=1,
attributes={'occluded': False, 'z_order': 2}),
]),
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
with TestDir() as test_dir:
self.generate_dummy_cvat(test_dir.path)
source_dataset = TestExtractor()
parsed_dataset = CvatImporter()(test_dir.path).make_dataset()
self.assertListEqual(
sorted(source_dataset.subsets()),
sorted(parsed_dataset.subsets()),
)
self.assertEqual(len(source_dataset), len(parsed_dataset))
for subset_name in source_dataset.subsets():
source_subset = source_dataset.get_subset(subset_name)
parsed_subset = parsed_dataset.get_subset(subset_name)
for item_a, item_b in zip(source_subset, parsed_subset):
self.assertEqual(len(item_a.annotations), len(item_b.annotations))
for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
self.assertEqual(ann_a, ann_b)

@ -1,4 +1,3 @@
from itertools import zip_longest
import numpy as np
from unittest import TestCase
@ -17,7 +16,7 @@ from datumaro.util.mask_tools import generate_colormap
class DatumaroConverterTest(TestCase):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
annotations=[
CaptionObject('hello', id=1),
@ -47,11 +46,10 @@ class DatumaroConverterTest(TestCase):
]),
DatasetItem(id=42, subset='test'),
]
return iter(items)
def subsets(self):
return ['train', 'val', 'test']
DatasetItem(id=42),
DatasetItem(id=43),
])
def categories(self):
label_categories = LabelCategories()
@ -91,8 +89,9 @@ class DatumaroConverterTest(TestCase):
for subset_name in source_dataset.subsets():
source_subset = source_dataset.get_subset(subset_name)
parsed_subset = parsed_dataset.get_subset(subset_name)
self.assertEqual(len(source_subset), len(parsed_subset))
for idx, (item_a, item_b) in enumerate(
zip_longest(source_subset, parsed_subset)):
zip(source_subset, parsed_subset)):
self.assertEqual(item_a, item_b, str(idx))
self.assertEqual(

@ -137,9 +137,6 @@ class ProjectTest(TestCase):
for i in range(self.n):
yield DatasetItem(id=i, subset='train', image=i)
def subsets(self):
return ['train']
class TestLauncher(Launcher):
def __init__(self, **kwargs):
pass
@ -178,9 +175,6 @@ class ProjectTest(TestCase):
yield DatasetItem(id=i, subset='train', image=i,
annotations=[ LabelObject(i) ])
def subsets(self):
return ['train']
class TestLauncher(Launcher):
def __init__(self, **kwargs):
pass
@ -207,17 +201,10 @@ class ProjectTest(TestCase):
index = osp.splitext(osp.basename(path))[0]
subset = f.readline()[:-1]
label = int(f.readline()[:-1])
assert(subset == 'train')
assert subset == 'train'
yield DatasetItem(id=index, subset=subset,
annotations=[ LabelObject(label) ])
def __len__(self):
return len(self.items)
def subsets(self):
return ['train']
model_name = 'model'
launcher_name = 'custom_launcher'
extractor_name = 'custom_extractor'
@ -251,9 +238,6 @@ class ProjectTest(TestCase):
for i in range(self.n):
yield DatasetItem(id=self.s + i, subset='train')
def subsets(self):
return ['train']
e_name1 = 'e1'
e_name2 = 'e2'
n1 = 2
@ -279,9 +263,6 @@ class ProjectTest(TestCase):
for i in range(self.n):
yield DatasetItem(id=i, subset='train')
def subsets(self):
return ['train']
e_type = 'type'
project = Project()
project.env.extractors.register(e_type, TestExtractor)
@ -353,9 +334,6 @@ class ProjectTest(TestCase):
else:
yield v2_item
def subsets(self):
return ['train']
project = Project()
project.env.extractors.register('t1', lambda p: TestExtractor(p, v=1))
project.env.extractors.register('t2', lambda p: TestExtractor(p, v=2))
@ -379,9 +357,6 @@ class DatasetFilterTest(TestCase):
for i in range(self.n):
yield DatasetItem(id=i, subset='train')
def subsets(self):
return ['train']
def test_xpathfilter_can_be_applied(self):
extractor = self.TestExtractor('', n=4)
dataset_filter = XPathDatasetFilter('/item[id > 1]')
@ -433,9 +408,6 @@ class ExtractorTest(TestCase):
DatasetItem(id=3, subset='test'),
])
def subsets(self):
return ['train', 'test']
extractor_name = 'ext1'
project = Project()
project.env.extractors.register(extractor_name, CustomExtractor)

@ -6,12 +6,8 @@ from datumaro.components.project import Project
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, BboxObject, LabelCategories
)
from datumaro.components.extractors.tfrecord import (
DetectionApiExtractor,
)
from datumaro.components.converters.tfrecord import (
DetectionApiConverter,
)
from datumaro.components.extractors.tfrecord import DetectionApiExtractor
from datumaro.components.converters.tfrecord import DetectionApiConverter
from datumaro.util import find
from datumaro.util.test_utils import TestDir
@ -47,7 +43,7 @@ class TfrecordConverterTest(TestCase):
def test_can_save_bboxes(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
@ -67,10 +63,7 @@ class TfrecordConverterTest(TestCase):
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
),
]
for item in items:
yield item
])
def categories(self):
label_cat = LabelCategories()
@ -88,7 +81,7 @@ class TfrecordConverterTest(TestCase):
def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
@ -107,10 +100,7 @@ class TfrecordConverterTest(TestCase):
DatasetItem(id=3,
image=np.ones((8, 4, 3)) * 3,
),
]
for item in items:
yield item
])
def categories(self):
label_cat = LabelCategories()

@ -27,6 +27,7 @@ from datumaro.components.converters.voc import (
VocSegmentationConverter,
)
from datumaro.components.importers.voc import VocImporter
from datumaro.components.project import Project
from datumaro.util import find
from datumaro.util.test_utils import TestDir
@ -446,7 +447,7 @@ class VocConverterTest(TestCase):
def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=1, annotations=[
BboxObject(2, 3, 4, 5, label=2, id=1),
BboxObject(2, 3, 4, 5, label=3, id=2),
@ -455,10 +456,7 @@ class VocConverterTest(TestCase):
DatasetItem(id=2, annotations=[
BboxObject(5, 4, 6, 5, label=3, id=1),
]),
]
for item in items:
yield item
])
def categories(self):
return VOC.make_voc_categories()
@ -582,7 +580,7 @@ class VocImporterTest(TestCase):
dummy_dir = osp.join(test_dir.path, 'dummy')
subsets = generate_dummy_voc(dummy_dir)
dataset = VocImporter()(dummy_dir).make_dataset()
dataset = Project.import_from(dummy_dir, 'voc').make_dataset()
self.assertEqual(len(VOC.VocTask), len(dataset.sources))
self.assertEqual(set(subsets), set(dataset.subsets()))

@ -14,7 +14,7 @@ class YoloFormatTest(TestCase):
def test_can_save_and_load(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
return iter([
DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
annotations=[
BboxObject(0, 2, 4, 2, label=2),
@ -34,8 +34,7 @@ class YoloFormatTest(TestCase):
BboxObject(0, 2, 4, 2, label=6),
BboxObject(0, 7, 3, 2, label=7),
]),
]
return iter(items)
])
def categories(self):
label_categories = LabelCategories()

Loading…
Cancel
Save