[Datumaro] Reducing nesting of tests (#1875)

* Add `Dataset.from_iterable` constructor
* Simplify creation of `Dataset` objects in common simple cases
* Refactor tests
main
Ilya Kochankov 6 years ago committed by GitHub
parent e372589dc8
commit 7ecdcf182b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -72,6 +72,33 @@ class Categories:
class LabelCategories(Categories): class LabelCategories(Categories):
Category = namedtuple('Category', ['name', 'parent', 'attributes']) Category = namedtuple('Category', ['name', 'parent', 'attributes'])
@classmethod
def from_iterable(cls, iterable):
"""Generation of LabelCategories from iterable object
Args:
iterable ([type]): This iterable object can be:
1)simple str - will generate one Category with str as name
2)list of str - will interpreted as list of Category names
3)list of positional argumetns - will generate Categories
with this arguments
Returns:
LabelCategories: LabelCategories object
"""
temp_categories = cls()
if isinstance(iterable, str):
iterable = [[iterable]]
for category in iterable:
if isinstance(category, str):
category = [category]
temp_categories.add(*category)
return temp_categories
def __init__(self, items=None, attributes=None): def __init__(self, items=None, attributes=None):
super().__init__(attributes=attributes) super().__init__(attributes=attributes)
@ -482,6 +509,31 @@ class Bbox(_Shape):
class PointsCategories(Categories): class PointsCategories(Categories):
Category = namedtuple('Category', ['labels', 'joints']) Category = namedtuple('Category', ['labels', 'joints'])
@classmethod
def from_iterable(cls, iterable):
"""Generation of PointsCategories from iterable object
Args:
iterable ([type]): This iterable object can be:
1)simple int - will generate one Category with int as label
2)list of int - will interpreted as list of Category labels
3)list of positional argumetns - will generate Categories
with this arguments
Returns:
PointsCategories: PointsCategories object
"""
temp_categories = cls()
if isinstance(iterable, int):
iterable = [[iterable]]
for category in iterable:
if isinstance(category, int):
category = [category]
temp_categories.add(*category)
return temp_categories
def __init__(self, items=None, attributes=None): def __init__(self, items=None, attributes=None):
super().__init__(attributes=attributes) super().__init__(attributes=attributes)

@ -18,7 +18,8 @@ import sys
from datumaro.components.config import Config, DEFAULT_FORMAT from datumaro.components.config import Config, DEFAULT_FORMAT
from datumaro.components.config_model import (Model, Source, from datumaro.components.config_model import (Model, Source,
PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA) PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA)
from datumaro.components.extractor import Extractor from datumaro.components.extractor import Extractor, LabelCategories,\
AnnotationType
from datumaro.components.launcher import ModelTransform from datumaro.components.launcher import ModelTransform
from datumaro.components.dataset_filter import \ from datumaro.components.dataset_filter import \
XPathDatasetFilter, XPathAnnotationsFilter XPathDatasetFilter, XPathAnnotationsFilter
@ -319,6 +320,35 @@ class Subset(Extractor):
return self._parent.categories() return self._parent.categories()
class Dataset(Extractor): class Dataset(Extractor):
@classmethod
def from_iterable(cls, iterable, categories=None):
"""Generation of Dataset from iterable object
Args:
iterable: Iterable object contains DatasetItems
categories (dict, optional): You can pass dict of categories or
you can pass list of names. It'll interpreted as list of names of
LabelCategories. Defaults to {}.
Returns:
Dataset: Dataset object
"""
if isinstance(categories, list):
categories = {AnnotationType.label : LabelCategories.from_iterable(categories)}
if not categories:
categories = {}
class tmpExtractor(Extractor):
def __iter__(self):
return iter(iterable)
def categories(self):
return categories
return cls.from_extractors(tmpExtractor())
@classmethod @classmethod
def from_extractors(cls, *sources): def from_extractors(cls, *sources):
# merge categories # merge categories

@ -4,7 +4,7 @@ import os.path as osp
from unittest import TestCase from unittest import TestCase
from datumaro.components.project import Project from datumaro.components.project import (Project, Dataset)
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption, AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
LabelCategories, PointsCategories LabelCategories, PointsCategories
@ -26,32 +26,26 @@ DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset')
class CocoImporterTest(TestCase): class CocoImporterTest(TestCase):
def test_can_import(self): def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
subset='val', attributes={'id': 1},
annotations=[
Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
id=1, group=1, attributes={'is_crowd': False}),
Mask(np.array(
[[1, 0, 0, 1, 0]] * 5 +
[[1, 1, 1, 1, 0]] * 5
), label=0,
id=2, group=2, attributes={'is_crowd': True}),
]
),
])
def categories(self): expected_dataset = Dataset.from_iterable([
label_cat = LabelCategories() DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
label_cat.add('TEST') subset='val', attributes={'id': 1},
return { AnnotationType.label: label_cat } annotations=[
Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
id=1, group=1, attributes={'is_crowd': False}),
Mask(np.array(
[[1, 0, 0, 1, 0]] * 5 +
[[1, 1, 1, 1, 0]] * 5
), label=0,
id=2, group=2, attributes={'is_crowd': True}),
]
),
], categories=['TEST',])
dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \ dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \
.make_dataset() .make_dataset()
compare_datasets(self, DstExtractor(), dataset) compare_datasets(self, expected_dataset, dataset)
def test_can_detect(self): def test_can_detect(self):
self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR)) self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR))
@ -71,526 +65,417 @@ class CocoConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset) compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_and_load_captions(self): def test_can_save_and_load_captions(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ annotations=[
DatasetItem(id=1, subset='train', Caption('hello', id=1, group=1),
annotations=[ Caption('world', id=2, group=2),
Caption('hello', id=1, group=1), ], attributes={'id': 1}),
Caption('world', id=2, group=2), DatasetItem(id=2, subset='train',
], attributes={'id': 1}), annotations=[
DatasetItem(id=2, subset='train', Caption('test', id=3, group=3),
annotations=[ ], attributes={'id': 2}),
Caption('test', id=3, group=3),
], attributes={'id': 2}), DatasetItem(id=3, subset='val',
annotations=[
DatasetItem(id=3, subset='val', Caption('word', id=1, group=1),
annotations=[ ], attributes={'id': 1}),
Caption('word', id=1, group=1), ])
], attributes={'id': 1}
),
])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
CocoCaptionsConverter.convert, test_dir) CocoCaptionsConverter.convert, test_dir)
def test_can_save_and_load_instances(self): def test_can_save_and_load_instances(self):
label_categories = LabelCategories() source_dataset = Dataset.from_iterable([
for i in range(10): DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
label_categories.add(str(i)) annotations=[
categories = { AnnotationType.label: label_categories } # Bbox + single polygon
Bbox(0, 1, 2, 2,
class TestExtractor(Extractor): label=2, group=1, id=1,
def __iter__(self): attributes={ 'is_crowd': False }),
return iter([ Polygon([0, 1, 2, 1, 2, 3, 0, 3],
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), attributes={ 'is_crowd': False },
annotations=[ label=2, group=1, id=1),
# Bbox + single polygon ], attributes={'id': 1}),
Bbox(0, 1, 2, 2, DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
label=2, group=1, id=1, annotations=[
attributes={ 'is_crowd': False }), # Mask + bbox
Polygon([0, 1, 2, 1, 2, 3, 0, 3], Mask(np.array([
attributes={ 'is_crowd': False }, [0, 1, 0, 0],
label=2, group=1, id=1), [0, 1, 0, 0],
], attributes={'id': 1}), [0, 1, 1, 1],
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), [0, 0, 0, 0]],
annotations=[ ),
# Mask + bbox attributes={ 'is_crowd': True },
Mask(np.array([ label=4, group=3, id=3),
[0, 1, 0, 0], Bbox(1, 0, 2, 2, label=4, group=3, id=3,
[0, 1, 0, 0], attributes={ 'is_crowd': True }),
[0, 1, 1, 1], ], attributes={'id': 2}),
[0, 0, 0, 0]],
), DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
attributes={ 'is_crowd': True }, annotations=[
label=4, group=3, id=3), # Bbox + mask
Bbox(1, 0, 2, 2, label=4, group=3, id=3, Bbox(0, 1, 2, 2, label=4, group=3, id=3,
attributes={ 'is_crowd': True }), attributes={ 'is_crowd': True }),
], attributes={'id': 2}), Mask(np.array([
[0, 0, 0, 0],
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)), [1, 1, 1, 0],
annotations=[ [1, 1, 0, 0],
# Bbox + mask [0, 0, 0, 0]],
Bbox(0, 1, 2, 2, label=4, group=3, id=3, ),
attributes={ 'is_crowd': True }), attributes={ 'is_crowd': True },
Mask(np.array([ label=4, group=3, id=3),
[0, 0, 0, 0], ], attributes={'id': 1}),
[1, 1, 1, 0], ], categories=[str(i) for i in range(10)])
[1, 1, 0, 0],
[0, 0, 0, 0]], target_dataset = Dataset.from_iterable([
), DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
attributes={ 'is_crowd': True }, annotations=[
label=4, group=3, id=3), Polygon([0, 1, 2, 1, 2, 3, 0, 3],
], attributes={'id': 1}), attributes={ 'is_crowd': False },
]) label=2, group=1, id=1),
], attributes={'id': 1}),
def categories(self): DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
return categories annotations=[
Mask(np.array([
class DstExtractor(Extractor): [0, 1, 0, 0],
def __iter__(self): [0, 1, 0, 0],
return iter([ [0, 1, 1, 1],
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), [0, 0, 0, 0]],
annotations=[ ),
Polygon([0, 1, 2, 1, 2, 3, 0, 3], attributes={ 'is_crowd': True },
attributes={ 'is_crowd': False }, label=4, group=3, id=3),
label=2, group=1, id=1), ], attributes={'id': 2}),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[ annotations=[
Mask(np.array([ Mask(np.array([
[0, 1, 0, 0], [0, 0, 0, 0],
[0, 1, 0, 0], [1, 1, 1, 0],
[0, 1, 1, 1], [1, 1, 0, 0],
[0, 0, 0, 0]], [0, 0, 0, 0]],
), ),
attributes={ 'is_crowd': True }, attributes={ 'is_crowd': True },
label=4, group=3, id=3), label=4, group=3, id=3),
], attributes={'id': 2}), ], attributes={'id': 1})
], categories=[str(i) for i in range(10)])
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[
Mask(np.array([
[0, 0, 0, 0],
[1, 1, 1, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
], attributes={'id': 1}),
])
def categories(self):
return categories
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(source_dataset,
CocoInstancesConverter.convert, test_dir, CocoInstancesConverter.convert, test_dir,
target_dataset=DstExtractor()) target_dataset=target_dataset)
def test_can_merge_polygons_on_loading(self): def test_can_merge_polygons_on_loading(self):
label_categories = LabelCategories() source_dataset = Dataset.from_iterable([
for i in range(10): DatasetItem(id=1, image=np.zeros((6, 10, 3)),
label_categories.add(str(i)) annotations=[
categories = { AnnotationType.label: label_categories } Polygon([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4),
class SrcExtractor(Extractor): Polygon([5, 0, 9, 0, 5, 5],
def __iter__(self): label=3, id=4, group=4),
return iter([ ]
DatasetItem(id=1, image=np.zeros((6, 10, 3)), ),
annotations=[ ], categories=[str(i) for i in range(10)])
Polygon([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4), target_dataset = Dataset.from_iterable([
Polygon([5, 0, 9, 0, 5, 5], DatasetItem(id=1, image=np.zeros((6, 10, 3)),
label=3, id=4, group=4), annotations=[
] Mask(np.array([
), [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
]) [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
def categories(self): [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
return categories [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
class DstExtractor(Extractor): # only internal fragment (without the border),
def __iter__(self): # but not everywhere...
return iter([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Mask(np.array([
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
),
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
), ),
]) label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
def categories(self): ], attributes={'id': 1}
return categories ),
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(), self._test_save_and_load(source_dataset,
CocoInstancesConverter.convert, test_dir, CocoInstancesConverter.convert, test_dir,
importer_args={'merge_instance_polygons': True}, importer_args={'merge_instance_polygons': True},
target_dataset=DstExtractor()) target_dataset=target_dataset)
def test_can_crop_covered_segments(self): def test_can_crop_covered_segments(self):
label_categories = LabelCategories() source_dataset = Dataset.from_iterable([
for i in range(10): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
label_categories.add(str(i)) annotations=[
Mask(np.array([
class SrcTestExtractor(Extractor): [0, 0, 1, 1, 1],
def __iter__(self): [0, 0, 1, 1, 1],
return iter([ [1, 1, 0, 1, 1],
DatasetItem(id=1, image=np.zeros((5, 5, 3)), [1, 1, 1, 0, 0],
annotations=[ [1, 1, 1, 0, 0]],
Mask(np.array([ ),
[0, 0, 1, 1, 1], label=2, id=1, z_order=0),
[0, 0, 1, 1, 1], Polygon([1, 1, 4, 1, 4, 4, 1, 4],
[1, 1, 0, 1, 1], label=1, id=2, z_order=1),
[1, 1, 1, 0, 0], ]
[1, 1, 1, 0, 0]], ),
), ], categories=[str(i) for i in range(10)])
label=2, id=1, z_order=0),
Polygon([1, 1, 4, 1, 4, 4, 1, 4], target_dataset = Dataset.from_iterable([
label=1, id=2, z_order=1), DatasetItem(id=1, image=np.zeros((5, 5, 3)),
] annotations=[
), Mask(np.array([
]) [0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
def categories(self): [1, 0, 0, 0, 1],
return { AnnotationType.label: label_categories } [1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
class DstTestExtractor(Extractor): ),
def __iter__(self): attributes={ 'is_crowd': True },
return iter([ label=2, id=1, group=1),
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[ Polygon([1, 1, 4, 1, 4, 4, 1, 4],
Mask(np.array([ label=1, id=2, group=2,
[0, 0, 1, 1, 1], attributes={ 'is_crowd': False }),
[0, 0, 0, 0, 1], ], attributes={'id': 1}
[1, 0, 0, 0, 1], ),
[1, 0, 0, 0, 0], ], categories=[str(i) for i in range(10)])
[1, 1, 1, 0, 0]],
),
attributes={ 'is_crowd': True },
label=2, id=1, group=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
label=1, id=2, group=2,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
),
])
def categories(self):
return { AnnotationType.label: label_categories }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcTestExtractor(), self._test_save_and_load(source_dataset,
partial(CocoInstancesConverter.convert, crop_covered=True), partial(CocoInstancesConverter.convert, crop_covered=True),
test_dir, target_dataset=DstTestExtractor()) test_dir, target_dataset=target_dataset)
def test_can_convert_polygons_to_mask(self): def test_can_convert_polygons_to_mask(self):
label_categories = LabelCategories() source_dataset = Dataset.from_iterable([
for i in range(10): DatasetItem(id=1, image=np.zeros((6, 10, 3)),
label_categories.add(str(i)) annotations=[
Polygon([0, 0, 4, 0, 4, 4],
class SrcTestExtractor(Extractor): label=3, id=4, group=4),
def __iter__(self): Polygon([5, 0, 9, 0, 5, 5],
return iter([ label=3, id=4, group=4),
DatasetItem(id=1, image=np.zeros((6, 10, 3)), ]
annotations=[ ),
Polygon([0, 0, 4, 0, 4, 4], ], categories=[str(i) for i in range(10)])
label=3, id=4, group=4),
Polygon([5, 0, 9, 0, 5, 5], target_dataset = Dataset.from_iterable([
label=3, id=4, group=4), DatasetItem(id=1, image=np.zeros((6, 10, 3)),
] annotations=[
), Mask(np.array([
]) [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
def categories(self): [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
return { AnnotationType.label: label_categories } [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
class DstTestExtractor(Extractor): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
def __iter__(self): # only internal fragment (without the border),
return iter([ # but not everywhere...
DatasetItem(id=1, image=np.zeros((6, 10, 3)), ),
annotations=[ attributes={ 'is_crowd': True },
Mask(np.array([ label=3, id=4, group=4),
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0], ], attributes={'id': 1}
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0], ),
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0], ], categories=[str(i) for i in range(10)])
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
),
attributes={ 'is_crowd': True },
label=3, id=4, group=4),
], attributes={'id': 1}
),
])
def categories(self):
return { AnnotationType.label: label_categories }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcTestExtractor(), self._test_save_and_load(source_dataset,
partial(CocoInstancesConverter.convert, segmentation_mode='mask'), partial(CocoInstancesConverter.convert, segmentation_mode='mask'),
test_dir, target_dataset=DstTestExtractor()) test_dir, target_dataset=target_dataset)
def test_can_convert_masks_to_polygons(self): def test_can_convert_masks_to_polygons(self):
label_categories = LabelCategories() source_dataset = Dataset.from_iterable([
for i in range(10): DatasetItem(id=1, image=np.zeros((5, 10, 3)),
label_categories.add(str(i)) annotations=[
Mask(np.array([
class SrcExtractor(Extractor): [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
def __iter__(self): [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
return iter([ [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
DatasetItem(id=1, image=np.zeros((5, 10, 3)), [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
annotations=[ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
Mask(np.array([ ]),
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0], label=3, id=4, group=4),
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0], ]
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0], ),
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0], ], categories=[str(i) for i in range(10)])
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]), target_dataset = Dataset.from_iterable([
label=3, id=4, group=4), DatasetItem(id=1, image=np.zeros((5, 10, 3)),
] annotations=[
), Polygon(
]) [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
label=3, id=4, group=4,
def categories(self): attributes={ 'is_crowd': False }),
return { AnnotationType.label: label_categories } Polygon(
[5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
class DstExtractor(Extractor): label=3, id=4, group=4,
def __iter__(self): attributes={ 'is_crowd': False }),
return iter([ ], attributes={'id': 1}
DatasetItem(id=1, image=np.zeros((5, 10, 3)), ),
annotations=[ ], categories=[str(i) for i in range(10)])
Polygon(
[3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
Polygon(
[5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
),
])
def categories(self):
return { AnnotationType.label: label_categories }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(), self._test_save_and_load(source_dataset,
partial(CocoInstancesConverter.convert, segmentation_mode='polygons'), partial(CocoInstancesConverter.convert, segmentation_mode='polygons'),
test_dir, target_dataset=DstExtractor()) test_dir,
target_dataset=target_dataset)
def test_can_save_and_load_images(self): def test_can_save_and_load_images(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train', attributes={'id': 1}),
return iter([ DatasetItem(id=2, subset='train', attributes={'id': 2}),
DatasetItem(id=1, subset='train', attributes={'id': 1}),
DatasetItem(id=2, subset='train', attributes={'id': 2}),
DatasetItem(id=2, subset='val', attributes={'id': 2}), DatasetItem(id=2, subset='val', attributes={'id': 2}),
DatasetItem(id=3, subset='val', attributes={'id': 3}), DatasetItem(id=3, subset='val', attributes={'id': 3}),
DatasetItem(id=4, subset='val', attributes={'id': 4}), DatasetItem(id=4, subset='val', attributes={'id': 4}),
DatasetItem(id=5, subset='test', attributes={'id': 1}), DatasetItem(id=5, subset='test', attributes={'id': 1}),
]) ])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
CocoImageInfoConverter.convert, test_dir) CocoImageInfoConverter.convert, test_dir)
def test_can_save_and_load_labels(self): def test_can_save_and_load_labels(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ annotations=[
DatasetItem(id=1, subset='train', Label(4, id=1, group=1),
annotations=[ Label(9, id=2, group=2),
Label(4, id=1, group=1), ], attributes={'id': 1}),
Label(9, id=2, group=2), ], categories=[str(i) for i in range(10)])
], attributes={'id': 1}
),
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
return {
AnnotationType.label: label_categories,
}
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
CocoLabelsConverter.convert, test_dir) CocoLabelsConverter.convert, test_dir)
def test_can_save_and_load_keypoints(self): def test_can_save_and_load_keypoints(self):
label_categories = LabelCategories()
points_categories = PointsCategories() source_dataset = Dataset.from_iterable([
for i in range(10): DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
label_categories.add(str(i)) annotations=[
points_categories.add(i, joints=[[0, 1], [1, 2]]) # Full instance annotations: polygon + keypoints
categories = { Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
AnnotationType.label: label_categories, label=3, group=1, id=1),
AnnotationType.points: points_categories, Polygon([0, 0, 4, 0, 4, 4],
} label=3, group=1, id=1),
class TestExtractor(Extractor): # Full instance annotations: bbox + keypoints
def __iter__(self): Points([1, 2, 3, 4, 2, 3], group=2, id=2),
return iter([ Bbox(1, 2, 2, 2, group=2, id=2),
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
annotations=[ # Solitary keypoints
# Full instance annotations: polygon + keypoints Points([1, 2, 0, 2, 4, 1], label=5, id=3),
Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
label=3, group=1, id=1), # Some other solitary annotations (bug #1387)
Polygon([0, 0, 4, 0, 4, 4], Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
label=3, group=1, id=1),
# Solitary keypoints with no label
# Full instance annotations: bbox + keypoints Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
Points([1, 2, 3, 4, 2, 3], group=2, id=2), ]),
Bbox(1, 2, 2, 2, group=2, id=2), ], categories={
AnnotationType.label: LabelCategories.from_iterable(
# Solitary keypoints str(i) for i in range(10)),
Points([1, 2, 0, 2, 4, 1], label=5, id=3), AnnotationType.points: PointsCategories.from_iterable(
(i, None, [[0, 1], [1, 2]]) for i in range(10)
# Some other solitary annotations (bug #1387) ),
Polygon([0, 0, 4, 0, 4, 4], label=3, id=4), })
# Solitary keypoints with no label target_dataset = Dataset.from_iterable([
Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5), DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
]) annotations=[
]) Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
label=3, group=1, id=1,
def categories(self): attributes={'is_crowd': False}),
return categories Polygon([0, 0, 4, 0, 4, 4],
label=3, group=1, id=1,
class DstTestExtractor(TestExtractor): attributes={'is_crowd': False}),
def __iter__(self):
return iter([ Points([1, 2, 3, 4, 2, 3],
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), group=2, id=2,
annotations=[ attributes={'is_crowd': False}),
Points([0, 0, 0, 2, 4, 1], [0, 1, 2], Polygon([1, 2, 3, 2, 3, 4, 1, 4],
label=3, group=1, id=1, group=2, id=2,
attributes={'is_crowd': False}), attributes={'is_crowd': False}),
Polygon([0, 0, 4, 0, 4, 4],
label=3, group=1, id=1, Points([1, 2, 0, 2, 4, 1],
attributes={'is_crowd': False}), label=5, group=3, id=3,
attributes={'is_crowd': False}),
Points([1, 2, 3, 4, 2, 3], Polygon([0, 1, 4, 1, 4, 2, 0, 2],
group=2, id=2, label=5, group=3, id=3,
attributes={'is_crowd': False}), attributes={'is_crowd': False}),
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
group=2, id=2, Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
attributes={'is_crowd': False}), group=5, id=5,
attributes={'is_crowd': False}),
Points([1, 2, 0, 2, 4, 1], Polygon([1, 2, 3, 2, 3, 4, 1, 4],
label=5, group=3, id=3, group=5, id=5,
attributes={'is_crowd': False}), attributes={'is_crowd': False}),
Polygon([0, 1, 4, 1, 4, 2, 0, 2], ], attributes={'id': 1}),
label=5, group=3, id=3, ], categories={
attributes={'is_crowd': False}), AnnotationType.label: LabelCategories.from_iterable(
str(i) for i in range(10)),
Points([0, 0, 1, 2, 3, 4], [0, 1, 2], AnnotationType.points: PointsCategories.from_iterable(
group=5, id=5, (i, None, [[0, 1], [1, 2]]) for i in range(10)
attributes={'is_crowd': False}), ),
Polygon([1, 2, 3, 2, 3, 4, 1, 4], })
group=5, id=5,
attributes={'is_crowd': False}),
], attributes={'id': 1}),
])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(source_dataset,
CocoPersonKeypointsConverter.convert, test_dir, CocoPersonKeypointsConverter.convert, test_dir,
target_dataset=DstTestExtractor()) target_dataset=target_dataset)
def test_can_save_dataset_with_no_subsets(self): def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor): test_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, attributes={'id': 1}),
return iter([ DatasetItem(id=2, attributes={'id': 2}),
DatasetItem(id=1, attributes={'id': 1}), ])
DatasetItem(id=2, attributes={'id': 2}),
])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(test_dataset,
CocoConverter.convert, test_dir) CocoConverter.convert, test_dir)
def test_can_save_dataset_with_image_info(self): def test_can_save_dataset_with_image_info(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
return iter([ attributes={'id': 1}),
DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), ])
attributes={'id': 1}),
])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
CocoImageInfoConverter.convert, test_dir) CocoImageInfoConverter.convert, test_dir)
def test_relative_paths(self): def test_relative_paths(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='1', image=np.ones((4, 2, 3)),
return iter([ attributes={'id': 1}),
DatasetItem(id='1', image=np.ones((4, 2, 3)), DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'id': 1}), attributes={'id': 2}),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'id': 2}), attributes={'id': 3}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), ])
attributes={'id': 3}),
])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
partial(CocoImageInfoConverter.convert, save_images=True), partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
test_dir)
def test_preserve_coco_ids(self): def test_preserve_coco_ids(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
return iter([ attributes={'id': 40}),
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), ])
attributes={'id': 40}),
])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
partial(CocoImageInfoConverter.convert, save_images=True), partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
test_dir)
def test_annotation_attributes(self): def test_annotation_attributes(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
return iter([ Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[ attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1, ], attributes={'id': 1})
attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}), ], categories=[str(i) for i in range(10)])
], attributes={'id': 1})
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
return { AnnotationType.label: label_categories, }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
CocoConverter.convert, test_dir) CocoConverter.convert, test_dir)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp import os.path as osp
from unittest import TestCase from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Points, Polygon, PolyLine, Bbox, Label, AnnotationType, Points, Polygon, PolyLine, Bbox, Label,
LabelCategories, LabelCategories,
@ -28,121 +28,115 @@ class CvatImporterTest(TestCase):
self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR)) self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR))
def test_can_load_image(self): def test_can_load_image(self):
class DstExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='img0', subset='train',
return iter([ image=np.ones((8, 8, 3)),
DatasetItem(id='img0', subset='train', annotations=[
image=np.ones((8, 8, 3)), Bbox(0, 2, 4, 2, label=0, z_order=1,
annotations=[ attributes={
Bbox(0, 2, 4, 2, label=0, z_order=1, 'occluded': True,
attributes={ 'a1': True, 'a2': 'v3'
'occluded': True, }),
'a1': True, 'a2': 'v3' PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
}), attributes={'occluded': False}),
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], ], attributes={'frame': 0}),
attributes={'occluded': False}), DatasetItem(id='img1', subset='train',
], attributes={'frame': 0}), image=np.ones((10, 10, 3)),
DatasetItem(id='img1', subset='train', annotations=[
image=np.ones((10, 10, 3)), Polygon([1, 2, 3, 4, 6, 5], z_order=1,
annotations=[ attributes={'occluded': False}),
Polygon([1, 2, 3, 4, 6, 5], z_order=1, Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
attributes={'occluded': False}), attributes={'occluded': False}),
Points([1, 2, 3, 4, 5, 6], label=1, z_order=2, ], attributes={'frame': 1}),
attributes={'occluded': False}), ], categories={
], attributes={'frame': 1}), AnnotationType.label: LabelCategories.from_iterable([
]) ['label1', '', {'a1', 'a2'}],
['label2'],
def categories(self): ])
label_categories = LabelCategories() })
label_categories.add('label1', attributes={'a1', 'a2'})
label_categories.add('label2')
return { AnnotationType.label: label_categories }
parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset() parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset()
compare_datasets(self, DstExtractor(), parsed_dataset) compare_datasets(self, expected_dataset, parsed_dataset)
def test_can_load_video(self): def test_can_load_video(self):
class DstExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='frame_000010', subset='annotations',
return iter([ image=np.ones((20, 25, 3)),
DatasetItem(id='frame_000010', subset='annotations', annotations=[
image=np.ones((20, 25, 3)), Bbox(3, 4, 7, 1, label=2,
annotations=[ id=0,
Bbox(3, 4, 7, 1, label=2, attributes={
id=0, 'occluded': True,
attributes={ 'outside': False, 'keyframe': True,
'occluded': True, 'track_id': 0
'outside': False, 'keyframe': True, }),
'track_id': 0 Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
}), label=0,
Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16], id=1,
label=0, attributes={
id=1, 'occluded': False,
attributes={ 'outside': False, 'keyframe': True,
'occluded': False, 'track_id': 1, 'hgl': 'hgkf',
'outside': False, 'keyframe': True, }),
'track_id': 1, 'hgl': 'hgkf', ], attributes={'frame': 10}),
}), DatasetItem(id='frame_000013', subset='annotations',
], attributes={'frame': 10}), image=np.ones((20, 25, 3)),
DatasetItem(id='frame_000013', subset='annotations', annotations=[
image=np.ones((20, 25, 3)), Bbox(7, 6, 7, 2, label=2,
annotations=[ id=0,
Bbox(7, 6, 7, 2, label=2, attributes={
id=0, 'occluded': False,
attributes={ 'outside': True, 'keyframe': True,
'occluded': False, 'track_id': 0
'outside': True, 'keyframe': True, }),
'track_id': 0 Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
}), label=0,
Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16], id=1,
label=0, attributes={
id=1, 'occluded': False,
attributes={ 'outside': True, 'keyframe': True,
'occluded': False, 'track_id': 1, 'hgl': 'jk',
'outside': True, 'keyframe': True, }),
'track_id': 1, 'hgl': 'jk', PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
}), label=2,
PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], id=2,
label=2, attributes={
id=2, 'occluded': False,
attributes={ 'outside': False, 'keyframe': True,
'occluded': False, 'track_id': 2,
'outside': False, 'keyframe': True, }),
'track_id': 2, ], attributes={'frame': 13}),
}), DatasetItem(id='frame_000016', subset='annotations',
], attributes={'frame': 13}), image=Image(path='frame_0000016.png', size=(20, 25)),
DatasetItem(id='frame_000016', subset='annotations', annotations=[
image=Image(path='frame_0000016.png', size=(20, 25)), Bbox(8, 7, 6, 10, label=2,
annotations=[ id=0,
Bbox(8, 7, 6, 10, label=2, attributes={
id=0, 'occluded': False,
attributes={ 'outside': True, 'keyframe': True,
'occluded': False, 'track_id': 0
'outside': True, 'keyframe': True, }),
'track_id': 0 PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
}), label=2,
PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], id=2,
label=2, attributes={
id=2, 'occluded': False,
attributes={ 'outside': True, 'keyframe': True,
'occluded': False, 'track_id': 2,
'outside': True, 'keyframe': True, }),
'track_id': 2, ], attributes={'frame': 16}),
}), ], categories={
], attributes={'frame': 16}), AnnotationType.label: LabelCategories.from_iterable([
]) ['klhg', '', {'hgl'}],
['z U k'],
def categories(self): ['II']
label_categories = LabelCategories() ]),
label_categories.add('klhg', attributes={'hgl'}) })
label_categories.add('z U k')
label_categories.add('II')
return { AnnotationType.label: label_categories }
parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset() parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset()
compare_datasets(self, DstExtractor(), parsed_dataset) compare_datasets(self, expected_dataset, parsed_dataset)
class CvatConverterTest(TestCase): class CvatConverterTest(TestCase):
def _test_save_and_load(self, source_dataset, converter, test_dir, def _test_save_and_load(self, source_dataset, converter, test_dir,
@ -165,137 +159,120 @@ class CvatConverterTest(TestCase):
label_categories.items[2].attributes.update(['a1', 'a2']) label_categories.items[2].attributes.update(['a1', 'a2'])
label_categories.attributes.update(['occluded']) label_categories.attributes.update(['occluded'])
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
return iter([ annotations=[
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), Polygon([0, 0, 4, 0, 4, 4],
annotations=[ label=1, group=4,
Polygon([0, 0, 4, 0, 4, 4], attributes={ 'occluded': True }),
label=1, group=4, Points([1, 1, 3, 2, 2, 3],
attributes={ 'occluded': True }), label=2,
Points([1, 1, 3, 2, 2, 3], attributes={ 'a1': 'x', 'a2': 42,
label=2, 'unknown': 'bar' }),
attributes={ 'a1': 'x', 'a2': 42, Label(1),
'unknown': 'bar' }), Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
Label(1), ]
Label(2, attributes={ 'a1': 'y', 'a2': 44 }), ),
] DatasetItem(id=1, subset='s1',
), annotations=[
DatasetItem(id=1, subset='s1', PolyLine([0, 0, 4, 0, 4, 4],
annotations=[ label=3, id=4, group=4),
PolyLine([0, 0, 4, 0, 4, 4], Bbox(5, 0, 1, 9,
label=3, id=4, group=4), label=3, id=4, group=4),
Bbox(5, 0, 1, 9, ]
label=3, id=4, group=4), ),
]
), DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
annotations=[
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), Polygon([0, 0, 4, 0, 4, 4], z_order=1,
annotations=[ label=3, group=4,
Polygon([0, 0, 4, 0, 4, 4], z_order=1, attributes={ 'occluded': False }),
label=3, group=4, PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
attributes={ 'occluded': False }), ]
PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label ),
]
), DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4))),
DatasetItem(id=3, subset='s3', image=Image( ], categories={
path='3.jpg', size=(2, 4))), AnnotationType.label: label_categories,
]) })
def categories(self): target_dataset = Dataset.from_iterable([
return { AnnotationType.label: label_categories } DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
annotations=[
class DstExtractor(Extractor): Polygon([0, 0, 4, 0, 4, 4],
def __iter__(self): label=1, group=4,
return iter([ attributes={ 'occluded': True }),
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), Points([1, 1, 3, 2, 2, 3],
annotations=[ label=2,
Polygon([0, 0, 4, 0, 4, 4], attributes={ 'occluded': False,
label=1, group=4, 'a1': 'x', 'a2': 42 }),
attributes={ 'occluded': True }), Label(1),
Points([1, 1, 3, 2, 2, 3], Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
label=2, ], attributes={'frame': 0}
attributes={ 'occluded': False, ),
'a1': 'x', 'a2': 42 }), DatasetItem(id=1, subset='s1',
Label(1), annotations=[
Label(2, attributes={ 'a1': 'y', 'a2': 44 }), PolyLine([0, 0, 4, 0, 4, 4],
], attributes={'frame': 0} label=3, group=4,
), attributes={ 'occluded': False }),
DatasetItem(id=1, subset='s1', Bbox(5, 0, 1, 9,
annotations=[ label=3, group=4,
PolyLine([0, 0, 4, 0, 4, 4], attributes={ 'occluded': False }),
label=3, group=4, ], attributes={'frame': 1}
attributes={ 'occluded': False }), ),
Bbox(5, 0, 1, 9,
label=3, group=4, DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
attributes={ 'occluded': False }), annotations=[
], attributes={'frame': 1} Polygon([0, 0, 4, 0, 4, 4], z_order=1,
), label=3, group=4,
attributes={ 'occluded': False }),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), ], attributes={'frame': 0}
annotations=[ ),
Polygon([0, 0, 4, 0, 4, 4], z_order=1,
label=3, group=4, DatasetItem(id=3, subset='s3', image=Image(
attributes={ 'occluded': False }), path='3.jpg', size=(2, 4)),
], attributes={'frame': 0} attributes={'frame': 0}),
), ], categories={
AnnotationType.label: label_categories,
DatasetItem(id=3, subset='s3', image=Image( })
path='3.jpg', size=(2, 4)),
attributes={'frame': 0}),
])
def categories(self):
return { AnnotationType.label: label_categories }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(), self._test_save_and_load(source_dataset,
partial(CvatConverter.convert, save_images=True), test_dir, partial(CvatConverter.convert, save_images=True), test_dir,
target_dataset=DstExtractor()) target_dataset=target_dataset)
def test_relative_paths(self): def test_relative_paths(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='1', image=np.ones((4, 2, 3))),
return iter([ DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='1', image=np.ones((4, 2, 3))), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), ], categories={ AnnotationType.label: LabelCategories() })
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
]) target_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((4, 2, 3)),
def categories(self): attributes={'frame': 0}),
return { AnnotationType.label: LabelCategories() } DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'frame': 1}),
class DstExtractor(Extractor): DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
def __iter__(self): attributes={'frame': 2}),
return iter([ ], categories={
DatasetItem(id='1', image=np.ones((4, 2, 3)), AnnotationType.label: LabelCategories()
attributes={'frame': 0}), })
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'frame': 1}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'frame': 2}),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(), self._test_save_and_load(source_dataset,
partial(CvatConverter.convert, save_images=True), test_dir, partial(CvatConverter.convert, save_images=True), test_dir,
target_dataset=DstExtractor()) target_dataset=target_dataset)
def test_preserve_frame_ids(self): def test_preserve_frame_ids(self):
class TestExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
return iter([ attributes={'frame': 40}),
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), ], categories={
attributes={'frame': 40}), AnnotationType.label: LabelCategories()
]) })
def categories(self):
return { AnnotationType.label: LabelCategories() }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(expected_dataset,
CvatConverter.convert, test_dir) CvatConverter.convert, test_dir)

@ -2,7 +2,7 @@ from functools import partial
import numpy as np import numpy as np
from unittest import TestCase from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.project import Project from datumaro.components.project import Project
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Label, Mask, Points, Polygon, AnnotationType, Label, Mask, Points, Polygon,
@ -32,82 +32,75 @@ class DatumaroConverterTest(TestCase):
compare_datasets_strict(self, compare_datasets_strict(self,
expected=target_dataset, actual=parsed_dataset) expected=target_dataset, actual=parsed_dataset)
class TestExtractor(Extractor): label_categories = LabelCategories()
def __iter__(self): for i in range(5):
return iter([ label_categories.add('cat' + str(i))
DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
annotations=[ mask_categories = MaskCategories(
Caption('hello', id=1), generate_colormap(len(label_categories.items)))
Caption('world', id=2, group=5),
Label(2, id=3, attributes={ points_categories = PointsCategories()
'x': 1, for index, _ in enumerate(label_categories.items):
'y': '2', points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
}),
Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={ test_dataset = Dataset.from_iterable([
'score': 1.0, DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
}), annotations=[
Bbox(5, 6, 7, 8, id=5, group=5), Caption('hello', id=1),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4), Caption('world', id=2, group=5),
Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))), Label(2, id=3, attributes={
]), 'x': 1,
DatasetItem(id=21, subset='train', 'y': '2',
annotations=[ }),
Caption('test'), Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
Label(2), 'score': 1.0,
Bbox(1, 2, 3, 4, 5, id=42, group=42) }),
]), Bbox(5, 6, 7, 8, id=5, group=5),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
DatasetItem(id=2, subset='val', Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
annotations=[ ]),
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), DatasetItem(id=21, subset='train',
Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), annotations=[
]), Caption('test'),
Label(2),
DatasetItem(id=42, subset='test', Bbox(1, 2, 3, 4, 5, id=42, group=42)
attributes={'a1': 5, 'a2': '42'}), ]),
DatasetItem(id=42), DatasetItem(id=2, subset='val',
DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), annotations=[
]) PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
def categories(self): ]),
label_categories = LabelCategories()
for i in range(5): DatasetItem(id=42, subset='test',
label_categories.add('cat' + str(i)) attributes={'a1': 5, 'a2': '42'}),
mask_categories = MaskCategories( DatasetItem(id=42),
generate_colormap(len(label_categories.items))) DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
], categories={
points_categories = PointsCategories() AnnotationType.label: label_categories,
for index, _ in enumerate(label_categories.items): AnnotationType.mask: mask_categories,
points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) AnnotationType.points: points_categories,
})
return {
AnnotationType.label: label_categories,
AnnotationType.mask: mask_categories,
AnnotationType.points: points_categories,
}
def test_can_save_and_load(self): def test_can_save_and_load(self):
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(self.TestExtractor(), self._test_save_and_load(self.test_dataset,
partial(DatumaroConverter.convert, save_images=True), test_dir) partial(DatumaroConverter.convert, save_images=True), test_dir)
def test_can_detect(self): def test_can_detect(self):
with TestDir() as test_dir: with TestDir() as test_dir:
DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir) DatumaroConverter.convert(self.test_dataset, save_dir=test_dir)
self.assertTrue(DatumaroImporter.detect(test_dir)) self.assertTrue(DatumaroImporter.detect(test_dir))
def test_relative_paths(self): def test_relative_paths(self):
class TestExtractor(Extractor): test_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='1', image=np.ones((4, 2, 3))),
return iter([ DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='1', image=np.ones((4, 2, 3))), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), ])
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(test_dataset,
partial(DatumaroConverter.convert, save_images=True), test_dir) partial(DatumaroConverter.convert, save_images=True), test_dir)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp import os.path as osp
from unittest import TestCase from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, Mask, Polygon, LabelCategories AnnotationType, Bbox, Mask, Polygon, LabelCategories
) )
@ -29,101 +29,84 @@ class LabelMeConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset) compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_and_load(self): def test_can_save_and_load(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ image=np.ones((16, 16, 3)),
DatasetItem(id=1, subset='train', annotations=[
image=np.ones((16, 16, 3)), Bbox(0, 4, 4, 8, label=2, group=2),
annotations=[ Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
Bbox(0, 4, 4, 8, label=2, group=2), 'occluded': True,
Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={ 'a1': 'qwe',
'occluded': True, 'a2': True,
'a1': 'qwe', 'a3': 123,
'a2': True, }),
'a3': 123, Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
}), attributes={ 'username': 'test' }),
Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, Bbox(1, 2, 3, 4, group=3),
attributes={ 'username': 'test' }), Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
Bbox(1, 2, 3, 4, group=3), attributes={ 'occluded': True }
Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
attributes={ 'occluded': True }
),
]
), ),
]) ]
),
def categories(self): ], categories={
label_cat = LabelCategories() AnnotationType.label: LabelCategories.from_iterable(
for label in range(10): 'label_' + str(label) for label in range(10)),
label_cat.add('label_' + str(label)) })
return {
AnnotationType.label: label_cat, target_dataset = Dataset.from_iterable([
} DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
class DstExtractor(Extractor): annotations=[
def __iter__(self): Bbox(0, 4, 4, 8, label=0, group=2, id=0,
return iter([ attributes={
DatasetItem(id=1, subset='train', 'occluded': False, 'username': '',
image=np.ones((16, 16, 3)), }
annotations=[
Bbox(0, 4, 4, 8, label=0, group=2, id=0,
attributes={
'occluded': False, 'username': '',
}
),
Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
attributes={
'occluded': True, 'username': '',
'a1': 'qwe',
'a2': True,
'a3': 123,
}
),
Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
id=2, attributes={
'occluded': False, 'username': 'test'
}
),
Bbox(1, 2, 3, 4, group=1, id=3, attributes={
'occluded': False, 'username': '',
}),
Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
id=4, attributes={
'occluded': True, 'username': ''
}
),
]
), ),
]) Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
attributes={
def categories(self): 'occluded': True, 'username': '',
label_cat = LabelCategories() 'a1': 'qwe',
label_cat.add('label_2') 'a2': True,
label_cat.add('label_3') 'a3': 123,
return { }
AnnotationType.label: label_cat, ),
} Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
id=2, attributes={
'occluded': False, 'username': 'test'
}
),
Bbox(1, 2, 3, 4, group=1, id=3, attributes={
'occluded': False, 'username': '',
}),
Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
id=4, attributes={
'occluded': True, 'username': ''
}
),
]
),
], categories={
AnnotationType.label: LabelCategories.from_iterable([
'label_2', 'label_3']),
})
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(), self._test_save_and_load(
source_dataset,
partial(LabelMeConverter.convert, save_images=True), partial(LabelMeConverter.convert, save_images=True),
test_dir, target_dataset=DstExtractor()) test_dir, target_dataset=target_dataset)
def test_cant_save_dataset_with_relative_paths(self): def test_cant_save_dataset_with_relative_paths(self):
class SrcExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
return iter([ ], categories={
DatasetItem(id='dir/1', image=np.ones((2, 6, 3))), AnnotationType.label: LabelCategories(),
]) })
def categories(self):
return { AnnotationType.label: LabelCategories() }
with self.assertRaisesRegex(Exception, r'only supports flat'): with self.assertRaisesRegex(Exception, r'only supports flat'):
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(), self._test_save_and_load(expected_dataset,
partial(LabelMeConverter.convert, save_images=True), LabelMeConverter.convert, test_dir)
test_dir)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset') DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
@ -133,101 +116,91 @@ class LabelMeImporterTest(TestCase):
self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR)) self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self): def test_can_import(self):
class DstExtractor(Extractor): img1 = np.ones((77, 102, 3)) * 255
def __iter__(self): img1[6:32, 7:41] = 0
img1 = np.ones((77, 102, 3)) * 255
img1[6:32, 7:41] = 0 mask1 = np.zeros((77, 102), dtype=int)
mask1[67:69, 58:63] = 1
mask1 = np.zeros((77, 102), dtype=int)
mask1[67:69, 58:63] = 1 mask2 = np.zeros((77, 102), dtype=int)
mask2[13:25, 54:71] = [
mask2 = np.zeros((77, 102), dtype=int) [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
mask2[13:25, 54:71] = [ [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], ]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
] target_dataset = Dataset.from_iterable([
DatasetItem(id='img1', image=img1,
return iter([ annotations=[
DatasetItem(id='img1', image=img1, Polygon([43, 34, 45, 34, 45, 37, 43, 37],
annotations=[ label=0, id=0,
Polygon([43, 34, 45, 34, 45, 37, 43, 37], attributes={
label=0, id=0, 'occluded': False,
attributes={ 'username': 'admin'
'occluded': False, }
'username': 'admin' ),
} Mask(mask1, label=1, id=1,
), attributes={
Mask(mask1, label=1, id=1, 'occluded': False,
attributes={ 'username': 'brussell'
'occluded': False, }
'username': 'brussell' ),
} Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
), label=2, group=2, id=2,
Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12], attributes={
label=2, group=2, id=2, 'a1': True,
attributes={ 'occluded': True,
'a1': True, 'username': 'anonymous'
'occluded': True, }
'username': 'anonymous' ),
} Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
), label=3, group=2, id=3,
Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], attributes={
label=3, group=2, id=3, 'kj': True,
attributes={ 'occluded': False,
'kj': True, 'username': 'anonymous'
'occluded': False, }
'username': 'anonymous'
}
),
Bbox(13, 19, 10, 11, label=4, group=2, id=4,
attributes={
'hg': True,
'occluded': True,
'username': 'anonymous'
}
),
Mask(mask2, label=5, group=1, id=5,
attributes={
'd': True,
'occluded': False,
'username': 'anonymous'
}
),
Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
label=6, group=1, id=6,
attributes={
'gfd lkj lkj hi': True,
'occluded': False,
'username': 'anonymous'
}
),
]
), ),
]) Bbox(13, 19, 10, 11, label=4, group=2, id=4,
attributes={
def categories(self): 'hg': True,
label_cat = LabelCategories() 'occluded': True,
label_cat.add('window') 'username': 'anonymous'
label_cat.add('license plate') }
label_cat.add('o1') ),
label_cat.add('q1') Mask(mask2, label=5, group=1, id=5,
label_cat.add('b1') attributes={
label_cat.add('m1') 'd': True,
label_cat.add('hg') 'occluded': False,
return { 'username': 'anonymous'
AnnotationType.label: label_cat, }
} ),
Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
label=6, group=1, id=6,
attributes={
'gfd lkj lkj hi': True,
'occluded': False,
'username': 'anonymous'
}
),
]
),
], categories={
AnnotationType.label: LabelCategories.from_iterable([
'window', 'license plate', 'o1',
'q1', 'b1', 'm1', 'hg',
]),
})
parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \ parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \
.make_dataset() .make_dataset()
compare_datasets(self, expected=DstExtractor(), actual=parsed) compare_datasets(self, expected=target_dataset, actual=parsed)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp import os.path as osp
from unittest import TestCase from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, LabelCategories AnnotationType, Bbox, LabelCategories
) )
@ -28,96 +28,83 @@ class MotConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset) compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_bboxes(self): def test_can_save_bboxes(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ image=np.ones((16, 16, 3)),
DatasetItem(id=1, subset='train', annotations=[
image=np.ones((16, 16, 3)), Bbox(0, 4, 4, 8, label=2, attributes={
annotations=[ 'occluded': True,
Bbox(0, 4, 4, 8, label=2, attributes={ }),
'occluded': True, Bbox(0, 4, 4, 4, label=3, attributes={
}), 'visibility': 0.4,
Bbox(0, 4, 4, 4, label=3, attributes={ }),
'visibility': 0.4, Bbox(2, 4, 4, 4, attributes={
}), 'ignored': True
Bbox(2, 4, 4, 4, attributes={ }),
'ignored': True ]
}), ),
]
), DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
DatasetItem(id=2, subset='val', annotations=[
image=np.ones((8, 8, 3)), Bbox(1, 2, 4, 2, label=3),
annotations=[ ]
Bbox(1, 2, 4, 2, label=3), ),
]
), DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
DatasetItem(id=3, subset='test', ),
image=np.ones((5, 4, 3)) * 3, ], categories={
), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(label) for label in range(10)),
})
def categories(self):
label_cat = LabelCategories() target_dataset = Dataset.from_iterable([
for label in range(10): DatasetItem(id=1,
label_cat.add('label_' + str(label)) image=np.ones((16, 16, 3)),
return { annotations=[
AnnotationType.label: label_cat, Bbox(0, 4, 4, 8, label=2, attributes={
} 'occluded': True,
'visibility': 0.0,
class DstExtractor(Extractor): 'ignored': False,
def __iter__(self): }),
return iter([ Bbox(0, 4, 4, 4, label=3, attributes={
DatasetItem(id=1, 'occluded': False,
image=np.ones((16, 16, 3)), 'visibility': 0.4,
annotations=[ 'ignored': False,
Bbox(0, 4, 4, 8, label=2, attributes={ }),
'occluded': True, Bbox(2, 4, 4, 4, attributes={
'visibility': 0.0, 'occluded': False,
'ignored': False, 'visibility': 1.0,
}), 'ignored': True,
Bbox(0, 4, 4, 4, label=3, attributes={ }),
'occluded': False, ]
'visibility': 0.4, ),
'ignored': False,
}), DatasetItem(id=2,
Bbox(2, 4, 4, 4, attributes={ image=np.ones((8, 8, 3)),
'occluded': False, annotations=[
'visibility': 1.0, Bbox(1, 2, 4, 2, label=3, attributes={
'ignored': True, 'occluded': False,
}), 'visibility': 1.0,
] 'ignored': False,
), }),
]
DatasetItem(id=2, ),
image=np.ones((8, 8, 3)),
annotations=[ DatasetItem(id=3,
Bbox(1, 2, 4, 2, label=3, attributes={ image=np.ones((5, 4, 3)) * 3,
'occluded': False, ),
'visibility': 1.0, ], categories={
'ignored': False, AnnotationType.label: LabelCategories.from_iterable(
}), 'label_' + str(label) for label in range(10)),
] })
),
DatasetItem(id=3,
image=np.ones((5, 4, 3)) * 3,
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(), self._test_save_and_load(
source_dataset,
partial(MotSeqGtConverter.convert, save_images=True), partial(MotSeqGtConverter.convert, save_images=True),
test_dir, target_dataset=DstExtractor()) test_dir, target_dataset=target_dataset)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset') DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset')
@ -127,30 +114,23 @@ class MotImporterTest(TestCase):
self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR)) self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self): def test_can_import(self):
class DstExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1,
return iter([ image=np.ones((16, 16, 3)),
DatasetItem(id=1, annotations=[
image=np.ones((16, 16, 3)), Bbox(0, 4, 4, 8, label=2, attributes={
annotations=[ 'occluded': False,
Bbox(0, 4, 4, 8, label=2, attributes={ 'visibility': 1.0,
'occluded': False, 'ignored': False,
'visibility': 1.0, }),
'ignored': False, ]
}), ),
] ], categories={
), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(label) for label in range(10)),
})
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \ dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
.make_dataset() .make_dataset()
compare_datasets(self, DstExtractor(), dataset) compare_datasets(self, expected_dataset, dataset)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp import os.path as osp
from unittest import TestCase, skipIf from unittest import TestCase, skipIf
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, Mask, LabelCategories AnnotationType, Bbox, Mask, LabelCategories
) )
@ -48,117 +48,96 @@ class TfrecordConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset) compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_bboxes(self): def test_can_save_bboxes(self):
class TestExtractor(Extractor): test_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ image=np.ones((16, 16, 3)),
DatasetItem(id=1, subset='train', annotations=[
image=np.ones((16, 16, 3)), Bbox(0, 4, 4, 8, label=2),
annotations=[ Bbox(0, 4, 4, 4, label=3),
Bbox(0, 4, 4, 8, label=2), Bbox(2, 4, 4, 4),
Bbox(0, 4, 4, 4, label=3), ], attributes={'source_id': ''}
Bbox(2, 4, 4, 4), ),
], attributes={'source_id': ''} ], categories={
), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(label) for label in range(10)),
})
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(
test_dataset,
partial(TfDetectionApiConverter.convert, save_images=True), partial(TfDetectionApiConverter.convert, save_images=True),
test_dir) test_dir)
def test_can_save_masks(self): def test_can_save_masks(self):
class TestExtractor(Extractor): test_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)), Mask(image=np.array([
annotations=[ [1, 0, 0, 1],
Mask(image=np.array([ [0, 1, 1, 0],
[1, 0, 0, 1], [0, 1, 1, 0],
[0, 1, 1, 0], [1, 0, 0, 1],
[0, 1, 1, 0], ]), label=1),
[1, 0, 0, 1], ],
]), label=1), attributes={'source_id': ''}
], ),
attributes={'source_id': ''} ], categories={
), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(label) for label in range(10)),
})
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(
test_dataset,
partial(TfDetectionApiConverter.convert, save_masks=True), partial(TfDetectionApiConverter.convert, save_masks=True),
test_dir) test_dir)
def test_can_save_dataset_with_no_subsets(self): def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor): test_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1,
return iter([ image=np.ones((16, 16, 3)),
DatasetItem(id=1, annotations=[
image=np.ones((16, 16, 3)), Bbox(2, 1, 4, 4, label=2),
annotations=[ Bbox(4, 2, 8, 4, label=3),
Bbox(2, 1, 4, 4, label=2), ],
Bbox(4, 2, 8, 4, label=3), attributes={'source_id': ''}
], ),
attributes={'source_id': ''}
), DatasetItem(id=2,
image=np.ones((8, 8, 3)) * 2,
DatasetItem(id=2, annotations=[
image=np.ones((8, 8, 3)) * 2, Bbox(4, 4, 4, 4, label=3),
annotations=[ ],
Bbox(4, 4, 4, 4, label=3), attributes={'source_id': ''}
], ),
attributes={'source_id': ''}
), DatasetItem(id=3,
image=np.ones((8, 4, 3)) * 3,
DatasetItem(id=3, attributes={'source_id': ''}
image=np.ones((8, 4, 3)) * 3, ),
attributes={'source_id': ''} ], categories={
), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(label) for label in range(10)),
})
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(
test_dataset,
partial(TfDetectionApiConverter.convert, save_images=True), partial(TfDetectionApiConverter.convert, save_images=True),
test_dir) test_dir)
def test_can_save_dataset_with_image_info(self): def test_can_save_dataset_with_image_info(self):
class TestExtractor(Extractor): test_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='1/q.e',
return iter([ image=Image(path='1/q.e', size=(10, 15)),
DatasetItem(id='1/q.e', attributes={'source_id': ''}
image=Image(path='1/q.e', size=(10, 15)), )
attributes={'source_id': ''} ], categories={
) AnnotationType.label: LabelCategories(),
]) })
def categories(self):
return { AnnotationType.label: LabelCategories() }
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(), self._test_save_and_load(test_dataset,
TfDetectionApiConverter.convert, test_dir) TfDetectionApiConverter.convert, test_dir)
def test_labelmap_parsing(self): def test_labelmap_parsing(self):
@ -197,42 +176,35 @@ class TfrecordImporterTest(TestCase):
self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR)) self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self): def test_can_import(self):
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ image=np.ones((16, 16, 3)),
DatasetItem(id=1, subset='train', annotations=[
image=np.ones((16, 16, 3)), Bbox(0, 4, 4, 8, label=2),
annotations=[ Bbox(0, 4, 4, 4, label=3),
Bbox(0, 4, 4, 8, label=2), Bbox(2, 4, 4, 4),
Bbox(0, 4, 4, 4, label=3), ],
Bbox(2, 4, 4, 4), attributes={'source_id': '1'}
], ),
attributes={'source_id': '1'}
), DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
DatasetItem(id=2, subset='val', annotations=[
image=np.ones((8, 8, 3)), Bbox(1, 2, 4, 2, label=3),
annotations=[ ],
Bbox(1, 2, 4, 2, label=3), attributes={'source_id': '2'}
], ),
attributes={'source_id': '2'}
), DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
DatasetItem(id=3, subset='test', attributes={'source_id': '3'}
image=np.ones((5, 4, 3)) * 3, ),
attributes={'source_id': '3'} ], categories={
), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(label) for label in range(10)),
})
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \ dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
.make_dataset() .make_dataset()
compare_datasets(self, DstExtractor(), dataset) compare_datasets(self, target_dataset, dataset)

@ -2,7 +2,7 @@ import logging as log
import numpy as np import numpy as np
from unittest import TestCase from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
Mask, Polygon, PolyLine, Points, Bbox, Label, Mask, Polygon, PolyLine, Points, Bbox, Label,
LabelCategories, MaskCategories, AnnotationType LabelCategories, MaskCategories, AnnotationType
@ -67,304 +67,269 @@ class TransformsTest(TestCase):
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, DstExtractor(), actual)
def test_mask_to_polygons_small_polygons_message(self): def test_mask_to_polygons_small_polygons_message(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 10, 3)),
items = [ annotations=[
DatasetItem(id=1, image=np.zeros((5, 10, 3)), Mask(np.array([
annotations=[ [0, 0, 0],
Mask(np.array([ [0, 1, 0],
[0, 0, 0], [0, 0, 0],
[0, 1, 0], ]),
[0, 0, 0],
]),
),
]
), ),
] ]
return iter(items) ),
])
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
return iter([ DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
with self.assertLogs(level=log.DEBUG) as logs: with self.assertLogs(level=log.DEBUG) as logs:
actual = transforms.MasksToPolygons(SrcExtractor()) actual = transforms.MasksToPolygons(source_dataset)
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
self.assertRegex('\n'.join(logs.output), 'too small polygons') self.assertRegex('\n'.join(logs.output), 'too small polygons')
def test_polygons_to_masks(self): def test_polygons_to_masks(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 10, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 10, 3)), Polygon([0, 0, 4, 0, 4, 4]),
annotations=[ Polygon([5, 0, 9, 0, 5, 5]),
Polygon([0, 0, 4, 0, 4, 4]), ]
Polygon([5, 0, 9, 0, 5, 5]), ),
] ])
),
])
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 10, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 10, 3)), Mask(np.array([
annotations=[ [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
Mask(np.array([ [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 1, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0], ]),
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
),
Mask(np.array([
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
),
]
), ),
]) Mask(np.array([
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
),
]
),
])
actual = transforms.PolygonsToMasks(SrcExtractor()) actual = transforms.PolygonsToMasks(source_dataset)
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
def test_crop_covered_segments(self): def test_crop_covered_segments(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), # The mask is partially covered by the polygon
annotations=[ Mask(np.array([
# The mask is partially covered by the polygon [0, 0, 1, 1, 1],
Mask(np.array([ [0, 0, 1, 1, 1],
[0, 0, 1, 1, 1], [1, 1, 1, 1, 1],
[0, 0, 1, 1, 1], [1, 1, 1, 0, 0],
[1, 1, 1, 1, 1], [1, 1, 1, 0, 0]],
[1, 1, 1, 0, 0], ),
[1, 1, 1, 0, 0]], z_order=0),
), Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=0), z_order=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4], ]
z_order=1), ),
] ])
),
])
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), Mask(np.array([
annotations=[ [0, 0, 1, 1, 1],
Mask(np.array([ [0, 0, 0, 0, 1],
[0, 0, 1, 1, 1], [1, 0, 0, 0, 1],
[0, 0, 0, 0, 1], [1, 0, 0, 0, 0],
[1, 0, 0, 0, 1], [1, 1, 1, 0, 0]],
[1, 0, 0, 0, 0], ),
[1, 1, 1, 0, 0]], z_order=0),
), Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=0), z_order=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4], ]
z_order=1), ),
] ])
),
])
actual = transforms.CropCoveredSegments(SrcExtractor()) actual = transforms.CropCoveredSegments(source_dataset)
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
def test_merge_instance_segments(self): def test_merge_instance_segments(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), Mask(np.array([
annotations=[ [0, 0, 1, 1, 1],
Mask(np.array([ [0, 0, 0, 0, 1],
[0, 0, 1, 1, 1], [1, 0, 0, 0, 1],
[0, 0, 0, 0, 1], [1, 0, 0, 0, 0],
[1, 0, 0, 0, 1], [1, 1, 1, 0, 0]],
[1, 0, 0, 0, 0], ),
[1, 1, 1, 0, 0]], z_order=0, group=1),
), Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=0, group=1), z_order=1, group=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4], Polygon([0, 0, 0, 2, 2, 2, 2, 0],
z_order=1, group=1), z_order=1),
Polygon([0, 0, 0, 2, 2, 2, 2, 0], ]
z_order=1), ),
] ])
),
])
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), Mask(np.array([
annotations=[ [0, 0, 1, 1, 1],
Mask(np.array([ [0, 1, 1, 1, 1],
[0, 0, 1, 1, 1], [1, 1, 1, 1, 1],
[0, 1, 1, 1, 1], [1, 1, 1, 1, 0],
[1, 1, 1, 1, 1], [1, 1, 1, 0, 0]],
[1, 1, 1, 1, 0], ),
[1, 1, 1, 0, 0]], z_order=0, group=1),
), Mask(np.array([
z_order=0, group=1), [1, 1, 0, 0, 0],
Mask(np.array([ [1, 1, 0, 0, 0],
[1, 1, 0, 0, 0], [0, 0, 0, 0, 0],
[1, 1, 0, 0, 0], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]],
[0, 0, 0, 0, 0], ),
[0, 0, 0, 0, 0]], z_order=1),
), ]
z_order=1), ),
] ])
),
])
actual = transforms.MergeInstanceSegments(SrcExtractor(), actual = transforms.MergeInstanceSegments(source_dataset,
include_polygons=True) include_polygons=True)
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
def test_map_subsets(self): def test_map_subsets(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='a'),
return iter([ DatasetItem(id=2, subset='b'),
DatasetItem(id=1, subset='a'), DatasetItem(id=3, subset='c'),
DatasetItem(id=2, subset='b'), ])
DatasetItem(id=3, subset='c'),
])
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset=''),
return iter([ DatasetItem(id=2, subset='a'),
DatasetItem(id=1, subset=''), DatasetItem(id=3, subset='c'),
DatasetItem(id=2, subset='a'), ])
DatasetItem(id=3, subset='c'),
])
actual = transforms.MapSubsets(SrcExtractor(), actual = transforms.MapSubsets(source_dataset,
{ 'a': '', 'b': 'a' }) { 'a': '', 'b': 'a' })
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
def test_shapes_to_boxes(self): def test_shapes_to_boxes(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), Mask(np.array([
annotations=[ [0, 0, 1, 1, 1],
Mask(np.array([ [0, 0, 0, 0, 1],
[0, 0, 1, 1, 1], [1, 0, 0, 0, 1],
[0, 0, 0, 0, 1], [1, 0, 0, 0, 0],
[1, 0, 0, 0, 1], [1, 1, 1, 0, 0]],
[1, 0, 0, 0, 0], ), id=1),
[1, 1, 1, 0, 0]], Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
), id=1), PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2), Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3), ]
Points([2, 2, 4, 2, 4, 4, 2, 4], id=4), ),
] ])
),
])
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), Bbox(0, 0, 4, 4, id=1),
annotations=[ Bbox(1, 1, 3, 3, id=2),
Bbox(0, 0, 4, 4, id=1), Bbox(1, 1, 1, 1, id=3),
Bbox(1, 1, 3, 3, id=2), Bbox(2, 2, 2, 2, id=4),
Bbox(1, 1, 1, 1, id=3), ]
Bbox(2, 2, 2, 2, id=4), ),
] ])
),
])
actual = transforms.ShapesToBoxes(SrcExtractor()) actual = transforms.ShapesToBoxes(source_dataset)
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
def test_id_from_image(self): def test_id_from_image(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image='path.jpg'),
return iter([ DatasetItem(id=2),
DatasetItem(id=1, image='path.jpg'), ])
DatasetItem(id=2), target_dataset = Dataset.from_iterable([
]) DatasetItem(id='path', image='path.jpg'),
DatasetItem(id=2),
class DstExtractor(Extractor): ])
def __iter__(self):
return iter([
DatasetItem(id='path', image='path.jpg'),
DatasetItem(id=2),
])
actual = transforms.IdFromImageName(SrcExtractor()) actual = transforms.IdFromImageName(source_dataset)
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
def test_boxes_to_masks(self): def test_boxes_to_masks(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), Bbox(0, 0, 3, 3, z_order=1),
annotations=[ Bbox(0, 0, 3, 1, z_order=2),
Bbox(0, 0, 3, 3, z_order=1), Bbox(0, 2, 3, 1, z_order=3),
Bbox(0, 0, 3, 1, z_order=2), ]
Bbox(0, 2, 3, 1, z_order=3), ),
] ])
),
])
class DstExtractor(Extractor): target_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, image=np.zeros((5, 5, 3)),
return iter([ annotations=[
DatasetItem(id=1, image=np.zeros((5, 5, 3)), Mask(np.array([
annotations=[ [1, 1, 1, 0, 0],
Mask(np.array([ [1, 1, 1, 0, 0],
[1, 1, 1, 0, 0], [1, 1, 1, 0, 0],
[1, 1, 1, 0, 0], [0, 0, 0, 0, 0],
[1, 1, 1, 0, 0], [0, 0, 0, 0, 0]],
[0, 0, 0, 0, 0], ),
[0, 0, 0, 0, 0]], z_order=1),
), Mask(np.array([
z_order=1), [1, 1, 1, 0, 0],
Mask(np.array([ [0, 0, 0, 0, 0],
[1, 1, 1, 0, 0], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]],
[0, 0, 0, 0, 0], ),
[0, 0, 0, 0, 0]], z_order=2),
), Mask(np.array([
z_order=2), [0, 0, 0, 0, 0],
Mask(np.array([ [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0], [1, 1, 1, 0, 0],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
[1, 1, 1, 0, 0], [0, 0, 0, 0, 0]],
[0, 0, 0, 0, 0], ),
[0, 0, 0, 0, 0]], z_order=3),
), ]
z_order=3), ),
] ])
),
])
actual = transforms.BoxesToMasks(SrcExtractor()) actual = transforms.BoxesToMasks(source_dataset)
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)
def test_random_split(self): def test_random_split(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset="a"),
return iter([ DatasetItem(id=2, subset="a"),
DatasetItem(id=1, subset="a"), DatasetItem(id=3, subset="b"),
DatasetItem(id=2, subset="a"), DatasetItem(id=4, subset="b"),
DatasetItem(id=3, subset="b"), DatasetItem(id=5, subset="b"),
DatasetItem(id=4, subset="b"), DatasetItem(id=6, subset=""),
DatasetItem(id=5, subset="b"), DatasetItem(id=7, subset=""),
DatasetItem(id=6, subset=""), ])
DatasetItem(id=7, subset=""),
])
actual = transforms.RandomSplit(SrcExtractor(), splits=[ actual = transforms.RandomSplit(source_dataset, splits=[
('train', 4.0 / 7.0), ('train', 4.0 / 7.0),
('test', 3.0 / 7.0), ('test', 3.0 / 7.0),
]) ])
@ -373,21 +338,19 @@ class TransformsTest(TestCase):
self.assertEqual(3, len(actual.get_subset('test'))) self.assertEqual(3, len(actual.get_subset('test')))
def test_random_split_gives_error_on_wrong_ratios(self): def test_random_split_gives_error_on_wrong_ratios(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([DatasetItem(id=1)])
def __iter__(self):
return iter([DatasetItem(id=1)])
with self.assertRaises(Exception): with self.assertRaises(Exception):
transforms.RandomSplit(SrcExtractor(), splits=[ transforms.RandomSplit(source_dataset, splits=[
('train', 0.5), ('train', 0.5),
('test', 0.7), ('test', 0.7),
]) ])
with self.assertRaises(Exception): with self.assertRaises(Exception):
transforms.RandomSplit(SrcExtractor(), splits=[]) transforms.RandomSplit(source_dataset, splits=[])
with self.assertRaises(Exception): with self.assertRaises(Exception):
transforms.RandomSplit(SrcExtractor(), splits=[ transforms.RandomSplit(source_dataset, splits=[
('train', -0.5), ('train', -0.5),
('test', 1.5), ('test', 1.5),
]) ])
@ -462,24 +425,19 @@ class TransformsTest(TestCase):
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, DstExtractor(), actual)
def test_remap_labels_delete_unspecified(self): def test_remap_labels_delete_unspecified(self):
class SrcExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, annotations=[ Label(0) ])
return iter([ DatasetItem(id=1, annotations=[ Label(0) ]) ]) ], categories={
AnnotationType.label: LabelCategories.from_iterable('label0'),
def categories(self): })
label_cat = LabelCategories()
label_cat.add('label0') target_dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[]),
return { AnnotationType.label: label_cat } ], categories={
AnnotationType.label: LabelCategories(),
class DstExtractor(Extractor): })
def __iter__(self):
return iter([ DatasetItem(id=1, annotations=[]) ]) actual = transforms.RemapLabels(source_dataset,
def categories(self):
return { AnnotationType.label: LabelCategories() }
actual = transforms.RemapLabels(SrcExtractor(),
mapping={}, default='delete') mapping={}, default='delete')
compare_datasets(self, DstExtractor(), actual) compare_datasets(self, target_dataset, actual)

@ -6,7 +6,7 @@ from unittest import TestCase
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, LabelCategories, AnnotationType, Bbox, LabelCategories,
) )
from datumaro.components.project import Project from datumaro.components.project import Project, Dataset
from datumaro.plugins.yolo_format.importer import YoloImporter from datumaro.plugins.yolo_format.importer import YoloImporter
from datumaro.plugins.yolo_format.converter import YoloConverter from datumaro.plugins.yolo_format.converter import YoloConverter
from datumaro.util.image import Image, save_image from datumaro.util.image import Image, save_image
@ -15,40 +15,32 @@ from datumaro.util.test_utils import TestDir, compare_datasets
class YoloFormatTest(TestCase): class YoloFormatTest(TestCase):
def test_can_save_and_load(self): def test_can_save_and_load(self):
class TestExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
return iter([ annotations=[
DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)), Bbox(0, 2, 4, 2, label=2),
annotations=[ Bbox(0, 1, 2, 3, label=4),
Bbox(0, 2, 4, 2, label=2), ]),
Bbox(0, 1, 2, 3, label=4), DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
]), annotations=[
DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)), Bbox(0, 2, 4, 2, label=2),
annotations=[ Bbox(3, 3, 2, 3, label=4),
Bbox(0, 2, 4, 2, label=2), Bbox(2, 1, 2, 3, label=4),
Bbox(3, 3, 2, 3, label=4), ]),
Bbox(2, 1, 2, 3, label=4),
]), DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
annotations=[
DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)), Bbox(0, 1, 5, 2, label=2),
annotations=[ Bbox(0, 2, 3, 2, label=5),
Bbox(0, 1, 5, 2, label=2), Bbox(0, 2, 4, 2, label=6),
Bbox(0, 2, 3, 2, label=5), Bbox(0, 7, 3, 2, label=7),
Bbox(0, 2, 4, 2, label=6), ]),
Bbox(0, 7, 3, 2, label=7), ], categories={
]), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(i) for i in range(10)),
})
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
with TestDir() as test_dir: with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir, save_images=True) YoloConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = YoloImporter()(test_dir).make_dataset() parsed_dataset = YoloImporter()(test_dir).make_dataset()
@ -56,27 +48,19 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset) compare_datasets(self, source_dataset, parsed_dataset)
def test_can_save_dataset_with_image_info(self): def test_can_save_dataset_with_image_info(self):
class TestExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ image=Image(path='1.jpg', size=(10, 15)),
DatasetItem(id=1, subset='train', annotations=[
image=Image(path='1.jpg', size=(10, 15)), Bbox(0, 2, 4, 2, label=2),
annotations=[ Bbox(3, 3, 2, 3, label=4),
Bbox(0, 2, 4, 2, label=2), ]),
Bbox(3, 3, 2, 3, label=4), ], categories={
]), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(i) for i in range(10)),
})
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
with TestDir() as test_dir: with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir) YoloConverter.convert(source_dataset, test_dir)
@ -87,27 +71,19 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset) compare_datasets(self, source_dataset, parsed_dataset)
def test_can_load_dataset_with_exact_image_info(self): def test_can_load_dataset_with_exact_image_info(self):
class TestExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ image=Image(path='1.jpg', size=(10, 15)),
DatasetItem(id=1, subset='train', annotations=[
image=Image(path='1.jpg', size=(10, 15)), Bbox(0, 2, 4, 2, label=2),
annotations=[ Bbox(3, 3, 2, 3, label=4),
Bbox(0, 2, 4, 2, label=2), ]),
Bbox(3, 3, 2, 3, label=4), ], categories={
]), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(i) for i in range(10)),
})
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
with TestDir() as test_dir: with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir) YoloConverter.convert(source_dataset, test_dir)
@ -117,24 +93,20 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset) compare_datasets(self, source_dataset, parsed_dataset)
def test_relative_paths(self): def test_relative_paths(self):
class TestExtractor(Extractor): source_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id='1', subset='train',
return iter([ image=np.ones((4, 2, 3))),
DatasetItem(id='1', subset='train', DatasetItem(id='subdir1/1', subset='train',
image=np.ones((4, 2, 3))), image=np.ones((2, 6, 3))),
DatasetItem(id='subdir1/1', subset='train', DatasetItem(id='subdir2/1', subset='train',
image=np.ones((2, 6, 3))), image=np.ones((5, 4, 3))),
DatasetItem(id='subdir2/1', subset='train', ], categories={
image=np.ones((5, 4, 3))), AnnotationType.label: LabelCategories(),
]) })
def categories(self):
return { AnnotationType.label: LabelCategories() }
for save_images in {True, False}: for save_images in {True, False}:
with self.subTest(save_images=save_images): with self.subTest(save_images=save_images):
with TestDir() as test_dir: with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir, YoloConverter.convert(source_dataset, test_dir,
save_images=save_images) save_images=save_images)
@ -150,26 +122,19 @@ class YoloImporterTest(TestCase):
self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR)) self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self): def test_can_import(self):
class DstExtractor(Extractor): expected_dataset = Dataset.from_iterable([
def __iter__(self): DatasetItem(id=1, subset='train',
return iter([ image=np.ones((10, 15, 3)),
DatasetItem(id=1, subset='train', annotations=[
image=np.ones((10, 15, 3)), Bbox(0, 2, 4, 2, label=2),
annotations=[ Bbox(3, 3, 2, 3, label=4),
Bbox(0, 2, 4, 2, label=2), ]),
Bbox(3, 3, 2, 3, label=4), ], categories={
]), AnnotationType.label: LabelCategories.from_iterable(
]) 'label_' + str(i) for i in range(10)),
})
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \ dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
.make_dataset() .make_dataset()
compare_datasets(self, DstExtractor(), dataset) compare_datasets(self, expected_dataset, dataset)

Loading…
Cancel
Save