[Datumaro] Reducing nesting of tests (#1875)

* Add `Dataset.from_iterable` constructor
* Simplify creation of `Dataset` objects in common simple cases
* Refactor tests
main
Ilya Kochankov 6 years ago committed by GitHub
parent e372589dc8
commit 7ecdcf182b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -72,6 +72,33 @@ class Categories:
class LabelCategories(Categories):
Category = namedtuple('Category', ['name', 'parent', 'attributes'])
@classmethod
def from_iterable(cls, iterable):
"""Generation of LabelCategories from iterable object
Args:
iterable ([type]): This iterable object can be:
1)simple str - will generate one Category with str as name
2)list of str - will interpreted as list of Category names
3)list of positional argumetns - will generate Categories
with this arguments
Returns:
LabelCategories: LabelCategories object
"""
temp_categories = cls()
if isinstance(iterable, str):
iterable = [[iterable]]
for category in iterable:
if isinstance(category, str):
category = [category]
temp_categories.add(*category)
return temp_categories
def __init__(self, items=None, attributes=None):
super().__init__(attributes=attributes)
@ -482,6 +509,31 @@ class Bbox(_Shape):
class PointsCategories(Categories):
Category = namedtuple('Category', ['labels', 'joints'])
@classmethod
def from_iterable(cls, iterable):
"""Generation of PointsCategories from iterable object
Args:
iterable ([type]): This iterable object can be:
1)simple int - will generate one Category with int as label
2)list of int - will interpreted as list of Category labels
3)list of positional argumetns - will generate Categories
with this arguments
Returns:
PointsCategories: PointsCategories object
"""
temp_categories = cls()
if isinstance(iterable, int):
iterable = [[iterable]]
for category in iterable:
if isinstance(category, int):
category = [category]
temp_categories.add(*category)
return temp_categories
def __init__(self, items=None, attributes=None):
super().__init__(attributes=attributes)

@ -18,7 +18,8 @@ import sys
from datumaro.components.config import Config, DEFAULT_FORMAT
from datumaro.components.config_model import (Model, Source,
PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA)
from datumaro.components.extractor import Extractor
from datumaro.components.extractor import Extractor, LabelCategories,\
AnnotationType
from datumaro.components.launcher import ModelTransform
from datumaro.components.dataset_filter import \
XPathDatasetFilter, XPathAnnotationsFilter
@ -319,6 +320,35 @@ class Subset(Extractor):
return self._parent.categories()
class Dataset(Extractor):
@classmethod
def from_iterable(cls, iterable, categories=None):
"""Generation of Dataset from iterable object
Args:
iterable: Iterable object contains DatasetItems
categories (dict, optional): You can pass dict of categories or
you can pass list of names. It'll interpreted as list of names of
LabelCategories. Defaults to {}.
Returns:
Dataset: Dataset object
"""
if isinstance(categories, list):
categories = {AnnotationType.label : LabelCategories.from_iterable(categories)}
if not categories:
categories = {}
class tmpExtractor(Extractor):
def __iter__(self):
return iter(iterable)
def categories(self):
return categories
return cls.from_extractors(tmpExtractor())
@classmethod
def from_extractors(cls, *sources):
# merge categories

@ -4,7 +4,7 @@ import os.path as osp
from unittest import TestCase
from datumaro.components.project import Project
from datumaro.components.project import (Project, Dataset)
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
LabelCategories, PointsCategories
@ -26,32 +26,26 @@ DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset')
class CocoImporterTest(TestCase):
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
subset='val', attributes={'id': 1},
annotations=[
Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
id=1, group=1, attributes={'is_crowd': False}),
Mask(np.array(
[[1, 0, 0, 1, 0]] * 5 +
[[1, 1, 1, 1, 0]] * 5
), label=0,
id=2, group=2, attributes={'is_crowd': True}),
]
),
])
def categories(self):
label_cat = LabelCategories()
label_cat.add('TEST')
return { AnnotationType.label: label_cat }
expected_dataset = Dataset.from_iterable([
DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
subset='val', attributes={'id': 1},
annotations=[
Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
id=1, group=1, attributes={'is_crowd': False}),
Mask(np.array(
[[1, 0, 0, 1, 0]] * 5 +
[[1, 1, 1, 1, 0]] * 5
), label=0,
id=2, group=2, attributes={'is_crowd': True}),
]
),
], categories=['TEST',])
dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \
.make_dataset()
compare_datasets(self, DstExtractor(), dataset)
compare_datasets(self, expected_dataset, dataset)
def test_can_detect(self):
self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR))
@ -71,526 +65,417 @@ class CocoConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_and_load_captions(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
annotations=[
Caption('hello', id=1, group=1),
Caption('world', id=2, group=2),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train',
annotations=[
Caption('test', id=3, group=3),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val',
annotations=[
Caption('word', id=1, group=1),
], attributes={'id': 1}
),
])
expected_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
annotations=[
Caption('hello', id=1, group=1),
Caption('world', id=2, group=2),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train',
annotations=[
Caption('test', id=3, group=3),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val',
annotations=[
Caption('word', id=1, group=1),
], attributes={'id': 1}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(expected_dataset,
CocoCaptionsConverter.convert, test_dir)
def test_can_save_and_load_instances(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
categories = { AnnotationType.label: label_categories }
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
annotations=[
# Bbox + single polygon
Bbox(0, 1, 2, 2,
label=2, group=1, id=1,
attributes={ 'is_crowd': False }),
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
attributes={ 'is_crowd': False },
label=2, group=1, id=1),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
annotations=[
# Mask + bbox
Mask(np.array([
[0, 1, 0, 0],
[0, 1, 0, 0],
[0, 1, 1, 1],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
Bbox(1, 0, 2, 2, label=4, group=3, id=3,
attributes={ 'is_crowd': True }),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[
# Bbox + mask
Bbox(0, 1, 2, 2, label=4, group=3, id=3,
attributes={ 'is_crowd': True }),
Mask(np.array([
[0, 0, 0, 0],
[1, 1, 1, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
], attributes={'id': 1}),
])
def categories(self):
return categories
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
annotations=[
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
attributes={ 'is_crowd': False },
label=2, group=1, id=1),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
annotations=[
Mask(np.array([
[0, 1, 0, 0],
[0, 1, 0, 0],
[0, 1, 1, 1],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[
Mask(np.array([
[0, 0, 0, 0],
[1, 1, 1, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
], attributes={'id': 1}),
])
def categories(self):
return categories
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
annotations=[
# Bbox + single polygon
Bbox(0, 1, 2, 2,
label=2, group=1, id=1,
attributes={ 'is_crowd': False }),
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
attributes={ 'is_crowd': False },
label=2, group=1, id=1),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
annotations=[
# Mask + bbox
Mask(np.array([
[0, 1, 0, 0],
[0, 1, 0, 0],
[0, 1, 1, 1],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
Bbox(1, 0, 2, 2, label=4, group=3, id=3,
attributes={ 'is_crowd': True }),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[
# Bbox + mask
Bbox(0, 1, 2, 2, label=4, group=3, id=3,
attributes={ 'is_crowd': True }),
Mask(np.array([
[0, 0, 0, 0],
[1, 1, 1, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
], attributes={'id': 1}),
], categories=[str(i) for i in range(10)])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
annotations=[
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
attributes={ 'is_crowd': False },
label=2, group=1, id=1),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
annotations=[
Mask(np.array([
[0, 1, 0, 0],
[0, 1, 0, 0],
[0, 1, 1, 1],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[
Mask(np.array([
[0, 0, 0, 0],
[1, 1, 1, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]],
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
], attributes={'id': 1})
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(source_dataset,
CocoInstancesConverter.convert, test_dir,
target_dataset=DstExtractor())
target_dataset=target_dataset)
def test_can_merge_polygons_on_loading(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
categories = { AnnotationType.label: label_categories }
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4),
Polygon([5, 0, 9, 0, 5, 5],
label=3, id=4, group=4),
]
),
])
def categories(self):
return categories
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Mask(np.array([
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
),
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4),
Polygon([5, 0, 9, 0, 5, 5],
label=3, id=4, group=4),
]
),
], categories=[str(i) for i in range(10)])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Mask(np.array([
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
),
])
def categories(self):
return categories
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
),
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
self._test_save_and_load(source_dataset,
CocoInstancesConverter.convert, test_dir,
importer_args={'merge_instance_polygons': True},
target_dataset=DstExtractor())
target_dataset=target_dataset)
def test_can_crop_covered_segments(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
class SrcTestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 1, 1, 1],
[1, 1, 0, 1, 1],
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0]],
),
label=2, id=1, z_order=0),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
label=1, id=2, z_order=1),
]
),
])
def categories(self):
return { AnnotationType.label: label_categories }
class DstTestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
),
attributes={ 'is_crowd': True },
label=2, id=1, group=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
label=1, id=2, group=2,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
),
])
def categories(self):
return { AnnotationType.label: label_categories }
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 1, 1, 1],
[1, 1, 0, 1, 1],
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0]],
),
label=2, id=1, z_order=0),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
label=1, id=2, z_order=1),
]
),
], categories=[str(i) for i in range(10)])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
),
attributes={ 'is_crowd': True },
label=2, id=1, group=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
label=1, id=2, group=2,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
),
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir:
self._test_save_and_load(SrcTestExtractor(),
partial(CocoInstancesConverter.convert, crop_covered=True),
test_dir, target_dataset=DstTestExtractor())
self._test_save_and_load(source_dataset,
partial(CocoInstancesConverter.convert, crop_covered=True),
test_dir, target_dataset=target_dataset)
def test_can_convert_polygons_to_mask(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
class SrcTestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4),
Polygon([5, 0, 9, 0, 5, 5],
label=3, id=4, group=4),
]
),
])
def categories(self):
return { AnnotationType.label: label_categories }
class DstTestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Mask(np.array([
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
),
attributes={ 'is_crowd': True },
label=3, id=4, group=4),
], attributes={'id': 1}
),
])
def categories(self):
return { AnnotationType.label: label_categories }
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4),
Polygon([5, 0, 9, 0, 5, 5],
label=3, id=4, group=4),
]
),
], categories=[str(i) for i in range(10)])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
annotations=[
Mask(np.array([
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
),
attributes={ 'is_crowd': True },
label=3, id=4, group=4),
], attributes={'id': 1}
),
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir:
self._test_save_and_load(SrcTestExtractor(),
self._test_save_and_load(source_dataset,
partial(CocoInstancesConverter.convert, segmentation_mode='mask'),
test_dir, target_dataset=DstTestExtractor())
test_dir, target_dataset=target_dataset)
def test_can_convert_masks_to_polygons(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Mask(np.array([
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
label=3, id=4, group=4),
]
),
])
def categories(self):
return { AnnotationType.label: label_categories }
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Polygon(
[3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
Polygon(
[5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
),
])
def categories(self):
return { AnnotationType.label: label_categories }
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Mask(np.array([
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
label=3, id=4, group=4),
]
),
], categories=[str(i) for i in range(10)])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Polygon(
[3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
Polygon(
[5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
], attributes={'id': 1}
),
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
self._test_save_and_load(source_dataset,
partial(CocoInstancesConverter.convert, segmentation_mode='polygons'),
test_dir, target_dataset=DstExtractor())
test_dir,
target_dataset=target_dataset)
def test_can_save_and_load_images(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', attributes={'id': 1}),
DatasetItem(id=2, subset='train', attributes={'id': 2}),
expected_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', attributes={'id': 1}),
DatasetItem(id=2, subset='train', attributes={'id': 2}),
DatasetItem(id=2, subset='val', attributes={'id': 2}),
DatasetItem(id=3, subset='val', attributes={'id': 3}),
DatasetItem(id=4, subset='val', attributes={'id': 4}),
DatasetItem(id=2, subset='val', attributes={'id': 2}),
DatasetItem(id=3, subset='val', attributes={'id': 3}),
DatasetItem(id=4, subset='val', attributes={'id': 4}),
DatasetItem(id=5, subset='test', attributes={'id': 1}),
])
DatasetItem(id=5, subset='test', attributes={'id': 1}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(expected_dataset,
CocoImageInfoConverter.convert, test_dir)
def test_can_save_and_load_labels(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
annotations=[
Label(4, id=1, group=1),
Label(9, id=2, group=2),
], attributes={'id': 1}
),
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
return {
AnnotationType.label: label_categories,
}
expected_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
annotations=[
Label(4, id=1, group=1),
Label(9, id=2, group=2),
], attributes={'id': 1}),
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(expected_dataset,
CocoLabelsConverter.convert, test_dir)
def test_can_save_and_load_keypoints(self):
label_categories = LabelCategories()
points_categories = PointsCategories()
for i in range(10):
label_categories.add(str(i))
points_categories.add(i, joints=[[0, 1], [1, 2]])
categories = {
AnnotationType.label: label_categories,
AnnotationType.points: points_categories,
}
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
annotations=[
# Full instance annotations: polygon + keypoints
Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
label=3, group=1, id=1),
Polygon([0, 0, 4, 0, 4, 4],
label=3, group=1, id=1),
# Full instance annotations: bbox + keypoints
Points([1, 2, 3, 4, 2, 3], group=2, id=2),
Bbox(1, 2, 2, 2, group=2, id=2),
# Solitary keypoints
Points([1, 2, 0, 2, 4, 1], label=5, id=3),
# Some other solitary annotations (bug #1387)
Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
# Solitary keypoints with no label
Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
])
])
def categories(self):
return categories
class DstTestExtractor(TestExtractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
annotations=[
Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
label=3, group=1, id=1,
attributes={'is_crowd': False}),
Polygon([0, 0, 4, 0, 4, 4],
label=3, group=1, id=1,
attributes={'is_crowd': False}),
Points([1, 2, 3, 4, 2, 3],
group=2, id=2,
attributes={'is_crowd': False}),
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
group=2, id=2,
attributes={'is_crowd': False}),
Points([1, 2, 0, 2, 4, 1],
label=5, group=3, id=3,
attributes={'is_crowd': False}),
Polygon([0, 1, 4, 1, 4, 2, 0, 2],
label=5, group=3, id=3,
attributes={'is_crowd': False}),
Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
group=5, id=5,
attributes={'is_crowd': False}),
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
group=5, id=5,
attributes={'is_crowd': False}),
], attributes={'id': 1}),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
annotations=[
# Full instance annotations: polygon + keypoints
Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
label=3, group=1, id=1),
Polygon([0, 0, 4, 0, 4, 4],
label=3, group=1, id=1),
# Full instance annotations: bbox + keypoints
Points([1, 2, 3, 4, 2, 3], group=2, id=2),
Bbox(1, 2, 2, 2, group=2, id=2),
# Solitary keypoints
Points([1, 2, 0, 2, 4, 1], label=5, id=3),
# Some other solitary annotations (bug #1387)
Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
# Solitary keypoints with no label
Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
]),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
str(i) for i in range(10)),
AnnotationType.points: PointsCategories.from_iterable(
(i, None, [[0, 1], [1, 2]]) for i in range(10)
),
})
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
annotations=[
Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
label=3, group=1, id=1,
attributes={'is_crowd': False}),
Polygon([0, 0, 4, 0, 4, 4],
label=3, group=1, id=1,
attributes={'is_crowd': False}),
Points([1, 2, 3, 4, 2, 3],
group=2, id=2,
attributes={'is_crowd': False}),
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
group=2, id=2,
attributes={'is_crowd': False}),
Points([1, 2, 0, 2, 4, 1],
label=5, group=3, id=3,
attributes={'is_crowd': False}),
Polygon([0, 1, 4, 1, 4, 2, 0, 2],
label=5, group=3, id=3,
attributes={'is_crowd': False}),
Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
group=5, id=5,
attributes={'is_crowd': False}),
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
group=5, id=5,
attributes={'is_crowd': False}),
], attributes={'id': 1}),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
str(i) for i in range(10)),
AnnotationType.points: PointsCategories.from_iterable(
(i, None, [[0, 1], [1, 2]]) for i in range(10)
),
})
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(source_dataset,
CocoPersonKeypointsConverter.convert, test_dir,
target_dataset=DstTestExtractor())
target_dataset=target_dataset)
def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, attributes={'id': 1}),
DatasetItem(id=2, attributes={'id': 2}),
])
test_dataset = Dataset.from_iterable([
DatasetItem(id=1, attributes={'id': 1}),
DatasetItem(id=2, attributes={'id': 2}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(test_dataset,
CocoConverter.convert, test_dir)
def test_can_save_dataset_with_image_info(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
attributes={'id': 1}),
])
expected_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
attributes={'id': 1}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(expected_dataset,
CocoImageInfoConverter.convert, test_dir)
def test_relative_paths(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3)),
attributes={'id': 1}),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'id': 2}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'id': 3}),
])
expected_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((4, 2, 3)),
attributes={'id': 1}),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'id': 2}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'id': 3}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
partial(CocoImageInfoConverter.convert, save_images=True),
test_dir)
self._test_save_and_load(expected_dataset,
partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
def test_preserve_coco_ids(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
attributes={'id': 40}),
])
expected_dataset = Dataset.from_iterable([
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
attributes={'id': 40}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
partial(CocoImageInfoConverter.convert, save_images=True),
test_dir)
self._test_save_and_load(expected_dataset,
partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
def test_annotation_attributes(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
], attributes={'id': 1})
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
return { AnnotationType.label: label_categories, }
expected_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
], attributes={'id': 1})
], categories=[str(i) for i in range(10)])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(expected_dataset,
CocoConverter.convert, test_dir)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp
from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Points, Polygon, PolyLine, Bbox, Label,
LabelCategories,
@ -28,121 +28,115 @@ class CvatImporterTest(TestCase):
self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR))
def test_can_load_image(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='img0', subset='train',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=0, z_order=1,
attributes={
'occluded': True,
'a1': True, 'a2': 'v3'
}),
PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
attributes={'occluded': False}),
], attributes={'frame': 0}),
DatasetItem(id='img1', subset='train',
image=np.ones((10, 10, 3)),
annotations=[
Polygon([1, 2, 3, 4, 6, 5], z_order=1,
attributes={'occluded': False}),
Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
attributes={'occluded': False}),
], attributes={'frame': 1}),
])
def categories(self):
label_categories = LabelCategories()
label_categories.add('label1', attributes={'a1', 'a2'})
label_categories.add('label2')
return { AnnotationType.label: label_categories }
expected_dataset = Dataset.from_iterable([
DatasetItem(id='img0', subset='train',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=0, z_order=1,
attributes={
'occluded': True,
'a1': True, 'a2': 'v3'
}),
PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
attributes={'occluded': False}),
], attributes={'frame': 0}),
DatasetItem(id='img1', subset='train',
image=np.ones((10, 10, 3)),
annotations=[
Polygon([1, 2, 3, 4, 6, 5], z_order=1,
attributes={'occluded': False}),
Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
attributes={'occluded': False}),
], attributes={'frame': 1}),
], categories={
AnnotationType.label: LabelCategories.from_iterable([
['label1', '', {'a1', 'a2'}],
['label2'],
])
})
parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset()
compare_datasets(self, DstExtractor(), parsed_dataset)
compare_datasets(self, expected_dataset, parsed_dataset)
def test_can_load_video(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='frame_000010', subset='annotations',
image=np.ones((20, 25, 3)),
annotations=[
Bbox(3, 4, 7, 1, label=2,
id=0,
attributes={
'occluded': True,
'outside': False, 'keyframe': True,
'track_id': 0
}),
Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
label=0,
id=1,
attributes={
'occluded': False,
'outside': False, 'keyframe': True,
'track_id': 1, 'hgl': 'hgkf',
}),
], attributes={'frame': 10}),
DatasetItem(id='frame_000013', subset='annotations',
image=np.ones((20, 25, 3)),
annotations=[
Bbox(7, 6, 7, 2, label=2,
id=0,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 0
}),
Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
label=0,
id=1,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 1, 'hgl': 'jk',
}),
PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
label=2,
id=2,
attributes={
'occluded': False,
'outside': False, 'keyframe': True,
'track_id': 2,
}),
], attributes={'frame': 13}),
DatasetItem(id='frame_000016', subset='annotations',
image=Image(path='frame_0000016.png', size=(20, 25)),
annotations=[
Bbox(8, 7, 6, 10, label=2,
id=0,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 0
}),
PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
label=2,
id=2,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 2,
}),
], attributes={'frame': 16}),
])
def categories(self):
label_categories = LabelCategories()
label_categories.add('klhg', attributes={'hgl'})
label_categories.add('z U k')
label_categories.add('II')
return { AnnotationType.label: label_categories }
expected_dataset = Dataset.from_iterable([
DatasetItem(id='frame_000010', subset='annotations',
image=np.ones((20, 25, 3)),
annotations=[
Bbox(3, 4, 7, 1, label=2,
id=0,
attributes={
'occluded': True,
'outside': False, 'keyframe': True,
'track_id': 0
}),
Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
label=0,
id=1,
attributes={
'occluded': False,
'outside': False, 'keyframe': True,
'track_id': 1, 'hgl': 'hgkf',
}),
], attributes={'frame': 10}),
DatasetItem(id='frame_000013', subset='annotations',
image=np.ones((20, 25, 3)),
annotations=[
Bbox(7, 6, 7, 2, label=2,
id=0,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 0
}),
Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
label=0,
id=1,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 1, 'hgl': 'jk',
}),
PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
label=2,
id=2,
attributes={
'occluded': False,
'outside': False, 'keyframe': True,
'track_id': 2,
}),
], attributes={'frame': 13}),
DatasetItem(id='frame_000016', subset='annotations',
image=Image(path='frame_0000016.png', size=(20, 25)),
annotations=[
Bbox(8, 7, 6, 10, label=2,
id=0,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 0
}),
PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
label=2,
id=2,
attributes={
'occluded': False,
'outside': True, 'keyframe': True,
'track_id': 2,
}),
], attributes={'frame': 16}),
], categories={
AnnotationType.label: LabelCategories.from_iterable([
['klhg', '', {'hgl'}],
['z U k'],
['II']
]),
})
parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset()
compare_datasets(self, DstExtractor(), parsed_dataset)
compare_datasets(self, expected_dataset, parsed_dataset)
class CvatConverterTest(TestCase):
def _test_save_and_load(self, source_dataset, converter, test_dir,
@ -165,137 +159,120 @@ class CvatConverterTest(TestCase):
label_categories.items[2].attributes.update(['a1', 'a2'])
label_categories.attributes.update(['occluded'])
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=1, group=4,
attributes={ 'occluded': True }),
Points([1, 1, 3, 2, 2, 3],
label=2,
attributes={ 'a1': 'x', 'a2': 42,
'unknown': 'bar' }),
Label(1),
Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
]
),
DatasetItem(id=1, subset='s1',
annotations=[
PolyLine([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4),
Bbox(5, 0, 1, 9,
label=3, id=4, group=4),
]
),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4], z_order=1,
label=3, group=4,
attributes={ 'occluded': False }),
PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
]
),
DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4))),
])
def categories(self):
return { AnnotationType.label: label_categories }
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=1, group=4,
attributes={ 'occluded': True }),
Points([1, 1, 3, 2, 2, 3],
label=2,
attributes={ 'occluded': False,
'a1': 'x', 'a2': 42 }),
Label(1),
Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
], attributes={'frame': 0}
),
DatasetItem(id=1, subset='s1',
annotations=[
PolyLine([0, 0, 4, 0, 4, 4],
label=3, group=4,
attributes={ 'occluded': False }),
Bbox(5, 0, 1, 9,
label=3, group=4,
attributes={ 'occluded': False }),
], attributes={'frame': 1}
),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4], z_order=1,
label=3, group=4,
attributes={ 'occluded': False }),
], attributes={'frame': 0}
),
DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4)),
attributes={'frame': 0}),
])
def categories(self):
return { AnnotationType.label: label_categories }
source_dataset = Dataset.from_iterable([
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=1, group=4,
attributes={ 'occluded': True }),
Points([1, 1, 3, 2, 2, 3],
label=2,
attributes={ 'a1': 'x', 'a2': 42,
'unknown': 'bar' }),
Label(1),
Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
]
),
DatasetItem(id=1, subset='s1',
annotations=[
PolyLine([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4),
Bbox(5, 0, 1, 9,
label=3, id=4, group=4),
]
),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4], z_order=1,
label=3, group=4,
attributes={ 'occluded': False }),
PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
]
),
DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4))),
], categories={
AnnotationType.label: label_categories,
})
target_dataset = Dataset.from_iterable([
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=1, group=4,
attributes={ 'occluded': True }),
Points([1, 1, 3, 2, 2, 3],
label=2,
attributes={ 'occluded': False,
'a1': 'x', 'a2': 42 }),
Label(1),
Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
], attributes={'frame': 0}
),
DatasetItem(id=1, subset='s1',
annotations=[
PolyLine([0, 0, 4, 0, 4, 4],
label=3, group=4,
attributes={ 'occluded': False }),
Bbox(5, 0, 1, 9,
label=3, group=4,
attributes={ 'occluded': False }),
], attributes={'frame': 1}
),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4], z_order=1,
label=3, group=4,
attributes={ 'occluded': False }),
], attributes={'frame': 0}
),
DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4)),
attributes={'frame': 0}),
], categories={
AnnotationType.label: label_categories,
})
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
self._test_save_and_load(source_dataset,
partial(CvatConverter.convert, save_images=True), test_dir,
target_dataset=DstExtractor())
target_dataset=target_dataset)
def test_relative_paths(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3)),
attributes={'frame': 0}),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'frame': 1}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'frame': 2}),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
source_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
], categories={ AnnotationType.label: LabelCategories() })
target_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((4, 2, 3)),
attributes={'frame': 0}),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'frame': 1}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'frame': 2}),
], categories={
AnnotationType.label: LabelCategories()
})
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
self._test_save_and_load(source_dataset,
partial(CvatConverter.convert, save_images=True), test_dir,
target_dataset=DstExtractor())
target_dataset=target_dataset)
def test_preserve_frame_ids(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
attributes={'frame': 40}),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
expected_dataset = Dataset.from_iterable([
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
attributes={'frame': 40}),
], categories={
AnnotationType.label: LabelCategories()
})
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(expected_dataset,
CvatConverter.convert, test_dir)

@ -2,7 +2,7 @@ from functools import partial
import numpy as np
from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.project import Project
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Label, Mask, Points, Polygon,
@ -32,82 +32,75 @@ class DatumaroConverterTest(TestCase):
compare_datasets_strict(self,
expected=target_dataset, actual=parsed_dataset)
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
annotations=[
Caption('hello', id=1),
Caption('world', id=2, group=5),
Label(2, id=3, attributes={
'x': 1,
'y': '2',
}),
Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
'score': 1.0,
}),
Bbox(5, 6, 7, 8, id=5, group=5),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
]),
DatasetItem(id=21, subset='train',
annotations=[
Caption('test'),
Label(2),
Bbox(1, 2, 3, 4, 5, id=42, group=42)
]),
DatasetItem(id=2, subset='val',
annotations=[
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
]),
DatasetItem(id=42, subset='test',
attributes={'a1': 5, 'a2': '42'}),
DatasetItem(id=42),
DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
])
def categories(self):
label_categories = LabelCategories()
for i in range(5):
label_categories.add('cat' + str(i))
mask_categories = MaskCategories(
generate_colormap(len(label_categories.items)))
points_categories = PointsCategories()
for index, _ in enumerate(label_categories.items):
points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
return {
AnnotationType.label: label_categories,
AnnotationType.mask: mask_categories,
AnnotationType.points: points_categories,
}
label_categories = LabelCategories()
for i in range(5):
label_categories.add('cat' + str(i))
mask_categories = MaskCategories(
generate_colormap(len(label_categories.items)))
points_categories = PointsCategories()
for index, _ in enumerate(label_categories.items):
points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
test_dataset = Dataset.from_iterable([
DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
annotations=[
Caption('hello', id=1),
Caption('world', id=2, group=5),
Label(2, id=3, attributes={
'x': 1,
'y': '2',
}),
Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
'score': 1.0,
}),
Bbox(5, 6, 7, 8, id=5, group=5),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
]),
DatasetItem(id=21, subset='train',
annotations=[
Caption('test'),
Label(2),
Bbox(1, 2, 3, 4, 5, id=42, group=42)
]),
DatasetItem(id=2, subset='val',
annotations=[
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
]),
DatasetItem(id=42, subset='test',
attributes={'a1': 5, 'a2': '42'}),
DatasetItem(id=42),
DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
], categories={
AnnotationType.label: label_categories,
AnnotationType.mask: mask_categories,
AnnotationType.points: points_categories,
})
def test_can_save_and_load(self):
with TestDir() as test_dir:
self._test_save_and_load(self.TestExtractor(),
self._test_save_and_load(self.test_dataset,
partial(DatumaroConverter.convert, save_images=True), test_dir)
def test_can_detect(self):
with TestDir() as test_dir:
DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir)
DatumaroConverter.convert(self.test_dataset, save_dir=test_dir)
self.assertTrue(DatumaroImporter.detect(test_dir))
def test_relative_paths(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
test_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(test_dataset,
partial(DatumaroConverter.convert, save_images=True), test_dir)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp
from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, Mask, Polygon, LabelCategories
)
@ -29,101 +29,84 @@ class LabelMeConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_and_load(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, group=2),
Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
'occluded': True,
'a1': 'qwe',
'a2': True,
'a3': 123,
}),
Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
attributes={ 'username': 'test' }),
Bbox(1, 2, 3, 4, group=3),
Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
attributes={ 'occluded': True }
),
]
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, group=2),
Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
'occluded': True,
'a1': 'qwe',
'a2': True,
'a3': 123,
}),
Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
attributes={ 'username': 'test' }),
Bbox(1, 2, 3, 4, group=3),
Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
attributes={ 'occluded': True }
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=0, group=2, id=0,
attributes={
'occluded': False, 'username': '',
}
),
Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
attributes={
'occluded': True, 'username': '',
'a1': 'qwe',
'a2': True,
'a3': 123,
}
),
Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
id=2, attributes={
'occluded': False, 'username': 'test'
}
),
Bbox(1, 2, 3, 4, group=1, id=3, attributes={
'occluded': False, 'username': '',
}),
Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
id=4, attributes={
'occluded': True, 'username': ''
}
),
]
]
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=0, group=2, id=0,
attributes={
'occluded': False, 'username': '',
}
),
])
def categories(self):
label_cat = LabelCategories()
label_cat.add('label_2')
label_cat.add('label_3')
return {
AnnotationType.label: label_cat,
}
Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
attributes={
'occluded': True, 'username': '',
'a1': 'qwe',
'a2': True,
'a3': 123,
}
),
Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
id=2, attributes={
'occluded': False, 'username': 'test'
}
),
Bbox(1, 2, 3, 4, group=1, id=3, attributes={
'occluded': False, 'username': '',
}),
Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
id=4, attributes={
'occluded': True, 'username': ''
}
),
]
),
], categories={
AnnotationType.label: LabelCategories.from_iterable([
'label_2', 'label_3']),
})
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
self._test_save_and_load(
source_dataset,
partial(LabelMeConverter.convert, save_images=True),
test_dir, target_dataset=DstExtractor())
test_dir, target_dataset=target_dataset)
def test_cant_save_dataset_with_relative_paths(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
expected_dataset = Dataset.from_iterable([
DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
], categories={
AnnotationType.label: LabelCategories(),
})
with self.assertRaisesRegex(Exception, r'only supports flat'):
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
partial(LabelMeConverter.convert, save_images=True),
test_dir)
self._test_save_and_load(expected_dataset,
LabelMeConverter.convert, test_dir)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
@ -133,101 +116,91 @@ class LabelMeImporterTest(TestCase):
self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
img1 = np.ones((77, 102, 3)) * 255
img1[6:32, 7:41] = 0
mask1 = np.zeros((77, 102), dtype=int)
mask1[67:69, 58:63] = 1
mask2 = np.zeros((77, 102), dtype=int)
mask2[13:25, 54:71] = [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]
return iter([
DatasetItem(id='img1', image=img1,
annotations=[
Polygon([43, 34, 45, 34, 45, 37, 43, 37],
label=0, id=0,
attributes={
'occluded': False,
'username': 'admin'
}
),
Mask(mask1, label=1, id=1,
attributes={
'occluded': False,
'username': 'brussell'
}
),
Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
label=2, group=2, id=2,
attributes={
'a1': True,
'occluded': True,
'username': 'anonymous'
}
),
Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
label=3, group=2, id=3,
attributes={
'kj': True,
'occluded': False,
'username': 'anonymous'
}
),
Bbox(13, 19, 10, 11, label=4, group=2, id=4,
attributes={
'hg': True,
'occluded': True,
'username': 'anonymous'
}
),
Mask(mask2, label=5, group=1, id=5,
attributes={
'd': True,
'occluded': False,
'username': 'anonymous'
}
),
Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
label=6, group=1, id=6,
attributes={
'gfd lkj lkj hi': True,
'occluded': False,
'username': 'anonymous'
}
),
]
img1 = np.ones((77, 102, 3)) * 255
img1[6:32, 7:41] = 0
mask1 = np.zeros((77, 102), dtype=int)
mask1[67:69, 58:63] = 1
mask2 = np.zeros((77, 102), dtype=int)
mask2[13:25, 54:71] = [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]
target_dataset = Dataset.from_iterable([
DatasetItem(id='img1', image=img1,
annotations=[
Polygon([43, 34, 45, 34, 45, 37, 43, 37],
label=0, id=0,
attributes={
'occluded': False,
'username': 'admin'
}
),
Mask(mask1, label=1, id=1,
attributes={
'occluded': False,
'username': 'brussell'
}
),
Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
label=2, group=2, id=2,
attributes={
'a1': True,
'occluded': True,
'username': 'anonymous'
}
),
Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
label=3, group=2, id=3,
attributes={
'kj': True,
'occluded': False,
'username': 'anonymous'
}
),
])
def categories(self):
label_cat = LabelCategories()
label_cat.add('window')
label_cat.add('license plate')
label_cat.add('o1')
label_cat.add('q1')
label_cat.add('b1')
label_cat.add('m1')
label_cat.add('hg')
return {
AnnotationType.label: label_cat,
}
Bbox(13, 19, 10, 11, label=4, group=2, id=4,
attributes={
'hg': True,
'occluded': True,
'username': 'anonymous'
}
),
Mask(mask2, label=5, group=1, id=5,
attributes={
'd': True,
'occluded': False,
'username': 'anonymous'
}
),
Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
label=6, group=1, id=6,
attributes={
'gfd lkj lkj hi': True,
'occluded': False,
'username': 'anonymous'
}
),
]
),
], categories={
AnnotationType.label: LabelCategories.from_iterable([
'window', 'license plate', 'o1',
'q1', 'b1', 'm1', 'hg',
]),
})
parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \
.make_dataset()
compare_datasets(self, expected=DstExtractor(), actual=parsed)
compare_datasets(self, expected=target_dataset, actual=parsed)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp
from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, LabelCategories
)
@ -28,96 +28,83 @@ class MotConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_bboxes(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, attributes={
'occluded': True,
}),
Bbox(0, 4, 4, 4, label=3, attributes={
'visibility': 0.4,
}),
Bbox(2, 4, 4, 4, attributes={
'ignored': True
}),
]
),
DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3),
]
),
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, attributes={
'occluded': True,
'visibility': 0.0,
'ignored': False,
}),
Bbox(0, 4, 4, 4, label=3, attributes={
'occluded': False,
'visibility': 0.4,
'ignored': False,
}),
Bbox(2, 4, 4, 4, attributes={
'occluded': False,
'visibility': 1.0,
'ignored': True,
}),
]
),
DatasetItem(id=2,
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3, attributes={
'occluded': False,
'visibility': 1.0,
'ignored': False,
}),
]
),
DatasetItem(id=3,
image=np.ones((5, 4, 3)) * 3,
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, attributes={
'occluded': True,
}),
Bbox(0, 4, 4, 4, label=3, attributes={
'visibility': 0.4,
}),
Bbox(2, 4, 4, 4, attributes={
'ignored': True
}),
]
),
DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3),
]
),
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
target_dataset = Dataset.from_iterable([
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, attributes={
'occluded': True,
'visibility': 0.0,
'ignored': False,
}),
Bbox(0, 4, 4, 4, label=3, attributes={
'occluded': False,
'visibility': 0.4,
'ignored': False,
}),
Bbox(2, 4, 4, 4, attributes={
'occluded': False,
'visibility': 1.0,
'ignored': True,
}),
]
),
DatasetItem(id=2,
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3, attributes={
'occluded': False,
'visibility': 1.0,
'ignored': False,
}),
]
),
DatasetItem(id=3,
image=np.ones((5, 4, 3)) * 3,
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
self._test_save_and_load(
source_dataset,
partial(MotSeqGtConverter.convert, save_images=True),
test_dir, target_dataset=DstExtractor())
test_dir, target_dataset=target_dataset)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset')
@ -127,30 +114,23 @@ class MotImporterTest(TestCase):
self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, attributes={
'occluded': False,
'visibility': 1.0,
'ignored': False,
}),
]
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
expected_dataset = Dataset.from_iterable([
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2, attributes={
'occluded': False,
'visibility': 1.0,
'ignored': False,
}),
]
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
.make_dataset()
compare_datasets(self, DstExtractor(), dataset)
compare_datasets(self, expected_dataset, dataset)

@ -3,7 +3,7 @@ import numpy as np
import os.path as osp
from unittest import TestCase, skipIf
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, Mask, LabelCategories
)
@ -48,117 +48,96 @@ class TfrecordConverterTest(TestCase):
compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_can_save_bboxes(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2),
Bbox(0, 4, 4, 4, label=3),
Bbox(2, 4, 4, 4),
], attributes={'source_id': ''}
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
test_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2),
Bbox(0, 4, 4, 4, label=3),
Bbox(2, 4, 4, 4),
], attributes={'source_id': ''}
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(
test_dataset,
partial(TfDetectionApiConverter.convert, save_images=True),
test_dir)
def test_can_save_masks(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
annotations=[
Mask(image=np.array([
[1, 0, 0, 1],
[0, 1, 1, 0],
[0, 1, 1, 0],
[1, 0, 0, 1],
]), label=1),
],
attributes={'source_id': ''}
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
test_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
annotations=[
Mask(image=np.array([
[1, 0, 0, 1],
[0, 1, 1, 0],
[0, 1, 1, 0],
[1, 0, 0, 1],
]), label=1),
],
attributes={'source_id': ''}
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(
test_dataset,
partial(TfDetectionApiConverter.convert, save_masks=True),
test_dir)
def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
Bbox(2, 1, 4, 4, label=2),
Bbox(4, 2, 8, 4, label=3),
],
attributes={'source_id': ''}
),
DatasetItem(id=2,
image=np.ones((8, 8, 3)) * 2,
annotations=[
Bbox(4, 4, 4, 4, label=3),
],
attributes={'source_id': ''}
),
DatasetItem(id=3,
image=np.ones((8, 4, 3)) * 3,
attributes={'source_id': ''}
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
test_dataset = Dataset.from_iterable([
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
Bbox(2, 1, 4, 4, label=2),
Bbox(4, 2, 8, 4, label=3),
],
attributes={'source_id': ''}
),
DatasetItem(id=2,
image=np.ones((8, 8, 3)) * 2,
annotations=[
Bbox(4, 4, 4, 4, label=3),
],
attributes={'source_id': ''}
),
DatasetItem(id=3,
image=np.ones((8, 4, 3)) * 3,
attributes={'source_id': ''}
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(
test_dataset,
partial(TfDetectionApiConverter.convert, save_images=True),
test_dir)
def test_can_save_dataset_with_image_info(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1/q.e',
image=Image(path='1/q.e', size=(10, 15)),
attributes={'source_id': ''}
)
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
test_dataset = Dataset.from_iterable([
DatasetItem(id='1/q.e',
image=Image(path='1/q.e', size=(10, 15)),
attributes={'source_id': ''}
)
], categories={
AnnotationType.label: LabelCategories(),
})
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
self._test_save_and_load(test_dataset,
TfDetectionApiConverter.convert, test_dir)
def test_labelmap_parsing(self):
@ -197,42 +176,35 @@ class TfrecordImporterTest(TestCase):
self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2),
Bbox(0, 4, 4, 4, label=3),
Bbox(2, 4, 4, 4),
],
attributes={'source_id': '1'}
),
DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3),
],
attributes={'source_id': '2'}
),
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
attributes={'source_id': '3'}
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2),
Bbox(0, 4, 4, 4, label=3),
Bbox(2, 4, 4, 4),
],
attributes={'source_id': '1'}
),
DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3),
],
attributes={'source_id': '2'}
),
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
attributes={'source_id': '3'}
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(10)),
})
dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
.make_dataset()
compare_datasets(self, DstExtractor(), dataset)
compare_datasets(self, target_dataset, dataset)

@ -2,7 +2,7 @@ import logging as log
import numpy as np
from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (Extractor, DatasetItem,
Mask, Polygon, PolyLine, Points, Bbox, Label,
LabelCategories, MaskCategories, AnnotationType
@ -67,304 +67,269 @@ class TransformsTest(TestCase):
compare_datasets(self, DstExtractor(), actual)
def test_mask_to_polygons_small_polygons_message(self):
class SrcExtractor(Extractor):
def __iter__(self):
items = [
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Mask(np.array([
[0, 0, 0],
[0, 1, 0],
[0, 0, 0],
]),
),
]
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Mask(np.array([
[0, 0, 0],
[0, 1, 0],
[0, 0, 0],
]),
),
]
return iter(items)
),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([ DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
with self.assertLogs(level=log.DEBUG) as logs:
actual = transforms.MasksToPolygons(SrcExtractor())
actual = transforms.MasksToPolygons(source_dataset)
compare_datasets(self, DstExtractor(), actual)
compare_datasets(self, target_dataset, actual)
self.assertRegex('\n'.join(logs.output), 'too small polygons')
def test_polygons_to_masks(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4]),
Polygon([5, 0, 9, 0, 5, 5]),
]
),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4]),
Polygon([5, 0, 9, 0, 5, 5]),
]
),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Mask(np.array([
[0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
),
Mask(np.array([
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
),
]
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
annotations=[
Mask(np.array([
[0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
),
])
Mask(np.array([
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]),
),
]
),
])
actual = transforms.PolygonsToMasks(SrcExtractor())
compare_datasets(self, DstExtractor(), actual)
actual = transforms.PolygonsToMasks(source_dataset)
compare_datasets(self, target_dataset, actual)
def test_crop_covered_segments(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
# The mask is partially covered by the polygon
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0]],
),
z_order=0),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=1),
]
),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
# The mask is partially covered by the polygon
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0]],
),
z_order=0),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=1),
]
),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
),
z_order=0),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=1),
]
),
])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
),
z_order=0),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=1),
]
),
])
actual = transforms.CropCoveredSegments(SrcExtractor())
compare_datasets(self, DstExtractor(), actual)
actual = transforms.CropCoveredSegments(source_dataset)
compare_datasets(self, target_dataset, actual)
def test_merge_instance_segments(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
),
z_order=0, group=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=1, group=1),
Polygon([0, 0, 0, 2, 2, 2, 2, 0],
z_order=1),
]
),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
),
z_order=0, group=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=1, group=1),
Polygon([0, 0, 0, 2, 2, 2, 2, 0],
z_order=1),
]
),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 0],
[1, 1, 1, 0, 0]],
),
z_order=0, group=1),
Mask(np.array([
[1, 1, 0, 0, 0],
[1, 1, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=1),
]
),
])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 0],
[1, 1, 1, 0, 0]],
),
z_order=0, group=1),
Mask(np.array([
[1, 1, 0, 0, 0],
[1, 1, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=1),
]
),
])
actual = transforms.MergeInstanceSegments(SrcExtractor(),
actual = transforms.MergeInstanceSegments(source_dataset,
include_polygons=True)
compare_datasets(self, DstExtractor(), actual)
compare_datasets(self, target_dataset, actual)
def test_map_subsets(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a'),
DatasetItem(id=2, subset='b'),
DatasetItem(id=3, subset='c'),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='a'),
DatasetItem(id=2, subset='b'),
DatasetItem(id=3, subset='c'),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset=''),
DatasetItem(id=2, subset='a'),
DatasetItem(id=3, subset='c'),
])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset=''),
DatasetItem(id=2, subset='a'),
DatasetItem(id=3, subset='c'),
])
actual = transforms.MapSubsets(SrcExtractor(),
actual = transforms.MapSubsets(source_dataset,
{ 'a': '', 'b': 'a' })
compare_datasets(self, DstExtractor(), actual)
compare_datasets(self, target_dataset, actual)
def test_shapes_to_boxes(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
), id=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
]
),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
), id=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
]
),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Bbox(0, 0, 4, 4, id=1),
Bbox(1, 1, 3, 3, id=2),
Bbox(1, 1, 1, 1, id=3),
Bbox(2, 2, 2, 2, id=4),
]
),
])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Bbox(0, 0, 4, 4, id=1),
Bbox(1, 1, 3, 3, id=2),
Bbox(1, 1, 1, 1, id=3),
Bbox(2, 2, 2, 2, id=4),
]
),
])
actual = transforms.ShapesToBoxes(SrcExtractor())
compare_datasets(self, DstExtractor(), actual)
actual = transforms.ShapesToBoxes(source_dataset)
compare_datasets(self, target_dataset, actual)
def test_id_from_image(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image='path.jpg'),
DatasetItem(id=2),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='path', image='path.jpg'),
DatasetItem(id=2),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image='path.jpg'),
DatasetItem(id=2),
])
target_dataset = Dataset.from_iterable([
DatasetItem(id='path', image='path.jpg'),
DatasetItem(id=2),
])
actual = transforms.IdFromImageName(SrcExtractor())
compare_datasets(self, DstExtractor(), actual)
actual = transforms.IdFromImageName(source_dataset)
compare_datasets(self, target_dataset, actual)
def test_boxes_to_masks(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Bbox(0, 0, 3, 3, z_order=1),
Bbox(0, 0, 3, 1, z_order=2),
Bbox(0, 2, 3, 1, z_order=3),
]
),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Bbox(0, 0, 3, 3, z_order=1),
Bbox(0, 0, 3, 1, z_order=2),
Bbox(0, 2, 3, 1, z_order=3),
]
),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=1),
Mask(np.array([
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=2),
Mask(np.array([
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=3),
]
),
])
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=1),
Mask(np.array([
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=2),
Mask(np.array([
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=3),
]
),
])
actual = transforms.BoxesToMasks(SrcExtractor())
compare_datasets(self, DstExtractor(), actual)
actual = transforms.BoxesToMasks(source_dataset)
compare_datasets(self, target_dataset, actual)
def test_random_split(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset="a"),
DatasetItem(id=2, subset="a"),
DatasetItem(id=3, subset="b"),
DatasetItem(id=4, subset="b"),
DatasetItem(id=5, subset="b"),
DatasetItem(id=6, subset=""),
DatasetItem(id=7, subset=""),
])
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset="a"),
DatasetItem(id=2, subset="a"),
DatasetItem(id=3, subset="b"),
DatasetItem(id=4, subset="b"),
DatasetItem(id=5, subset="b"),
DatasetItem(id=6, subset=""),
DatasetItem(id=7, subset=""),
])
actual = transforms.RandomSplit(SrcExtractor(), splits=[
actual = transforms.RandomSplit(source_dataset, splits=[
('train', 4.0 / 7.0),
('test', 3.0 / 7.0),
])
@ -373,21 +338,19 @@ class TransformsTest(TestCase):
self.assertEqual(3, len(actual.get_subset('test')))
def test_random_split_gives_error_on_wrong_ratios(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([DatasetItem(id=1)])
source_dataset = Dataset.from_iterable([DatasetItem(id=1)])
with self.assertRaises(Exception):
transforms.RandomSplit(SrcExtractor(), splits=[
transforms.RandomSplit(source_dataset, splits=[
('train', 0.5),
('test', 0.7),
])
with self.assertRaises(Exception):
transforms.RandomSplit(SrcExtractor(), splits=[])
transforms.RandomSplit(source_dataset, splits=[])
with self.assertRaises(Exception):
transforms.RandomSplit(SrcExtractor(), splits=[
transforms.RandomSplit(source_dataset, splits=[
('train', -0.5),
('test', 1.5),
])
@ -462,24 +425,19 @@ class TransformsTest(TestCase):
compare_datasets(self, DstExtractor(), actual)
def test_remap_labels_delete_unspecified(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([ DatasetItem(id=1, annotations=[ Label(0) ]) ])
def categories(self):
label_cat = LabelCategories()
label_cat.add('label0')
return { AnnotationType.label: label_cat }
class DstExtractor(Extractor):
def __iter__(self):
return iter([ DatasetItem(id=1, annotations=[]) ])
def categories(self):
return { AnnotationType.label: LabelCategories() }
actual = transforms.RemapLabels(SrcExtractor(),
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[ Label(0) ])
], categories={
AnnotationType.label: LabelCategories.from_iterable('label0'),
})
target_dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[]),
], categories={
AnnotationType.label: LabelCategories(),
})
actual = transforms.RemapLabels(source_dataset,
mapping={}, default='delete')
compare_datasets(self, DstExtractor(), actual)
compare_datasets(self, target_dataset, actual)

@ -6,7 +6,7 @@ from unittest import TestCase
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, LabelCategories,
)
from datumaro.components.project import Project
from datumaro.components.project import Project, Dataset
from datumaro.plugins.yolo_format.importer import YoloImporter
from datumaro.plugins.yolo_format.converter import YoloConverter
from datumaro.util.image import Image, save_image
@ -15,40 +15,32 @@ from datumaro.util.test_utils import TestDir, compare_datasets
class YoloFormatTest(TestCase):
def test_can_save_and_load(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(0, 1, 2, 3, label=4),
]),
DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
Bbox(2, 1, 2, 3, label=4),
]),
DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 1, 5, 2, label=2),
Bbox(0, 2, 3, 2, label=5),
Bbox(0, 2, 4, 2, label=6),
Bbox(0, 7, 3, 2, label=7),
]),
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(0, 1, 2, 3, label=4),
]),
DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
Bbox(2, 1, 2, 3, label=4),
]),
DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 1, 5, 2, label=2),
Bbox(0, 2, 3, 2, label=5),
Bbox(0, 2, 4, 2, label=6),
Bbox(0, 7, 3, 2, label=7),
]),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(i) for i in range(10)),
})
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = YoloImporter()(test_dir).make_dataset()
@ -56,27 +48,19 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset)
def test_can_save_dataset_with_image_info(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=Image(path='1.jpg', size=(10, 15)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
]),
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=Image(path='1.jpg', size=(10, 15)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
]),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(i) for i in range(10)),
})
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir)
@ -87,27 +71,19 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset)
def test_can_load_dataset_with_exact_image_info(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=Image(path='1.jpg', size=(10, 15)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
]),
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=Image(path='1.jpg', size=(10, 15)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
]),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(i) for i in range(10)),
})
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir)
@ -117,24 +93,20 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset)
def test_relative_paths(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', subset='train',
image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', subset='train',
image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', subset='train',
image=np.ones((5, 4, 3))),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
source_dataset = Dataset.from_iterable([
DatasetItem(id='1', subset='train',
image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', subset='train',
image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', subset='train',
image=np.ones((5, 4, 3))),
], categories={
AnnotationType.label: LabelCategories(),
})
for save_images in {True, False}:
with self.subTest(save_images=save_images):
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter.convert(source_dataset, test_dir,
save_images=save_images)
@ -150,26 +122,19 @@ class YoloImporterTest(TestCase):
self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
]),
])
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}
expected_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
]),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(i) for i in range(10)),
})
dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
.make_dataset()
compare_datasets(self, DstExtractor(), dataset)
compare_datasets(self, expected_dataset, dataset)

Loading…
Cancel
Save