[Datumaro] Reducing nesting of tests (#1875)

* Add `Dataset.from_iterable` constructor * Simplify creation of `Dataset` objects in common simple cases * Refactor tests
6 years ago · 7ecdcf182b
parent e372589dc8
commit 7ecdcf182b
10 changed files with 1363 additions and 1578 deletions
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@ -72,6 +72,33 @@ class Categories:
 class LabelCategories(Categories):
    Category = namedtuple('Category', ['name', 'parent', 'attributes'])
    @classmethod
    def from_iterable(cls, iterable):
        """Generation of LabelCategories from iterable object
        Args:
            iterable ([type]): This iterable object can be:
            1)simple str - will generate one Category with str as name
            2)list of str - will interpreted as list of Category names
            3)list of positional argumetns - will generate Categories
            with this arguments
        Returns:
            LabelCategories: LabelCategories object
        """
        temp_categories = cls()
        if isinstance(iterable, str):
            iterable = [[iterable]]
        for category in iterable:
            if isinstance(category, str):
                category = [category]
            temp_categories.add(*category)
        return temp_categories
    def __init__(self, items=None, attributes=None):
        super().__init__(attributes=attributes)
@ -482,6 +509,31 @@ class Bbox(_Shape):
 class PointsCategories(Categories):
    Category = namedtuple('Category', ['labels', 'joints'])
    @classmethod
    def from_iterable(cls, iterable):
        """Generation of PointsCategories from iterable object
        Args:
            iterable ([type]): This iterable object can be:
            1)simple int - will generate one Category with int as label
            2)list of int - will interpreted as list of Category labels
            3)list of positional argumetns - will generate Categories
            with this arguments
        Returns:
            PointsCategories: PointsCategories object
        """
        temp_categories = cls()
        if isinstance(iterable, int):
            iterable = [[iterable]]
        for category in iterable:
            if isinstance(category, int):
                category = [category]
            temp_categories.add(*category)
        return temp_categories
    def __init__(self, items=None, attributes=None):
        super().__init__(attributes=attributes)
--- a/datumaro/datumaro/components/project.py
+++ b/datumaro/datumaro/components/project.py
@ -18,7 +18,8 @@ import sys
 from datumaro.components.config import Config, DEFAULT_FORMAT
 from datumaro.components.config_model import (Model, Source,
    PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA)
-from datumaro.components.extractor import Extractor
+from datumaro.components.extractor import Extractor, LabelCategories,\
    AnnotationType
 from datumaro.components.launcher import ModelTransform
 from datumaro.components.dataset_filter import \
    XPathDatasetFilter, XPathAnnotationsFilter
@ -319,6 +320,35 @@ class Subset(Extractor):
        return self._parent.categories()
 class Dataset(Extractor):
    @classmethod
    def from_iterable(cls, iterable, categories=None):
        """Generation of Dataset from iterable object
        Args:
            iterable: Iterable object contains DatasetItems
            categories (dict, optional): You can pass dict of categories or
            you can pass list of names. It'll interpreted as list of names of
            LabelCategories. Defaults to {}.
        Returns:
            Dataset: Dataset object
        """
        if isinstance(categories, list):
            categories = {AnnotationType.label : LabelCategories.from_iterable(categories)}
        if not categories:
            categories = {}
        class tmpExtractor(Extractor):
            def __iter__(self):
                return iter(iterable)
            def categories(self):
                return categories
        return cls.from_extractors(tmpExtractor())
    @classmethod
    def from_extractors(cls, *sources):
        # merge categories
--- a/datumaro/tests/test_coco_format.py
+++ b/datumaro/tests/test_coco_format.py
@ -4,7 +4,7 @@ import os.path as osp
 from unittest import TestCase
-from datumaro.components.project import Project
+from datumaro.components.project import (Project, Dataset)
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
    LabelCategories, PointsCategories
@ -26,32 +26,26 @@ DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset')
 class CocoImporterTest(TestCase):
    def test_can_import(self):
        class DstExtractor(Extractor):
            def __iter__(self):
                return iter([
                    DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
                        subset='val', attributes={'id': 1},
                        annotations=[
                            Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
                                id=1, group=1, attributes={'is_crowd': False}),
                            Mask(np.array(
                                [[1, 0, 0, 1, 0]] * 5 +
                                [[1, 1, 1, 1, 0]] * 5
                                ), label=0,
                                id=2, group=2, attributes={'is_crowd': True}),
                        ]
                    ),
                ])
-            def categories(self):
+        expected_dataset = Dataset.from_iterable([
-                label_cat = LabelCategories()
+            DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
-                label_cat.add('TEST')
+                subset='val', attributes={'id': 1},
-                return { AnnotationType.label: label_cat }
+                annotations=[
                    Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
                        id=1, group=1, attributes={'is_crowd': False}),
                    Mask(np.array(
                        [[1, 0, 0, 1, 0]] * 5 +
                        [[1, 1, 1, 1, 0]] * 5
                        ), label=0,
                        id=2, group=2, attributes={'is_crowd': True}),
                ]
            ),
        ], categories=['TEST',])
        dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \
            .make_dataset()
-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)
    def test_can_detect(self):
        self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR))
@ -71,526 +65,417 @@ class CocoConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
    def test_can_save_and_load_captions(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, subset='train',
+                    Caption('hello', id=1, group=1),
-                        annotations=[
+                    Caption('world', id=2, group=2),
-                            Caption('hello', id=1, group=1),
+                ], attributes={'id': 1}),
-                            Caption('world', id=2, group=2),
+            DatasetItem(id=2, subset='train',
-                        ], attributes={'id': 1}),
+                annotations=[
-                    DatasetItem(id=2, subset='train',
+                    Caption('test', id=3, group=3),
-                        annotations=[
+                ], attributes={'id': 2}),
-                            Caption('test', id=3, group=3),
+
-                        ], attributes={'id': 2}),
+            DatasetItem(id=3, subset='val',
-
+                annotations=[
-                    DatasetItem(id=3, subset='val',
+                    Caption('word', id=1, group=1),
-                        annotations=[
+                ], attributes={'id': 1}),
-                            Caption('word', id=1, group=1),
+            ])
                        ], attributes={'id': 1}
                    ),
                ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoCaptionsConverter.convert, test_dir)
    def test_can_save_and_load_instances(self):
-        label_categories = LabelCategories()
+        source_dataset = Dataset.from_iterable([
-        for i in range(10):
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
-            label_categories.add(str(i))
+                annotations=[
-        categories = { AnnotationType.label: label_categories }
+                    # Bbox + single polygon
-
+                    Bbox(0, 1, 2, 2,
-        class TestExtractor(Extractor):
+                        label=2, group=1, id=1,
-            def __iter__(self):
+                        attributes={ 'is_crowd': False }),
-                return iter([
+                    Polygon([0, 1, 2, 1, 2, 3, 0, 3],
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                        attributes={ 'is_crowd': False },
-                        annotations=[
+                        label=2, group=1, id=1),
-                            # Bbox + single polygon
+                ], attributes={'id': 1}),
-                            Bbox(0, 1, 2, 2,
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
-                                label=2, group=1, id=1,
+                annotations=[
-                                attributes={ 'is_crowd': False }),
+                    # Mask + bbox
-                            Polygon([0, 1, 2, 1, 2, 3, 0, 3],
+                    Mask(np.array([
-                                attributes={ 'is_crowd': False },
+                            [0, 1, 0, 0],
-                                label=2, group=1, id=1),
+                            [0, 1, 0, 0],
-                        ], attributes={'id': 1}),
+                            [0, 1, 1, 1],
-                    DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+                            [0, 0, 0, 0]],
-                        annotations=[
+                        ),
-                            # Mask + bbox
+                        attributes={ 'is_crowd': True },
-                            Mask(np.array([
+                        label=4, group=3, id=3),
-                                    [0, 1, 0, 0],
+                    Bbox(1, 0, 2, 2, label=4, group=3, id=3,
-                                    [0, 1, 0, 0],
+                        attributes={ 'is_crowd': True }),
-                                    [0, 1, 1, 1],
+                ], attributes={'id': 2}),
-                                    [0, 0, 0, 0]],
+
-                                ),
+            DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
-                                attributes={ 'is_crowd': True },
+                annotations=[
-                                label=4, group=3, id=3),
+                    # Bbox + mask
-                            Bbox(1, 0, 2, 2, label=4, group=3, id=3,
+                    Bbox(0, 1, 2, 2, label=4, group=3, id=3,
-                                attributes={ 'is_crowd': True }),
+                        attributes={ 'is_crowd': True }),
-                        ], attributes={'id': 2}),
+                    Mask(np.array([
-
+                            [0, 0, 0, 0],
-                    DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
+                            [1, 1, 1, 0],
-                        annotations=[
+                            [1, 1, 0, 0],
-                            # Bbox + mask
+                            [0, 0, 0, 0]],
-                            Bbox(0, 1, 2, 2, label=4, group=3, id=3,
+                        ),
-                                attributes={ 'is_crowd': True }),
+                        attributes={ 'is_crowd': True },
-                            Mask(np.array([
+                        label=4, group=3, id=3),
-                                    [0, 0, 0, 0],
+                ], attributes={'id': 1}),
-                                    [1, 1, 1, 0],
+            ], categories=[str(i) for i in range(10)])
-                                    [1, 1, 0, 0],
+
-                                    [0, 0, 0, 0]],
+        target_dataset = Dataset.from_iterable([
-                                ),
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
-                                attributes={ 'is_crowd': True },
+                annotations=[
-                                label=4, group=3, id=3),
+                    Polygon([0, 1, 2, 1, 2, 3, 0, 3],
-                        ], attributes={'id': 1}),
+                        attributes={ 'is_crowd': False },
-                ])
+                        label=2, group=1, id=1),
-
+                ], attributes={'id': 1}),
-            def categories(self):
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
-                return categories
+                annotations=[
-
+                    Mask(np.array([
-        class DstExtractor(Extractor):
+                            [0, 1, 0, 0],
-            def __iter__(self):
+                            [0, 1, 0, 0],
-                return iter([
+                            [0, 1, 1, 1],
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                            [0, 0, 0, 0]],
-                        annotations=[
+                        ),
-                            Polygon([0, 1, 2, 1, 2, 3, 0, 3],
+                        attributes={ 'is_crowd': True },
-                                attributes={ 'is_crowd': False },
+                        label=4, group=3, id=3),
-                                label=2, group=1, id=1),
+                ], attributes={'id': 2}),
-                        ], attributes={'id': 1}),
+
-                    DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+            DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
-                        annotations=[
+                annotations=[
-                            Mask(np.array([
+                    Mask(np.array([
-                                    [0, 1, 0, 0],
+                            [0, 0, 0, 0],
-                                    [0, 1, 0, 0],
+                            [1, 1, 1, 0],
-                                    [0, 1, 1, 1],
+                            [1, 1, 0, 0],
-                                    [0, 0, 0, 0]],
+                            [0, 0, 0, 0]],
-                                ),
+                        ),
-                                attributes={ 'is_crowd': True },
+                        attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
+                        label=4, group=3, id=3),
-                        ], attributes={'id': 2}),
+                ], attributes={'id': 1})
-
+            ], categories=[str(i) for i in range(10)])
                    DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
                        annotations=[
                            Mask(np.array([
                                    [0, 0, 0, 0],
                                    [1, 1, 1, 0],
                                    [1, 1, 0, 0],
                                    [0, 0, 0, 0]],
                                ),
                                attributes={ 'is_crowd': True },
                                label=4, group=3, id=3),
                        ], attributes={'id': 1}),
                ])
            def categories(self):
                return categories
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(source_dataset,
                CocoInstancesConverter.convert, test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
    def test_can_merge_polygons_on_loading(self):
-        label_categories = LabelCategories()
+        source_dataset = Dataset.from_iterable([
-        for i in range(10):
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-            label_categories.add(str(i))
+                annotations=[
-        categories = { AnnotationType.label: label_categories }
+                    Polygon([0, 0, 4, 0, 4, 4],
-
+                        label=3, id=4, group=4),
-        class SrcExtractor(Extractor):
+                    Polygon([5, 0, 9, 0, 5, 5],
-            def __iter__(self):
+                        label=3, id=4, group=4),
-                return iter([
+                ]
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+            ),
-                        annotations=[
+        ], categories=[str(i) for i in range(10)])
-                            Polygon([0, 0, 4, 0, 4, 4],
+
-                                label=3, id=4, group=4),
+        target_dataset = Dataset.from_iterable([
-                            Polygon([5, 0, 9, 0, 5, 5],
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                                label=3, id=4, group=4),
+                annotations=[
-                        ]
+                    Mask(np.array([
-                    ),
+                        [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-                ])
+                        [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-
+                        [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-            def categories(self):
+                        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                return categories
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
-        class DstExtractor(Extractor):
+                        # only internal fragment (without the border),
-            def __iter__(self):
+                        # but not everywhere...
                return iter([
                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
                        annotations=[
                            Mask(np.array([
                                [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
                                [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
                                [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
                                [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                                # only internal fragment (without the border),
                                # but not everywhere...
                            ),
                            label=3, id=4, group=4,
                            attributes={ 'is_crowd': False }),
                        ], attributes={'id': 1}
                    ),
-                ])
+                    label=3, id=4, group=4,
-
+                    attributes={ 'is_crowd': False }),
-            def categories(self):
+                ], attributes={'id': 1}
-                return categories
+            ),
        ], categories=[str(i) for i in range(10)])
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                CocoInstancesConverter.convert, test_dir,
                importer_args={'merge_instance_polygons': True},
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
    def test_can_crop_covered_segments(self):
-        label_categories = LabelCategories()
+        source_dataset = Dataset.from_iterable([
-        for i in range(10):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-            label_categories.add(str(i))
+                annotations=[
-
+                    Mask(np.array([
-        class SrcTestExtractor(Extractor):
+                            [0, 0, 1, 1, 1],
-            def __iter__(self):
+                            [0, 0, 1, 1, 1],
-                return iter([
+                            [1, 1, 0, 1, 1],
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                            [1, 1, 1, 0, 0],
-                        annotations=[
+                            [1, 1, 1, 0, 0]],
-                            Mask(np.array([
+                        ),
-                                    [0, 0, 1, 1, 1],
+                        label=2, id=1, z_order=0),
-                                    [0, 0, 1, 1, 1],
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                    [1, 1, 0, 1, 1],
+                        label=1, id=2, z_order=1),
-                                    [1, 1, 1, 0, 0],
+                ]
-                                    [1, 1, 1, 0, 0]],
+            ),
-                                ),
+        ], categories=[str(i) for i in range(10)])
-                                label=2, id=1, z_order=0),
+
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+        target_dataset = Dataset.from_iterable([
-                                label=1, id=2, z_order=1),
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        ]
+                annotations=[
-                    ),
+                    Mask(np.array([
-                ])
+                            [0, 0, 1, 1, 1],
-
+                            [0, 0, 0, 0, 1],
-            def categories(self):
+                            [1, 0, 0, 0, 1],
-                return { AnnotationType.label: label_categories }
+                            [1, 0, 0, 0, 0],
-
+                            [1, 1, 1, 0, 0]],
-        class DstTestExtractor(Extractor):
+                        ),
-            def __iter__(self):
+                        attributes={ 'is_crowd': True },
-                return iter([
+                        label=2, id=1, group=1),
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+
-                        annotations=[
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                            Mask(np.array([
+                        label=1, id=2, group=2,
-                                    [0, 0, 1, 1, 1],
+                        attributes={ 'is_crowd': False }),
-                                    [0, 0, 0, 0, 1],
+                ], attributes={'id': 1}
-                                    [1, 0, 0, 0, 1],
+            ),
-                                    [1, 0, 0, 0, 0],
+        ], categories=[str(i) for i in range(10)])
                                    [1, 1, 1, 0, 0]],
                                ),
                                attributes={ 'is_crowd': True },
                                label=2, id=1, group=1),
                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
                                label=1, id=2, group=2,
                                attributes={ 'is_crowd': False }),
                        ], attributes={'id': 1}
                    ),
                ])
            def categories(self):
                return { AnnotationType.label: label_categories }
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcTestExtractor(),
+            self._test_save_and_load(source_dataset,
-                partial(CocoInstancesConverter.convert, crop_covered=True),
+                 partial(CocoInstancesConverter.convert, crop_covered=True),
-                test_dir, target_dataset=DstTestExtractor())
+                 test_dir, target_dataset=target_dataset)
    def test_can_convert_polygons_to_mask(self):
-        label_categories = LabelCategories()
+        source_dataset = Dataset.from_iterable([
-        for i in range(10):
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-            label_categories.add(str(i))
+                annotations=[
-
+                    Polygon([0, 0, 4, 0, 4, 4],
-        class SrcTestExtractor(Extractor):
+                        label=3, id=4, group=4),
-            def __iter__(self):
+                    Polygon([5, 0, 9, 0, 5, 5],
-                return iter([
+                        label=3, id=4, group=4),
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                ]
-                        annotations=[
+            ),
-                            Polygon([0, 0, 4, 0, 4, 4],
+        ], categories=[str(i) for i in range(10)])
-                                label=3, id=4, group=4),
+
-                            Polygon([5, 0, 9, 0, 5, 5],
+        target_dataset = Dataset.from_iterable([
-                                label=3, id=4, group=4),
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        ]
+                annotations=[
-                    ),
+                    Mask(np.array([
-                ])
+                            [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-
+                            [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-            def categories(self):
+                            [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                return { AnnotationType.label: label_categories }
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        class DstTestExtractor(Extractor):
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
-            def __iter__(self):
+                            # only internal fragment (without the border),
-                return iter([
+                            # but not everywhere...
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                        ),
-                        annotations=[
+                        attributes={ 'is_crowd': True },
-                            Mask(np.array([
+                        label=3, id=4, group=4),
-                                    [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                ], attributes={'id': 1}
-                                    [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+            ),
-                                    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+        ], categories=[str(i) for i in range(10)])
                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                                    # only internal fragment (without the border),
                                    # but not everywhere...
                                ),
                                attributes={ 'is_crowd': True },
                                label=3, id=4, group=4),
                        ], attributes={'id': 1}
                    ),
                ])
            def categories(self):
                return { AnnotationType.label: label_categories }
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcTestExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CocoInstancesConverter.convert, segmentation_mode='mask'),
-                test_dir, target_dataset=DstTestExtractor())
+                test_dir, target_dataset=target_dataset)
    def test_can_convert_masks_to_polygons(self):
-        label_categories = LabelCategories()
+        source_dataset = Dataset.from_iterable([
-        for i in range(10):
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-            label_categories.add(str(i))
+                annotations=[
-
+                    Mask(np.array([
-        class SrcExtractor(Extractor):
+                            [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-            def __iter__(self):
+                            [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-                return iter([
+                            [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                        annotations=[
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                            Mask(np.array([
+                        ]),
-                                    [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                        label=3, id=4, group=4),
-                                    [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+                ]
-                                    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+            ),
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+        ], categories=[str(i) for i in range(10)])
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+
-                                ]),
+        target_dataset = Dataset.from_iterable([
-                                label=3, id=4, group=4),
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        ]
+                annotations=[
-                    ),
+                    Polygon(
-                ])
+                        [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
-
+                        label=3, id=4, group=4,
-            def categories(self):
+                        attributes={ 'is_crowd': False }),
-                return { AnnotationType.label: label_categories }
+                    Polygon(
-
+                        [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
-        class DstExtractor(Extractor):
+                        label=3, id=4, group=4,
-            def __iter__(self):
+                        attributes={ 'is_crowd': False }),
-                return iter([
+                ], attributes={'id': 1}
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+            ),
-                        annotations=[
+        ], categories=[str(i) for i in range(10)])
                            Polygon(
                                [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
                                label=3, id=4, group=4,
                                attributes={ 'is_crowd': False }),
                            Polygon(
                                [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
                                label=3, id=4, group=4,
                                attributes={ 'is_crowd': False }),
                        ], attributes={'id': 1}
                    ),
                ])
            def categories(self):
                return { AnnotationType.label: label_categories }
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CocoInstancesConverter.convert, segmentation_mode='polygons'),
-                test_dir, target_dataset=DstExtractor())
+                test_dir,
                target_dataset=target_dataset)
    def test_can_save_and_load_images(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train', attributes={'id': 1}),
-                return iter([
+            DatasetItem(id=2, subset='train', attributes={'id': 2}),
                    DatasetItem(id=1, subset='train', attributes={'id': 1}),
                    DatasetItem(id=2, subset='train', attributes={'id': 2}),
-                    DatasetItem(id=2, subset='val', attributes={'id': 2}),
+            DatasetItem(id=2, subset='val', attributes={'id': 2}),
-                    DatasetItem(id=3, subset='val', attributes={'id': 3}),
+            DatasetItem(id=3, subset='val', attributes={'id': 3}),
-                    DatasetItem(id=4, subset='val', attributes={'id': 4}),
+            DatasetItem(id=4, subset='val', attributes={'id': 4}),
-                    DatasetItem(id=5, subset='test', attributes={'id': 1}),
+            DatasetItem(id=5, subset='test', attributes={'id': 1}),
-                ])
+        ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoImageInfoConverter.convert, test_dir)
    def test_can_save_and_load_labels(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, subset='train',
+                    Label(4, id=1, group=1),
-                        annotations=[
+                    Label(9, id=2, group=2),
-                            Label(4, id=1, group=1),
+                ], attributes={'id': 1}),
-                            Label(9, id=2, group=2),
+        ], categories=[str(i) for i in range(10)])
                        ], attributes={'id': 1}
                    ),
                ])
            def categories(self):
                label_categories = LabelCategories()
                for i in range(10):
                    label_categories.add(str(i))
                return {
                    AnnotationType.label: label_categories,
                }
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoLabelsConverter.convert, test_dir)
    def test_can_save_and_load_keypoints(self):
-        label_categories = LabelCategories()
+
-        points_categories = PointsCategories()
+        source_dataset = Dataset.from_iterable([
-        for i in range(10):
+            DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
-            label_categories.add(str(i))
+                annotations=[
-            points_categories.add(i, joints=[[0, 1], [1, 2]])
+                    # Full instance annotations: polygon + keypoints
-        categories = {
+                    Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
-            AnnotationType.label: label_categories,
+                        label=3, group=1, id=1),
-            AnnotationType.points: points_categories,
+                    Polygon([0, 0, 4, 0, 4, 4],
-        }
+                        label=3, group=1, id=1),
-
+
-        class TestExtractor(Extractor):
+                    # Full instance annotations: bbox + keypoints
-            def __iter__(self):
+                    Points([1, 2, 3, 4, 2, 3], group=2, id=2),
-                return iter([
+                    Bbox(1, 2, 2, 2, group=2, id=2),
-                    DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
+
-                        annotations=[
+                    # Solitary keypoints
-                            # Full instance annotations: polygon + keypoints
+                    Points([1, 2, 0, 2, 4, 1], label=5, id=3),
-                            Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
+
-                                label=3, group=1, id=1),
+                    # Some other solitary annotations (bug #1387)
-                            Polygon([0, 0, 4, 0, 4, 4],
+                    Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
-                                label=3, group=1, id=1),
+
-
+                    # Solitary keypoints with no label
-                            # Full instance annotations: bbox + keypoints
+                    Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
-                            Points([1, 2, 3, 4, 2, 3], group=2, id=2),
+                ]),
-                            Bbox(1, 2, 2, 2, group=2, id=2),
+            ], categories={
-
+                    AnnotationType.label: LabelCategories.from_iterable(
-                            # Solitary keypoints
+                        str(i) for i in range(10)),
-                            Points([1, 2, 0, 2, 4, 1], label=5, id=3),
+                    AnnotationType.points: PointsCategories.from_iterable(
-
+                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
-                            # Some other solitary annotations (bug #1387)
+                    ),
-                            Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
+            })
-
+
-                            # Solitary keypoints with no label
+        target_dataset = Dataset.from_iterable([
-                            Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
+            DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
-                        ])
+                annotations=[
-                ])
+                    Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
-
+                        label=3, group=1, id=1,
-            def categories(self):
+                        attributes={'is_crowd': False}),
-                return categories
+                    Polygon([0, 0, 4, 0, 4, 4],
-
+                        label=3, group=1, id=1,
-        class DstTestExtractor(TestExtractor):
+                        attributes={'is_crowd': False}),
-            def __iter__(self):
+
-                return iter([
+                    Points([1, 2, 3, 4, 2, 3],
-                    DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
+                        group=2, id=2,
-                        annotations=[
+                        attributes={'is_crowd': False}),
-                            Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
+                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
-                                label=3, group=1, id=1,
+                        group=2, id=2,
-                                attributes={'is_crowd': False}),
+                        attributes={'is_crowd': False}),
-                            Polygon([0, 0, 4, 0, 4, 4],
+
-                                label=3, group=1, id=1,
+                    Points([1, 2, 0, 2, 4, 1],
-                                attributes={'is_crowd': False}),
+                        label=5, group=3, id=3,
-
+                        attributes={'is_crowd': False}),
-                            Points([1, 2, 3, 4, 2, 3],
+                    Polygon([0, 1, 4, 1, 4, 2, 0, 2],
-                                group=2, id=2,
+                        label=5, group=3, id=3,
-                                attributes={'is_crowd': False}),
+                        attributes={'is_crowd': False}),
-                            Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+
-                                group=2, id=2,
+                    Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
-                                attributes={'is_crowd': False}),
+                        group=5, id=5,
-
+                        attributes={'is_crowd': False}),
-                            Points([1, 2, 0, 2, 4, 1],
+                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
-                                label=5, group=3, id=3,
+                        group=5, id=5,
-                                attributes={'is_crowd': False}),
+                        attributes={'is_crowd': False}),
-                            Polygon([0, 1, 4, 1, 4, 2, 0, 2],
+                ], attributes={'id': 1}),
-                                label=5, group=3, id=3,
+            ], categories={
-                                attributes={'is_crowd': False}),
+                    AnnotationType.label: LabelCategories.from_iterable(
-
+                        str(i) for i in range(10)),
-                            Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
+                    AnnotationType.points: PointsCategories.from_iterable(
-                                group=5, id=5,
+                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
-                                attributes={'is_crowd': False}),
+                    ),
-                            Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+            })
                                group=5, id=5,
                                attributes={'is_crowd': False}),
                        ], attributes={'id': 1}),
                ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(source_dataset,
                CocoPersonKeypointsConverter.convert, test_dir,
-                target_dataset=DstTestExtractor())
+                target_dataset=target_dataset)
    def test_can_save_dataset_with_no_subsets(self):
-        class TestExtractor(Extractor):
+        test_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, attributes={'id': 1}),
-                return iter([
+            DatasetItem(id=2, attributes={'id': 2}),
-                    DatasetItem(id=1, attributes={'id': 1}),
+        ])
                    DatasetItem(id=2, attributes={'id': 2}),
                ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                CocoConverter.convert, test_dir)
    def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
-                return iter([
+                attributes={'id': 1}),
-                    DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
+        ])
                        attributes={'id': 1}),
                ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoImageInfoConverter.convert, test_dir)
    def test_relative_paths(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='1', image=np.ones((4, 2, 3)),
-                return iter([
+                attributes={'id': 1}),
-                    DatasetItem(id='1', image=np.ones((4, 2, 3)),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
-                        attributes={'id': 1}),
+                attributes={'id': 2}),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
-                        attributes={'id': 2}),
+                attributes={'id': 3}),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
+        ])
                        attributes={'id': 3}),
                ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
-                partial(CocoImageInfoConverter.convert, save_images=True),
+                partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
                test_dir)
    def test_preserve_coco_ids(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
-                return iter([
+                attributes={'id': 40}),
-                    DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
+        ])
                        attributes={'id': 40}),
                ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
-                partial(CocoImageInfoConverter.convert, save_images=True),
+                partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
                test_dir)
    def test_annotation_attributes(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
-                return iter([
+                Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
-                    DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
+                    attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
-                        Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
+            ], attributes={'id': 1})
-                            attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
+        ], categories=[str(i) for i in range(10)])
                    ], attributes={'id': 1})
                ])
            def categories(self):
                label_categories = LabelCategories()
                for i in range(10):
                    label_categories.add(str(i))
                return { AnnotationType.label: label_categories, }
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoConverter.convert, test_dir)
--- a/datumaro/tests/test_cvat_format.py
+++ b/datumaro/tests/test_cvat_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Points, Polygon, PolyLine, Bbox, Label,
    LabelCategories,
@ -28,121 +28,115 @@ class CvatImporterTest(TestCase):
        self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR))
    def test_can_load_image(self):
-        class DstExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='img0', subset='train',
-                return iter([
+                image=np.ones((8, 8, 3)),
-                    DatasetItem(id='img0', subset='train',
+                annotations=[
-                        image=np.ones((8, 8, 3)),
+                    Bbox(0, 2, 4, 2, label=0, z_order=1,
-                        annotations=[
+                        attributes={
-                            Bbox(0, 2, 4, 2, label=0, z_order=1,
+                            'occluded': True,
-                                attributes={
+                            'a1': True, 'a2': 'v3'
-                                    'occluded': True,
+                        }),
-                                    'a1': True, 'a2': 'v3'
+                    PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
-                                }),
+                        attributes={'occluded': False}),
-                            PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
+                ], attributes={'frame': 0}),
-                                attributes={'occluded': False}),
+            DatasetItem(id='img1', subset='train',
-                        ], attributes={'frame': 0}),
+                image=np.ones((10, 10, 3)),
-                    DatasetItem(id='img1', subset='train',
+                annotations=[
-                        image=np.ones((10, 10, 3)),
+                    Polygon([1, 2, 3, 4, 6, 5], z_order=1,
-                        annotations=[
+                        attributes={'occluded': False}),
-                            Polygon([1, 2, 3, 4, 6, 5], z_order=1,
+                    Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
-                                attributes={'occluded': False}),
+                        attributes={'occluded': False}),
-                            Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
+                ], attributes={'frame': 1}),
-                                attributes={'occluded': False}),
+        ], categories={
-                        ], attributes={'frame': 1}),
+            AnnotationType.label: LabelCategories.from_iterable([
-                ])
+                ['label1', '', {'a1', 'a2'}],
-
+                ['label2'],
-            def categories(self):
+            ])
-                label_categories = LabelCategories()
+        })
                label_categories.add('label1', attributes={'a1', 'a2'})
                label_categories.add('label2')
                return { AnnotationType.label: label_categories }
        parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset()
-        compare_datasets(self, DstExtractor(), parsed_dataset)
+        compare_datasets(self, expected_dataset, parsed_dataset)
    def test_can_load_video(self):
-        class DstExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='frame_000010', subset='annotations',
-                return iter([
+                image=np.ones((20, 25, 3)),
-                    DatasetItem(id='frame_000010', subset='annotations',
+                annotations=[
-                        image=np.ones((20, 25, 3)),
+                    Bbox(3, 4, 7, 1, label=2,
-                        annotations=[
+                        id=0,
-                            Bbox(3, 4, 7, 1, label=2,
+                        attributes={
-                                id=0,
+                            'occluded': True,
-                                attributes={
+                            'outside': False, 'keyframe': True,
-                                    'occluded': True,
+                            'track_id': 0
-                                    'outside': False, 'keyframe': True,
+                        }),
-                                    'track_id': 0
+                    Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
-                                }),
+                        label=0,
-                            Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
+                        id=1,
-                                label=0,
+                        attributes={
-                                id=1,
+                            'occluded': False,
-                                attributes={
+                            'outside': False, 'keyframe': True,
-                                    'occluded': False,
+                            'track_id': 1, 'hgl': 'hgkf',
-                                    'outside': False, 'keyframe': True,
+                        }),
-                                    'track_id': 1, 'hgl': 'hgkf',
+                ], attributes={'frame': 10}),
-                                }),
+            DatasetItem(id='frame_000013', subset='annotations',
-                        ], attributes={'frame': 10}),
+                image=np.ones((20, 25, 3)),
-                    DatasetItem(id='frame_000013', subset='annotations',
+                annotations=[
-                        image=np.ones((20, 25, 3)),
+                    Bbox(7, 6, 7, 2, label=2,
-                        annotations=[
+                        id=0,
-                            Bbox(7, 6, 7, 2, label=2,
+                        attributes={
-                                id=0,
+                            'occluded': False,
-                                attributes={
+                            'outside': True, 'keyframe': True,
-                                    'occluded': False,
+                            'track_id': 0
-                                    'outside': True, 'keyframe': True,
+                        }),
-                                    'track_id': 0
+                    Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
-                                }),
+                        label=0,
-                            Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
+                        id=1,
-                                label=0,
+                        attributes={
-                                id=1,
+                            'occluded': False,
-                                attributes={
+                            'outside': True, 'keyframe': True,
-                                    'occluded': False,
+                            'track_id': 1, 'hgl': 'jk',
-                                    'outside': True, 'keyframe': True,
+                        }),
-                                    'track_id': 1, 'hgl': 'jk',
+                    PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
-                                }),
+                        label=2,
-                            PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
+                        id=2,
-                                label=2,
+                        attributes={
-                                id=2,
+                            'occluded': False,
-                                attributes={
+                            'outside': False, 'keyframe': True,
-                                    'occluded': False,
+                            'track_id': 2,
-                                    'outside': False, 'keyframe': True,
+                        }),
-                                    'track_id': 2,
+                ], attributes={'frame': 13}),
-                                }),
+            DatasetItem(id='frame_000016', subset='annotations',
-                        ], attributes={'frame': 13}),
+                image=Image(path='frame_0000016.png', size=(20, 25)),
-                    DatasetItem(id='frame_000016', subset='annotations',
+                annotations=[
-                        image=Image(path='frame_0000016.png', size=(20, 25)),
+                    Bbox(8, 7, 6, 10, label=2,
-                        annotations=[
+                        id=0,
-                            Bbox(8, 7, 6, 10, label=2,
+                        attributes={
-                                id=0,
+                            'occluded': False,
-                                attributes={
+                            'outside': True, 'keyframe': True,
-                                    'occluded': False,
+                            'track_id': 0
-                                    'outside': True, 'keyframe': True,
+                        }),
-                                    'track_id': 0
+                    PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
-                                }),
+                        label=2,
-                            PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
+                        id=2,
-                                label=2,
+                        attributes={
-                                id=2,
+                            'occluded': False,
-                                attributes={
+                            'outside': True, 'keyframe': True,
-                                    'occluded': False,
+                            'track_id': 2,
-                                    'outside': True, 'keyframe': True,
+                        }),
-                                    'track_id': 2,
+                ], attributes={'frame': 16}),
-                                }),
+        ], categories={
-                        ], attributes={'frame': 16}),
+            AnnotationType.label: LabelCategories.from_iterable([
-                ])
+                ['klhg', '', {'hgl'}],
-
+                ['z U k'],
-            def categories(self):
+                ['II']
-                label_categories = LabelCategories()
+            ]),
-                label_categories.add('klhg', attributes={'hgl'})
+        })
                label_categories.add('z U k')
                label_categories.add('II')
                return { AnnotationType.label: label_categories }
        parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset()
-        compare_datasets(self, DstExtractor(), parsed_dataset)
+        compare_datasets(self, expected_dataset, parsed_dataset)
 class CvatConverterTest(TestCase):
    def _test_save_and_load(self, source_dataset, converter, test_dir,
@ -165,137 +159,120 @@ class CvatConverterTest(TestCase):
        label_categories.items[2].attributes.update(['a1', 'a2'])
        label_categories.attributes.update(['occluded'])
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
+                    Polygon([0, 0, 4, 0, 4, 4],
-                        annotations=[
+                        label=1, group=4,
-                            Polygon([0, 0, 4, 0, 4, 4],
+                        attributes={ 'occluded': True }),
-                                label=1, group=4,
+                    Points([1, 1, 3, 2, 2, 3],
-                                attributes={ 'occluded': True }),
+                        label=2,
-                            Points([1, 1, 3, 2, 2, 3],
+                        attributes={ 'a1': 'x', 'a2': 42,
-                                label=2,
+                            'unknown': 'bar' }),
-                                attributes={ 'a1': 'x', 'a2': 42,
+                    Label(1),
-                                    'unknown': 'bar' }),
+                    Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
-                            Label(1),
+                ]
-                            Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
+            ),
-                        ]
+            DatasetItem(id=1, subset='s1',
-                    ),
+                annotations=[
-                    DatasetItem(id=1, subset='s1',
+                    PolyLine([0, 0, 4, 0, 4, 4],
-                        annotations=[
+                        label=3, id=4, group=4),
-                            PolyLine([0, 0, 4, 0, 4, 4],
+                    Bbox(5, 0, 1, 9,
-                                label=3, id=4, group=4),
+                        label=3, id=4, group=4),
-                            Bbox(5, 0, 1, 9,
+                ]
-                                label=3, id=4, group=4),
+            ),
-                        ]
+
-                    ),
+            DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
-
+                annotations=[
-                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
+                    Polygon([0, 0, 4, 0, 4, 4], z_order=1,
-                        annotations=[
+                        label=3, group=4,
-                            Polygon([0, 0, 4, 0, 4, 4], z_order=1,
+                        attributes={ 'occluded': False }),
-                                label=3, group=4,
+                    PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
-                                attributes={ 'occluded': False }),
+                ]
-                            PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
+            ),
-                        ]
+
-                    ),
+            DatasetItem(id=3, subset='s3', image=Image(
-
+                path='3.jpg', size=(2, 4))),
-                    DatasetItem(id=3, subset='s3', image=Image(
+        ], categories={
-                        path='3.jpg', size=(2, 4))),
+            AnnotationType.label: label_categories,
-                ])
+        })
-
+
-            def categories(self):
+        target_dataset = Dataset.from_iterable([
-                return { AnnotationType.label: label_categories }
+            DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
-
+                annotations=[
-        class DstExtractor(Extractor):
+                    Polygon([0, 0, 4, 0, 4, 4],
-            def __iter__(self):
+                        label=1, group=4,
-                return iter([
+                        attributes={ 'occluded': True }),
-                    DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
+                    Points([1, 1, 3, 2, 2, 3],
-                        annotations=[
+                        label=2,
-                            Polygon([0, 0, 4, 0, 4, 4],
+                        attributes={ 'occluded': False,
-                                label=1, group=4,
+                            'a1': 'x', 'a2': 42 }),
-                                attributes={ 'occluded': True }),
+                    Label(1),
-                            Points([1, 1, 3, 2, 2, 3],
+                    Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
-                                label=2,
+                ], attributes={'frame': 0}
-                                attributes={ 'occluded': False,
+            ),
-                                    'a1': 'x', 'a2': 42 }),
+            DatasetItem(id=1, subset='s1',
-                            Label(1),
+                annotations=[
-                            Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
+                    PolyLine([0, 0, 4, 0, 4, 4],
-                        ], attributes={'frame': 0}
+                        label=3, group=4,
-                    ),
+                        attributes={ 'occluded': False }),
-                    DatasetItem(id=1, subset='s1',
+                    Bbox(5, 0, 1, 9,
-                        annotations=[
+                        label=3, group=4,
-                            PolyLine([0, 0, 4, 0, 4, 4],
+                        attributes={ 'occluded': False }),
-                                label=3, group=4,
+                ], attributes={'frame': 1}
-                                attributes={ 'occluded': False }),
+            ),
-                            Bbox(5, 0, 1, 9,
+
-                                label=3, group=4,
+            DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
-                                attributes={ 'occluded': False }),
+                annotations=[
-                        ], attributes={'frame': 1}
+                    Polygon([0, 0, 4, 0, 4, 4], z_order=1,
-                    ),
+                        label=3, group=4,
-
+                        attributes={ 'occluded': False }),
-                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
+                ], attributes={'frame': 0}
-                        annotations=[
+            ),
-                            Polygon([0, 0, 4, 0, 4, 4], z_order=1,
+
-                                label=3, group=4,
+            DatasetItem(id=3, subset='s3', image=Image(
-                                attributes={ 'occluded': False }),
+                    path='3.jpg', size=(2, 4)),
-                        ], attributes={'frame': 0}
+                attributes={'frame': 0}),
-                    ),
+        ], categories={
-
+            AnnotationType.label: label_categories,
-                    DatasetItem(id=3, subset='s3', image=Image(
+        })
                            path='3.jpg', size=(2, 4)),
                        attributes={'frame': 0}),
                ])
            def categories(self):
                return { AnnotationType.label: label_categories }
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CvatConverter.convert, save_images=True), test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
    def test_relative_paths(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='1', image=np.ones((4, 2, 3))),
-                return iter([
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
-                    DatasetItem(id='1', image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
+        ], categories={ AnnotationType.label: LabelCategories() })
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
+
-                ])
+        target_dataset = Dataset.from_iterable([
-
+            DatasetItem(id='1', image=np.ones((4, 2, 3)),
-            def categories(self):
+                attributes={'frame': 0}),
-                return { AnnotationType.label: LabelCategories() }
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
-
+                attributes={'frame': 1}),
-        class DstExtractor(Extractor):
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
-            def __iter__(self):
+                attributes={'frame': 2}),
-                return iter([
+        ], categories={
-                    DatasetItem(id='1', image=np.ones((4, 2, 3)),
+            AnnotationType.label: LabelCategories()
-                        attributes={'frame': 0}),
+        })
                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
                        attributes={'frame': 1}),
                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
                        attributes={'frame': 2}),
                ])
            def categories(self):
                return { AnnotationType.label: LabelCategories() }
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CvatConverter.convert, save_images=True), test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
    def test_preserve_frame_ids(self):
-        class TestExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
-                return iter([
+                attributes={'frame': 40}),
-                    DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
+        ], categories={
-                        attributes={'frame': 40}),
+            AnnotationType.label: LabelCategories()
-                ])
+        })
            def categories(self):
                return { AnnotationType.label: LabelCategories() }
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CvatConverter.convert, test_dir)
--- a/datumaro/tests/test_datumaro_format.py
+++ b/datumaro/tests/test_datumaro_format.py
@ -2,7 +2,7 @@ from functools import partial
 import numpy as np
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.project import Project
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Label, Mask, Points, Polygon,
@ -32,82 +32,75 @@ class DatumaroConverterTest(TestCase):
        compare_datasets_strict(self,
            expected=target_dataset, actual=parsed_dataset)
-    class TestExtractor(Extractor):
+    label_categories = LabelCategories()
-        def __iter__(self):
+    for i in range(5):
-            return iter([
+        label_categories.add('cat' + str(i))
-                DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
+
-                    annotations=[
+    mask_categories = MaskCategories(
-                        Caption('hello', id=1),
+        generate_colormap(len(label_categories.items)))
-                        Caption('world', id=2, group=5),
+
-                        Label(2, id=3, attributes={
+    points_categories = PointsCategories()
-                            'x': 1,
+    for index, _ in enumerate(label_categories.items):
-                            'y': '2',
+        points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
-                        }),
+
-                        Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
+    test_dataset = Dataset.from_iterable([
-                            'score': 1.0,
+        DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
-                        }),
+            annotations=[
-                        Bbox(5, 6, 7, 8, id=5, group=5),
+                Caption('hello', id=1),
-                        Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
+                Caption('world', id=2, group=5),
-                        Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
+                Label(2, id=3, attributes={
-                    ]),
+                    'x': 1,
-                DatasetItem(id=21, subset='train',
+                    'y': '2',
-                    annotations=[
+                }),
-                        Caption('test'),
+                Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
-                        Label(2),
+                    'score': 1.0,
-                        Bbox(1, 2, 3, 4, 5, id=42, group=42)
+                }),
-                    ]),
+                Bbox(5, 6, 7, 8, id=5, group=5),
-
+                Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
-                DatasetItem(id=2, subset='val',
+                Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
-                    annotations=[
+            ]),
-                        PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
+        DatasetItem(id=21, subset='train',
-                        Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
+            annotations=[
-                    ]),
+                Caption('test'),
-
+                Label(2),
-                DatasetItem(id=42, subset='test',
+                Bbox(1, 2, 3, 4, 5, id=42, group=42)
-                    attributes={'a1': 5, 'a2': '42'}),
+            ]),
-
+
-                DatasetItem(id=42),
+        DatasetItem(id=2, subset='val',
-                DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
+            annotations=[
-            ])
+                PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
-
+                Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
-        def categories(self):
+            ]),
-            label_categories = LabelCategories()
+
-            for i in range(5):
+        DatasetItem(id=42, subset='test',
-                label_categories.add('cat' + str(i))
+            attributes={'a1': 5, 'a2': '42'}),
-
+
-            mask_categories = MaskCategories(
+        DatasetItem(id=42),
-                generate_colormap(len(label_categories.items)))
+        DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
-
+    ], categories={
-            points_categories = PointsCategories()
+        AnnotationType.label: label_categories,
-            for index, _ in enumerate(label_categories.items):
+        AnnotationType.mask: mask_categories,
-                points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
+        AnnotationType.points: points_categories,
-
+    })
            return {
                AnnotationType.label: label_categories,
                AnnotationType.mask: mask_categories,
                AnnotationType.points: points_categories,
            }
    def test_can_save_and_load(self):
        with TestDir() as test_dir:
-            self._test_save_and_load(self.TestExtractor(),
+            self._test_save_and_load(self.test_dataset,
                partial(DatumaroConverter.convert, save_images=True), test_dir)
    def test_can_detect(self):
        with TestDir() as test_dir:
-            DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir)
+            DatumaroConverter.convert(self.test_dataset, save_dir=test_dir)
            self.assertTrue(DatumaroImporter.detect(test_dir))
    def test_relative_paths(self):
-        class TestExtractor(Extractor):
+        test_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='1', image=np.ones((4, 2, 3))),
-                return iter([
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
-                    DatasetItem(id='1', image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
+        ])
                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
                ])
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                partial(DatumaroConverter.convert, save_images=True), test_dir)
--- a/datumaro/tests/test_labelme_format.py
+++ b/datumaro/tests/test_labelme_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, Mask, Polygon, LabelCategories
 )
@ -29,101 +29,84 @@ class LabelMeConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
    def test_can_save_and_load(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                image=np.ones((16, 16, 3)),
-                    DatasetItem(id=1, subset='train',
+                annotations=[
-                        image=np.ones((16, 16, 3)),
+                    Bbox(0, 4, 4, 8, label=2, group=2),
-                        annotations=[
+                    Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
-                            Bbox(0, 4, 4, 8, label=2, group=2),
+                        'occluded': True,
-                            Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
+                        'a1': 'qwe',
-                                'occluded': True,
+                        'a2': True,
-                                'a1': 'qwe',
+                        'a3': 123,
-                                'a2': True,
+                    }),
-                                'a3': 123,
+                    Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
-                            }),
+                        attributes={ 'username': 'test' }),
-                            Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
+                    Bbox(1, 2, 3, 4, group=3),
-                                attributes={ 'username': 'test' }),
+                    Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
-                            Bbox(1, 2, 3, 4, group=3),
+                        attributes={ 'occluded': True }
                            Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
                                attributes={ 'occluded': True }
                            ),
                        ]
                    ),
-                ])
+                ]
-
+            ),
-            def categories(self):
+        ], categories={
-                label_cat = LabelCategories()
+            AnnotationType.label: LabelCategories.from_iterable(
-                for label in range(10):
+                'label_' + str(label) for label in range(10)),
-                    label_cat.add('label_' + str(label))
+        })
-                return {
+
-                    AnnotationType.label: label_cat,
+        target_dataset = Dataset.from_iterable([
-                }
+            DatasetItem(id=1, subset='train',
-
+                image=np.ones((16, 16, 3)),
-        class DstExtractor(Extractor):
+                annotations=[
-            def __iter__(self):
+                    Bbox(0, 4, 4, 8, label=0, group=2, id=0,
-                return iter([
+                        attributes={
-                    DatasetItem(id=1, subset='train',
+                            'occluded': False, 'username': '',
-                        image=np.ones((16, 16, 3)),
+                        }
                        annotations=[
                            Bbox(0, 4, 4, 8, label=0, group=2, id=0,
                                attributes={
                                    'occluded': False, 'username': '',
                                }
                            ),
                            Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
                                attributes={
                                    'occluded': True, 'username': '',
                                    'a1': 'qwe',
                                    'a2': True,
                                    'a3': 123,
                                }
                            ),
                            Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
                                id=2, attributes={
                                    'occluded': False, 'username': 'test'
                                }
                            ),
                            Bbox(1, 2, 3, 4, group=1, id=3, attributes={
                                'occluded': False, 'username': '',
                            }),
                            Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
                                id=4, attributes={
                                    'occluded': True, 'username': ''
                                }
                            ),
                        ]
                    ),
-                ])
+                    Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
-
+                        attributes={
-            def categories(self):
+                            'occluded': True, 'username': '',
-                label_cat = LabelCategories()
+                            'a1': 'qwe',
-                label_cat.add('label_2')
+                            'a2': True,
-                label_cat.add('label_3')
+                            'a3': 123,
-                return {
+                        }
-                    AnnotationType.label: label_cat,
+                    ),
-                }
+                    Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
                        id=2, attributes={
                            'occluded': False, 'username': 'test'
                        }
                    ),
                    Bbox(1, 2, 3, 4, group=1, id=3, attributes={
                        'occluded': False, 'username': '',
                    }),
                    Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
                        id=4, attributes={
                            'occluded': True, 'username': ''
                        }
                    ),
                ]
            ),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable([
                'label_2', 'label_3']),
        })
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(
                source_dataset,
                partial(LabelMeConverter.convert, save_images=True),
-                test_dir, target_dataset=DstExtractor())
+                test_dir, target_dataset=target_dataset)
    def test_cant_save_dataset_with_relative_paths(self):
-        class SrcExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
-                return iter([
+        ], categories={
-                    DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
+            AnnotationType.label: LabelCategories(),
-                ])
+        })
            def categories(self):
                return { AnnotationType.label: LabelCategories() }
        with self.assertRaisesRegex(Exception, r'only supports flat'):
            with TestDir() as test_dir:
-                self._test_save_and_load(SrcExtractor(),
+                self._test_save_and_load(expected_dataset,
-                    partial(LabelMeConverter.convert, save_images=True),
+                    LabelMeConverter.convert, test_dir)
                    test_dir)
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
@ -133,101 +116,91 @@ class LabelMeImporterTest(TestCase):
        self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
    def test_can_import(self):
-        class DstExtractor(Extractor):
+        img1 = np.ones((77, 102, 3)) * 255
-            def __iter__(self):
+        img1[6:32, 7:41] = 0
-                img1 = np.ones((77, 102, 3)) * 255
+
-                img1[6:32, 7:41] = 0
+        mask1 = np.zeros((77, 102), dtype=int)
-
+        mask1[67:69, 58:63] = 1
-                mask1 = np.zeros((77, 102), dtype=int)
+
-                mask1[67:69, 58:63] = 1
+        mask2 = np.zeros((77, 102), dtype=int)
-
+        mask2[13:25, 54:71] = [
-                mask2 = np.zeros((77, 102), dtype=int)
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                mask2[13:25, 54:71] = [
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+        ]
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+
-                ]
+        target_dataset = Dataset.from_iterable([
-
+            DatasetItem(id='img1', image=img1,
-                return iter([
+                annotations=[
-                    DatasetItem(id='img1', image=img1,
+                    Polygon([43, 34, 45, 34, 45, 37, 43, 37],
-                        annotations=[
+                        label=0, id=0,
-                            Polygon([43, 34, 45, 34, 45, 37, 43, 37],
+                        attributes={
-                                label=0, id=0,
+                            'occluded': False,
-                                attributes={
+                            'username': 'admin'
-                                    'occluded': False,
+                        }
-                                    'username': 'admin'
+                    ),
-                                }
+                    Mask(mask1, label=1, id=1,
-                            ),
+                        attributes={
-                            Mask(mask1, label=1, id=1,
+                            'occluded': False,
-                                attributes={
+                            'username': 'brussell'
-                                    'occluded': False,
+                        }
-                                    'username': 'brussell'
+                    ),
-                                }
+                    Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
-                            ),
+                        label=2, group=2, id=2,
-                            Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
+                        attributes={
-                                label=2, group=2, id=2,
+                            'a1': True,
-                                attributes={
+                            'occluded': True,
-                                    'a1': True,
+                            'username': 'anonymous'
-                                    'occluded': True,
+                        }
-                                    'username': 'anonymous'
+                    ),
-                                }
+                    Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
-                            ),
+                        label=3, group=2, id=3,
-                            Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
+                        attributes={
-                                label=3, group=2, id=3,
+                            'kj': True,
-                                attributes={
+                            'occluded': False,
-                                    'kj': True,
+                            'username': 'anonymous'
-                                    'occluded': False,
+                        }
                                    'username': 'anonymous'
                                }
                            ),
                            Bbox(13, 19, 10, 11, label=4, group=2, id=4,
                                attributes={
                                    'hg': True,
                                    'occluded': True,
                                    'username': 'anonymous'
                                }
                            ),
                            Mask(mask2, label=5, group=1, id=5,
                                attributes={
                                    'd': True,
                                    'occluded': False,
                                    'username': 'anonymous'
                                }
                            ),
                            Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
                                label=6, group=1, id=6,
                                attributes={
                                    'gfd lkj lkj hi': True,
                                    'occluded': False,
                                    'username': 'anonymous'
                                }
                            ),
                        ]
                    ),
-                ])
+                    Bbox(13, 19, 10, 11, label=4, group=2, id=4,
-
+                        attributes={
-            def categories(self):
+                            'hg': True,
-                label_cat = LabelCategories()
+                            'occluded': True,
-                label_cat.add('window')
+                            'username': 'anonymous'
-                label_cat.add('license plate')
+                        }
-                label_cat.add('o1')
+                    ),
-                label_cat.add('q1')
+                    Mask(mask2, label=5, group=1, id=5,
-                label_cat.add('b1')
+                        attributes={
-                label_cat.add('m1')
+                            'd': True,
-                label_cat.add('hg')
+                            'occluded': False,
-                return {
+                            'username': 'anonymous'
-                    AnnotationType.label: label_cat,
+                        }
-                }
+                    ),
                    Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
                        label=6, group=1, id=6,
                        attributes={
                            'gfd lkj lkj hi': True,
                            'occluded': False,
                            'username': 'anonymous'
                        }
                    ),
                ]
            ),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable([
                'window', 'license plate', 'o1',
                'q1', 'b1', 'm1', 'hg',
            ]),
        })
        parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \
            .make_dataset()
-        compare_datasets(self, expected=DstExtractor(), actual=parsed)
+        compare_datasets(self, expected=target_dataset, actual=parsed)
--- a/datumaro/tests/test_mot_format.py
+++ b/datumaro/tests/test_mot_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, LabelCategories
 )
@ -28,96 +28,83 @@ class MotConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
    def test_can_save_bboxes(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                image=np.ones((16, 16, 3)),
-                    DatasetItem(id=1, subset='train',
+                annotations=[
-                        image=np.ones((16, 16, 3)),
+                    Bbox(0, 4, 4, 8, label=2, attributes={
-                        annotations=[
+                        'occluded': True,
-                            Bbox(0, 4, 4, 8, label=2, attributes={
+                    }),
-                                'occluded': True,
+                    Bbox(0, 4, 4, 4, label=3, attributes={
-                            }),
+                        'visibility': 0.4,
-                            Bbox(0, 4, 4, 4, label=3, attributes={
+                    }),
-                                'visibility': 0.4,
+                    Bbox(2, 4, 4, 4, attributes={
-                            }),
+                        'ignored': True
-                            Bbox(2, 4, 4, 4, attributes={
+                    }),
-                                'ignored': True
+                ]
-                            }),
+            ),
-                        ]
+
-                    ),
+            DatasetItem(id=2, subset='val',
-
+                image=np.ones((8, 8, 3)),
-                    DatasetItem(id=2, subset='val',
+                annotations=[
-                        image=np.ones((8, 8, 3)),
+                    Bbox(1, 2, 4, 2, label=3),
-                        annotations=[
+                ]
-                            Bbox(1, 2, 4, 2, label=3),
+            ),
-                        ]
+
-                    ),
+            DatasetItem(id=3, subset='test',
-
+                image=np.ones((5, 4, 3)) * 3,
-                    DatasetItem(id=3, subset='test',
+            ),
-                        image=np.ones((5, 4, 3)) * 3,
+        ], categories={
-                    ),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(label) for label in range(10)),
-
+        })
-            def categories(self):
+
-                label_cat = LabelCategories()
+        target_dataset = Dataset.from_iterable([
-                for label in range(10):
+            DatasetItem(id=1,
-                    label_cat.add('label_' + str(label))
+                image=np.ones((16, 16, 3)),
-                return {
+                annotations=[
-                    AnnotationType.label: label_cat,
+                    Bbox(0, 4, 4, 8, label=2, attributes={
-                }
+                        'occluded': True,
-
+                        'visibility': 0.0,
-        class DstExtractor(Extractor):
+                        'ignored': False,
-            def __iter__(self):
+                    }),
-                return iter([
+                    Bbox(0, 4, 4, 4, label=3, attributes={
-                    DatasetItem(id=1,
+                        'occluded': False,
-                        image=np.ones((16, 16, 3)),
+                        'visibility': 0.4,
-                        annotations=[
+                        'ignored': False,
-                            Bbox(0, 4, 4, 8, label=2, attributes={
+                    }),
-                                'occluded': True,
+                    Bbox(2, 4, 4, 4, attributes={
-                                'visibility': 0.0,
+                        'occluded': False,
-                                'ignored': False,
+                        'visibility': 1.0,
-                            }),
+                        'ignored': True,
-                            Bbox(0, 4, 4, 4, label=3, attributes={
+                    }),
-                                'occluded': False,
+                ]
-                                'visibility': 0.4,
+            ),
-                                'ignored': False,
+
-                            }),
+            DatasetItem(id=2,
-                            Bbox(2, 4, 4, 4, attributes={
+                image=np.ones((8, 8, 3)),
-                                'occluded': False,
+                annotations=[
-                                'visibility': 1.0,
+                    Bbox(1, 2, 4, 2, label=3, attributes={
-                                'ignored': True,
+                        'occluded': False,
-                            }),
+                        'visibility': 1.0,
-                        ]
+                        'ignored': False,
-                    ),
+                    }),
-
+                ]
-                    DatasetItem(id=2,
+            ),
-                        image=np.ones((8, 8, 3)),
+
-                        annotations=[
+            DatasetItem(id=3,
-                            Bbox(1, 2, 4, 2, label=3, attributes={
+                image=np.ones((5, 4, 3)) * 3,
-                                'occluded': False,
+            ),
-                                'visibility': 1.0,
+        ], categories={
-                                'ignored': False,
+            AnnotationType.label: LabelCategories.from_iterable(
-                            }),
+                'label_' + str(label) for label in range(10)),
-                        ]
+        })
                    ),
                    DatasetItem(id=3,
                        image=np.ones((5, 4, 3)) * 3,
                    ),
                ])
            def categories(self):
                label_cat = LabelCategories()
                for label in range(10):
                    label_cat.add('label_' + str(label))
                return {
                    AnnotationType.label: label_cat,
                }
        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(
                source_dataset,
                partial(MotSeqGtConverter.convert, save_images=True),
-                test_dir, target_dataset=DstExtractor())
+                test_dir, target_dataset=target_dataset)
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset')
@ -127,30 +114,23 @@ class MotImporterTest(TestCase):
        self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR))
    def test_can_import(self):
-        class DstExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1,
-                return iter([
+                image=np.ones((16, 16, 3)),
-                    DatasetItem(id=1,
+                annotations=[
-                        image=np.ones((16, 16, 3)),
+                    Bbox(0, 4, 4, 8, label=2, attributes={
-                        annotations=[
+                        'occluded': False,
-                            Bbox(0, 4, 4, 8, label=2, attributes={
+                        'visibility': 1.0,
-                                'occluded': False,
+                        'ignored': False,
-                                'visibility': 1.0,
+                    }),
-                                'ignored': False,
+                ]
-                            }),
+            ),
-                        ]
+        ], categories={
-                    ),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(label) for label in range(10)),
-
+        })
            def categories(self):
                label_cat = LabelCategories()
                for label in range(10):
                    label_cat.add('label_' + str(label))
                return {
                    AnnotationType.label: label_cat,
                }
        dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
            .make_dataset()
-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)
--- a/datumaro/tests/test_tfrecord_format.py
+++ b/datumaro/tests/test_tfrecord_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp
 from unittest import TestCase, skipIf
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, Mask, LabelCategories
 )
@ -48,117 +48,96 @@ class TfrecordConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
    def test_can_save_bboxes(self):
-        class TestExtractor(Extractor):
+        test_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                image=np.ones((16, 16, 3)),
-                    DatasetItem(id=1, subset='train',
+                annotations=[
-                        image=np.ones((16, 16, 3)),
+                    Bbox(0, 4, 4, 8, label=2),
-                        annotations=[
+                    Bbox(0, 4, 4, 4, label=3),
-                            Bbox(0, 4, 4, 8, label=2),
+                    Bbox(2, 4, 4, 4),
-                            Bbox(0, 4, 4, 4, label=3),
+                ], attributes={'source_id': ''}
-                            Bbox(2, 4, 4, 4),
+            ),
-                        ], attributes={'source_id': ''}
+        ], categories={
-                    ),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(label) for label in range(10)),
-
+        })
            def categories(self):
                label_cat = LabelCategories()
                for label in range(10):
                    label_cat.add('label_' + str(label))
                return {
                    AnnotationType.label: label_cat,
                }
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
                test_dataset,
                partial(TfDetectionApiConverter.convert, save_images=True),
                test_dir)
    def test_can_save_masks(self):
-        class TestExtractor(Extractor):
+        test_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
+                    Mask(image=np.array([
-                        annotations=[
+                        [1, 0, 0, 1],
-                            Mask(image=np.array([
+                        [0, 1, 1, 0],
-                                [1, 0, 0, 1],
+                        [0, 1, 1, 0],
-                                [0, 1, 1, 0],
+                        [1, 0, 0, 1],
-                                [0, 1, 1, 0],
+                    ]), label=1),
-                                [1, 0, 0, 1],
+                ],
-                            ]), label=1),
+                attributes={'source_id': ''}
-                        ],
+            ),
-                        attributes={'source_id': ''}
+        ], categories={
-                    ),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(label) for label in range(10)),
-
+        })
            def categories(self):
                label_cat = LabelCategories()
                for label in range(10):
                    label_cat.add('label_' + str(label))
                return {
                    AnnotationType.label: label_cat,
                }
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
                test_dataset,
                partial(TfDetectionApiConverter.convert, save_masks=True),
                test_dir)
    def test_can_save_dataset_with_no_subsets(self):
-        class TestExtractor(Extractor):
+        test_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1,
-                return iter([
+                image=np.ones((16, 16, 3)),
-                    DatasetItem(id=1,
+                annotations=[
-                        image=np.ones((16, 16, 3)),
+                    Bbox(2, 1, 4, 4, label=2),
-                        annotations=[
+                    Bbox(4, 2, 8, 4, label=3),
-                            Bbox(2, 1, 4, 4, label=2),
+                ],
-                            Bbox(4, 2, 8, 4, label=3),
+                attributes={'source_id': ''}
-                        ],
+            ),
-                        attributes={'source_id': ''}
+
-                    ),
+            DatasetItem(id=2,
-
+                image=np.ones((8, 8, 3)) * 2,
-                    DatasetItem(id=2,
+                annotations=[
-                        image=np.ones((8, 8, 3)) * 2,
+                    Bbox(4, 4, 4, 4, label=3),
-                        annotations=[
+                ],
-                            Bbox(4, 4, 4, 4, label=3),
+                attributes={'source_id': ''}
-                        ],
+            ),
-                        attributes={'source_id': ''}
+
-                    ),
+            DatasetItem(id=3,
-
+                image=np.ones((8, 4, 3)) * 3,
-                    DatasetItem(id=3,
+                attributes={'source_id': ''}
-                        image=np.ones((8, 4, 3)) * 3,
+            ),
-                        attributes={'source_id': ''}
+        ], categories={
-                    ),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(label) for label in range(10)),
-
+        })
            def categories(self):
                label_cat = LabelCategories()
                for label in range(10):
                    label_cat.add('label_' + str(label))
                return {
                    AnnotationType.label: label_cat,
                }
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
                test_dataset,
                partial(TfDetectionApiConverter.convert, save_images=True),
                test_dir)
    def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
+        test_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='1/q.e',
-                return iter([
+                image=Image(path='1/q.e', size=(10, 15)),
-                    DatasetItem(id='1/q.e',
+                attributes={'source_id': ''}
-                        image=Image(path='1/q.e', size=(10, 15)),
+            )
-                        attributes={'source_id': ''}
+        ], categories={
-                    )
+            AnnotationType.label: LabelCategories(),
-                ])
+        })
            def categories(self):
                return { AnnotationType.label: LabelCategories() }
        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                TfDetectionApiConverter.convert, test_dir)
    def test_labelmap_parsing(self):
@ -197,42 +176,35 @@ class TfrecordImporterTest(TestCase):
        self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR))
    def test_can_import(self):
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                image=np.ones((16, 16, 3)),
-                    DatasetItem(id=1, subset='train',
+                annotations=[
-                        image=np.ones((16, 16, 3)),
+                    Bbox(0, 4, 4, 8, label=2),
-                        annotations=[
+                    Bbox(0, 4, 4, 4, label=3),
-                            Bbox(0, 4, 4, 8, label=2),
+                    Bbox(2, 4, 4, 4),
-                            Bbox(0, 4, 4, 4, label=3),
+                ],
-                            Bbox(2, 4, 4, 4),
+                attributes={'source_id': '1'}
-                        ],
+            ),
-                        attributes={'source_id': '1'}
+
-                    ),
+            DatasetItem(id=2, subset='val',
-
+                image=np.ones((8, 8, 3)),
-                    DatasetItem(id=2, subset='val',
+                annotations=[
-                        image=np.ones((8, 8, 3)),
+                    Bbox(1, 2, 4, 2, label=3),
-                        annotations=[
+                ],
-                            Bbox(1, 2, 4, 2, label=3),
+                attributes={'source_id': '2'}
-                        ],
+            ),
-                        attributes={'source_id': '2'}
+
-                    ),
+            DatasetItem(id=3, subset='test',
-
+                image=np.ones((5, 4, 3)) * 3,
-                    DatasetItem(id=3, subset='test',
+                attributes={'source_id': '3'}
-                        image=np.ones((5, 4, 3)) * 3,
+            ),
-                        attributes={'source_id': '3'}
+        ], categories={
-                    ),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(label) for label in range(10)),
-
+        })
            def categories(self):
                label_cat = LabelCategories()
                for label in range(10):
                    label_cat.add('label_' + str(label))
                return {
                    AnnotationType.label: label_cat,
                }
        dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
            .make_dataset()
-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, target_dataset, dataset)
--- a/datumaro/tests/test_transforms.py
+++ b/datumaro/tests/test_transforms.py
@ -2,7 +2,7 @@ import logging as log
 import numpy as np
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    Mask, Polygon, PolyLine, Points, Bbox, Label,
    LabelCategories, MaskCategories, AnnotationType
@ -67,304 +67,269 @@ class TransformsTest(TestCase):
        compare_datasets(self, DstExtractor(), actual)
    def test_mask_to_polygons_small_polygons_message(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                items = [
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                    Mask(np.array([
-                        annotations=[
+                            [0, 0, 0],
-                            Mask(np.array([
+                            [0, 1, 0],
-                                    [0, 0, 0],
+                            [0, 0, 0],
-                                    [0, 1, 0],
+                        ]),
                                    [0, 0, 0],
                                ]),
                            ),
                        ]
                    ),
                ]
-                return iter(items)
+            ),
        ])
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
                return iter([ DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
        with self.assertLogs(level=log.DEBUG) as logs:
-            actual = transforms.MasksToPolygons(SrcExtractor())
+            actual = transforms.MasksToPolygons(source_dataset)
-            compare_datasets(self, DstExtractor(), actual)
+            compare_datasets(self, target_dataset, actual)
            self.assertRegex('\n'.join(logs.output), 'too small polygons')
    def test_polygons_to_masks(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                    Polygon([0, 0, 4, 0, 4, 4]),
-                        annotations=[
+                    Polygon([5, 0, 9, 0, 5, 5]),
-                            Polygon([0, 0, 4, 0, 4, 4]),
+                ]
-                            Polygon([5, 0, 9, 0, 5, 5]),
+            ),
-                        ]
+        ])
                    ),
                ])
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                    Mask(np.array([
-                        annotations=[
+                            [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
-                            Mask(np.array([
+                            [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
+                            [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                        ]),
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                ]),
                            ),
                            Mask(np.array([
                                    [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                ]),
                            ),
                        ]
                    ),
-                ])
+                    Mask(np.array([
                            [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
                            [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                        ]),
                    ),
                ]
            ),
        ])
-        actual = transforms.PolygonsToMasks(SrcExtractor())
+        actual = transforms.PolygonsToMasks(source_dataset)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
    def test_crop_covered_segments(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    # The mask is partially covered by the polygon
-                        annotations=[
+                    Mask(np.array([
-                            # The mask is partially covered by the polygon
+                            [0, 0, 1, 1, 1],
-                            Mask(np.array([
+                            [0, 0, 1, 1, 1],
-                                    [0, 0, 1, 1, 1],
+                            [1, 1, 1, 1, 1],
-                                    [0, 0, 1, 1, 1],
+                            [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 1, 1],
+                            [1, 1, 1, 0, 0]],
-                                    [1, 1, 1, 0, 0],
+                        ),
-                                    [1, 1, 1, 0, 0]],
+                        z_order=0),
-                                ),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=0),
+                        z_order=1),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                ]
-                                z_order=1),
+            ),
-                        ]
+        ])
                    ),
                ])
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    Mask(np.array([
-                        annotations=[
+                            [0, 0, 1, 1, 1],
-                            Mask(np.array([
+                            [0, 0, 0, 0, 1],
-                                    [0, 0, 1, 1, 1],
+                            [1, 0, 0, 0, 1],
-                                    [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
-                                    [1, 0, 0, 0, 1],
+                            [1, 1, 1, 0, 0]],
-                                    [1, 0, 0, 0, 0],
+                        ),
-                                    [1, 1, 1, 0, 0]],
+                        z_order=0),
-                                ),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=0),
+                        z_order=1),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                ]
-                                z_order=1),
+            ),
-                        ]
+        ])
                    ),
                ])
-        actual = transforms.CropCoveredSegments(SrcExtractor())
+        actual = transforms.CropCoveredSegments(source_dataset)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
    def test_merge_instance_segments(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    Mask(np.array([
-                        annotations=[
+                            [0, 0, 1, 1, 1],
-                            Mask(np.array([
+                            [0, 0, 0, 0, 1],
-                                    [0, 0, 1, 1, 1],
+                            [1, 0, 0, 0, 1],
-                                    [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
-                                    [1, 0, 0, 0, 1],
+                            [1, 1, 1, 0, 0]],
-                                    [1, 0, 0, 0, 0],
+                        ),
-                                    [1, 1, 1, 0, 0]],
+                        z_order=0, group=1),
-                                ),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=0, group=1),
+                        z_order=1, group=1),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                    Polygon([0, 0, 0, 2, 2, 2, 2, 0],
-                                z_order=1, group=1),
+                        z_order=1),
-                            Polygon([0, 0, 0, 2, 2, 2, 2, 0],
+                ]
-                                z_order=1),
+            ),
-                        ]
+        ])
                    ),
                ])
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    Mask(np.array([
-                        annotations=[
+                            [0, 0, 1, 1, 1],
-                            Mask(np.array([
+                            [0, 1, 1, 1, 1],
-                                    [0, 0, 1, 1, 1],
+                            [1, 1, 1, 1, 1],
-                                    [0, 1, 1, 1, 1],
+                            [1, 1, 1, 1, 0],
-                                    [1, 1, 1, 1, 1],
+                            [1, 1, 1, 0, 0]],
-                                    [1, 1, 1, 1, 0],
+                        ),
-                                    [1, 1, 1, 0, 0]],
+                        z_order=0, group=1),
-                                ),
+                    Mask(np.array([
-                                z_order=0, group=1),
+                            [1, 1, 0, 0, 0],
-                            Mask(np.array([
+                            [1, 1, 0, 0, 0],
-                                    [1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
-                                    [1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
-                                    [0, 0, 0, 0, 0],
+                        ),
-                                    [0, 0, 0, 0, 0]],
+                        z_order=1),
-                                ),
+                ]
-                                z_order=1),
+            ),
-                        ]
+        ])
                    ),
                ])
-        actual = transforms.MergeInstanceSegments(SrcExtractor(),
+        actual = transforms.MergeInstanceSegments(source_dataset,
            include_polygons=True)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
    def test_map_subsets(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='a'),
-                return iter([
+            DatasetItem(id=2, subset='b'),
-                    DatasetItem(id=1, subset='a'),
+            DatasetItem(id=3, subset='c'),
-                    DatasetItem(id=2, subset='b'),
+        ])
                    DatasetItem(id=3, subset='c'),
                ])
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset=''),
-                return iter([
+            DatasetItem(id=2, subset='a'),
-                    DatasetItem(id=1, subset=''),
+            DatasetItem(id=3, subset='c'),
-                    DatasetItem(id=2, subset='a'),
+        ])
                    DatasetItem(id=3, subset='c'),
                ])
-        actual = transforms.MapSubsets(SrcExtractor(),
+        actual = transforms.MapSubsets(source_dataset,
            { 'a': '', 'b': 'a' })
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
    def test_shapes_to_boxes(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    Mask(np.array([
-                        annotations=[
+                            [0, 0, 1, 1, 1],
-                            Mask(np.array([
+                            [0, 0, 0, 0, 1],
-                                    [0, 0, 1, 1, 1],
+                            [1, 0, 0, 0, 1],
-                                    [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
-                                    [1, 0, 0, 0, 1],
+                            [1, 1, 1, 0, 0]],
-                                    [1, 0, 0, 0, 0],
+                        ), id=1),
-                                    [1, 1, 1, 0, 0]],
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
-                                ), id=1),
+                    PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
+                    Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
-                            PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
+                ]
-                            Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
+            ),
-                        ]
+        ])
                    ),
                ])
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    Bbox(0, 0, 4, 4, id=1),
-                        annotations=[
+                    Bbox(1, 1, 3, 3, id=2),
-                            Bbox(0, 0, 4, 4, id=1),
+                    Bbox(1, 1, 1, 1, id=3),
-                            Bbox(1, 1, 3, 3, id=2),
+                    Bbox(2, 2, 2, 2, id=4),
-                            Bbox(1, 1, 1, 1, id=3),
+                ]
-                            Bbox(2, 2, 2, 2, id=4),
+            ),
-                        ]
+        ])
                    ),
                ])
-        actual = transforms.ShapesToBoxes(SrcExtractor())
+        actual = transforms.ShapesToBoxes(source_dataset)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
    def test_id_from_image(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image='path.jpg'),
-                return iter([
+            DatasetItem(id=2),
-                    DatasetItem(id=1, image='path.jpg'),
+        ])
-                    DatasetItem(id=2),
+        target_dataset = Dataset.from_iterable([
-                ])
+            DatasetItem(id='path', image='path.jpg'),
-
+            DatasetItem(id=2),
-        class DstExtractor(Extractor):
+        ])
            def __iter__(self):
                return iter([
                    DatasetItem(id='path', image='path.jpg'),
                    DatasetItem(id=2),
                ])
-        actual = transforms.IdFromImageName(SrcExtractor())
+        actual = transforms.IdFromImageName(source_dataset)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
    def test_boxes_to_masks(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    Bbox(0, 0, 3, 3, z_order=1),
-                        annotations=[
+                    Bbox(0, 0, 3, 1, z_order=2),
-                            Bbox(0, 0, 3, 3, z_order=1),
+                    Bbox(0, 2, 3, 1, z_order=3),
-                            Bbox(0, 0, 3, 1, z_order=2),
+                ]
-                            Bbox(0, 2, 3, 1, z_order=3),
+            ),
-                        ]
+        ])
                    ),
                ])
-        class DstExtractor(Extractor):
+        target_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                    Mask(np.array([
-                        annotations=[
+                            [1, 1, 1, 0, 0],
-                            Mask(np.array([
+                            [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0]],
-                                    [0, 0, 0, 0, 0],
+                        ),
-                                    [0, 0, 0, 0, 0]],
+                        z_order=1),
-                                ),
+                    Mask(np.array([
-                                z_order=1),
+                            [1, 1, 1, 0, 0],
-                            Mask(np.array([
+                            [0, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
-                                    [0, 0, 0, 0, 0],
+                        ),
-                                    [0, 0, 0, 0, 0]],
+                        z_order=2),
-                                ),
+                    Mask(np.array([
-                                z_order=2),
+                            [0, 0, 0, 0, 0],
-                            Mask(np.array([
+                            [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0]],
-                                    [0, 0, 0, 0, 0],
+                        ),
-                                    [0, 0, 0, 0, 0]],
+                        z_order=3),
-                                ),
+                ]
-                                z_order=3),
+            ),
-                        ]
+        ])
                    ),
                ])
-        actual = transforms.BoxesToMasks(SrcExtractor())
+        actual = transforms.BoxesToMasks(source_dataset)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
    def test_random_split(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset="a"),
-                return iter([
+            DatasetItem(id=2, subset="a"),
-                    DatasetItem(id=1, subset="a"),
+            DatasetItem(id=3, subset="b"),
-                    DatasetItem(id=2, subset="a"),
+            DatasetItem(id=4, subset="b"),
-                    DatasetItem(id=3, subset="b"),
+            DatasetItem(id=5, subset="b"),
-                    DatasetItem(id=4, subset="b"),
+            DatasetItem(id=6, subset=""),
-                    DatasetItem(id=5, subset="b"),
+            DatasetItem(id=7, subset=""),
-                    DatasetItem(id=6, subset=""),
+        ])
                    DatasetItem(id=7, subset=""),
                ])
-        actual = transforms.RandomSplit(SrcExtractor(), splits=[
+        actual = transforms.RandomSplit(source_dataset, splits=[
            ('train', 4.0 / 7.0),
            ('test', 3.0 / 7.0),
        ])
@ -373,21 +338,19 @@ class TransformsTest(TestCase):
        self.assertEqual(3, len(actual.get_subset('test')))
    def test_random_split_gives_error_on_wrong_ratios(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([DatasetItem(id=1)])
            def __iter__(self):
                return iter([DatasetItem(id=1)])
        with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[
+            transforms.RandomSplit(source_dataset, splits=[
                ('train', 0.5),
                ('test', 0.7),
            ])
        with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[])
+            transforms.RandomSplit(source_dataset, splits=[])
        with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[
+            transforms.RandomSplit(source_dataset, splits=[
                ('train', -0.5),
                ('test', 1.5),
            ])
@ -462,24 +425,19 @@ class TransformsTest(TestCase):
        compare_datasets(self, DstExtractor(), actual)
    def test_remap_labels_delete_unspecified(self):
-        class SrcExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, annotations=[ Label(0) ])
-                return iter([ DatasetItem(id=1, annotations=[ Label(0) ]) ])
+        ], categories={
-
+            AnnotationType.label: LabelCategories.from_iterable('label0'),
-            def categories(self):
+        })
-                label_cat = LabelCategories()
+
-                label_cat.add('label0')
+        target_dataset = Dataset.from_iterable([
-
+                DatasetItem(id=1, annotations=[]),
-                return { AnnotationType.label: label_cat }
+            ], categories={
-
+                AnnotationType.label: LabelCategories(),
-        class DstExtractor(Extractor):
+            })
-            def __iter__(self):
+
-                return iter([ DatasetItem(id=1, annotations=[]) ])
+        actual = transforms.RemapLabels(source_dataset,
            def categories(self):
                return { AnnotationType.label: LabelCategories() }
        actual = transforms.RemapLabels(SrcExtractor(),
            mapping={}, default='delete')
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
--- a/datumaro/tests/test_yolo_format.py
+++ b/datumaro/tests/test_yolo_format.py
@ -6,7 +6,7 @@ from unittest import TestCase
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, LabelCategories,
 )
-from datumaro.components.project import Project
+from datumaro.components.project import Project, Dataset
 from datumaro.plugins.yolo_format.importer import YoloImporter
 from datumaro.plugins.yolo_format.converter import YoloConverter
 from datumaro.util.image import Image, save_image
@ -15,40 +15,32 @@ from datumaro.util.test_utils import TestDir, compare_datasets
 class YoloFormatTest(TestCase):
    def test_can_save_and_load(self):
-        class TestExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
-                return iter([
+                annotations=[
-                    DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
+                    Bbox(0, 2, 4, 2, label=2),
-                        annotations=[
+                    Bbox(0, 1, 2, 3, label=4),
-                            Bbox(0, 2, 4, 2, label=2),
+                ]),
-                            Bbox(0, 1, 2, 3, label=4),
+            DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
-                        ]),
+                annotations=[
-                    DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
+                    Bbox(0, 2, 4, 2, label=2),
-                        annotations=[
+                    Bbox(3, 3, 2, 3, label=4),
-                            Bbox(0, 2, 4, 2, label=2),
+                    Bbox(2, 1, 2, 3, label=4),
-                            Bbox(3, 3, 2, 3, label=4),
+                ]),
-                            Bbox(2, 1, 2, 3, label=4),
+
-                        ]),
+            DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
-
+                annotations=[
-                    DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
+                    Bbox(0, 1, 5, 2, label=2),
-                        annotations=[
+                    Bbox(0, 2, 3, 2, label=5),
-                            Bbox(0, 1, 5, 2, label=2),
+                    Bbox(0, 2, 4, 2, label=6),
-                            Bbox(0, 2, 3, 2, label=5),
+                    Bbox(0, 7, 3, 2, label=7),
-                            Bbox(0, 2, 4, 2, label=6),
+                ]),
-                            Bbox(0, 7, 3, 2, label=7),
+        ], categories={
-                        ]),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(i) for i in range(10)),
-
+        })
            def categories(self):
                label_categories = LabelCategories()
                for i in range(10):
                    label_categories.add('label_' + str(i))
                return {
                    AnnotationType.label: label_categories,
                }
        with TestDir() as test_dir:
            source_dataset = TestExtractor()
            YoloConverter.convert(source_dataset, test_dir, save_images=True)
            parsed_dataset = YoloImporter()(test_dir).make_dataset()
@ -56,27 +48,19 @@ class YoloFormatTest(TestCase):
            compare_datasets(self, source_dataset, parsed_dataset)
    def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                image=Image(path='1.jpg', size=(10, 15)),
-                    DatasetItem(id=1, subset='train',
+                annotations=[
-                        image=Image(path='1.jpg', size=(10, 15)),
+                    Bbox(0, 2, 4, 2, label=2),
-                        annotations=[
+                    Bbox(3, 3, 2, 3, label=4),
-                            Bbox(0, 2, 4, 2, label=2),
+                ]),
-                            Bbox(3, 3, 2, 3, label=4),
+        ], categories={
-                        ]),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(i) for i in range(10)),
-
+        })
            def categories(self):
                label_categories = LabelCategories()
                for i in range(10):
                    label_categories.add('label_' + str(i))
                return {
                    AnnotationType.label: label_categories,
                }
        with TestDir() as test_dir:
            source_dataset = TestExtractor()
            YoloConverter.convert(source_dataset, test_dir)
@ -87,27 +71,19 @@ class YoloFormatTest(TestCase):
            compare_datasets(self, source_dataset, parsed_dataset)
    def test_can_load_dataset_with_exact_image_info(self):
-        class TestExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                image=Image(path='1.jpg', size=(10, 15)),
-                    DatasetItem(id=1, subset='train',
+                annotations=[
-                        image=Image(path='1.jpg', size=(10, 15)),
+                    Bbox(0, 2, 4, 2, label=2),
-                        annotations=[
+                    Bbox(3, 3, 2, 3, label=4),
-                            Bbox(0, 2, 4, 2, label=2),
+                ]),
-                            Bbox(3, 3, 2, 3, label=4),
+        ], categories={
-                        ]),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(i) for i in range(10)),
-
+        })
            def categories(self):
                label_categories = LabelCategories()
                for i in range(10):
                    label_categories.add('label_' + str(i))
                return {
                    AnnotationType.label: label_categories,
                }
        with TestDir() as test_dir:
            source_dataset = TestExtractor()
            YoloConverter.convert(source_dataset, test_dir)
@ -117,24 +93,20 @@ class YoloFormatTest(TestCase):
            compare_datasets(self, source_dataset, parsed_dataset)
    def test_relative_paths(self):
-        class TestExtractor(Extractor):
+        source_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id='1', subset='train',
-                return iter([
+                image=np.ones((4, 2, 3))),
-                    DatasetItem(id='1', subset='train',
+            DatasetItem(id='subdir1/1', subset='train',
-                        image=np.ones((4, 2, 3))),
+                image=np.ones((2, 6, 3))),
-                    DatasetItem(id='subdir1/1', subset='train',
+            DatasetItem(id='subdir2/1', subset='train',
-                        image=np.ones((2, 6, 3))),
+                image=np.ones((5, 4, 3))),
-                    DatasetItem(id='subdir2/1', subset='train',
+        ], categories={
-                        image=np.ones((5, 4, 3))),
+            AnnotationType.label: LabelCategories(),
-                ])
+        })
            def categories(self):
                return { AnnotationType.label: LabelCategories() }
        for save_images in {True, False}:
            with self.subTest(save_images=save_images):
                with TestDir() as test_dir:
                    source_dataset = TestExtractor()
                    YoloConverter.convert(source_dataset, test_dir,
                        save_images=save_images)
@ -150,26 +122,19 @@ class YoloImporterTest(TestCase):
        self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR))
    def test_can_import(self):
-        class DstExtractor(Extractor):
+        expected_dataset = Dataset.from_iterable([
-            def __iter__(self):
+            DatasetItem(id=1, subset='train',
-                return iter([
+                image=np.ones((10, 15, 3)),
-                    DatasetItem(id=1, subset='train',
+                annotations=[
-                        image=np.ones((10, 15, 3)),
+                    Bbox(0, 2, 4, 2, label=2),
-                        annotations=[
+                    Bbox(3, 3, 2, 3, label=4),
-                            Bbox(0, 2, 4, 2, label=2),
+                ]),
-                            Bbox(3, 3, 2, 3, label=4),
+        ], categories={
-                        ]),
+            AnnotationType.label: LabelCategories.from_iterable(
-                ])
+                'label_' + str(i) for i in range(10)),
-
+        })
            def categories(self):
                label_categories = LabelCategories()
                for i in range(10):
                    label_categories.add('label_' + str(i))
                return {
                    AnnotationType.label: label_categories,
                }
        dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
            .make_dataset()
-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)