[Datumaro] Reducing nesting of tests (#1875)

* Add `Dataset.from_iterable` constructor * Simplify creation of `Dataset` objects in common simple cases * Refactor tests
6 years ago · 7ecdcf182b
parent e372589dc8
commit 7ecdcf182b
10 changed files with 1363 additions and 1578 deletions
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@ -72,6 +72,33 @@ class Categories:
 class LabelCategories(Categories):
    Category = namedtuple('Category', ['name', 'parent', 'attributes'])

+    @classmethod
+    def from_iterable(cls, iterable):
+        """Generation of LabelCategories from iterable object
+
+        Args:
+            iterable ([type]): This iterable object can be:
+            1)simple str - will generate one Category with str as name
+            2)list of str - will interpreted as list of Category names
+            3)list of positional argumetns - will generate Categories
+            with this arguments
+
+
+        Returns:
+            LabelCategories: LabelCategories object
+        """
+        temp_categories = cls()
+
+        if isinstance(iterable, str):
+            iterable = [[iterable]]
+
+        for category in iterable:
+            if isinstance(category, str):
+                category = [category]
+            temp_categories.add(*category)
+
+        return temp_categories
+
    def __init__(self, items=None, attributes=None):
        super().__init__(attributes=attributes)

@ -482,6 +509,31 @@ class Bbox(_Shape):
 class PointsCategories(Categories):
    Category = namedtuple('Category', ['labels', 'joints'])

+    @classmethod
+    def from_iterable(cls, iterable):
+        """Generation of PointsCategories from iterable object
+
+        Args:
+            iterable ([type]): This iterable object can be:
+            1)simple int - will generate one Category with int as label
+            2)list of int - will interpreted as list of Category labels
+            3)list of positional argumetns - will generate Categories
+            with this arguments
+
+        Returns:
+            PointsCategories: PointsCategories object
+        """
+        temp_categories = cls()
+
+        if isinstance(iterable, int):
+            iterable = [[iterable]]
+
+        for category in iterable:
+            if isinstance(category, int):
+                category = [category]
+            temp_categories.add(*category)
+        return temp_categories
+
    def __init__(self, items=None, attributes=None):
        super().__init__(attributes=attributes)

--- a/datumaro/datumaro/components/project.py
+++ b/datumaro/datumaro/components/project.py
@ -18,7 +18,8 @@ import sys
 from datumaro.components.config import Config, DEFAULT_FORMAT
 from datumaro.components.config_model import (Model, Source,
    PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA)
-from datumaro.components.extractor import Extractor
+from datumaro.components.extractor import Extractor, LabelCategories,\
+    AnnotationType
 from datumaro.components.launcher import ModelTransform
 from datumaro.components.dataset_filter import \
    XPathDatasetFilter, XPathAnnotationsFilter
@ -319,6 +320,35 @@ class Subset(Extractor):
        return self._parent.categories()

 class Dataset(Extractor):
+    @classmethod
+    def from_iterable(cls, iterable, categories=None):
+        """Generation of Dataset from iterable object
+
+        Args:
+            iterable: Iterable object contains DatasetItems
+            categories (dict, optional): You can pass dict of categories or
+            you can pass list of names. It'll interpreted as list of names of
+            LabelCategories. Defaults to {}.
+
+        Returns:
+            Dataset: Dataset object
+        """
+
+        if isinstance(categories, list):
+            categories = {AnnotationType.label : LabelCategories.from_iterable(categories)}
+
+        if not categories:
+            categories = {}
+
+        class tmpExtractor(Extractor):
+            def __iter__(self):
+                return iter(iterable)
+
+            def categories(self):
+                return categories
+
+        return cls.from_extractors(tmpExtractor())
+
    @classmethod
    def from_extractors(cls, *sources):
        # merge categories
--- a/datumaro/tests/test_coco_format.py
+++ b/datumaro/tests/test_coco_format.py
@ -4,7 +4,7 @@ import os.path as osp

 from unittest import TestCase

-from datumaro.components.project import Project
+from datumaro.components.project import (Project, Dataset)
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
    LabelCategories, PointsCategories
@ -26,32 +26,26 @@ DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset')

 class CocoImporterTest(TestCase):
    def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
-                        subset='val', attributes={'id': 1},
-                        annotations=[
-                            Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
-                                id=1, group=1, attributes={'is_crowd': False}),
-                            Mask(np.array(
-                                [[1, 0, 0, 1, 0]] * 5 +
-                                [[1, 1, 1, 1, 0]] * 5
-                                ), label=0,
-                                id=2, group=2, attributes={'is_crowd': True}),
-                        ]
-                    ),
-                ])

-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('TEST')
-                return { AnnotationType.label: label_cat }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
+                subset='val', attributes={'id': 1},
+                annotations=[
+                    Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
+                        id=1, group=1, attributes={'is_crowd': False}),
+                    Mask(np.array(
+                        [[1, 0, 0, 1, 0]] * 5 +
+                        [[1, 1, 1, 1, 0]] * 5
+                        ), label=0,
+                        id=2, group=2, attributes={'is_crowd': True}),
+                ]
+            ),
+        ], categories=['TEST',])

        dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \
            .make_dataset()

-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)

    def test_can_detect(self):
        self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR))
@ -71,526 +65,417 @@ class CocoConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)

    def test_can_save_and_load_captions(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        annotations=[
-                            Caption('hello', id=1, group=1),
-                            Caption('world', id=2, group=2),
-                        ], attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train',
-                        annotations=[
-                            Caption('test', id=3, group=3),
-                        ], attributes={'id': 2}),
-
-                    DatasetItem(id=3, subset='val',
-                        annotations=[
-                            Caption('word', id=1, group=1),
-                        ], attributes={'id': 1}
-                    ),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                annotations=[
+                    Caption('hello', id=1, group=1),
+                    Caption('world', id=2, group=2),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train',
+                annotations=[
+                    Caption('test', id=3, group=3),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='val',
+                annotations=[
+                    Caption('word', id=1, group=1),
+                ], attributes={'id': 1}),
+            ])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoCaptionsConverter.convert, test_dir)

    def test_can_save_and_load_instances(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-        categories = { AnnotationType.label: label_categories }
-
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            # Bbox + single polygon
-                            Bbox(0, 1, 2, 2,
-                                label=2, group=1, id=1,
-                                attributes={ 'is_crowd': False }),
-                            Polygon([0, 1, 2, 1, 2, 3, 0, 3],
-                                attributes={ 'is_crowd': False },
-                                label=2, group=1, id=1),
-                        ], attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            # Mask + bbox
-                            Mask(np.array([
-                                    [0, 1, 0, 0],
-                                    [0, 1, 0, 0],
-                                    [0, 1, 1, 1],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                            Bbox(1, 0, 2, 2, label=4, group=3, id=3,
-                                attributes={ 'is_crowd': True }),
-                        ], attributes={'id': 2}),
-
-                    DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            # Bbox + mask
-                            Bbox(0, 1, 2, 2, label=4, group=3, id=3,
-                                attributes={ 'is_crowd': True }),
-                            Mask(np.array([
-                                    [0, 0, 0, 0],
-                                    [1, 1, 1, 0],
-                                    [1, 1, 0, 0],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                        ], attributes={'id': 1}),
-                ])
-
-            def categories(self):
-                return categories
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            Polygon([0, 1, 2, 1, 2, 3, 0, 3],
-                                attributes={ 'is_crowd': False },
-                                label=2, group=1, id=1),
-                        ], attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 1, 0, 0],
-                                    [0, 1, 0, 0],
-                                    [0, 1, 1, 1],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                        ], attributes={'id': 2}),
-
-                    DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 0, 0],
-                                    [1, 1, 1, 0],
-                                    [1, 1, 0, 0],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                        ], attributes={'id': 1}),
-                ])
-
-            def categories(self):
-                return categories
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    # Bbox + single polygon
+                    Bbox(0, 1, 2, 2,
+                        label=2, group=1, id=1,
+                        attributes={ 'is_crowd': False }),
+                    Polygon([0, 1, 2, 1, 2, 3, 0, 3],
+                        attributes={ 'is_crowd': False },
+                        label=2, group=1, id=1),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    # Mask + bbox
+                    Mask(np.array([
+                            [0, 1, 0, 0],
+                            [0, 1, 0, 0],
+                            [0, 1, 1, 1],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                    Bbox(1, 0, 2, 2, label=4, group=3, id=3,
+                        attributes={ 'is_crowd': True }),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
+                annotations=[
+                    # Bbox + mask
+                    Bbox(0, 1, 2, 2, label=4, group=3, id=3,
+                        attributes={ 'is_crowd': True }),
+                    Mask(np.array([
+                            [0, 0, 0, 0],
+                            [1, 1, 1, 0],
+                            [1, 1, 0, 0],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                ], attributes={'id': 1}),
+            ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Polygon([0, 1, 2, 1, 2, 3, 0, 3],
+                        attributes={ 'is_crowd': False },
+                        label=2, group=1, id=1),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 1, 0, 0],
+                            [0, 1, 0, 0],
+                            [0, 1, 1, 1],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 0, 0],
+                            [1, 1, 1, 0],
+                            [1, 1, 0, 0],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                ], attributes={'id': 1})
+            ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(source_dataset,
                CocoInstancesConverter.convert, test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)

    def test_can_merge_polygons_on_loading(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-        categories = { AnnotationType.label: label_categories }
-
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, id=4, group=4),
-                            Polygon([5, 0, 9, 0, 5, 5],
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return categories
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-                                [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-                                [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                                [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
-                                # only internal fragment (without the border),
-                                # but not everywhere...
-                            ),
-                            label=3, id=4, group=4,
-                            attributes={ 'is_crowd': False }),
-                        ], attributes={'id': 1}
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, id=4, group=4),
+                    Polygon([5, 0, 9, 0, 5, 5],
+                        label=3, id=4, group=4),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                        [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                        [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+                        [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+                        # only internal fragment (without the border),
+                        # but not everywhere...
                    ),
-                ])
-
-            def categories(self):
-                return categories
+                    label=3, id=4, group=4,
+                    attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                CocoInstancesConverter.convert, test_dir,
                importer_args={'merge_instance_polygons': True},
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)

    def test_can_crop_covered_segments(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-
-        class SrcTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 1, 1, 1],
-                                    [1, 1, 0, 1, 1],
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                label=2, id=1, z_order=0),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                label=1, id=2, z_order=1),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=2, id=1, group=1),
-
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                label=1, id=2, group=2,
-                                attributes={ 'is_crowd': False }),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 1, 1, 1],
+                            [1, 1, 0, 1, 1],
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        label=2, id=1, z_order=0),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        label=1, id=2, z_order=1),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=2, id=1, group=1),
+
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        label=1, id=2, group=2,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcTestExtractor(),
-                partial(CocoInstancesConverter.convert, crop_covered=True),
-                test_dir, target_dataset=DstTestExtractor())
+            self._test_save_and_load(source_dataset,
+                 partial(CocoInstancesConverter.convert, crop_covered=True),
+                 test_dir, target_dataset=target_dataset)

    def test_can_convert_polygons_to_mask(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-
-        class SrcTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, id=4, group=4),
-                            Polygon([5, 0, 9, 0, 5, 5],
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-                                    [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-                                    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
-                                    # only internal fragment (without the border),
-                                    # but not everywhere...
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=3, id=4, group=4),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, id=4, group=4),
+                    Polygon([5, 0, 9, 0, 5, 5],
+                        label=3, id=4, group=4),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                            [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+                            [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+                            # only internal fragment (without the border),
+                            # but not everywhere...
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=3, id=4, group=4),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcTestExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CocoInstancesConverter.convert, segmentation_mode='mask'),
-                test_dir, target_dataset=DstTestExtractor())
+                test_dir, target_dataset=target_dataset)

    def test_can_convert_masks_to_polygons(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-                                    [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-                                    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                ]),
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon(
-                                [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
-                                label=3, id=4, group=4,
-                                attributes={ 'is_crowd': False }),
-                            Polygon(
-                                [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
-                                label=3, id=4, group=4,
-                                attributes={ 'is_crowd': False }),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                            [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+                            [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        ]),
+                        label=3, id=4, group=4),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon(
+                        [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
+                        label=3, id=4, group=4,
+                        attributes={ 'is_crowd': False }),
+                    Polygon(
+                        [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
+                        label=3, id=4, group=4,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CocoInstancesConverter.convert, segmentation_mode='polygons'),
-                test_dir, target_dataset=DstExtractor())
+                test_dir,
+                target_dataset=target_dataset)

    def test_can_save_and_load_images(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train', attributes={'id': 2}),
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', attributes={'id': 2}),

-                    DatasetItem(id=2, subset='val', attributes={'id': 2}),
-                    DatasetItem(id=3, subset='val', attributes={'id': 3}),
-                    DatasetItem(id=4, subset='val', attributes={'id': 4}),
+            DatasetItem(id=2, subset='val', attributes={'id': 2}),
+            DatasetItem(id=3, subset='val', attributes={'id': 3}),
+            DatasetItem(id=4, subset='val', attributes={'id': 4}),

-                    DatasetItem(id=5, subset='test', attributes={'id': 1}),
-                ])
+            DatasetItem(id=5, subset='test', attributes={'id': 1}),
+        ])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoImageInfoConverter.convert, test_dir)

    def test_can_save_and_load_labels(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        annotations=[
-                            Label(4, id=1, group=1),
-                            Label(9, id=2, group=2),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add(str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                annotations=[
+                    Label(4, id=1, group=1),
+                    Label(9, id=2, group=2),
+                ], attributes={'id': 1}),
+        ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoLabelsConverter.convert, test_dir)

    def test_can_save_and_load_keypoints(self):
-        label_categories = LabelCategories()
-        points_categories = PointsCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-            points_categories.add(i, joints=[[0, 1], [1, 2]])
-        categories = {
-            AnnotationType.label: label_categories,
-            AnnotationType.points: points_categories,
-        }
-
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            # Full instance annotations: polygon + keypoints
-                            Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
-                                label=3, group=1, id=1),
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, group=1, id=1),
-
-                            # Full instance annotations: bbox + keypoints
-                            Points([1, 2, 3, 4, 2, 3], group=2, id=2),
-                            Bbox(1, 2, 2, 2, group=2, id=2),
-
-                            # Solitary keypoints
-                            Points([1, 2, 0, 2, 4, 1], label=5, id=3),
-
-                            # Some other solitary annotations (bug #1387)
-                            Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
-
-                            # Solitary keypoints with no label
-                            Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
-                        ])
-                ])
-
-            def categories(self):
-                return categories
-
-        class DstTestExtractor(TestExtractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
-                                label=3, group=1, id=1,
-                                attributes={'is_crowd': False}),
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, group=1, id=1,
-                                attributes={'is_crowd': False}),
-
-                            Points([1, 2, 3, 4, 2, 3],
-                                group=2, id=2,
-                                attributes={'is_crowd': False}),
-                            Polygon([1, 2, 3, 2, 3, 4, 1, 4],
-                                group=2, id=2,
-                                attributes={'is_crowd': False}),
-
-                            Points([1, 2, 0, 2, 4, 1],
-                                label=5, group=3, id=3,
-                                attributes={'is_crowd': False}),
-                            Polygon([0, 1, 4, 1, 4, 2, 0, 2],
-                                label=5, group=3, id=3,
-                                attributes={'is_crowd': False}),
-
-                            Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
-                                group=5, id=5,
-                                attributes={'is_crowd': False}),
-                            Polygon([1, 2, 3, 2, 3, 4, 1, 4],
-                                group=5, id=5,
-                                attributes={'is_crowd': False}),
-                        ], attributes={'id': 1}),
-                ])
+
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
+                annotations=[
+                    # Full instance annotations: polygon + keypoints
+                    Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
+                        label=3, group=1, id=1),
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, group=1, id=1),
+
+                    # Full instance annotations: bbox + keypoints
+                    Points([1, 2, 3, 4, 2, 3], group=2, id=2),
+                    Bbox(1, 2, 2, 2, group=2, id=2),
+
+                    # Solitary keypoints
+                    Points([1, 2, 0, 2, 4, 1], label=5, id=3),
+
+                    # Some other solitary annotations (bug #1387)
+                    Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
+
+                    # Solitary keypoints with no label
+                    Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
+                ]),
+            ], categories={
+                    AnnotationType.label: LabelCategories.from_iterable(
+                        str(i) for i in range(10)),
+                    AnnotationType.points: PointsCategories.from_iterable(
+                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
+                    ),
+            })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
+                        label=3, group=1, id=1,
+                        attributes={'is_crowd': False}),
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, group=1, id=1,
+                        attributes={'is_crowd': False}),
+
+                    Points([1, 2, 3, 4, 2, 3],
+                        group=2, id=2,
+                        attributes={'is_crowd': False}),
+                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+                        group=2, id=2,
+                        attributes={'is_crowd': False}),
+
+                    Points([1, 2, 0, 2, 4, 1],
+                        label=5, group=3, id=3,
+                        attributes={'is_crowd': False}),
+                    Polygon([0, 1, 4, 1, 4, 2, 0, 2],
+                        label=5, group=3, id=3,
+                        attributes={'is_crowd': False}),
+
+                    Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
+                        group=5, id=5,
+                        attributes={'is_crowd': False}),
+                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+                        group=5, id=5,
+                        attributes={'is_crowd': False}),
+                ], attributes={'id': 1}),
+            ], categories={
+                    AnnotationType.label: LabelCategories.from_iterable(
+                        str(i) for i in range(10)),
+                    AnnotationType.points: PointsCategories.from_iterable(
+                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
+                    ),
+            })

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(source_dataset,
                CocoPersonKeypointsConverter.convert, test_dir,
-                target_dataset=DstTestExtractor())
+                target_dataset=target_dataset)

    def test_can_save_dataset_with_no_subsets(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, attributes={'id': 1}),
-                    DatasetItem(id=2, attributes={'id': 2}),
-                ])
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, attributes={'id': 1}),
+            DatasetItem(id=2, attributes={'id': 2}),
+        ])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                CocoConverter.convert, test_dir)

    def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
-                        attributes={'id': 1}),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
+                attributes={'id': 1}),
+        ])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoImageInfoConverter.convert, test_dir)

    def test_relative_paths(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3)),
-                        attributes={'id': 1}),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
-                        attributes={'id': 2}),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
-                        attributes={'id': 3}),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3)),
+                attributes={'id': 1}),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
+                attributes={'id': 2}),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
+                attributes={'id': 3}),
+        ])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
-                partial(CocoImageInfoConverter.convert, save_images=True),
-                test_dir)
+            self._test_save_and_load(expected_dataset,
+                partial(CocoImageInfoConverter.convert, save_images=True), test_dir)

    def test_preserve_coco_ids(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
-                        attributes={'id': 40}),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
+                attributes={'id': 40}),
+        ])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
-                partial(CocoImageInfoConverter.convert, save_images=True),
-                test_dir)
+            self._test_save_and_load(expected_dataset,
+                partial(CocoImageInfoConverter.convert, save_images=True), test_dir)

    def test_annotation_attributes(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
-                        Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
-                            attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
-                    ], attributes={'id': 1})
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add(str(i))
-                return { AnnotationType.label: label_categories, }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
+                Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
+                    attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
+            ], attributes={'id': 1})
+        ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CocoConverter.convert, test_dir)
--- a/datumaro/tests/test_cvat_format.py
+++ b/datumaro/tests/test_cvat_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp

 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Points, Polygon, PolyLine, Bbox, Label,
    LabelCategories,
@ -28,121 +28,115 @@ class CvatImporterTest(TestCase):
        self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR))

    def test_can_load_image(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='img0', subset='train',
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=0, z_order=1,
-                                attributes={
-                                    'occluded': True,
-                                    'a1': True, 'a2': 'v3'
-                                }),
-                            PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
-                                attributes={'occluded': False}),
-                        ], attributes={'frame': 0}),
-                    DatasetItem(id='img1', subset='train',
-                        image=np.ones((10, 10, 3)),
-                        annotations=[
-                            Polygon([1, 2, 3, 4, 6, 5], z_order=1,
-                                attributes={'occluded': False}),
-                            Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
-                                attributes={'occluded': False}),
-                        ], attributes={'frame': 1}),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                label_categories.add('label1', attributes={'a1', 'a2'})
-                label_categories.add('label2')
-                return { AnnotationType.label: label_categories }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='img0', subset='train',
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=0, z_order=1,
+                        attributes={
+                            'occluded': True,
+                            'a1': True, 'a2': 'v3'
+                        }),
+                    PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
+                        attributes={'occluded': False}),
+                ], attributes={'frame': 0}),
+            DatasetItem(id='img1', subset='train',
+                image=np.ones((10, 10, 3)),
+                annotations=[
+                    Polygon([1, 2, 3, 4, 6, 5], z_order=1,
+                        attributes={'occluded': False}),
+                    Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
+                        attributes={'occluded': False}),
+                ], attributes={'frame': 1}),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                ['label1', '', {'a1', 'a2'}],
+                ['label2'],
+            ])
+        })

        parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset()

-        compare_datasets(self, DstExtractor(), parsed_dataset)
+        compare_datasets(self, expected_dataset, parsed_dataset)

    def test_can_load_video(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='frame_000010', subset='annotations',
-                        image=np.ones((20, 25, 3)),
-                        annotations=[
-                            Bbox(3, 4, 7, 1, label=2,
-                                id=0,
-                                attributes={
-                                    'occluded': True,
-                                    'outside': False, 'keyframe': True,
-                                    'track_id': 0
-                                }),
-                            Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
-                                label=0,
-                                id=1,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': False, 'keyframe': True,
-                                    'track_id': 1, 'hgl': 'hgkf',
-                                }),
-                        ], attributes={'frame': 10}),
-                    DatasetItem(id='frame_000013', subset='annotations',
-                        image=np.ones((20, 25, 3)),
-                        annotations=[
-                            Bbox(7, 6, 7, 2, label=2,
-                                id=0,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 0
-                                }),
-                            Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
-                                label=0,
-                                id=1,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 1, 'hgl': 'jk',
-                                }),
-                            PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
-                                label=2,
-                                id=2,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': False, 'keyframe': True,
-                                    'track_id': 2,
-                                }),
-                        ], attributes={'frame': 13}),
-                    DatasetItem(id='frame_000016', subset='annotations',
-                        image=Image(path='frame_0000016.png', size=(20, 25)),
-                        annotations=[
-                            Bbox(8, 7, 6, 10, label=2,
-                                id=0,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 0
-                                }),
-                            PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
-                                label=2,
-                                id=2,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 2,
-                                }),
-                        ], attributes={'frame': 16}),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                label_categories.add('klhg', attributes={'hgl'})
-                label_categories.add('z U k')
-                label_categories.add('II')
-                return { AnnotationType.label: label_categories }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='frame_000010', subset='annotations',
+                image=np.ones((20, 25, 3)),
+                annotations=[
+                    Bbox(3, 4, 7, 1, label=2,
+                        id=0,
+                        attributes={
+                            'occluded': True,
+                            'outside': False, 'keyframe': True,
+                            'track_id': 0
+                        }),
+                    Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
+                        label=0,
+                        id=1,
+                        attributes={
+                            'occluded': False,
+                            'outside': False, 'keyframe': True,
+                            'track_id': 1, 'hgl': 'hgkf',
+                        }),
+                ], attributes={'frame': 10}),
+            DatasetItem(id='frame_000013', subset='annotations',
+                image=np.ones((20, 25, 3)),
+                annotations=[
+                    Bbox(7, 6, 7, 2, label=2,
+                        id=0,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 0
+                        }),
+                    Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
+                        label=0,
+                        id=1,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 1, 'hgl': 'jk',
+                        }),
+                    PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
+                        label=2,
+                        id=2,
+                        attributes={
+                            'occluded': False,
+                            'outside': False, 'keyframe': True,
+                            'track_id': 2,
+                        }),
+                ], attributes={'frame': 13}),
+            DatasetItem(id='frame_000016', subset='annotations',
+                image=Image(path='frame_0000016.png', size=(20, 25)),
+                annotations=[
+                    Bbox(8, 7, 6, 10, label=2,
+                        id=0,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 0
+                        }),
+                    PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
+                        label=2,
+                        id=2,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 2,
+                        }),
+                ], attributes={'frame': 16}),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                ['klhg', '', {'hgl'}],
+                ['z U k'],
+                ['II']
+            ]),
+        })

        parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset()

-        compare_datasets(self, DstExtractor(), parsed_dataset)
+        compare_datasets(self, expected_dataset, parsed_dataset)

 class CvatConverterTest(TestCase):
    def _test_save_and_load(self, source_dataset, converter, test_dir,
@ -165,137 +159,120 @@ class CvatConverterTest(TestCase):
        label_categories.items[2].attributes.update(['a1', 'a2'])
        label_categories.attributes.update(['occluded'])

-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=1, group=4,
-                                attributes={ 'occluded': True }),
-                            Points([1, 1, 3, 2, 2, 3],
-                                label=2,
-                                attributes={ 'a1': 'x', 'a2': 42,
-                                    'unknown': 'bar' }),
-                            Label(1),
-                            Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
-                        ]
-                    ),
-                    DatasetItem(id=1, subset='s1',
-                        annotations=[
-                            PolyLine([0, 0, 4, 0, 4, 4],
-                                label=3, id=4, group=4),
-                            Bbox(5, 0, 1, 9,
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-
-                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4], z_order=1,
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                            PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
-                        ]
-                    ),
-
-                    DatasetItem(id=3, subset='s3', image=Image(
-                        path='3.jpg', size=(2, 4))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=1, group=4,
-                                attributes={ 'occluded': True }),
-                            Points([1, 1, 3, 2, 2, 3],
-                                label=2,
-                                attributes={ 'occluded': False,
-                                    'a1': 'x', 'a2': 42 }),
-                            Label(1),
-                            Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
-                        ], attributes={'frame': 0}
-                    ),
-                    DatasetItem(id=1, subset='s1',
-                        annotations=[
-                            PolyLine([0, 0, 4, 0, 4, 4],
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                            Bbox(5, 0, 1, 9,
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                        ], attributes={'frame': 1}
-                    ),
-
-                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4], z_order=1,
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                        ], attributes={'frame': 0}
-                    ),
-
-                    DatasetItem(id=3, subset='s3', image=Image(
-                            path='3.jpg', size=(2, 4)),
-                        attributes={'frame': 0}),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=1, group=4,
+                        attributes={ 'occluded': True }),
+                    Points([1, 1, 3, 2, 2, 3],
+                        label=2,
+                        attributes={ 'a1': 'x', 'a2': 42,
+                            'unknown': 'bar' }),
+                    Label(1),
+                    Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
+                ]
+            ),
+            DatasetItem(id=1, subset='s1',
+                annotations=[
+                    PolyLine([0, 0, 4, 0, 4, 4],
+                        label=3, id=4, group=4),
+                    Bbox(5, 0, 1, 9,
+                        label=3, id=4, group=4),
+                ]
+            ),
+
+            DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4], z_order=1,
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                    PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
+                ]
+            ),
+
+            DatasetItem(id=3, subset='s3', image=Image(
+                path='3.jpg', size=(2, 4))),
+        ], categories={
+            AnnotationType.label: label_categories,
+        })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=1, group=4,
+                        attributes={ 'occluded': True }),
+                    Points([1, 1, 3, 2, 2, 3],
+                        label=2,
+                        attributes={ 'occluded': False,
+                            'a1': 'x', 'a2': 42 }),
+                    Label(1),
+                    Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
+                ], attributes={'frame': 0}
+            ),
+            DatasetItem(id=1, subset='s1',
+                annotations=[
+                    PolyLine([0, 0, 4, 0, 4, 4],
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                    Bbox(5, 0, 1, 9,
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                ], attributes={'frame': 1}
+            ),
+
+            DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4], z_order=1,
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                ], attributes={'frame': 0}
+            ),
+
+            DatasetItem(id=3, subset='s3', image=Image(
+                    path='3.jpg', size=(2, 4)),
+                attributes={'frame': 0}),
+        ], categories={
+            AnnotationType.label: label_categories,
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CvatConverter.convert, save_images=True), test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)

    def test_relative_paths(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3))),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3)),
-                        attributes={'frame': 0}),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
-                        attributes={'frame': 1}),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
-                        attributes={'frame': 2}),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
+        ], categories={ AnnotationType.label: LabelCategories() })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3)),
+                attributes={'frame': 0}),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
+                attributes={'frame': 1}),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
+                attributes={'frame': 2}),
+        ], categories={
+            AnnotationType.label: LabelCategories()
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                partial(CvatConverter.convert, save_images=True), test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)

    def test_preserve_frame_ids(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
-                        attributes={'frame': 40}),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
+                attributes={'frame': 40}),
+        ], categories={
+            AnnotationType.label: LabelCategories()
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                CvatConverter.convert, test_dir)
--- a/datumaro/tests/test_datumaro_format.py
+++ b/datumaro/tests/test_datumaro_format.py
@ -2,7 +2,7 @@ from functools import partial
 import numpy as np

 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.project import Project
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Label, Mask, Points, Polygon,
@ -32,82 +32,75 @@ class DatumaroConverterTest(TestCase):
        compare_datasets_strict(self,
            expected=target_dataset, actual=parsed_dataset)

-    class TestExtractor(Extractor):
-        def __iter__(self):
-            return iter([
-                DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
-                    annotations=[
-                        Caption('hello', id=1),
-                        Caption('world', id=2, group=5),
-                        Label(2, id=3, attributes={
-                            'x': 1,
-                            'y': '2',
-                        }),
-                        Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
-                            'score': 1.0,
-                        }),
-                        Bbox(5, 6, 7, 8, id=5, group=5),
-                        Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
-                        Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
-                    ]),
-                DatasetItem(id=21, subset='train',
-                    annotations=[
-                        Caption('test'),
-                        Label(2),
-                        Bbox(1, 2, 3, 4, 5, id=42, group=42)
-                    ]),
-
-                DatasetItem(id=2, subset='val',
-                    annotations=[
-                        PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
-                        Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
-                    ]),
-
-                DatasetItem(id=42, subset='test',
-                    attributes={'a1': 5, 'a2': '42'}),
-
-                DatasetItem(id=42),
-                DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
-            ])
-
-        def categories(self):
-            label_categories = LabelCategories()
-            for i in range(5):
-                label_categories.add('cat' + str(i))
-
-            mask_categories = MaskCategories(
-                generate_colormap(len(label_categories.items)))
-
-            points_categories = PointsCategories()
-            for index, _ in enumerate(label_categories.items):
-                points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
-
-            return {
-                AnnotationType.label: label_categories,
-                AnnotationType.mask: mask_categories,
-                AnnotationType.points: points_categories,
-            }
+    label_categories = LabelCategories()
+    for i in range(5):
+        label_categories.add('cat' + str(i))
+
+    mask_categories = MaskCategories(
+        generate_colormap(len(label_categories.items)))
+
+    points_categories = PointsCategories()
+    for index, _ in enumerate(label_categories.items):
+        points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
+
+    test_dataset = Dataset.from_iterable([
+        DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
+            annotations=[
+                Caption('hello', id=1),
+                Caption('world', id=2, group=5),
+                Label(2, id=3, attributes={
+                    'x': 1,
+                    'y': '2',
+                }),
+                Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
+                    'score': 1.0,
+                }),
+                Bbox(5, 6, 7, 8, id=5, group=5),
+                Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
+                Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
+            ]),
+        DatasetItem(id=21, subset='train',
+            annotations=[
+                Caption('test'),
+                Label(2),
+                Bbox(1, 2, 3, 4, 5, id=42, group=42)
+            ]),
+
+        DatasetItem(id=2, subset='val',
+            annotations=[
+                PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
+                Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
+            ]),
+
+        DatasetItem(id=42, subset='test',
+            attributes={'a1': 5, 'a2': '42'}),
+
+        DatasetItem(id=42),
+        DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
+    ], categories={
+        AnnotationType.label: label_categories,
+        AnnotationType.mask: mask_categories,
+        AnnotationType.points: points_categories,
+    })

    def test_can_save_and_load(self):
        with TestDir() as test_dir:
-            self._test_save_and_load(self.TestExtractor(),
+            self._test_save_and_load(self.test_dataset,
                partial(DatumaroConverter.convert, save_images=True), test_dir)

    def test_can_detect(self):
        with TestDir() as test_dir:
-            DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir)
+            DatumaroConverter.convert(self.test_dataset, save_dir=test_dir)

            self.assertTrue(DatumaroImporter.detect(test_dir))

    def test_relative_paths(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3))),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
-                ])
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
+        ])

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                partial(DatumaroConverter.convert, save_images=True), test_dir)
--- a/datumaro/tests/test_labelme_format.py
+++ b/datumaro/tests/test_labelme_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp

 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, Mask, Polygon, LabelCategories
 )
@ -29,101 +29,84 @@ class LabelMeConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)

    def test_can_save_and_load(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, group=2),
-                            Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
-                                'occluded': True,
-                                'a1': 'qwe',
-                                'a2': True,
-                                'a3': 123,
-                            }),
-                            Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
-                                attributes={ 'username': 'test' }),
-                            Bbox(1, 2, 3, 4, group=3),
-                            Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
-                                attributes={ 'occluded': True }
-                            ),
-                        ]
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, group=2),
+                    Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
+                        'occluded': True,
+                        'a1': 'qwe',
+                        'a2': True,
+                        'a3': 123,
+                    }),
+                    Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
+                        attributes={ 'username': 'test' }),
+                    Bbox(1, 2, 3, 4, group=3),
+                    Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
+                        attributes={ 'occluded': True }
                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=0, group=2, id=0,
-                                attributes={
-                                    'occluded': False, 'username': '',
-                                }
-                            ),
-                            Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
-                                attributes={
-                                    'occluded': True, 'username': '',
-                                    'a1': 'qwe',
-                                    'a2': True,
-                                    'a3': 123,
-                                }
-                            ),
-                            Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
-                                id=2, attributes={
-                                    'occluded': False, 'username': 'test'
-                                }
-                            ),
-                            Bbox(1, 2, 3, 4, group=1, id=3, attributes={
-                                'occluded': False, 'username': '',
-                            }),
-                            Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
-                                id=4, attributes={
-                                    'occluded': True, 'username': ''
-                                }
-                            ),
-                        ]
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=0, group=2, id=0,
+                        attributes={
+                            'occluded': False, 'username': '',
+                        }
                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('label_2')
-                label_cat.add('label_3')
-                return {
-                    AnnotationType.label: label_cat,
-                }
+                    Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
+                        attributes={
+                            'occluded': True, 'username': '',
+                            'a1': 'qwe',
+                            'a2': True,
+                            'a3': 123,
+                        }
+                    ),
+                    Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
+                        id=2, attributes={
+                            'occluded': False, 'username': 'test'
+                        }
+                    ),
+                    Bbox(1, 2, 3, 4, group=1, id=3, attributes={
+                        'occluded': False, 'username': '',
+                    }),
+                    Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
+                        id=4, attributes={
+                            'occluded': True, 'username': ''
+                        }
+                    ),
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                'label_2', 'label_3']),
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(
+                source_dataset,
                partial(LabelMeConverter.convert, save_images=True),
-                test_dir, target_dataset=DstExtractor())
+                test_dir, target_dataset=target_dataset)

    def test_cant_save_dataset_with_relative_paths(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
+        ], categories={
+            AnnotationType.label: LabelCategories(),
+        })

        with self.assertRaisesRegex(Exception, r'only supports flat'):
            with TestDir() as test_dir:
-                self._test_save_and_load(SrcExtractor(),
-                    partial(LabelMeConverter.convert, save_images=True),
-                    test_dir)
+                self._test_save_and_load(expected_dataset,
+                    LabelMeConverter.convert, test_dir)


 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
@ -133,101 +116,91 @@ class LabelMeImporterTest(TestCase):
        self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))

    def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                img1 = np.ones((77, 102, 3)) * 255
-                img1[6:32, 7:41] = 0
-
-                mask1 = np.zeros((77, 102), dtype=int)
-                mask1[67:69, 58:63] = 1
-
-                mask2 = np.zeros((77, 102), dtype=int)
-                mask2[13:25, 54:71] = [
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                ]
-
-                return iter([
-                    DatasetItem(id='img1', image=img1,
-                        annotations=[
-                            Polygon([43, 34, 45, 34, 45, 37, 43, 37],
-                                label=0, id=0,
-                                attributes={
-                                    'occluded': False,
-                                    'username': 'admin'
-                                }
-                            ),
-                            Mask(mask1, label=1, id=1,
-                                attributes={
-                                    'occluded': False,
-                                    'username': 'brussell'
-                                }
-                            ),
-                            Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
-                                label=2, group=2, id=2,
-                                attributes={
-                                    'a1': True,
-                                    'occluded': True,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
-                                label=3, group=2, id=3,
-                                attributes={
-                                    'kj': True,
-                                    'occluded': False,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Bbox(13, 19, 10, 11, label=4, group=2, id=4,
-                                attributes={
-                                    'hg': True,
-                                    'occluded': True,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Mask(mask2, label=5, group=1, id=5,
-                                attributes={
-                                    'd': True,
-                                    'occluded': False,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
-                                label=6, group=1, id=6,
-                                attributes={
-                                    'gfd lkj lkj hi': True,
-                                    'occluded': False,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                        ]
+        img1 = np.ones((77, 102, 3)) * 255
+        img1[6:32, 7:41] = 0
+
+        mask1 = np.zeros((77, 102), dtype=int)
+        mask1[67:69, 58:63] = 1
+
+        mask2 = np.zeros((77, 102), dtype=int)
+        mask2[13:25, 54:71] = [
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        ]
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id='img1', image=img1,
+                annotations=[
+                    Polygon([43, 34, 45, 34, 45, 37, 43, 37],
+                        label=0, id=0,
+                        attributes={
+                            'occluded': False,
+                            'username': 'admin'
+                        }
+                    ),
+                    Mask(mask1, label=1, id=1,
+                        attributes={
+                            'occluded': False,
+                            'username': 'brussell'
+                        }
+                    ),
+                    Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
+                        label=2, group=2, id=2,
+                        attributes={
+                            'a1': True,
+                            'occluded': True,
+                            'username': 'anonymous'
+                        }
+                    ),
+                    Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
+                        label=3, group=2, id=3,
+                        attributes={
+                            'kj': True,
+                            'occluded': False,
+                            'username': 'anonymous'
+                        }
                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('window')
-                label_cat.add('license plate')
-                label_cat.add('o1')
-                label_cat.add('q1')
-                label_cat.add('b1')
-                label_cat.add('m1')
-                label_cat.add('hg')
-                return {
-                    AnnotationType.label: label_cat,
-                }
+                    Bbox(13, 19, 10, 11, label=4, group=2, id=4,
+                        attributes={
+                            'hg': True,
+                            'occluded': True,
+                            'username': 'anonymous'
+                        }
+                    ),
+                    Mask(mask2, label=5, group=1, id=5,
+                        attributes={
+                            'd': True,
+                            'occluded': False,
+                            'username': 'anonymous'
+                        }
+                    ),
+                    Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
+                        label=6, group=1, id=6,
+                        attributes={
+                            'gfd lkj lkj hi': True,
+                            'occluded': False,
+                            'username': 'anonymous'
+                        }
+                    ),
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                'window', 'license plate', 'o1',
+                'q1', 'b1', 'm1', 'hg',
+            ]),
+        })

        parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \
            .make_dataset()
-        compare_datasets(self, expected=DstExtractor(), actual=parsed)
+        compare_datasets(self, expected=target_dataset, actual=parsed)
--- a/datumaro/tests/test_mot_format.py
+++ b/datumaro/tests/test_mot_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp

 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, LabelCategories
 )
@ -28,96 +28,83 @@ class MotConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)

    def test_can_save_bboxes(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, attributes={
-                                'occluded': True,
-                            }),
-                            Bbox(0, 4, 4, 4, label=3, attributes={
-                                'visibility': 0.4,
-                            }),
-                            Bbox(2, 4, 4, 4, attributes={
-                                'ignored': True
-                            }),
-                        ]
-                    ),
-
-                    DatasetItem(id=2, subset='val',
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(1, 2, 4, 2, label=3),
-                        ]
-                    ),
-
-                    DatasetItem(id=3, subset='test',
-                        image=np.ones((5, 4, 3)) * 3,
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1,
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, attributes={
-                                'occluded': True,
-                                'visibility': 0.0,
-                                'ignored': False,
-                            }),
-                            Bbox(0, 4, 4, 4, label=3, attributes={
-                                'occluded': False,
-                                'visibility': 0.4,
-                                'ignored': False,
-                            }),
-                            Bbox(2, 4, 4, 4, attributes={
-                                'occluded': False,
-                                'visibility': 1.0,
-                                'ignored': True,
-                            }),
-                        ]
-                    ),
-
-                    DatasetItem(id=2,
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(1, 2, 4, 2, label=3, attributes={
-                                'occluded': False,
-                                'visibility': 1.0,
-                                'ignored': False,
-                            }),
-                        ]
-                    ),
-
-                    DatasetItem(id=3,
-                        image=np.ones((5, 4, 3)) * 3,
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, attributes={
+                        'occluded': True,
+                    }),
+                    Bbox(0, 4, 4, 4, label=3, attributes={
+                        'visibility': 0.4,
+                    }),
+                    Bbox(2, 4, 4, 4, attributes={
+                        'ignored': True
+                    }),
+                ]
+            ),
+
+            DatasetItem(id=2, subset='val',
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(1, 2, 4, 2, label=3),
+                ]
+            ),
+
+            DatasetItem(id=3, subset='test',
+                image=np.ones((5, 4, 3)) * 3,
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1,
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, attributes={
+                        'occluded': True,
+                        'visibility': 0.0,
+                        'ignored': False,
+                    }),
+                    Bbox(0, 4, 4, 4, label=3, attributes={
+                        'occluded': False,
+                        'visibility': 0.4,
+                        'ignored': False,
+                    }),
+                    Bbox(2, 4, 4, 4, attributes={
+                        'occluded': False,
+                        'visibility': 1.0,
+                        'ignored': True,
+                    }),
+                ]
+            ),
+
+            DatasetItem(id=2,
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(1, 2, 4, 2, label=3, attributes={
+                        'occluded': False,
+                        'visibility': 1.0,
+                        'ignored': False,
+                    }),
+                ]
+            ),
+
+            DatasetItem(id=3,
+                image=np.ones((5, 4, 3)) * 3,
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(
+                source_dataset,
                partial(MotSeqGtConverter.convert, save_images=True),
-                test_dir, target_dataset=DstExtractor())
+                test_dir, target_dataset=target_dataset)


 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset')
@ -127,30 +114,23 @@ class MotImporterTest(TestCase):
        self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR))

    def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1,
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, attributes={
-                                'occluded': False,
-                                'visibility': 1.0,
-                                'ignored': False,
-                            }),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1,
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, attributes={
+                        'occluded': False,
+                        'visibility': 1.0,
+                        'ignored': False,
+                    }),
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })

        dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
            .make_dataset()

-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)
--- a/datumaro/tests/test_tfrecord_format.py
+++ b/datumaro/tests/test_tfrecord_format.py
@ -3,7 +3,7 @@ import numpy as np
 import os.path as osp

 from unittest import TestCase, skipIf
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, Mask, LabelCategories
 )
@ -48,117 +48,96 @@ class TfrecordConverterTest(TestCase):
        compare_datasets(self, expected=target_dataset, actual=parsed_dataset)

    def test_can_save_bboxes(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2),
-                            Bbox(0, 4, 4, 4, label=3),
-                            Bbox(2, 4, 4, 4),
-                        ], attributes={'source_id': ''}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2),
+                    Bbox(0, 4, 4, 4, label=3),
+                    Bbox(2, 4, 4, 4),
+                ], attributes={'source_id': ''}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
+                test_dataset,
                partial(TfDetectionApiConverter.convert, save_images=True),
                test_dir)

    def test_can_save_masks(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
-                        annotations=[
-                            Mask(image=np.array([
-                                [1, 0, 0, 1],
-                                [0, 1, 1, 0],
-                                [0, 1, 1, 0],
-                                [1, 0, 0, 1],
-                            ]), label=1),
-                        ],
-                        attributes={'source_id': ''}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
+                annotations=[
+                    Mask(image=np.array([
+                        [1, 0, 0, 1],
+                        [0, 1, 1, 0],
+                        [0, 1, 1, 0],
+                        [1, 0, 0, 1],
+                    ]), label=1),
+                ],
+                attributes={'source_id': ''}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
+                test_dataset,
                partial(TfDetectionApiConverter.convert, save_masks=True),
                test_dir)

    def test_can_save_dataset_with_no_subsets(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1,
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(2, 1, 4, 4, label=2),
-                            Bbox(4, 2, 8, 4, label=3),
-                        ],
-                        attributes={'source_id': ''}
-                    ),
-
-                    DatasetItem(id=2,
-                        image=np.ones((8, 8, 3)) * 2,
-                        annotations=[
-                            Bbox(4, 4, 4, 4, label=3),
-                        ],
-                        attributes={'source_id': ''}
-                    ),
-
-                    DatasetItem(id=3,
-                        image=np.ones((8, 4, 3)) * 3,
-                        attributes={'source_id': ''}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1,
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(2, 1, 4, 4, label=2),
+                    Bbox(4, 2, 8, 4, label=3),
+                ],
+                attributes={'source_id': ''}
+            ),
+
+            DatasetItem(id=2,
+                image=np.ones((8, 8, 3)) * 2,
+                annotations=[
+                    Bbox(4, 4, 4, 4, label=3),
+                ],
+                attributes={'source_id': ''}
+            ),
+
+            DatasetItem(id=3,
+                image=np.ones((8, 4, 3)) * 3,
+                attributes={'source_id': ''}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
+                test_dataset,
                partial(TfDetectionApiConverter.convert, save_images=True),
                test_dir)

    def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1/q.e',
-                        image=Image(path='1/q.e', size=(10, 15)),
-                        attributes={'source_id': ''}
-                    )
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id='1/q.e',
+                image=Image(path='1/q.e', size=(10, 15)),
+                attributes={'source_id': ''}
+            )
+        ], categories={
+            AnnotationType.label: LabelCategories(),
+        })

        with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                TfDetectionApiConverter.convert, test_dir)

    def test_labelmap_parsing(self):
@ -197,42 +176,35 @@ class TfrecordImporterTest(TestCase):
        self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR))

    def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2),
-                            Bbox(0, 4, 4, 4, label=3),
-                            Bbox(2, 4, 4, 4),
-                        ],
-                        attributes={'source_id': '1'}
-                    ),
-
-                    DatasetItem(id=2, subset='val',
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(1, 2, 4, 2, label=3),
-                        ],
-                        attributes={'source_id': '2'}
-                    ),
-
-                    DatasetItem(id=3, subset='test',
-                        image=np.ones((5, 4, 3)) * 3,
-                        attributes={'source_id': '3'}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2),
+                    Bbox(0, 4, 4, 4, label=3),
+                    Bbox(2, 4, 4, 4),
+                ],
+                attributes={'source_id': '1'}
+            ),
+
+            DatasetItem(id=2, subset='val',
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(1, 2, 4, 2, label=3),
+                ],
+                attributes={'source_id': '2'}
+            ),
+
+            DatasetItem(id=3, subset='test',
+                image=np.ones((5, 4, 3)) * 3,
+                attributes={'source_id': '3'}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })

        dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
            .make_dataset()

-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, target_dataset, dataset)
--- a/datumaro/tests/test_transforms.py
+++ b/datumaro/tests/test_transforms.py
@ -2,7 +2,7 @@ import logging as log
 import numpy as np

 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
    Mask, Polygon, PolyLine, Points, Bbox, Label,
    LabelCategories, MaskCategories, AnnotationType
@ -67,304 +67,269 @@ class TransformsTest(TestCase):
        compare_datasets(self, DstExtractor(), actual)

    def test_mask_to_polygons_small_polygons_message(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                items = [
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 0],
-                                    [0, 1, 0],
-                                    [0, 0, 0],
-                                ]),
-                            ),
-                        ]
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 0],
+                            [0, 1, 0],
+                            [0, 0, 0],
+                        ]),
                    ),
                ]
-                return iter(items)
+            ),
+        ])

-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([ DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])

        with self.assertLogs(level=log.DEBUG) as logs:
-            actual = transforms.MasksToPolygons(SrcExtractor())
+            actual = transforms.MasksToPolygons(source_dataset)

-            compare_datasets(self, DstExtractor(), actual)
+            compare_datasets(self, target_dataset, actual)
            self.assertRegex('\n'.join(logs.output), 'too small polygons')

    def test_polygons_to_masks(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4]),
-                            Polygon([5, 0, 9, 0, 5, 5]),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4]),
+                    Polygon([5, 0, 9, 0, 5, 5]),
+                ]
+            ),
+        ])

-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
-                                    [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                ]),
-                            ),
-                            Mask(np.array([
-                                    [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                ]),
-                            ),
-                        ]
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
+                            [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        ]),
                    ),
-                ])
+                    Mask(np.array([
+                            [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        ]),
+                    ),
+                ]
+            ),
+        ])

-        actual = transforms.PolygonsToMasks(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.PolygonsToMasks(source_dataset)
+        compare_datasets(self, target_dataset, actual)

    def test_crop_covered_segments(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            # The mask is partially covered by the polygon
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 1, 1, 1],
-                                    [1, 1, 1, 1, 1],
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    # The mask is partially covered by the polygon
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 1, 1, 1],
+                            [1, 1, 1, 1, 1],
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        z_order=1),
+                ]
+            ),
+        ])

-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        z_order=1),
+                ]
+            ),
+        ])

-        actual = transforms.CropCoveredSegments(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.CropCoveredSegments(source_dataset)
+        compare_datasets(self, target_dataset, actual)

    def test_merge_instance_segments(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0, group=1),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=1, group=1),
-                            Polygon([0, 0, 0, 2, 2, 2, 2, 0],
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0, group=1),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        z_order=1, group=1),
+                    Polygon([0, 0, 0, 2, 2, 2, 2, 0],
+                        z_order=1),
+                ]
+            ),
+        ])

-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 1, 1, 1, 1],
-                                    [1, 1, 1, 1, 1],
-                                    [1, 1, 1, 1, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0, group=1),
-                            Mask(np.array([
-                                    [1, 1, 0, 0, 0],
-                                    [1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 1, 1, 1, 1],
+                            [1, 1, 1, 1, 1],
+                            [1, 1, 1, 1, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0, group=1),
+                    Mask(np.array([
+                            [1, 1, 0, 0, 0],
+                            [1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=1),
+                ]
+            ),
+        ])

-        actual = transforms.MergeInstanceSegments(SrcExtractor(),
+        actual = transforms.MergeInstanceSegments(source_dataset,
            include_polygons=True)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)

    def test_map_subsets(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='a'),
-                    DatasetItem(id=2, subset='b'),
-                    DatasetItem(id=3, subset='c'),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='a'),
+            DatasetItem(id=2, subset='b'),
+            DatasetItem(id=3, subset='c'),
+        ])

-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset=''),
-                    DatasetItem(id=2, subset='a'),
-                    DatasetItem(id=3, subset='c'),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset=''),
+            DatasetItem(id=2, subset='a'),
+            DatasetItem(id=3, subset='c'),
+        ])

-        actual = transforms.MapSubsets(SrcExtractor(),
+        actual = transforms.MapSubsets(source_dataset,
            { 'a': '', 'b': 'a' })
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)

    def test_shapes_to_boxes(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ), id=1),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
-                            PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
-                            Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ), id=1),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
+                    PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
+                    Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
+                ]
+            ),
+        ])

-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Bbox(0, 0, 4, 4, id=1),
-                            Bbox(1, 1, 3, 3, id=2),
-                            Bbox(1, 1, 1, 1, id=3),
-                            Bbox(2, 2, 2, 2, id=4),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Bbox(0, 0, 4, 4, id=1),
+                    Bbox(1, 1, 3, 3, id=2),
+                    Bbox(1, 1, 1, 1, id=3),
+                    Bbox(2, 2, 2, 2, id=4),
+                ]
+            ),
+        ])

-        actual = transforms.ShapesToBoxes(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.ShapesToBoxes(source_dataset)
+        compare_datasets(self, target_dataset, actual)

    def test_id_from_image(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image='path.jpg'),
-                    DatasetItem(id=2),
-                ])
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='path', image='path.jpg'),
-                    DatasetItem(id=2),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image='path.jpg'),
+            DatasetItem(id=2),
+        ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id='path', image='path.jpg'),
+            DatasetItem(id=2),
+        ])

-        actual = transforms.IdFromImageName(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.IdFromImageName(source_dataset)
+        compare_datasets(self, target_dataset, actual)

    def test_boxes_to_masks(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Bbox(0, 0, 3, 3, z_order=1),
-                            Bbox(0, 0, 3, 1, z_order=2),
-                            Bbox(0, 2, 3, 1, z_order=3),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Bbox(0, 0, 3, 3, z_order=1),
+                    Bbox(0, 0, 3, 1, z_order=2),
+                    Bbox(0, 2, 3, 1, z_order=3),
+                ]
+            ),
+        ])

-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=1),
-                            Mask(np.array([
-                                    [1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=2),
-                            Mask(np.array([
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=3),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=1),
+                    Mask(np.array([
+                            [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=2),
+                    Mask(np.array([
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=3),
+                ]
+            ),
+        ])

-        actual = transforms.BoxesToMasks(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.BoxesToMasks(source_dataset)
+        compare_datasets(self, target_dataset, actual)

    def test_random_split(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset="a"),
-                    DatasetItem(id=2, subset="a"),
-                    DatasetItem(id=3, subset="b"),
-                    DatasetItem(id=4, subset="b"),
-                    DatasetItem(id=5, subset="b"),
-                    DatasetItem(id=6, subset=""),
-                    DatasetItem(id=7, subset=""),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset="a"),
+            DatasetItem(id=2, subset="a"),
+            DatasetItem(id=3, subset="b"),
+            DatasetItem(id=4, subset="b"),
+            DatasetItem(id=5, subset="b"),
+            DatasetItem(id=6, subset=""),
+            DatasetItem(id=7, subset=""),
+        ])

-        actual = transforms.RandomSplit(SrcExtractor(), splits=[
+        actual = transforms.RandomSplit(source_dataset, splits=[
            ('train', 4.0 / 7.0),
            ('test', 3.0 / 7.0),
        ])
@ -373,21 +338,19 @@ class TransformsTest(TestCase):
        self.assertEqual(3, len(actual.get_subset('test')))

    def test_random_split_gives_error_on_wrong_ratios(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([DatasetItem(id=1)])
+        source_dataset = Dataset.from_iterable([DatasetItem(id=1)])

        with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[
+            transforms.RandomSplit(source_dataset, splits=[
                ('train', 0.5),
                ('test', 0.7),
            ])

        with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[])
+            transforms.RandomSplit(source_dataset, splits=[])

        with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[
+            transforms.RandomSplit(source_dataset, splits=[
                ('train', -0.5),
                ('test', 1.5),
            ])
@ -462,24 +425,19 @@ class TransformsTest(TestCase):
        compare_datasets(self, DstExtractor(), actual)

    def test_remap_labels_delete_unspecified(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([ DatasetItem(id=1, annotations=[ Label(0) ]) ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('label0')
-
-                return { AnnotationType.label: label_cat }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([ DatasetItem(id=1, annotations=[]) ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
-
-        actual = transforms.RemapLabels(SrcExtractor(),
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, annotations=[ Label(0) ])
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable('label0'),
+        })
+
+        target_dataset = Dataset.from_iterable([
+                DatasetItem(id=1, annotations=[]),
+            ], categories={
+                AnnotationType.label: LabelCategories(),
+            })
+
+        actual = transforms.RemapLabels(source_dataset,
            mapping={}, default='delete')

-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
--- a/datumaro/tests/test_yolo_format.py
+++ b/datumaro/tests/test_yolo_format.py
@ -6,7 +6,7 @@ from unittest import TestCase
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, Bbox, LabelCategories,
 )
-from datumaro.components.project import Project
+from datumaro.components.project import Project, Dataset
 from datumaro.plugins.yolo_format.importer import YoloImporter
 from datumaro.plugins.yolo_format.converter import YoloConverter
 from datumaro.util.image import Image, save_image
@ -15,40 +15,32 @@ from datumaro.util.test_utils import TestDir, compare_datasets

 class YoloFormatTest(TestCase):
    def test_can_save_and_load(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(0, 1, 2, 3, label=4),
-                        ]),
-                    DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                            Bbox(2, 1, 2, 3, label=4),
-                        ]),
-
-                    DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(0, 1, 5, 2, label=2),
-                            Bbox(0, 2, 3, 2, label=5),
-                            Bbox(0, 2, 4, 2, label=6),
-                            Bbox(0, 7, 3, 2, label=7),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(0, 1, 2, 3, label=4),
+                ]),
+            DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                    Bbox(2, 1, 2, 3, label=4),
+                ]),
+
+            DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(0, 1, 5, 2, label=2),
+                    Bbox(0, 2, 3, 2, label=5),
+                    Bbox(0, 2, 4, 2, label=6),
+                    Bbox(0, 7, 3, 2, label=7),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })

        with TestDir() as test_dir:
-            source_dataset = TestExtractor()

            YoloConverter.convert(source_dataset, test_dir, save_images=True)
            parsed_dataset = YoloImporter()(test_dir).make_dataset()
@ -56,27 +48,19 @@ class YoloFormatTest(TestCase):
            compare_datasets(self, source_dataset, parsed_dataset)

    def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=Image(path='1.jpg', size=(10, 15)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=Image(path='1.jpg', size=(10, 15)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })

        with TestDir() as test_dir:
-            source_dataset = TestExtractor()

            YoloConverter.convert(source_dataset, test_dir)

@ -87,27 +71,19 @@ class YoloFormatTest(TestCase):
            compare_datasets(self, source_dataset, parsed_dataset)

    def test_can_load_dataset_with_exact_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=Image(path='1.jpg', size=(10, 15)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=Image(path='1.jpg', size=(10, 15)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })

        with TestDir() as test_dir:
-            source_dataset = TestExtractor()

            YoloConverter.convert(source_dataset, test_dir)

@ -117,24 +93,20 @@ class YoloFormatTest(TestCase):
            compare_datasets(self, source_dataset, parsed_dataset)

    def test_relative_paths(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', subset='train',
-                        image=np.ones((4, 2, 3))),
-                    DatasetItem(id='subdir1/1', subset='train',
-                        image=np.ones((2, 6, 3))),
-                    DatasetItem(id='subdir2/1', subset='train',
-                        image=np.ones((5, 4, 3))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', subset='train',
+                image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir1/1', subset='train',
+                image=np.ones((2, 6, 3))),
+            DatasetItem(id='subdir2/1', subset='train',
+                image=np.ones((5, 4, 3))),
+        ], categories={
+            AnnotationType.label: LabelCategories(),
+        })

        for save_images in {True, False}:
            with self.subTest(save_images=save_images):
                with TestDir() as test_dir:
-                    source_dataset = TestExtractor()

                    YoloConverter.convert(source_dataset, test_dir,
                        save_images=save_images)
@ -150,26 +122,19 @@ class YoloImporterTest(TestCase):
        self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR))

    def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((10, 15, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((10, 15, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })

        dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
            .make_dataset()

-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)