diff --git a/cvat/apps/dataset_manager/formats/camvid.py b/cvat/apps/dataset_manager/formats/camvid.py index bcd00b7a..a8fb5059 100644 --- a/cvat/apps/dataset_manager/formats/camvid.py +++ b/cvat/apps/dataset_manager/formats/camvid.py @@ -4,7 +4,7 @@ from tempfile import TemporaryDirectory -from datumaro.components.project import Dataset +from datumaro.components.dataset import Dataset from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, @@ -17,16 +17,15 @@ from .utils import make_colormap @exporter(name='CamVid', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - envt = dm_env.transforms - extractor = extractor.transform(envt.get('polygons_to_masks')) - extractor = extractor.transform(envt.get('boxes_to_masks')) - extractor = extractor.transform(envt.get('merge_instance_segments')) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') label_map = make_colormap(task_data) with TemporaryDirectory() as temp_dir: - dm_env.converters.get('camvid').convert(extractor, - save_dir=temp_dir, save_images=save_images, apply_colormap=True, + dataset.export(temp_dir, 'camvid', + save_images=save_images, apply_colormap=True, label_map={label: label_map[label][0] for label in label_map}) make_zip_archive(temp_dir, dst_file) @@ -36,7 +35,6 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) - dataset = dm_env.make_importer('camvid')(tmp_dir).make_dataset() - masks_to_polygons = dm_env.transforms.get('masks_to_polygons') - dataset = dataset.transform(masks_to_polygons) + dataset = Dataset.import_from(tmp_dir, 'camvid', env=dm_env) + dataset.transform('masks_to_polygons') import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 84472d3a..3ec3ab18 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -5,7 +5,8 @@ import zipfile from tempfile import TemporaryDirectory -from datumaro.components.project import Dataset +from datumaro.components.dataset import Dataset + from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive @@ -15,11 +16,10 @@ from .registry import dm_env, exporter, importer @exporter(name='COCO', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: - dm_env.converters.get('coco_instances').convert(extractor, - save_dir=temp_dir, save_images=save_images) + dataset.export(temp_dir, 'coco_instances', save_images=save_images) make_zip_archive(temp_dir, dst_file) @@ -29,8 +29,9 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: zipfile.ZipFile(src_file).extractall(tmp_dir) - dataset = dm_env.make_importer('coco')(tmp_dir).make_dataset() + dataset = Dataset.import_from(tmp_dir, 'coco', env=dm_env) import_dm_annotations(dataset, task_data) else: - dataset = dm_env.make_extractor('coco_instances', src_file.name) + dataset = Dataset.import_from(src_file.name, + 'coco_instances', env=dm_env) import_dm_annotations(dataset, task_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index d825dae0..02025afc 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -9,10 +9,11 @@ from collections import OrderedDict from glob import glob from tempfile import TemporaryDirectory +from datumaro.components.extractor import DatasetItem + from cvat.apps.dataset_manager.bindings import match_dm_item from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.frame_provider import FrameProvider -from datumaro.components.extractor import DatasetItem from .registry import exporter, importer diff --git a/cvat/apps/dataset_manager/formats/imagenet.py b/cvat/apps/dataset_manager/formats/imagenet.py index d9847549..2ed0cb47 100644 --- a/cvat/apps/dataset_manager/formats/imagenet.py +++ b/cvat/apps/dataset_manager/formats/imagenet.py @@ -3,12 +3,12 @@ # SPDX-License-Identifier: MIT import os.path as osp -from glob import glob - import zipfile +from glob import glob from tempfile import TemporaryDirectory -from datumaro.components.project import Dataset +from datumaro.components.dataset import Dataset + from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive @@ -18,15 +18,13 @@ from .registry import dm_env, exporter, importer @exporter(name='ImageNet', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - extractor = Dataset.from_extractors(extractor) # apply lazy transform + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: if save_images: - dm_env.converters.get('imagenet').convert(extractor, - save_dir=temp_dir, save_images=save_images) + dataset.export(temp_dir, 'imagenet', save_images=save_images) else: - dm_env.converters.get('imagenet_txt').convert(extractor, - save_dir=temp_dir, save_images=save_images) + dataset.export(temp_dir, 'imagenet_txt', save_images=save_images) make_zip_archive(temp_dir, dst_file) @@ -35,7 +33,7 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: zipfile.ZipFile(src_file).extractall(tmp_dir) if glob(osp.join(tmp_dir, '*.txt')): - dataset = dm_env.make_importer('imagenet_txt')(tmp_dir).make_dataset() + dataset = Dataset.import_from(tmp_dir, 'imagenet_txt', env=dm_env) else: - dataset = dm_env.make_importer('imagenet')(tmp_dir).make_dataset() + dataset = Dataset.import_from(tmp_dir, 'imagenet', env=dm_env) import_dm_annotations(dataset, task_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index d3bd074d..744b11fa 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -4,23 +4,22 @@ from tempfile import TemporaryDirectory +from datumaro.components.dataset import Dataset from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive -from datumaro.components.project import Dataset from .registry import dm_env, exporter, importer @exporter(name='LabelMe', ext='ZIP', version='3.0') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: - dm_env.converters.get('label_me').convert(extractor, save_dir=temp_dir, - save_images=save_images) + dataset.export(temp_dir, 'label_me', save_images=save_images) make_zip_archive(temp_dir, dst_file) @@ -29,7 +28,6 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) - dataset = dm_env.make_importer('label_me')(tmp_dir).make_dataset() - masks_to_polygons = dm_env.transforms.get('masks_to_polygons') - dataset = dataset.transform(masks_to_polygons) + dataset = Dataset.import_from(tmp_dir, 'label_me', env=dm_env) + dataset.transform('masks_to_polygons') import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index b1307a9c..3e3780e8 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -4,12 +4,12 @@ from tempfile import TemporaryDirectory +from datumaro.components.dataset import Dataset from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive -from datumaro.components.project import Dataset from .registry import dm_env, exporter, importer from .utils import make_colormap @@ -17,15 +17,13 @@ from .utils import make_colormap @exporter(name='Segmentation mask', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - envt = dm_env.transforms - extractor = extractor.transform(envt.get('polygons_to_masks')) - extractor = extractor.transform(envt.get('boxes_to_masks')) - extractor = extractor.transform(envt.get('merge_instance_segments')) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') with TemporaryDirectory() as temp_dir: - dm_env.converters.get('voc_segmentation').convert(extractor, - save_dir=temp_dir, save_images=save_images, + dataset.export(temp_dir, 'voc_segmentation', save_images=save_images, apply_colormap=True, label_map=make_colormap(task_data)) make_zip_archive(temp_dir, dst_file) @@ -35,7 +33,6 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) - dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() - masks_to_polygons = dm_env.transforms.get('masks_to_polygons') - dataset = dataset.transform(masks_to_polygons) + dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env) + dataset.transform('masks_to_polygons') import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index 81131dc1..29d5182a 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -4,23 +4,22 @@ from tempfile import TemporaryDirectory +import datumaro.components.extractor as datumaro +from datumaro.components.dataset import Dataset from pyunpack import Archive -import datumaro.components.extractor as datumaro from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor from cvat.apps.dataset_manager.util import make_zip_archive -from datumaro.components.project import Dataset from .registry import dm_env, exporter, importer @exporter(name='MOT', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: - dm_env.converters.get('mot_seq_gt').convert(extractor, - save_dir=temp_dir, save_images=save_images) + dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images) make_zip_archive(temp_dir, dst_file) @@ -29,7 +28,7 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) - dataset = dm_env.make_importer('mot_seq')(tmp_dir).make_dataset() + dataset = Dataset.import_from(tmp_dir, 'mot_seq', env=dm_env) tracks = {} label_cat = dataset.categories()[datumaro.AnnotationType.label] diff --git a/cvat/apps/dataset_manager/formats/mots.py b/cvat/apps/dataset_manager/formats/mots.py index 52bf0fa6..22b9dd08 100644 --- a/cvat/apps/dataset_manager/formats/mots.py +++ b/cvat/apps/dataset_manager/formats/mots.py @@ -4,13 +4,13 @@ from tempfile import TemporaryDirectory +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import AnnotationType, Transform from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, find_dataset_root, match_dm_item) from cvat.apps.dataset_manager.util import make_zip_archive -from datumaro.components.extractor import AnnotationType, Transform -from datumaro.components.project import Dataset from .registry import dm_env, exporter, importer @@ -22,16 +22,14 @@ class KeepTracks(Transform): @exporter(name='MOTS PNG', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - envt = dm_env.transforms - extractor = extractor.transform(KeepTracks) # can only export tracks - extractor = extractor.transform(envt.get('polygons_to_masks')) - extractor = extractor.transform(envt.get('boxes_to_masks')) - extractor = extractor.transform(envt.get('merge_instance_segments')) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) + dataset.transform(KeepTracks) # can only export tracks + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') with TemporaryDirectory() as temp_dir: - dm_env.converters.get('mots_png').convert(extractor, - save_dir=temp_dir, save_images=save_images) + dataset.export(temp_dir, 'mots_png', save_images=save_images) make_zip_archive(temp_dir, dst_file) @@ -40,9 +38,8 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) - dataset = dm_env.make_importer('mots')(tmp_dir).make_dataset() - masks_to_polygons = dm_env.transforms.get('masks_to_polygons') - dataset = dataset.transform(masks_to_polygons) + dataset = Dataset.import_from(tmp_dir, 'mots', env=dm_env) + dataset.transform('masks_to_polygons') tracks = {} label_cat = dataset.categories()[AnnotationType.label] diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index ee30564b..3f10b93a 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -6,26 +6,25 @@ import os import os.path as osp import shutil from glob import glob - from tempfile import TemporaryDirectory +from datumaro.components.dataset import Dataset from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive -from datumaro.components.project import Dataset from .registry import dm_env, exporter, importer @exporter(name='PASCAL VOC', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: - dm_env.converters.get('voc').convert(extractor, - save_dir=temp_dir, save_images=save_images, label_map='source') + dataset.export(temp_dir, 'voc', save_images=save_images, + label_map='source') make_zip_archive(temp_dir, dst_file) @@ -56,7 +55,6 @@ def _import(src_file, task_data): for f in anno_files: shutil.move(f, anno_dir) - dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() - masks_to_polygons = dm_env.transforms.get('masks_to_polygons') - dataset = dataset.transform(masks_to_polygons) + dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env) + dataset.transform('masks_to_polygons') import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py index 3b7e123e..9847bf61 100644 --- a/cvat/apps/dataset_manager/formats/tfrecord.py +++ b/cvat/apps/dataset_manager/formats/tfrecord.py @@ -24,11 +24,10 @@ except ImportError: @exporter(name='TFRecord', ext='ZIP', version='1.0', enabled=tf_available) def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: - dm_env.converters.get('tf_detection_api').convert(extractor, - save_dir=temp_dir, save_images=save_images) + dataset.export(temp_dir, 'tf_detection_api', save_images=save_images) make_zip_archive(temp_dir, dst_file) @@ -37,5 +36,5 @@ def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) - dataset = dm_env.make_importer('tf_detection_api')(tmp_dir).make_dataset() + dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env) import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index bea73b3c..0df6f5fe 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -20,11 +20,10 @@ from .registry import dm_env, exporter, importer @exporter(name='YOLO', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): - extractor = CvatTaskDataExtractor(task_data, include_images=save_images) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: - dm_env.converters.get('yolo').convert(extractor, - save_dir=temp_dir, save_images=save_images) + dataset.export(temp_dir, 'yolo', save_images=save_images) make_zip_archive(temp_dir, dst_file) @@ -44,11 +43,11 @@ def _import(src_file, task_data): frame_id = match_dm_item(DatasetItem(id=frame), task_data, root_hint=root_hint) frame_info = task_data.frame_info[frame_id] - except Exception: + except Exception: # nosec pass if frame_info is not None: image_info[frame] = (frame_info['height'], frame_info['width']) - dataset = dm_env.make_importer('yolo')(tmp_dir, image_info=image_info) \ - .make_dataset() + dataset = Dataset.import_from(tmp_dir, 'yolo', + env=dm_env, image_info=image_info) import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/tests/test_formats.py b/cvat/apps/dataset_manager/tests/test_formats.py index 0c23eea7..5103da65 100644 --- a/cvat/apps/dataset_manager/tests/test_formats.py +++ b/cvat/apps/dataset_manager/tests/test_formats.py @@ -357,17 +357,18 @@ class TaskExportTest(_DbTestBase): project.config.remove('sources') return project.make_dataset() - return dm_env.make_importer(importer_name)(src) \ - .make_dataset() + return datumaro.components.dataset. \ + Dataset.import_from(src, importer_name, env=dm_env) if zipfile.is_zipfile(file_path): with tempfile.TemporaryDirectory() as tmp_dir: zipfile.ZipFile(file_path).extractall(tmp_dir) dataset = load_dataset(tmp_dir) + self.assertEqual(len(dataset), task["size"]) else: dataset = load_dataset(file_path) + self.assertEqual(len(dataset), task["size"]) - self.assertEqual(len(dataset), task["size"]) self._test_export(check, task, format_name, save_images=False) def test_can_skip_outside(self): diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index c8a6bda4..98ea74c5 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -45,5 +45,9 @@ tensorflow==2.4.1 # Optional requirement of Datumaro patool==1.12 diskcache==5.0.2 open3d==0.11.2 -# workaround for binary incompatibility with numpy when pycocotools is installed by wheel -datumaro==0.1.5.1 --no-binary=datumaro --no-binary=pycocotools +# --no-binary=datumaro: workaround for pip to install +# opencv-headless instead of regular opencv, to actually run setup script +# --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20 +# of pycocotools and tensorflow 2.4.1 +# when pycocotools is installed by wheel in python 3.8+ +datumaro==0.1.6.1 --no-binary=datumaro --no-binary=pycocotools