Update CVAT formats to use changes in Datumaro API (#2794)

* update cvat formats to use datumaro changes

* Update datumaro dependency

* Add comments on datumaro install in requirements

* fix linter

* Fix test

* fix linter

* Update datumaro version to 0.1.6.1
main
Maxim Zhiltsov 5 years ago committed by GitHub
parent d5312df891
commit 9b62b71ab1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -4,7 +4,7 @@
from tempfile import TemporaryDirectory
from datumaro.components.project import Dataset
from datumaro.components.dataset import Dataset
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
@ -17,16 +17,15 @@ from .utils import make_colormap
@exporter(name='CamVid', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(envt.get('polygons_to_masks'))
extractor = extractor.transform(envt.get('boxes_to_masks'))
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
label_map = make_colormap(task_data)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('camvid').convert(extractor,
save_dir=temp_dir, save_images=save_images, apply_colormap=True,
dataset.export(temp_dir, 'camvid',
save_images=save_images, apply_colormap=True,
label_map={label: label_map[label][0] for label in label_map})
make_zip_archive(temp_dir, dst_file)
@ -36,7 +35,6 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = dm_env.make_importer('camvid')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'camvid', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)

@ -5,7 +5,8 @@
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.project import Dataset
from datumaro.components.dataset import Dataset
from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \
import_dm_annotations
from cvat.apps.dataset_manager.util import make_zip_archive
@ -15,11 +16,10 @@ from .registry import dm_env, exporter, importer
@exporter(name='COCO', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('coco_instances').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'coco_instances', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@ -29,8 +29,9 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = dm_env.make_importer('coco')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'coco', env=dm_env)
import_dm_annotations(dataset, task_data)
else:
dataset = dm_env.make_extractor('coco_instances', src_file.name)
dataset = Dataset.import_from(src_file.name,
'coco_instances', env=dm_env)
import_dm_annotations(dataset, task_data)

@ -9,10 +9,11 @@ from collections import OrderedDict
from glob import glob
from tempfile import TemporaryDirectory
from datumaro.components.extractor import DatasetItem
from cvat.apps.dataset_manager.bindings import match_dm_item
from cvat.apps.dataset_manager.util import make_zip_archive
from cvat.apps.engine.frame_provider import FrameProvider
from datumaro.components.extractor import DatasetItem
from .registry import exporter, importer

@ -3,12 +3,12 @@
# SPDX-License-Identifier: MIT
import os.path as osp
from glob import glob
import zipfile
from glob import glob
from tempfile import TemporaryDirectory
from datumaro.components.project import Dataset
from datumaro.components.dataset import Dataset
from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \
import_dm_annotations
from cvat.apps.dataset_manager.util import make_zip_archive
@ -18,15 +18,13 @@ from .registry import dm_env, exporter, importer
@exporter(name='ImageNet', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transform
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
if save_images:
dm_env.converters.get('imagenet').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'imagenet', save_images=save_images)
else:
dm_env.converters.get('imagenet_txt').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'imagenet_txt', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@ -35,7 +33,7 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
if glob(osp.join(tmp_dir, '*.txt')):
dataset = dm_env.make_importer('imagenet_txt')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'imagenet_txt', env=dm_env)
else:
dataset = dm_env.make_importer('imagenet')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'imagenet', env=dm_env)
import_dm_annotations(dataset, task_data)

@ -4,23 +4,22 @@
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset
from .registry import dm_env, exporter, importer
@exporter(name='LabelMe', ext='ZIP', version='3.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('label_me').convert(extractor, save_dir=temp_dir,
save_images=save_images)
dataset.export(temp_dir, 'label_me', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@ -29,7 +28,6 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = dm_env.make_importer('label_me')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'label_me', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)

@ -4,12 +4,12 @@
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset
from .registry import dm_env, exporter, importer
from .utils import make_colormap
@ -17,15 +17,13 @@ from .utils import make_colormap
@exporter(name='Segmentation mask', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(envt.get('polygons_to_masks'))
extractor = extractor.transform(envt.get('boxes_to_masks'))
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('voc_segmentation').convert(extractor,
save_dir=temp_dir, save_images=save_images,
dataset.export(temp_dir, 'voc_segmentation', save_images=save_images,
apply_colormap=True, label_map=make_colormap(task_data))
make_zip_archive(temp_dir, dst_file)
@ -35,7 +33,6 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)

@ -4,23 +4,22 @@
from tempfile import TemporaryDirectory
import datumaro.components.extractor as datumaro
from datumaro.components.dataset import Dataset
from pyunpack import Archive
import datumaro.components.extractor as datumaro
from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset
from .registry import dm_env, exporter, importer
@exporter(name='MOT', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('mot_seq_gt').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@ -29,7 +28,7 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = dm_env.make_importer('mot_seq')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'mot_seq', env=dm_env)
tracks = {}
label_cat = dataset.categories()[datumaro.AnnotationType.label]

@ -4,13 +4,13 @@
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import AnnotationType, Transform
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
find_dataset_root, match_dm_item)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.extractor import AnnotationType, Transform
from datumaro.components.project import Dataset
from .registry import dm_env, exporter, importer
@ -22,16 +22,14 @@ class KeepTracks(Transform):
@exporter(name='MOTS PNG', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(KeepTracks) # can only export tracks
extractor = extractor.transform(envt.get('polygons_to_masks'))
extractor = extractor.transform(envt.get('boxes_to_masks'))
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
dataset.transform(KeepTracks) # can only export tracks
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('mots_png').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'mots_png', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@ -40,9 +38,8 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = dm_env.make_importer('mots')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'mots', env=dm_env)
dataset.transform('masks_to_polygons')
tracks = {}
label_cat = dataset.categories()[AnnotationType.label]

@ -6,26 +6,25 @@ import os
import os.path as osp
import shutil
from glob import glob
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset
from .registry import dm_env, exporter, importer
@exporter(name='PASCAL VOC', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('voc').convert(extractor,
save_dir=temp_dir, save_images=save_images, label_map='source')
dataset.export(temp_dir, 'voc', save_images=save_images,
label_map='source')
make_zip_archive(temp_dir, dst_file)
@ -56,7 +55,6 @@ def _import(src_file, task_data):
for f in anno_files:
shutil.move(f, anno_dir)
dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)

@ -24,11 +24,10 @@ except ImportError:
@exporter(name='TFRecord', ext='ZIP', version='1.0', enabled=tf_available)
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('tf_detection_api').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'tf_detection_api', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@ -37,5 +36,5 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = dm_env.make_importer('tf_detection_api')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env)
import_dm_annotations(dataset, task_data)

@ -20,11 +20,10 @@ from .registry import dm_env, exporter, importer
@exporter(name='YOLO', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('yolo').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'yolo', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@ -44,11 +43,11 @@ def _import(src_file, task_data):
frame_id = match_dm_item(DatasetItem(id=frame), task_data,
root_hint=root_hint)
frame_info = task_data.frame_info[frame_id]
except Exception:
except Exception: # nosec
pass
if frame_info is not None:
image_info[frame] = (frame_info['height'], frame_info['width'])
dataset = dm_env.make_importer('yolo')(tmp_dir, image_info=image_info) \
.make_dataset()
dataset = Dataset.import_from(tmp_dir, 'yolo',
env=dm_env, image_info=image_info)
import_dm_annotations(dataset, task_data)

@ -357,17 +357,18 @@ class TaskExportTest(_DbTestBase):
project.config.remove('sources')
return project.make_dataset()
return dm_env.make_importer(importer_name)(src) \
.make_dataset()
return datumaro.components.dataset. \
Dataset.import_from(src, importer_name, env=dm_env)
if zipfile.is_zipfile(file_path):
with tempfile.TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(file_path).extractall(tmp_dir)
dataset = load_dataset(tmp_dir)
self.assertEqual(len(dataset), task["size"])
else:
dataset = load_dataset(file_path)
self.assertEqual(len(dataset), task["size"])
self.assertEqual(len(dataset), task["size"])
self._test_export(check, task, format_name, save_images=False)
def test_can_skip_outside(self):

@ -45,5 +45,9 @@ tensorflow==2.4.1 # Optional requirement of Datumaro
patool==1.12
diskcache==5.0.2
open3d==0.11.2
# workaround for binary incompatibility with numpy when pycocotools is installed by wheel
datumaro==0.1.5.1 --no-binary=datumaro --no-binary=pycocotools
# --no-binary=datumaro: workaround for pip to install
# opencv-headless instead of regular opencv, to actually run setup script
# --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20
# of pycocotools and tensorflow 2.4.1
# when pycocotools is installed by wheel in python 3.8+
datumaro==0.1.6.1 --no-binary=datumaro --no-binary=pycocotools

Loading…
Cancel
Save