Update Datumaro dependency to 0.2.0 (#3813)

main
Kirill Sizov 4 years ago committed by GitHub
parent 78158cbcf5
commit b57467937a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- UI tracking has been reworked (<https://github.com/openvinotoolkit/cvat/pull/3571>) - UI tracking has been reworked (<https://github.com/openvinotoolkit/cvat/pull/3571>)
- Manifest generation: Reduce creating time (<https://github.com/openvinotoolkit/cvat/pull/3712>) - Manifest generation: Reduce creating time (<https://github.com/openvinotoolkit/cvat/pull/3712>)
- Migration from NPM 6 to NPM 7 (<https://github.com/openvinotoolkit/cvat/pull/3773>) - Migration from NPM 6 to NPM 7 (<https://github.com/openvinotoolkit/cvat/pull/3773>)
- Update Datumaro dependency to 0.2.0 (<https://github.com/openvinotoolkit/cvat/pull/3813>)
### Deprecated ### Deprecated

@ -3,21 +3,25 @@
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import sys
import os.path as osp import os.path as osp
import sys
from collections import namedtuple from collections import namedtuple
from typing import Any, Callable, DefaultDict, Dict, List, Literal, Mapping, NamedTuple, OrderedDict, Tuple, Union
from pathlib import Path from pathlib import Path
from typing import (Any, Callable, DefaultDict, Dict, List, Literal, Mapping,
NamedTuple, OrderedDict, Tuple, Union)
import datumaro.components.annotation as datum_annotation
import datumaro.components.extractor as datum_extractor
from datumaro.util import cast
from datumaro.util.image import ByteImage, Image
from django.utils import timezone from django.utils import timezone
import datumaro.components.extractor as datumaro
from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import AttributeType, ShapeType, Project, Task, Label, DimensionType, Image as Img from cvat.apps.engine.models import AttributeType, DimensionType
from datumaro.util import cast from cvat.apps.engine.models import Image as Img
from datumaro.util.image import ByteImage, Image from cvat.apps.engine.models import Label, Project, ShapeType, Task
from .annotation import AnnotationManager, TrackManager, AnnotationIR from .annotation import AnnotationIR, AnnotationManager, TrackManager
class InstanceLabelData: class InstanceLabelData:
@ -192,7 +196,7 @@ class TaskData(InstanceLabelData):
("bugtracker", db_task.bug_tracker), ("bugtracker", db_task.bug_tracker),
("created", str(timezone.localtime(db_task.created_date))), ("created", str(timezone.localtime(db_task.created_date))),
("updated", str(timezone.localtime(db_task.updated_date))), ("updated", str(timezone.localtime(db_task.updated_date))),
("subset", db_task.subset or datumaro.DEFAULT_SUBSET_NAME), ("subset", db_task.subset or datum_extractor.DEFAULT_SUBSET_NAME),
("start_frame", str(db_task.data.start_frame)), ("start_frame", str(db_task.data.start_frame)),
("stop_frame", str(db_task.data.stop_frame)), ("stop_frame", str(db_task.data.stop_frame)),
("frame_filter", db_task.data.frame_filter), ("frame_filter", db_task.data.frame_filter),
@ -800,9 +804,10 @@ class CVATDataExtractorMixin:
@staticmethod @staticmethod
def _load_categories(labels: list): def _load_categories(labels: list):
categories: Dict[datumaro.AnnotationType, datumaro.Categories] = {} categories: Dict[datum_annotation.AnnotationType,
datum_annotation.Categories] = {}
label_categories = datumaro.LabelCategories(attributes=['occluded']) label_categories = datum_annotation.LabelCategories(attributes=['occluded'])
for _, label in labels: for _, label in labels:
label_categories.add(label['name']) label_categories.add(label['name'])
@ -810,7 +815,7 @@ class CVATDataExtractorMixin:
label_categories.attributes.add(attr['name']) label_categories.attributes.add(attr['name'])
categories[datumaro.AnnotationType.label] = label_categories categories[datum_annotation.AnnotationType.label] = label_categories
return categories return categories
@ -824,7 +829,7 @@ class CVATDataExtractorMixin:
def _read_cvat_anno(self, cvat_frame_anno: Union[ProjectData.Frame, TaskData.Frame], labels: list): def _read_cvat_anno(self, cvat_frame_anno: Union[ProjectData.Frame, TaskData.Frame], labels: list):
categories = self.categories() categories = self.categories()
label_cat = categories[datumaro.AnnotationType.label] label_cat = categories[datum_annotation.AnnotationType.label]
def map_label(name): return label_cat.find(name)[0] def map_label(name): return label_cat.find(name)[0]
label_attrs = { label_attrs = {
label['name']: label['attributes'] label['name']: label['attributes']
@ -834,7 +839,7 @@ class CVATDataExtractorMixin:
return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label) return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label)
class CvatTaskDataExtractor(datumaro.SourceExtractor, CVATDataExtractorMixin): class CvatTaskDataExtractor(datum_extractor.SourceExtractor, CVATDataExtractorMixin):
def __init__(self, task_data, include_images=False, format_type=None, dimension=DimensionType.DIM_2D): def __init__(self, task_data, include_images=False, format_type=None, dimension=DimensionType.DIM_2D):
super().__init__() super().__init__()
self._categories = self._load_categories(task_data.meta['task']['labels']) self._categories = self._load_categories(task_data.meta['task']['labels'])
@ -893,7 +898,8 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor, CVATDataExtractorMixin):
dm_anno = self._read_cvat_anno(frame_data, task_data.meta['task']['labels']) dm_anno = self._read_cvat_anno(frame_data, task_data.meta['task']['labels'])
if dimension == DimensionType.DIM_2D: if dimension == DimensionType.DIM_2D:
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0], dm_item = datum_extractor.DatasetItem(
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image, annotations=dm_anno, image=dm_image,
attributes={'frame': frame_data.frame attributes={'frame': frame_data.frame
}) })
@ -908,9 +914,11 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor, CVATDataExtractorMixin):
attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]}) attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]})
attributes["track_id"] = -1 attributes["track_id"] = -1
dm_item = datumaro.DatasetItem(id=osp.splitext(osp.split(frame_data.name)[-1])[0], dm_item = datum_extractor.DatasetItem(
annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1], id=osp.splitext(osp.split(frame_data.name)[-1])[0],
attributes=attributes) annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1],
attributes=attributes
)
dm_items.append(dm_item) dm_items.append(dm_item)
@ -918,7 +926,7 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor, CVATDataExtractorMixin):
def _read_cvat_anno(self, cvat_frame_anno: TaskData.Frame, labels: list): def _read_cvat_anno(self, cvat_frame_anno: TaskData.Frame, labels: list):
categories = self.categories() categories = self.categories()
label_cat = categories[datumaro.AnnotationType.label] label_cat = categories[datum_annotation.AnnotationType.label]
def map_label(name): return label_cat.find(name)[0] def map_label(name): return label_cat.find(name)[0]
label_attrs = { label_attrs = {
label['name']: label['attributes'] label['name']: label['attributes']
@ -927,7 +935,7 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor, CVATDataExtractorMixin):
return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label, self._format_type, self._dimension) return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label, self._format_type, self._dimension)
class CVATProjectDataExtractor(datumaro.Extractor, CVATDataExtractorMixin): class CVATProjectDataExtractor(datum_extractor.Extractor, CVATDataExtractorMixin):
def __init__(self, project_data: ProjectData, include_images: bool = False, format_type: str = None, dimension: DimensionType = DimensionType.DIM_2D): def __init__(self, project_data: ProjectData, include_images: bool = False, format_type: str = None, dimension: DimensionType = DimensionType.DIM_2D):
super().__init__() super().__init__()
self._categories = self._load_categories(project_data.meta['project']['labels']) self._categories = self._load_categories(project_data.meta['project']['labels'])
@ -935,7 +943,7 @@ class CVATProjectDataExtractor(datumaro.Extractor, CVATDataExtractorMixin):
self._dimension = dimension self._dimension = dimension
self._format_type = format_type self._format_type = format_type
dm_items: List[datumaro.DatasetItem] = [] dm_items: List[datum_extractor.DatasetItem] = []
ext_per_task: Dict[int, str] = {} ext_per_task: Dict[int, str] = {}
image_maker_per_task: Dict[int, Callable] = {} image_maker_per_task: Dict[int, Callable] = {}
@ -996,7 +1004,8 @@ class CVATProjectDataExtractor(datumaro.Extractor, CVATDataExtractorMixin):
dm_image = Image(**image_args) dm_image = Image(**image_args)
dm_anno = self._read_cvat_anno(frame_data, project_data.meta['project']['labels']) dm_anno = self._read_cvat_anno(frame_data, project_data.meta['project']['labels'])
if self._dimension == DimensionType.DIM_2D: if self._dimension == DimensionType.DIM_2D:
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0], dm_item = datum_extractor.DatasetItem(
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image, annotations=dm_anno, image=dm_image,
subset=frame_data.subset, subset=frame_data.subset,
attributes={'frame': frame_data.frame} attributes={'frame': frame_data.frame}
@ -1012,9 +1021,11 @@ class CVATProjectDataExtractor(datumaro.Extractor, CVATDataExtractorMixin):
attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]}) attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]})
attributes["track_id"] = -1 attributes["track_id"] = -1
dm_item = datumaro.DatasetItem(id=osp.splitext(osp.split(frame_data.name)[-1])[0], dm_item = datum_extractor.DatasetItem(
annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1], id=osp.splitext(osp.split(frame_data.name)[-1])[0],
attributes=attributes, subset=frame_data.subset) annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1],
attributes=attributes, subset=frame_data.subset
)
dm_items.append(dm_item) dm_items.append(dm_item)
self._items = dm_items self._items = dm_items
@ -1063,13 +1074,13 @@ def get_defaulted_subset(subset: str, subsets: List[str]) -> str:
if subset: if subset:
return subset return subset
else: else:
if datumaro.DEFAULT_SUBSET_NAME not in subsets: if datum_extractor.DEFAULT_SUBSET_NAME not in subsets:
return datumaro.DEFAULT_SUBSET_NAME return datum_extractor.DEFAULT_SUBSET_NAME
else: else:
i = 1 i = 1
while i < sys.maxsize: while i < sys.maxsize:
if f'{datumaro.DEFAULT_SUBSET_NAME}_{i}' not in subsets: if f'{datum_extractor.DEFAULT_SUBSET_NAME}_{i}' not in subsets:
return f'{datumaro.DEFAULT_SUBSET_NAME}_{i}' return f'{datum_extractor.DEFAULT_SUBSET_NAME}_{i}'
i += 1 i += 1
raise Exception('Cannot find default name for subset') raise Exception('Cannot find default name for subset')
@ -1100,7 +1111,7 @@ def convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label, format_name
anno_label = map_label(tag_obj.label) anno_label = map_label(tag_obj.label)
anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes) anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes)
anno = datumaro.Label(label=anno_label, anno = datum_annotation.Label(label=anno_label,
attributes=anno_attr, group=anno_group) attributes=anno_attr, group=anno_group)
item_anno.append(anno) item_anno.append(anno)
@ -1121,20 +1132,20 @@ def convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label, format_name
anno_points = shape_obj.points anno_points = shape_obj.points
if shape_obj.type == ShapeType.POINTS: if shape_obj.type == ShapeType.POINTS:
anno = datumaro.Points(anno_points, anno = datum_annotation.Points(anno_points,
label=anno_label, attributes=anno_attr, group=anno_group, label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order) z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.POLYLINE: elif shape_obj.type == ShapeType.POLYLINE:
anno = datumaro.PolyLine(anno_points, anno = datum_annotation.PolyLine(anno_points,
label=anno_label, attributes=anno_attr, group=anno_group, label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order) z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.POLYGON: elif shape_obj.type == ShapeType.POLYGON:
anno = datumaro.Polygon(anno_points, anno = datum_annotation.Polygon(anno_points,
label=anno_label, attributes=anno_attr, group=anno_group, label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order) z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.RECTANGLE: elif shape_obj.type == ShapeType.RECTANGLE:
x0, y0, x1, y1 = anno_points x0, y0, x1, y1 = anno_points
anno = datumaro.Bbox(x0, y0, x1 - x0, y1 - y0, anno = datum_annotation.Bbox(x0, y0, x1 - x0, y1 - y0,
label=anno_label, attributes=anno_attr, group=anno_group, label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order) z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.CUBOID: elif shape_obj.type == ShapeType.CUBOID:
@ -1144,9 +1155,10 @@ def convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label, format_name
else: else:
anno_id = index anno_id = index
position, rotation, scale = anno_points[0:3], anno_points[3:6], anno_points[6:9] position, rotation, scale = anno_points[0:3], anno_points[3:6], anno_points[6:9]
anno = datumaro.Cuboid3d(id=anno_id, position=position, rotation=rotation, scale=scale, anno = datum_annotation.Cuboid3d(
label=anno_label, attributes=anno_attr, group=anno_group id=anno_id, position=position, rotation=rotation, scale=scale,
) label=anno_label, attributes=anno_attr, group=anno_group
)
else: else:
continue continue
else: else:
@ -1192,17 +1204,17 @@ def find_dataset_root(dm_dataset, task_data):
def import_dm_annotations(dm_dataset, task_data): def import_dm_annotations(dm_dataset, task_data):
shapes = { shapes = {
datumaro.AnnotationType.bbox: ShapeType.RECTANGLE, datum_annotation.AnnotationType.bbox: ShapeType.RECTANGLE,
datumaro.AnnotationType.polygon: ShapeType.POLYGON, datum_annotation.AnnotationType.polygon: ShapeType.POLYGON,
datumaro.AnnotationType.polyline: ShapeType.POLYLINE, datum_annotation.AnnotationType.polyline: ShapeType.POLYLINE,
datumaro.AnnotationType.points: ShapeType.POINTS, datum_annotation.AnnotationType.points: ShapeType.POINTS,
datumaro.AnnotationType.cuboid_3d: ShapeType.CUBOID datum_annotation.AnnotationType.cuboid_3d: ShapeType.CUBOID
} }
if len(dm_dataset) == 0: if len(dm_dataset) == 0:
return return
label_cat = dm_dataset.categories()[datumaro.AnnotationType.label] label_cat = dm_dataset.categories()[datum_annotation.AnnotationType.label]
root_hint = find_dataset_root(dm_dataset, task_data) root_hint = find_dataset_root(dm_dataset, task_data)
@ -1231,7 +1243,7 @@ def import_dm_annotations(dm_dataset, task_data):
if hasattr(ann, 'label') and ann.label is None: if hasattr(ann, 'label') and ann.label is None:
raise CvatImportError("annotation has no label") raise CvatImportError("annotation has no label")
if ann.type in shapes: if ann.type in shapes:
if ann.type == datumaro.AnnotationType.cuboid_3d: if ann.type == datum_annotation.AnnotationType.cuboid_3d:
try: try:
ann.points = [*ann.position,*ann.rotation,*ann.scale,0,0,0,0,0,0,0] ann.points = [*ann.position,*ann.rotation,*ann.scale,0,0,0,0,0,0,0]
except Exception as e: except Exception as e:
@ -1249,7 +1261,7 @@ def import_dm_annotations(dm_dataset, task_data):
attributes=[task_data.Attribute(name=n, value=str(v)) attributes=[task_data.Attribute(name=n, value=str(v))
for n, v in ann.attributes.items()], for n, v in ann.attributes.items()],
)) ))
elif ann.type == datumaro.AnnotationType.label: elif ann.type == datum_annotation.AnnotationType.label:
task_data.add_tag(task_data.Tag( task_data.add_tag(task_data.Tag(
frame=frame_number, frame=frame_number,
label=label_cat.items[ann.label].name, label=label_cat.items[ann.label].name,

@ -0,0 +1,67 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import ItemTransform
from datumaro.util.image import Image
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from cvat.apps.engine.models import DimensionType
from .registry import dm_env, exporter, importer
class DeleteImagePath(ItemTransform):
def transform_item(self, item):
image = None
if item.has_image and item.image.has_data:
image = Image(data=item.image.data, size=item.image.size)
return item.wrap(image=image, point_cloud='', related_images=[])
@exporter(name="Datumaro", ext="ZIP", version="1.0")
def _export(dst_file, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images), env=dm_env)
if not save_images:
dataset.transform(DeleteImagePath)
with TemporaryDirectory() as tmp_dir:
dataset.export(tmp_dir, 'datumaro', save_images=save_images)
make_zip_archive(tmp_dir, dst_file)
@importer(name="Datumaro", ext="ZIP", version="1.0")
def _import(src_file, instance_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)
import_dm_annotations(dataset, instance_data)
@exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _export(dst_file, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images,
dimension=DimensionType.DIM_3D), env=dm_env)
if not save_images:
dataset.transform(DeleteImagePath)
with TemporaryDirectory() as tmp_dir:
dataset.export(tmp_dir, 'datumaro', save_images=save_images)
make_zip_archive(tmp_dir, dst_file)
@importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _import(src_file, instance_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)
import_dm_annotations(dataset, instance_data)

@ -1,102 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import json
import os
import os.path as osp
import shutil
from tempfile import TemporaryDirectory
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
import_dm_annotations, ProjectData)
from cvat.apps.dataset_manager.util import make_zip_archive
from cvat.settings.base import BASE_DIR
from datumaro.components.project import Project
from ..registry import dm_env, exporter
@exporter(name="Datumaro", ext="ZIP", version="1.0")
class DatumaroProjectExporter:
_REMOTE_IMAGES_EXTRACTOR = 'cvat_rest_api_task_images'
_TEMPLATES_DIR = osp.join(osp.dirname(__file__), 'export_templates')
@staticmethod
def _save_image_info(save_dir, instance_data):
os.makedirs(save_dir, exist_ok=True)
config = {
'server_url': instance_data._host or 'localhost'
}
if isinstance(instance_data, ProjectData):
config['project_id'] = instance_data.db_project.id
else:
config['task_id'] = instance_data.db_task.id
images = []
images_meta = { 'images': images, }
for frame_id, frame in enumerate(instance_data.frame_info.values()):
image_info = {
'id': frame_id,
'name': osp.basename(frame['path']),
'width': frame['width'],
'height': frame['height'],
}
if isinstance(instance_data, ProjectData):
image_info['subset'] = frame['subset']
with open(osp.join(save_dir, 'config.json'),
'w', encoding='utf-8') as config_file:
json.dump(config, config_file)
with open(osp.join(save_dir, 'images_meta.json'),
'w', encoding='utf-8') as images_file:
json.dump(images_meta, images_file)
def _export(self, instance_data, save_dir, save_images=False):
dataset = GetCVATDataExtractor(instance_data, include_images=save_images)
db_instance = instance_data.db_project if isinstance(instance_data, ProjectData) else instance_data.db_task
dm_env.converters.get('datumaro_project').convert(dataset,
save_dir=save_dir, save_images=save_images,
project_config={ 'project_name': db_instance.name, }
)
project = Project.load(save_dir)
target_dir = project.config.project_dir
os.makedirs(target_dir, exist_ok=True)
shutil.copyfile(
osp.join(self._TEMPLATES_DIR, 'README.md'),
osp.join(target_dir, 'README.md'))
if not save_images:
# add remote links to images
source_name = '{}_{}_images'.format(
'project' if isinstance(instance_data, ProjectData) else 'task',
db_instance.id,
)
project.add_source(source_name, {
'format': self._REMOTE_IMAGES_EXTRACTOR,
})
self._save_image_info(
osp.join(save_dir, project.local_source_dir(source_name)),
instance_data)
project.save()
templates_dir = osp.join(self._TEMPLATES_DIR, 'plugins')
target_dir = osp.join(project.config.project_dir,
project.config.env_dir, project.config.plugins_dir)
os.makedirs(target_dir, exist_ok=True)
shutil.copyfile(
osp.join(templates_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'),
osp.join(target_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'))
# Make CVAT CLI module available to the user
cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils')
os.makedirs(cvat_utils_dst_dir)
shutil.copytree(osp.join(BASE_DIR, 'utils', 'cli'),
osp.join(cvat_utils_dst_dir, 'cli'))
def __call__(self, dst_file, instance_data, save_images=False):
with TemporaryDirectory() as temp_dir:
self._export(instance_data, save_dir=temp_dir, save_images=save_images)
make_zip_archive(temp_dir, dst_file)

@ -1,20 +0,0 @@
# Quick start
``` bash
# optionally make a virtualenv
python -m virtualenv .venv
. .venv/bin/activate
# install dependencies
pip install 'git+https://github.com/openvinotoolkit/datumaro'
pip install -r cvat/utils/cli/requirements.txt
# set up environment
PYTHONPATH=':'
export PYTHONPATH
# use Datumaro
datum --help
```
Check [Datumaro docs](https://github.com/openvinotoolkit/datumaro/README.md) for more info.

@ -1,105 +0,0 @@
# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
import getpass
import json
import os
import os.path as osp
from collections import OrderedDict
import requests
from cvat.utils.cli.core import CLI as CVAT_CLI
from cvat.utils.cli.core import CVAT_API_V1
from datumaro.components.config import Config, SchemaBuilder
from datumaro.components.extractor import SourceExtractor, DatasetItem
from datumaro.util.image import Image, lazy_image, load_image
CONFIG_SCHEMA = SchemaBuilder() \
.add('task_id', int) \
.add('server_url', str) \
.build()
class cvat_rest_api_task_images(SourceExtractor):
def _image_local_path(self, item_id):
task_id = self._config.task_id
return osp.join(self._cache_dir,
'task_{}_frame_{:06d}.jpg'.format(task_id, int(item_id)))
def _make_image_loader(self, item_id):
return lazy_image(item_id,
lambda item_id: self._image_loader(item_id, self))
def _is_image_cached(self, item_id):
return osp.isfile(self._image_local_path(item_id))
def _download_image(self, item_id):
self._connect()
os.makedirs(self._cache_dir, exist_ok=True)
self._cvat_cli.tasks_frame(task_id=self._config.task_id,
frame_ids=[item_id], outdir=self._cache_dir, quality='original')
def _connect(self):
if self._cvat_cli is not None:
return
print("Enter credentials for '%s' to read task data:" % \
(self._config.server_url))
username = input('User: ')
password = getpass.getpass()
session = requests.Session()
api = CVAT_API_V1(self._config.server_url)
cli = CVAT_CLI(session, api, credentials=(username, password))
self._cvat_cli = cli
@staticmethod
def _image_loader(item_id, extractor):
if not extractor._is_image_cached(item_id):
extractor._download_image(item_id)
local_path = extractor._image_local_path(item_id)
return load_image(local_path)
def __init__(self, url):
super().__init__()
local_dir = url
self._local_dir = local_dir
self._cache_dir = osp.join(local_dir, 'images')
with open(osp.join(url, 'config.json'),
'r', encoding='utf-8') as config_file:
config = json.load(config_file)
config = Config(config, schema=CONFIG_SCHEMA)
self._config = config
with open(osp.join(url, 'images_meta.json'),
'r', encoding='utf-8') as images_file:
images_meta = json.load(images_file)
image_list = images_meta['images']
items = []
for entry in image_list:
item_id = entry['id']
item_filename = entry.get('name', str(item_id))
size = None
if entry.get('height') and entry.get('width'):
size = (entry['height'], entry['width'])
image = Image(data=self._make_image_loader(item_id),
path=self._image_local_path(item_id), size=size)
item = DatasetItem(id=osp.splitext(item_filename)[0], image=image)
items.append((item.id, item))
items = OrderedDict(items)
self._items = items
self._cvat_cli = None
def __iter__(self):
for item in self._items.values():
yield item
def __len__(self):
return len(self._items)

@ -5,9 +5,10 @@
import zipfile import zipfile
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from datumaro.components.annotation import (AnnotationType, Caption, Label,
LabelCategories)
from datumaro.components.dataset import Dataset from datumaro.components.dataset import Dataset
from datumaro.components.extractor import (AnnotationType, Caption, Label, from datumaro.components.extractor import ItemTransform
LabelCategories, ItemTransform)
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
import_dm_annotations) import_dm_annotations)

@ -5,9 +5,10 @@
import zipfile import zipfile
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from datumaro.components.annotation import (AnnotationType, Label,
LabelCategories)
from datumaro.components.dataset import Dataset from datumaro.components.dataset import Dataset
from datumaro.components.extractor import (AnnotationType, Label, from datumaro.components.extractor import ItemTransform
LabelCategories, ItemTransform)
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
import_dm_annotations) import_dm_annotations)

@ -4,8 +4,9 @@
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from datumaro.components.annotation import AnnotationType
from datumaro.components.dataset import Dataset from datumaro.components.dataset import Dataset
from datumaro.components.extractor import AnnotationType, ItemTransform from datumaro.components.extractor import ItemTransform
from pyunpack import Archive from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,

@ -5,7 +5,7 @@
import os.path as osp import os.path as osp
from hashlib import blake2s from hashlib import blake2s
from datumaro.cli.util import make_file_name from datumaro.util.os_util import make_file_name
def get_color_from_index(index): def get_color_from_index(index):

@ -244,7 +244,7 @@
"type": "polygon", "type": "polygon",
"occluded": false, "occluded": false,
"z_order": 0, "z_order": 0,
"points": [35.0, 22.5, 53.32, 30.63, 22.34, 29.45, 47.43, 38.21], "points": [35.0, 22.5, 53.32, 30.63, 22.34, 29.45, 47.43, 38.21],
"frame": 0, "frame": 0,
"label_id": null, "label_id": null,
"group": 0, "group": 0,
@ -661,7 +661,7 @@
"points": [27.15, 26.7, 53.25, 24.8], "points": [27.15, 26.7, 53.25, 24.8],
"frame": 0, "frame": 0,
"label_id": null, "label_id": null,
"group": 2, "group": 1,
"source": "manual", "source": "manual",
"attributes": [] "attributes": []
}, },
@ -675,34 +675,6 @@
"group": 0, "group": 0,
"source": "manual", "source": "manual",
"attributes": [] "attributes": []
},
{
"type": "cuboid",
"occluded": false,
"z_order": 2,
"points": [
51.65,
37.3,
51.65,
46.8,
70.25,
37.2,
70.25,
46.8,
72.11,
36.34,
72.11,
45.74,
53.51,
36.34,
53.51,
45.74
],
"frame": 0,
"label_id": null,
"group": 1,
"source": "manual",
"attributes": []
} }
], ],
"tracks": [] "tracks": []
@ -920,7 +892,7 @@
} }
] ]
}, },
"CVAT for images 1.1 tag": { "CVAT for images 1.1 tag": {
"version": 0, "version": 0,
"tags": [ "tags": [
{ {
@ -982,7 +954,7 @@
"shapes": [], "shapes": [],
"tracks": [] "tracks": []
}, },
"CVAT for images 1.1 different types": { "CVAT for images 1.1 different types": {
"version": 0, "version": 0,
"tags": [], "tags": [],
"shapes": [ "shapes": [
@ -1028,7 +1000,7 @@
} }
] ]
}, },
"CVAT for video 1.1 polygon": { "CVAT for video 1.1 polygon": {
"version": 0, "version": 0,
"tags": [], "tags": [],
"shapes": [], "shapes": [],
@ -1069,7 +1041,7 @@
], ],
"attributes": [] "attributes": []
}, },
{ {
"frame": 0, "frame": 0,
"label_id": null, "label_id": null,
"group": 1, "group": 1,

@ -11,7 +11,7 @@ from io import BytesIO
import datumaro import datumaro
from datumaro.components.dataset import Dataset, DatasetItem from datumaro.components.dataset import Dataset, DatasetItem
from datumaro.components.extractor import Mask from datumaro.components.annotation import Mask
from django.contrib.auth.models import Group, User from django.contrib.auth.models import Group, User
from PIL import Image from PIL import Image
@ -278,6 +278,7 @@ class TaskExportTest(_DbTestBase):
'CVAT for images 1.1', 'CVAT for images 1.1',
'CVAT for video 1.1', 'CVAT for video 1.1',
'Datumaro 1.0', 'Datumaro 1.0',
'Datumaro 3D 1.0',
'LabelMe 3.0', 'LabelMe 3.0',
'MOT 1.1', 'MOT 1.1',
'MOTS PNG 1.0', 'MOTS PNG 1.0',
@ -321,7 +322,9 @@ class TaskExportTest(_DbTestBase):
'ICDAR Localization 1.0', 'ICDAR Localization 1.0',
'ICDAR Segmentation 1.0', 'ICDAR Segmentation 1.0',
'Kitti Raw Format 1.0', 'Kitti Raw Format 1.0',
'Sly Point Cloud Format 1.0' 'Sly Point Cloud Format 1.0',
'Datumaro 1.0',
'Datumaro 3D 1.0'
}) })
def test_exports(self): def test_exports(self):
@ -352,7 +355,7 @@ class TaskExportTest(_DbTestBase):
('COCO 1.0', 'coco'), ('COCO 1.0', 'coco'),
('CVAT for images 1.1', 'cvat'), ('CVAT for images 1.1', 'cvat'),
# ('CVAT for video 1.1', 'cvat'), # does not support # ('CVAT for video 1.1', 'cvat'), # does not support
('Datumaro 1.0', 'datumaro_project'), ('Datumaro 1.0', 'datumaro'),
('LabelMe 3.0', 'label_me'), ('LabelMe 3.0', 'label_me'),
# ('MOT 1.1', 'mot_seq'), # does not support # ('MOT 1.1', 'mot_seq'), # does not support
# ('MOTS PNG 1.0', 'mots_png'), # does not support # ('MOTS PNG 1.0', 'mots_png'), # does not support

@ -347,8 +347,8 @@ class TaskDumpUploadTest(_DbTestBase):
# Dump annotations with objects type is shape # Dump annotations with objects type is shape
for dump_format in dump_formats: for dump_format in dump_formats:
if not dump_format.ENABLED or dump_format.DISPLAY_NAME in [ if not dump_format.ENABLED or dump_format.DISPLAY_NAME in [
'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0' 'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0',
'Datumaro 3D 1.0'
]: ]:
continue continue
dump_format_name = dump_format.DISPLAY_NAME dump_format_name = dump_format.DISPLAY_NAME
@ -368,7 +368,7 @@ class TaskDumpUploadTest(_DbTestBase):
"MOT 1.1", "MOTS PNG 1.0", \ "MOT 1.1", "MOTS PNG 1.0", \
"PASCAL VOC 1.1", "Segmentation mask 1.1", \ "PASCAL VOC 1.1", "Segmentation mask 1.1", \
"TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \ "TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \
"WiderFace 1.0", "VGGFace2 1.0", \ "WiderFace 1.0", "VGGFace2 1.0", "Datumaro 1.0"\
]: ]:
self._create_annotations(task, dump_format_name, "default") self._create_annotations(task, dump_format_name, "default")
else: else:
@ -452,8 +452,8 @@ class TaskDumpUploadTest(_DbTestBase):
# Dump annotations with objects type is track # Dump annotations with objects type is track
for dump_format in dump_formats: for dump_format in dump_formats:
if not dump_format.ENABLED or dump_format.DISPLAY_NAME in [ if not dump_format.ENABLED or dump_format.DISPLAY_NAME in [
'Kitti Raw Format 1.0','Sly Point Cloud Format 1.0' 'Kitti Raw Format 1.0','Sly Point Cloud Format 1.0',
'Datumaro 3D 1.0'
]: ]:
continue continue
dump_format_name = dump_format.DISPLAY_NAME dump_format_name = dump_format.DISPLAY_NAME
@ -897,10 +897,9 @@ class TaskDumpUploadTest(_DbTestBase):
with self.subTest(format=dump_format_name): with self.subTest(format=dump_format_name):
if dump_format_name in [ if dump_format_name in [
"MOTS PNG 1.0", # issue #2925 and changed points values "MOTS PNG 1.0", # issue #2925 and changed points values
"Datumaro 1.0", # Datumaro 1.0 is not in the list of import format
'Kitti Raw Format 1.0', 'Kitti Raw Format 1.0',
'Sly Point Cloud Format 1.0' 'Sly Point Cloud Format 1.0',
'Datumaro 3D 1.0'
]: ]:
self.skipTest("Format is fail") self.skipTest("Format is fail")
images = self._generate_task_images(3) images = self._generate_task_images(3)
@ -917,7 +916,7 @@ class TaskDumpUploadTest(_DbTestBase):
"MOT 1.1", "MOTS PNG 1.0", \ "MOT 1.1", "MOTS PNG 1.0", \
"PASCAL VOC 1.1", "Segmentation mask 1.1", \ "PASCAL VOC 1.1", "Segmentation mask 1.1", \
"TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \ "TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \
"WiderFace 1.0", "VGGFace2 1.0", \ "WiderFace 1.0", "VGGFace2 1.0", "Datumaro 1.0" \
]: ]:
self._create_annotations(task, dump_format_name, "default") self._create_annotations(task, dump_format_name, "default")
else: else:
@ -1002,13 +1001,13 @@ class TaskDumpUploadTest(_DbTestBase):
with self.subTest(dump_format_name): with self.subTest(dump_format_name):
if dump_format_name in [ if dump_format_name in [
"MOT 1.1", "MOT 1.1",
"Datumaro 1.0", # not uploaded
"CamVid 1.0", # issue #2840 and changed points values "CamVid 1.0", # issue #2840 and changed points values
"MOTS PNG 1.0", # changed points values "MOTS PNG 1.0", # changed points values
"Segmentation mask 1.1", # changed points values "Segmentation mask 1.1", # changed points values
"ICDAR Segmentation 1.0", # changed points values "ICDAR Segmentation 1.0", # changed points values
'Kitti Raw Format 1.0', 'Kitti Raw Format 1.0',
'Sly Point Cloud Format 1.0' 'Sly Point Cloud Format 1.0',
'Datumaro 3D 1.0'
]: ]:
self.skipTest("Format is fail") self.skipTest("Format is fail")
@ -1029,7 +1028,7 @@ class TaskDumpUploadTest(_DbTestBase):
"MOT 1.1", "MOTS PNG 1.0", \ "MOT 1.1", "MOTS PNG 1.0", \
"PASCAL VOC 1.1", "Segmentation mask 1.1", \ "PASCAL VOC 1.1", "Segmentation mask 1.1", \
"TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \ "TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \
"WiderFace 1.0", "VGGFace2 1.0", \ "WiderFace 1.0", "VGGFace2 1.0", "Datumaro 1.0", \
]: ]:
self._create_annotations(task, dump_format_name, "default") self._create_annotations(task, dump_format_name, "default")
else: else:

@ -8,7 +8,7 @@ import tempfile
from datetime import timedelta from datetime import timedelta
import django_rq import django_rq
from datumaro.cli.util import make_file_name from datumaro.util.os_util import make_file_name
from datumaro.util import to_snake_case from datumaro.util import to_snake_case
from django.utils import timezone from django.utils import timezone

@ -4488,7 +4488,8 @@ class TaskAnnotationAPITestCase(JobAnnotationAPITestCase):
def _get_initial_annotation(annotation_format): def _get_initial_annotation(annotation_format):
if annotation_format not in ["Market-1501 1.0", "ICDAR Recognition 1.0", if annotation_format not in ["Market-1501 1.0", "ICDAR Recognition 1.0",
"ICDAR Localization 1.0", "ICDAR Segmentation 1.0", "ICDAR Localization 1.0", "ICDAR Segmentation 1.0",
'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0']: 'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0',
'Datumaro 3D 1.0']:
rectangle_tracks_with_attrs = [{ rectangle_tracks_with_attrs = [{
"frame": 0, "frame": 0,
"label_id": task["labels"][0]["id"], "label_id": task["labels"][0]["id"],
@ -4832,7 +4833,8 @@ class TaskAnnotationAPITestCase(JobAnnotationAPITestCase):
], ],
}] }]
annotations["tags"] = tags_with_attrs annotations["tags"] = tags_with_attrs
elif annotation_format in ['Kitti Raw Format 1.0','Sly Point Cloud Format 1.0']: elif annotation_format in ['Kitti Raw Format 1.0',
'Sly Point Cloud Format 1.0', 'Datumaro 3D 1.0']:
velodyne_wo_attrs = [{ velodyne_wo_attrs = [{
"frame": 0, "frame": 0,
"label_id": task["labels"][0]["id"], "label_id": task["labels"][0]["id"],

@ -53,5 +53,5 @@ google-cloud-storage==1.42.0
# --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20 # --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20
# of pycocotools and tensorflow 2.4.1 # of pycocotools and tensorflow 2.4.1
# when pycocotools is installed by wheel in python 3.8+ # when pycocotools is installed by wheel in python 3.8+
datumaro==0.1.10.1 --no-binary=datumaro --no-binary=pycocotools datumaro==0.2.0 --no-binary=datumaro --no-binary=pycocotools
urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability

@ -13,3 +13,91 @@ is possible in Datumaro too, but Datumaro can offer dataset operations.
- supported annotations: any 2D shapes, labels - supported annotations: any 2D shapes, labels
- supported attributes: any - supported attributes: any
# Import annotations in Datumaro format
Uploaded file: a zip archive of the following structure:
```bash
<archive_name>.zip/
└── annotations/
├── subset1.json # fully description of classes and all dataset items
└── subset2.json # fully description of classes and all dataset items
```
JSON annotations files in the `annotations` directory should have similar structure:
```json
{
"info": {},
"categories": {
"label": {
"labels": [
{
"name": "label_0",
"parent": "",
"attributes": []
},
{
"name": "label_1",
"parent": "",
"attributes": []
}
],
"attributes": []
}
},
"items": [
{
"id": "img1",
"annotations": [
{
"id": 0,
"type": "polygon",
"attributes": {},
"group": 0,
"label_id": 1,
"points": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
"z_order": 0
},
{
"id": 1,
"type": "bbox",
"attributes": {},
"group": 1,
"label_id": 0,
"z_order": 0,
"bbox": [1.0, 2.0, 3.0, 4.0]
},
{
"id": 2,
"type": "mask",
"attributes": {},
"group": 1,
"label_id": 0,
"rle": {
"counts": "d0d0:F\\0",
"size": [10, 10]
},
"z_order": 0
}
]
}
]
}
```
# Export annotations in Datumaro format
Downloaded file: a zip archive of the following structure:
```bash
taskname.zip/
├── annotations/
│ └── default.json # fully description of classes and all dataset items
└── images/ # if the option `save images` was selected
└── default
├── image1.jpg
├── image2.jpg
├── ...
```

Loading…
Cancel
Save