Change default temporary directory for import and export (#5613)

main
Maxim Zhiltsov 3 years ago committed by GitHub
parent a9476cb623
commit a0b70fcc40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,9 +1,8 @@
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
@ -17,7 +16,7 @@ from .utils import make_colormap
@exporter(name='CamVid', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
@ -25,20 +24,19 @@ def _export(dst_file, instance_data, save_images=False):
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
label_map = make_colormap(instance_data)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'camvid',
save_images=save_images, apply_colormap=True,
label_map={label: label_map[label][0] for label in label_map})
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'camvid',
save_images=save_images, apply_colormap=True,
label_map={label: label_map[label][0] for label in label_map})
make_zip_archive(temp_dir, dst_file)
@importer(name='CamVid', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'camvid', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'camvid', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,9 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import os.path as osp
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from datumaro.plugins.cityscapes_format import write_label_map
@ -19,33 +19,32 @@ from .utils import make_colormap
@exporter(name='Cityscapes', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'cityscapes', save_images=save_images,
apply_colormap=True, label_map={label: info[0]
for label, info in make_colormap(instance_data).items()})
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'cityscapes', save_images=save_images,
apply_colormap=True, label_map={label: info[0]
for label, info in make_colormap(instance_data).items()})
make_zip_archive(temp_dir, dst_file)
@importer(name='Cityscapes', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
labelmap_file = osp.join(tmp_dir, 'label_colors.txt')
if not osp.isfile(labelmap_file):
colormap = {label: info[0]
for label, info in make_colormap(instance_data).items()}
write_label_map(labelmap_file, colormap)
dataset = Dataset.import_from(tmp_dir, 'cityscapes', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
labelmap_file = osp.join(temp_dir, 'label_colors.txt')
if not osp.isfile(labelmap_file):
colormap = {label: info[0]
for label, info in make_colormap(instance_data).items()}
write_label_map(labelmap_file, colormap)
dataset = Dataset.import_from(temp_dir, 'cityscapes', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,9 +1,9 @@
# Copyright (C) 2018-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from datumaro.components.annotation import AnnotationType
@ -15,41 +15,38 @@ from cvat.apps.dataset_manager.util import make_zip_archive
from .registry import dm_env, exporter, importer
@exporter(name='COCO', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'coco_instances', save_images=save_images,
merge_images=True)
dataset.export(temp_dir, 'coco_instances', save_images=save_images,
merge_images=True)
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name='COCO', ext='JSON, ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
if zipfile.is_zipfile(src_file):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'coco_instances', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'coco_instances', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
else:
dataset = Dataset.import_from(src_file.name,
'coco_instances', env=dm_env)
import_dm_annotations(dataset, instance_data)
@exporter(name='COCO Keypoints', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images,
merge_images=True)
dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images,
merge_images=True)
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name='COCO Keypoints', ext='JSON, ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
def remove_extra_annotations(dataset):
for item in dataset:
annotations = [ann for ann in item.annotations
@ -57,13 +54,12 @@ def _import(src_file, instance_data, load_data_callback=None, **kwargs):
item.annotations = annotations
if zipfile.is_zipfile(src_file):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'coco_person_keypoints', env=dm_env)
remove_extra_annotations(dataset)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'coco_person_keypoints', env=dm_env)
remove_extra_annotations(dataset)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
else:
dataset = Dataset.import_from(src_file.name,
'coco_person_keypoints', env=dm_env)

@ -1,5 +1,5 @@
# Copyright (C) 2018-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
@ -9,7 +9,6 @@ import zipfile
from collections import OrderedDict
from glob import glob
from io import BufferedWriter
from tempfile import TemporaryDirectory
from typing import Callable
from datumaro.components.annotation import (AnnotationType, Bbox, Label,
@ -1349,64 +1348,63 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj
with open(img_path, 'wb') as f:
f.write(frame_data.getvalue())
def _export_task_or_job(dst_file, instance_data, anno_callback, save_images=False):
with TemporaryDirectory() as temp_dir:
with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f:
dump_task_or_job_anno(f, instance_data, anno_callback)
def _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback, save_images=False):
with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f:
dump_task_or_job_anno(f, instance_data, anno_callback)
if save_images:
dump_media_files(instance_data, osp.join(temp_dir, 'images'))
if save_images:
dump_media_files(instance_data, osp.join(temp_dir, 'images'))
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
def _export_project(dst_file: str, project_data: ProjectData, anno_callback: Callable, save_images: bool=False):
with TemporaryDirectory() as temp_dir:
with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f:
dump_project_anno(f, project_data, anno_callback)
def _export_project(dst_file: str, temp_dir: str, project_data: ProjectData,
anno_callback: Callable, save_images: bool=False
):
with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f:
dump_project_anno(f, project_data, anno_callback)
if save_images:
for task_data in project_data.task_data:
subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets)
subset_dir = osp.join(temp_dir, 'images', subset)
os.makedirs(subset_dir, exist_ok=True)
dump_media_files(task_data, subset_dir, project_data)
if save_images:
for task_data in project_data.task_data:
subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets)
subset_dir = osp.join(temp_dir, 'images', subset)
os.makedirs(subset_dir, exist_ok=True)
dump_media_files(task_data, subset_dir, project_data)
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@exporter(name='CVAT for video', ext='ZIP', version='1.1')
def _export_video(dst_file, instance_data, save_images=False):
def _export_video(dst_file, temp_dir, instance_data, save_images=False):
if isinstance(instance_data, ProjectData):
_export_project(dst_file, instance_data,
_export_project(dst_file, temp_dir, instance_data,
anno_callback=dump_as_cvat_interpolation, save_images=save_images)
else:
_export_task_or_job(dst_file, instance_data,
_export_task_or_job(dst_file, temp_dir, instance_data,
anno_callback=dump_as_cvat_interpolation, save_images=save_images)
@exporter(name='CVAT for images', ext='ZIP', version='1.1')
def _export_images(dst_file, instance_data, save_images=False):
def _export_images(dst_file, temp_dir, instance_data, save_images=False):
if isinstance(instance_data, ProjectData):
_export_project(dst_file, instance_data,
_export_project(dst_file, temp_dir, instance_data,
anno_callback=dump_as_cvat_annotation, save_images=save_images)
else:
_export_task_or_job(dst_file, instance_data,
_export_task_or_job(dst_file, temp_dir, instance_data,
anno_callback=dump_as_cvat_annotation, save_images=save_images)
@importer(name='CVAT', ext='XML, ZIP', version='1.1')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
is_zip = zipfile.is_zipfile(src_file)
src_file.seek(0)
if is_zip:
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
if isinstance(instance_data, ProjectData):
dataset = Dataset.import_from(tmp_dir, 'cvat', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
else:
anno_paths = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True)
for p in anno_paths:
load_anno(p, instance_data)
zipfile.ZipFile(src_file).extractall(temp_dir)
if isinstance(instance_data, ProjectData):
dataset = Dataset.import_from(temp_dir, 'cvat', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
else:
anno_paths = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True)
for p in anno_paths:
load_anno(p, instance_data)
else:
load_anno(src_file, instance_data)

@ -1,9 +1,8 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import ItemTransform
from datumaro.util.image import Image
@ -25,47 +24,43 @@ class DeleteImagePath(ItemTransform):
@exporter(name="Datumaro", ext="ZIP", version="1.0")
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images), env=dm_env)
if not save_images:
dataset.transform(DeleteImagePath)
with TemporaryDirectory() as tmp_dir:
dataset.export(tmp_dir, 'datumaro', save_images=save_images)
dataset.export(temp_dir, 'datumaro', save_images=save_images)
make_zip_archive(tmp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name="Datumaro", ext="ZIP", version="1.0")
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
@exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images,
dimension=DimensionType.DIM_3D), env=dm_env)
if not save_images:
dataset.transform(DeleteImagePath)
with TemporaryDirectory() as tmp_dir:
dataset.export(tmp_dir, 'datumaro', save_images=save_images)
dataset.export(temp_dir, 'datumaro', save_images=save_images)
make_zip_archive(tmp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,10 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.annotation import (AnnotationType, Caption, Label,
LabelCategories)
@ -78,64 +77,58 @@ class LabelToCaption(ItemTransform):
return item.wrap(annotations=annotations)
@exporter(name='ICDAR Recognition', ext='ZIP', version='1.0')
def _export_recognition(dst_file, instance_data, save_images=False):
def _export_recognition(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.transform(LabelToCaption)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='ICDAR Recognition', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'icdar_word_recognition', env=dm_env)
dataset.transform(CaptionToLabel, label='icdar')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'icdar_word_recognition', env=dm_env)
dataset.transform(CaptionToLabel, label='icdar')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
@exporter(name='ICDAR Localization', ext='ZIP', version='1.0')
def _export_localization(dst_file, instance_data, save_images=False):
def _export_localization(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='ICDAR Localization', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(tmp_dir, 'icdar_text_localization', env=dm_env)
dataset.transform(AddLabelToAnns, label='icdar')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
dataset = Dataset.import_from(temp_dir, 'icdar_text_localization', env=dm_env)
dataset.transform(AddLabelToAnns, label='icdar')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
@exporter(name='ICDAR Segmentation', ext='ZIP', version='1.0')
def _export_segmentation(dst_file, instance_data, save_images=False):
def _export_segmentation(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='ICDAR Segmentation', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'icdar_text_segmentation', env=dm_env)
dataset.transform(AddLabelToAnns, label='icdar')
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'icdar_text_segmentation', env=dm_env)
dataset.transform(AddLabelToAnns, label='icdar')
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,11 +1,11 @@
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import os.path as osp
import zipfile
from glob import glob
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
@ -17,25 +17,23 @@ from .registry import dm_env, exporter, importer
@exporter(name='ImageNet', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
if save_images:
dataset.export(temp_dir, 'imagenet', save_images=save_images)
else:
dataset.export(temp_dir, 'imagenet_txt', save_images=save_images)
if save_images:
dataset.export(temp_dir, 'imagenet', save_images=save_images)
else:
dataset.export(temp_dir, 'imagenet_txt', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name='ImageNet', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
if glob(osp.join(tmp_dir, '*.txt')):
dataset = Dataset.import_from(tmp_dir, 'imagenet_txt', env=dm_env)
else:
dataset = Dataset.import_from(tmp_dir, 'imagenet', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)
if glob(osp.join(temp_dir, '*.txt')):
dataset = Dataset.import_from(temp_dir, 'imagenet_txt', env=dm_env)
else:
dataset = Dataset.import_from(temp_dir, 'imagenet', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,10 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import os.path as osp
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from datumaro.plugins.kitti_format.format import KittiPath, write_label_map
@ -19,38 +18,36 @@ from .utils import make_colormap
@exporter(name='KITTI', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(instance_data,
include_images=save_images), env=dm_env)
with TemporaryDirectory() as tmp_dir:
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('merge_instance_segments')
dataset.export(tmp_dir, format='kitti',
label_map={k: v[0] for k, v in make_colormap(instance_data).items()},
apply_colormap=True, save_images=save_images
)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('merge_instance_segments')
dataset.export(temp_dir, format='kitti',
label_map={k: v[0] for k, v in make_colormap(instance_data).items()},
apply_colormap=True, save_images=save_images
)
make_zip_archive(tmp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name='KITTI', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
color_map = {k: v[0] for k, v in make_colormap(instance_data).items()}
color_map_path = osp.join(tmp_dir, KittiPath.LABELMAP_FILE)
if not osp.isfile(color_map_path):
write_label_map(color_map_path, color_map)
dataset = Dataset.import_from(tmp_dir, format='kitti', env=dm_env)
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
if 'background' not in [label['name'] for _, label in labels_meta]:
dataset.filter('/item/annotation[label != "background"]',
filter_annotations=True)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
color_map = {k: v[0] for k, v in make_colormap(instance_data).items()}
color_map_path = osp.join(temp_dir, KittiPath.LABELMAP_FILE)
if not osp.isfile(color_map_path):
write_label_map(color_map_path, color_map)
dataset = Dataset.import_from(temp_dir, format='kitti', env=dm_env)
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
if 'background' not in [label['name'] for _, label in labels_meta]:
dataset.filter('/item/annotation[label != "background"]',
filter_annotations=True)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,9 +1,8 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
@ -16,21 +15,20 @@ from .registry import dm_env, exporter, importer
@exporter(name='LabelMe', ext='ZIP', version='3.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'label_me', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'label_me', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='LabelMe', ext='ZIP', version='3.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'label_me', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'label_me', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,7 +1,7 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
@ -14,20 +14,18 @@ from .registry import dm_env, exporter, importer
@importer(name='LFW', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(tmp_dir, 'lfw')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
dataset = Dataset.import_from(temp_dir, 'lfw')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
@exporter(name='LFW', ext='ZIP', version='1.0')
def _exporter(dst_file, instance_data, save_images=False):
def _exporter(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(instance_data,
include_images=save_images), env=dm_env)
with TemporaryDirectory() as tmp_dir:
dataset.export(tmp_dir, format='lfw', save_images=save_images)
make_zip_archive(tmp_dir, dst_file)
dataset.export(temp_dir, format='lfw', save_images=save_images)
make_zip_archive(temp_dir, dst_file)

@ -1,10 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.annotation import (AnnotationType, Label,
LabelCategories)
@ -62,21 +61,20 @@ class LabelAttrToAttr(ItemTransform):
@exporter(name='Market-1501', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.transform(LabelAttrToAttr, label='market-1501')
dataset.export(temp_dir, 'market1501', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.transform(LabelAttrToAttr, label='market-1501')
dataset.export(temp_dir, 'market1501', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='Market-1501', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'market1501', env=dm_env)
dataset.transform(AttrToLabelAttr, label='market-1501')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'market1501', env=dm_env)
dataset.transform(AttrToLabelAttr, label='market-1501')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,9 +1,8 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
@ -16,26 +15,25 @@ from .registry import dm_env, exporter, importer
from .utils import make_colormap
@exporter(name='Segmentation mask', ext='ZIP', version='1.1')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'voc_segmentation', save_images=save_images,
apply_colormap=True, label_map=make_colormap(instance_data))
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'voc_segmentation', save_images=save_images,
apply_colormap=True, label_map=make_colormap(instance_data))
make_zip_archive(temp_dir, dst_file)
@importer(name='Segmentation mask', ext='ZIP', version='1.1')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,10 +1,8 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
import datumaro as dm
from pyunpack import Archive
@ -95,26 +93,25 @@ def _import_to_task(dataset, instance_data):
@exporter(name='MOT', ext='ZIP', version='1.1')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = dm.Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='MOT', ext='ZIP', version='1.1')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = dm.Dataset.import_from(tmp_dir, 'mot_seq', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_to_task(sub_dataset, task_data)
else:
_import_to_task(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = dm.Dataset.import_from(temp_dir, 'mot_seq', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_to_task(sub_dataset, task_data)
else:
_import_to_task(dataset, instance_data)

@ -1,10 +1,8 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from datumaro.components.annotation import AnnotationType
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import ItemTransform
@ -95,7 +93,7 @@ def _import_to_task(dataset, instance_data):
instance_data.add_track(track)
@exporter(name='MOTS PNG', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.transform(KeepTracks) # can only export tracks
@ -103,25 +101,24 @@ def _export(dst_file, instance_data, save_images=False):
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'mots_png', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'mots_png', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='MOTS PNG', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'mots', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_to_task(sub_dataset, task_data)
else:
_import_to_task(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'mots', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_to_task(sub_dataset, task_data)
else:
_import_to_task(dataset, instance_data)

@ -1,10 +1,10 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import glob
import os.path as osp
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset, DatasetItem
from datumaro.plugins.open_images_format import OpenImagesPath
@ -38,50 +38,48 @@ def find_item_ids(path):
yield row.split(',')[0]
@exporter(name='Open Images V6', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
def _export(dst_file, temp_dir, task_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
task_data, include_images=save_images), env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'open_images', save_images=save_images)
dataset.export(temp_dir, 'open_images', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name='Open Images V6', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
image_meta_path = osp.join(tmp_dir, OpenImagesPath.ANNOTATIONS_DIR,
DEFAULT_IMAGE_META_FILE_NAME)
image_meta = None
if not osp.isfile(image_meta_path):
image_meta = {}
item_ids = list(find_item_ids(tmp_dir))
root_hint = find_dataset_root(
[DatasetItem(id=item_id) for item_id in item_ids], instance_data)
for item_id in item_ids:
frame_info = None
try:
frame_id = match_dm_item(DatasetItem(id=item_id),
instance_data, root_hint)
frame_info = instance_data.frame_info[frame_id]
except Exception: # nosec
pass
if frame_info is not None:
image_meta[item_id] = (frame_info['height'], frame_info['width'])
dataset = Dataset.import_from(tmp_dir, 'open_images',
image_meta=image_meta, env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
image_meta_path = osp.join(temp_dir, OpenImagesPath.ANNOTATIONS_DIR,
DEFAULT_IMAGE_META_FILE_NAME)
image_meta = None
if not osp.isfile(image_meta_path):
image_meta = {}
item_ids = list(find_item_ids(temp_dir))
root_hint = find_dataset_root(
[DatasetItem(id=item_id) for item_id in item_ids], instance_data)
for item_id in item_ids:
frame_info = None
try:
frame_id = match_dm_item(DatasetItem(id=item_id),
instance_data, root_hint)
frame_info = instance_data.frame_info[frame_id]
except Exception: # nosec
pass
if frame_info is not None:
image_meta[item_id] = (frame_info['height'], frame_info['width'])
dataset = Dataset.import_from(temp_dir, 'open_images',
image_meta=image_meta, env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,5 +1,5 @@
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
@ -7,7 +7,6 @@ import os
import os.path as osp
import shutil
from glob import glob
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
@ -20,44 +19,43 @@ from .registry import dm_env, exporter, importer
@exporter(name='PASCAL VOC', ext='ZIP', version='1.1')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'voc', save_images=save_images,
label_map='source')
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'voc', save_images=save_images,
label_map='source')
make_zip_archive(temp_dir, dst_file)
@importer(name='PASCAL VOC', ext='ZIP', version='1.1')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
# put label map from the task if not present
labelmap_file = osp.join(tmp_dir, 'labelmap.txt')
if not osp.isfile(labelmap_file):
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
labels = (label['name'] + ':::' for _, label in labels_meta)
with open(labelmap_file, 'w') as f:
f.write('\n'.join(labels))
# support flat archive layout
anno_dir = osp.join(tmp_dir, 'Annotations')
if not osp.isdir(anno_dir):
anno_files = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True)
subsets_dir = osp.join(tmp_dir, 'ImageSets', 'Main')
os.makedirs(subsets_dir, exist_ok=True)
with open(osp.join(subsets_dir, 'train.txt'), 'w') as subset_file:
for f in anno_files:
subset_file.write(osp.splitext(osp.basename(f))[0] + '\n')
os.makedirs(anno_dir, exist_ok=True)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
# put label map from the task if not present
labelmap_file = osp.join(temp_dir, 'labelmap.txt')
if not osp.isfile(labelmap_file):
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
labels = (label['name'] + ':::' for _, label in labels_meta)
with open(labelmap_file, 'w') as f:
f.write('\n'.join(labels))
# support flat archive layout
anno_dir = osp.join(temp_dir, 'Annotations')
if not osp.isdir(anno_dir):
anno_files = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True)
subsets_dir = osp.join(temp_dir, 'ImageSets', 'Main')
os.makedirs(subsets_dir, exist_ok=True)
with open(osp.join(subsets_dir, 'train.txt'), 'w') as subset_file:
for f in anno_files:
shutil.move(f, anno_dir)
subset_file.write(osp.splitext(osp.basename(f))[0] + '\n')
os.makedirs(anno_dir, exist_ok=True)
for f in anno_files:
shutil.move(f, anno_dir)
dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,9 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
@ -16,29 +16,24 @@ from .registry import dm_env, exporter, importer
@exporter(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D)
def _export_images(dst_file, task_data, save_images=False):
def _export_images(dst_file, temp_dir, task_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
task_data, include_images=save_images, format_type='sly_pointcloud', dimension=DimensionType.DIM_3D), env=dm_env)
task_data, include_images=save_images, format_type='sly_pointcloud',
dimension=DimensionType.DIM_3D), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'sly_pointcloud', save_images=save_images)
dataset.export(temp_dir, 'sly_pointcloud', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D)
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'sly_pointcloud', env=dm_env)
else:
dataset = Dataset.import_from(src_file.name,
'sly_pointcloud', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'sly_pointcloud', env=dm_env)
else:
dataset = Dataset.import_from(src_file.name, 'sly_pointcloud', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,4 +1,3 @@
# Copyright (C) 2020-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
@ -17,11 +16,11 @@ class _Format:
ENABLED = True
class Exporter(_Format):
def __call__(self, dst_file, instance_data, **options):
def __call__(self, dst_file, temp_dir, instance_data, **options):
raise NotImplementedError()
class Importer(_Format):
def __call__(self, src_file, instance_data, load_data_callback=None, **options):
def __call__(self, src_file, temp_dir, instance_data, load_data_callback=None, **options):
raise NotImplementedError()
def _wrap_format(f_or_cls, klass, name, version, ext, display_name, enabled, dimension=DimensionType.DIM_2D):

@ -1,9 +1,8 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from tempfile import TemporaryDirectory
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
@ -13,7 +12,6 @@ from datumaro.components.project import Dataset
from .registry import dm_env, exporter, importer
from datumaro.util.tf_util import import_tf
try:
import_tf()
@ -23,20 +21,19 @@ except ImportError:
@exporter(name='TFRecord', ext='ZIP', version='1.0', enabled=tf_available)
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'tf_detection_api', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'tf_detection_api', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='TFRecord', ext='ZIP', version='1.0', enabled=tf_available)
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'tf_detection_api', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,9 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
@ -18,24 +18,23 @@ from .registry import exporter, importer
@exporter(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D)
def _export_images(dst_file, task_data, save_images=False):
def _export_images(dst_file, temp_dir, task_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
task_data, include_images=save_images, format_type="kitti_raw", dimension=DimensionType.DIM_3D), env=dm_env)
task_data, include_images=save_images, format_type="kitti_raw",
dimension=DimensionType.DIM_3D), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'kitti_raw', save_images=save_images, reindex=True)
dataset.export(temp_dir, 'kitti_raw', save_images=save_images, reindex=True)
make_zip_archive(temp_dir, dst_file)
make_zip_archive(temp_dir, dst_file)
@importer(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D)
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'kitti_raw', env=dm_env)
else:
dataset = Dataset.import_from(src_file.name, 'kitti_raw', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'kitti_raw', env=dm_env)
else:
dataset = Dataset.import_from(src_file.name, 'kitti_raw', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,10 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
@ -16,22 +15,21 @@ from .registry import dm_env, exporter, importer
@exporter(name='VGGFace2', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'vgg_face2', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'vgg_face2', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='VGGFace2', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'vgg_face2', env=dm_env)
if isinstance(instance_data, TaskData):
dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|")
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'vgg_face2', env=dm_env)
if isinstance(instance_data, TaskData):
dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|")
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,9 +1,9 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import zipfile
from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
@ -15,20 +15,19 @@ from .registry import dm_env, exporter, importer
@exporter(name='WiderFace', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'wider_face', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'wider_face', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='WiderFace', ext='ZIP', version='1.0')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'wider_face', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)
dataset = Dataset.import_from(temp_dir, 'wider_face', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,10 +1,10 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import os.path as osp
from glob import glob
from tempfile import TemporaryDirectory
from pyunpack import Archive
@ -19,37 +19,36 @@ from .registry import dm_env, exporter, importer
@exporter(name='YOLO', ext='ZIP', version='1.1')
def _export(dst_file, instance_data, save_images=False):
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dataset.export(temp_dir, 'yolo', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
dataset.export(temp_dir, 'yolo', save_images=save_images)
make_zip_archive(temp_dir, dst_file)
@importer(name='YOLO', ext='ZIP', version='1.1')
def _import(src_file, instance_data, load_data_callback=None, **kwargs):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
image_info = {}
frames = [YoloExtractor.name_from_path(osp.relpath(p, tmp_dir))
for p in glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)]
root_hint = find_dataset_root(
[DatasetItem(id=frame) for frame in frames], instance_data)
for frame in frames:
frame_info = None
try:
frame_id = match_dm_item(DatasetItem(id=frame), instance_data,
root_hint=root_hint)
frame_info = instance_data.frame_info[frame_id]
except Exception: # nosec
pass
if frame_info is not None:
image_info[frame] = (frame_info['height'], frame_info['width'])
dataset = Dataset.import_from(tmp_dir, 'yolo',
env=dm_env, image_info=image_info)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
image_info = {}
frames = [YoloExtractor.name_from_path(osp.relpath(p, temp_dir))
for p in glob(osp.join(temp_dir, '**', '*.txt'), recursive=True)]
root_hint = find_dataset_root(
[DatasetItem(id=frame) for frame in frames], instance_data)
for frame in frames:
frame_info = None
try:
frame_id = match_dm_item(DatasetItem(id=frame), instance_data,
root_hint=root_hint)
frame_info = instance_data.frame_info[frame_id]
except Exception: # nosec
pass
if frame_info is not None:
image_info[frame] = (frame_info['height'], frame_info['width'])
dataset = Dataset.import_from(temp_dir, 'yolo',
env=dm_env, image_info=image_info)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@ -1,7 +1,10 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
import os
from tempfile import TemporaryDirectory
import rq
from typing import Any, Callable, List, Mapping, Tuple
@ -127,7 +130,11 @@ class ProjectAnnotationAndData:
db_project=self.db_project,
host=host
)
exporter(dst_file, project_data, **options)
temp_dir_base = self.db_project.get_tmp_dirname()
os.makedirs(temp_dir_base, exist_ok=True)
with TemporaryDirectory(dir=temp_dir_base) as temp_dir:
exporter(dst_file, temp_dir, project_data, **options)
def load_dataset_data(self, *args, **kwargs):
load_dataset_data(self, *args, **kwargs)
@ -141,7 +148,10 @@ class ProjectAnnotationAndData:
)
project_data.soft_attribute_import = True
importer(dataset_file, project_data, self.load_dataset_data, **options)
temp_dir_base = self.db_project.get_tmp_dirname()
os.makedirs(temp_dir_base, exist_ok=True)
with TemporaryDirectory(dir=temp_dir_base) as temp_dir:
importer(dataset_file, temp_dir, project_data, self.load_dataset_data, **options)
self.create({tid: ir.serialize() for tid, ir in self.annotation_irs.items() if tid in project_data.new_tasks})

@ -1,11 +1,12 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
from enum import Enum
import os
from tempfile import TemporaryDirectory
from django.db import transaction
from django.db.models.query import Prefetch
@ -567,7 +568,11 @@ class JobAnnotation:
db_job=self.db_job,
host=host,
)
exporter(dst_file, job_data, **options)
temp_dir_base = self.db_job.get_tmp_dirname()
os.makedirs(temp_dir_base, exist_ok=True)
with TemporaryDirectory(dir=temp_dir_base) as temp_dir:
exporter(dst_file, temp_dir, job_data, **options)
def import_annotations(self, src_file, importer, **options):
job_data = JobData(
@ -577,7 +582,10 @@ class JobAnnotation:
)
self.delete()
importer(src_file, job_data, **options)
temp_dir_base = self.db_job.get_tmp_dirname()
os.makedirs(temp_dir_base, exist_ok=True)
with TemporaryDirectory(dir=temp_dir_base) as temp_dir:
importer(src_file, temp_dir, job_data, **options)
self.create(job_data.data.slice(self.start_frame, self.stop_frame).serialize())
@ -654,7 +662,11 @@ class TaskAnnotation:
db_task=self.db_task,
host=host,
)
exporter(dst_file, task_data, **options)
temp_dir_base = self.db_task.get_tmp_dirname()
os.makedirs(temp_dir_base, exist_ok=True)
with TemporaryDirectory(dir=temp_dir_base) as temp_dir:
exporter(dst_file, temp_dir, task_data, **options)
def import_annotations(self, src_file, importer, **options):
task_data = TaskData(
@ -664,7 +676,10 @@ class TaskAnnotation:
)
self.delete()
importer(src_file, task_data, **options)
temp_dir_base = self.db_task.get_tmp_dirname()
os.makedirs(temp_dir_base, exist_ok=True)
with TemporaryDirectory(dir=temp_dir_base) as temp_dir:
importer(src_file, temp_dir, task_data, **options)
self.create(task_data.data.serialize())
@ -709,8 +724,7 @@ def delete_job_data(pk):
annotation = JobAnnotation(pk)
annotation.delete()
def export_job(job_id, dst_file, format_name,
server_url=None, save_images=False):
def export_job(job_id, dst_file, format_name, server_url=None, save_images=False):
# For big tasks dump function may run for a long time and
# we dont need to acquire lock after the task has been initialized from DB.
# But there is the bug with corrupted dump file in case 2 or
@ -759,8 +773,7 @@ def delete_task_data(pk):
annotation = TaskAnnotation(pk)
annotation.delete()
def export_task(task_id, dst_file, format_name,
server_url=None, save_images=False):
def export_task(task_id, dst_file, format_name, server_url=None, save_images=False):
# For big tasks dump function may run for a long time and
# we dont need to acquire lock after the task has been initialized from DB.
# But there is the bug with corrupted dump file in case 2 or

@ -1,4 +1,5 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
@ -72,7 +73,8 @@ def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=No
instance_time = timezone.localtime(db_instance.updated_date).timestamp()
if isinstance(db_instance, Project):
tasks_update = list(map(lambda db_task: timezone.localtime(db_task.updated_date).timestamp(), db_instance.tasks.all()))
tasks_update = list(map(lambda db_task: timezone.localtime(
db_task.updated_date).timestamp(), db_instance.tasks.all()))
instance_time = max(tasks_update + [instance_time])
if not (osp.exists(output_path) and \
instance_time <= osp.getmtime(output_path)):
@ -120,10 +122,10 @@ def export_task_annotations(task_id, dst_format=None, server_url=None):
def export_project_as_dataset(project_id, dst_format=None, server_url=None):
return export(dst_format, project_id=project_id, server_url=server_url, save_images=True)
def export_project_annotations(project_id, dst_format=None, server_url=None):
return export(dst_format, project_id=project_id, server_url=server_url, save_images=False)
def clear_export_cache(file_path, file_ctime, logger):
try:
if osp.exists(file_path) and osp.getctime(file_path) == file_ctime:

@ -468,6 +468,9 @@ class Job(models.Model):
def get_dirname(self):
return os.path.join(settings.JOBS_ROOT, str(self.id))
def get_tmp_dirname(self):
return os.path.join(self.get_dirname(), 'tmp')
@extend_schema_field(OpenApiTypes.INT)
def get_project_id(self):
project = self.segment.task.project

Loading…
Cancel
Save