From a0b70fcc40f4c65ffa6c7fb499f4f8a3e2021827 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 23 Jan 2023 17:07:27 +0300 Subject: [PATCH] Change default temporary directory for import and export (#5613) --- cvat/apps/dataset_manager/formats/camvid.py | 32 ++++---- .../dataset_manager/formats/cityscapes.py | 43 +++++----- cvat/apps/dataset_manager/formats/coco.py | 48 ++++++----- cvat/apps/dataset_manager/formats/cvat.py | 76 +++++++++--------- cvat/apps/dataset_manager/formats/datumaro.py | 43 +++++----- cvat/apps/dataset_manager/formats/icdar.py | 79 +++++++++---------- cvat/apps/dataset_manager/formats/imagenet.py | 34 ++++---- cvat/apps/dataset_manager/formats/kitti.py | 59 +++++++------- cvat/apps/dataset_manager/formats/labelme.py | 28 +++---- cvat/apps/dataset_manager/formats/lfw.py | 22 +++--- .../dataset_manager/formats/market1501.py | 30 ++++--- cvat/apps/dataset_manager/formats/mask.py | 30 ++++--- cvat/apps/dataset_manager/formats/mot.py | 39 +++++---- cvat/apps/dataset_manager/formats/mots.py | 41 +++++----- .../dataset_manager/formats/openimages.py | 72 ++++++++--------- .../dataset_manager/formats/pascal_voc.py | 70 ++++++++-------- .../dataset_manager/formats/pointcloud.py | 37 ++++----- cvat/apps/dataset_manager/formats/registry.py | 5 +- cvat/apps/dataset_manager/formats/tfrecord.py | 27 +++---- .../dataset_manager/formats/velodynepoint.py | 31 ++++---- cvat/apps/dataset_manager/formats/vggface2.py | 30 ++++--- .../apps/dataset_manager/formats/widerface.py | 25 +++--- cvat/apps/dataset_manager/formats/yolo.py | 59 +++++++------- cvat/apps/dataset_manager/project.py | 14 +++- cvat/apps/dataset_manager/task.py | 33 +++++--- cvat/apps/dataset_manager/views.py | 6 +- cvat/apps/engine/models.py | 3 + 27 files changed, 493 insertions(+), 523 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/camvid.py b/cvat/apps/dataset_manager/formats/camvid.py index b6476e53..7e103cd8 100644 --- a/cvat/apps/dataset_manager/formats/camvid.py +++ b/cvat/apps/dataset_manager/formats/camvid.py @@ -1,9 +1,8 @@ # Copyright (C) 2020-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory - from datumaro.components.dataset import Dataset from pyunpack import Archive @@ -17,7 +16,7 @@ from .utils import make_colormap @exporter(name='CamVid', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(RotatedBoxesToPolygons) @@ -25,20 +24,19 @@ def _export(dst_file, instance_data, save_images=False): dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') label_map = make_colormap(instance_data) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'camvid', - save_images=save_images, apply_colormap=True, - label_map={label: label_map[label][0] for label in label_map}) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'camvid', + save_images=save_images, apply_colormap=True, + label_map={label: label_map[label][0] for label in label_map}) + + make_zip_archive(temp_dir, dst_file) @importer(name='CamVid', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'camvid', env=dm_env) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'camvid', env=dm_env) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/cityscapes.py b/cvat/apps/dataset_manager/formats/cityscapes.py index c660ad8d..b05af362 100644 --- a/cvat/apps/dataset_manager/formats/cityscapes.py +++ b/cvat/apps/dataset_manager/formats/cityscapes.py @@ -1,9 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import os.path as osp -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset from datumaro.plugins.cityscapes_format import write_label_map @@ -19,33 +19,32 @@ from .utils import make_colormap @exporter(name='Cityscapes', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'cityscapes', save_images=save_images, - apply_colormap=True, label_map={label: info[0] - for label, info in make_colormap(instance_data).items()}) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'cityscapes', save_images=save_images, + apply_colormap=True, label_map={label: info[0] + for label, info in make_colormap(instance_data).items()}) + + make_zip_archive(temp_dir, dst_file) @importer(name='Cityscapes', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - labelmap_file = osp.join(tmp_dir, 'label_colors.txt') - if not osp.isfile(labelmap_file): - colormap = {label: info[0] - for label, info in make_colormap(instance_data).items()} - write_label_map(labelmap_file, colormap) - - dataset = Dataset.import_from(tmp_dir, 'cityscapes', env=dm_env) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + labelmap_file = osp.join(temp_dir, 'label_colors.txt') + if not osp.isfile(labelmap_file): + colormap = {label: info[0] + for label, info in make_colormap(instance_data).items()} + write_label_map(labelmap_file, colormap) + + dataset = Dataset.import_from(temp_dir, 'cityscapes', env=dm_env) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 0674556c..761ee057 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -1,9 +1,9 @@ # Copyright (C) 2018-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import zipfile -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset from datumaro.components.annotation import AnnotationType @@ -15,41 +15,38 @@ from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer @exporter(name='COCO', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'coco_instances', save_images=save_images, - merge_images=True) + dataset.export(temp_dir, 'coco_instances', save_images=save_images, + merge_images=True) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name='COCO', ext='JSON, ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): if zipfile.is_zipfile(src_file): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - dataset = Dataset.import_from(tmp_dir, 'coco_instances', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) + zipfile.ZipFile(src_file).extractall(temp_dir) + dataset = Dataset.import_from(temp_dir, 'coco_instances', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) else: dataset = Dataset.import_from(src_file.name, 'coco_instances', env=dm_env) import_dm_annotations(dataset, instance_data) @exporter(name='COCO Keypoints', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images, - merge_images=True) + dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images, + merge_images=True) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name='COCO Keypoints', ext='JSON, ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): def remove_extra_annotations(dataset): for item in dataset: annotations = [ann for ann in item.annotations @@ -57,13 +54,12 @@ def _import(src_file, instance_data, load_data_callback=None, **kwargs): item.annotations = annotations if zipfile.is_zipfile(src_file): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - dataset = Dataset.import_from(tmp_dir, 'coco_person_keypoints', env=dm_env) - remove_extra_annotations(dataset) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) + zipfile.ZipFile(src_file).extractall(temp_dir) + dataset = Dataset.import_from(temp_dir, 'coco_person_keypoints', env=dm_env) + remove_extra_annotations(dataset) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) else: dataset = Dataset.import_from(src_file.name, 'coco_person_keypoints', env=dm_env) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 4064edba..681b2d71 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -1,5 +1,5 @@ # Copyright (C) 2018-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -9,7 +9,6 @@ import zipfile from collections import OrderedDict from glob import glob from io import BufferedWriter -from tempfile import TemporaryDirectory from typing import Callable from datumaro.components.annotation import (AnnotationType, Bbox, Label, @@ -1349,64 +1348,63 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj with open(img_path, 'wb') as f: f.write(frame_data.getvalue()) -def _export_task_or_job(dst_file, instance_data, anno_callback, save_images=False): - with TemporaryDirectory() as temp_dir: - with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: - dump_task_or_job_anno(f, instance_data, anno_callback) +def _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback, save_images=False): + with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: + dump_task_or_job_anno(f, instance_data, anno_callback) - if save_images: - dump_media_files(instance_data, osp.join(temp_dir, 'images')) + if save_images: + dump_media_files(instance_data, osp.join(temp_dir, 'images')) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) -def _export_project(dst_file: str, project_data: ProjectData, anno_callback: Callable, save_images: bool=False): - with TemporaryDirectory() as temp_dir: - with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: - dump_project_anno(f, project_data, anno_callback) +def _export_project(dst_file: str, temp_dir: str, project_data: ProjectData, + anno_callback: Callable, save_images: bool=False +): + with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: + dump_project_anno(f, project_data, anno_callback) - if save_images: - for task_data in project_data.task_data: - subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets) - subset_dir = osp.join(temp_dir, 'images', subset) - os.makedirs(subset_dir, exist_ok=True) - dump_media_files(task_data, subset_dir, project_data) + if save_images: + for task_data in project_data.task_data: + subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets) + subset_dir = osp.join(temp_dir, 'images', subset) + os.makedirs(subset_dir, exist_ok=True) + dump_media_files(task_data, subset_dir, project_data) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @exporter(name='CVAT for video', ext='ZIP', version='1.1') -def _export_video(dst_file, instance_data, save_images=False): +def _export_video(dst_file, temp_dir, instance_data, save_images=False): if isinstance(instance_data, ProjectData): - _export_project(dst_file, instance_data, + _export_project(dst_file, temp_dir, instance_data, anno_callback=dump_as_cvat_interpolation, save_images=save_images) else: - _export_task_or_job(dst_file, instance_data, + _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback=dump_as_cvat_interpolation, save_images=save_images) @exporter(name='CVAT for images', ext='ZIP', version='1.1') -def _export_images(dst_file, instance_data, save_images=False): +def _export_images(dst_file, temp_dir, instance_data, save_images=False): if isinstance(instance_data, ProjectData): - _export_project(dst_file, instance_data, + _export_project(dst_file, temp_dir, instance_data, anno_callback=dump_as_cvat_annotation, save_images=save_images) else: - _export_task_or_job(dst_file, instance_data, + _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback=dump_as_cvat_annotation, save_images=save_images) @importer(name='CVAT', ext='XML, ZIP', version='1.1') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - - if isinstance(instance_data, ProjectData): - dataset = Dataset.import_from(tmp_dir, 'cvat', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) - else: - anno_paths = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + zipfile.ZipFile(src_file).extractall(temp_dir) + + if isinstance(instance_data, ProjectData): + dataset = Dataset.import_from(temp_dir, 'cvat', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) + else: + anno_paths = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) else: load_anno(src_file, instance_data) diff --git a/cvat/apps/dataset_manager/formats/datumaro.py b/cvat/apps/dataset_manager/formats/datumaro.py index 3ef84824..40a10600 100644 --- a/cvat/apps/dataset_manager/formats/datumaro.py +++ b/cvat/apps/dataset_manager/formats/datumaro.py @@ -1,9 +1,8 @@ # Copyright (C) 2019-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory - from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform from datumaro.util.image import Image @@ -25,47 +24,43 @@ class DeleteImagePath(ItemTransform): @exporter(name="Datumaro", ext="ZIP", version="1.0") -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data=instance_data, include_images=save_images), env=dm_env) if not save_images: dataset.transform(DeleteImagePath) - with TemporaryDirectory() as tmp_dir: - dataset.export(tmp_dir, 'datumaro', save_images=save_images) + dataset.export(temp_dir, 'datumaro', save_images=save_images) - make_zip_archive(tmp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name="Datumaro", ext="ZIP", version="1.0") -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) - dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env) + dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) @exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data=instance_data, include_images=save_images, dimension=DimensionType.DIM_3D), env=dm_env) if not save_images: dataset.transform(DeleteImagePath) - with TemporaryDirectory() as tmp_dir: - dataset.export(tmp_dir, 'datumaro', save_images=save_images) + dataset.export(temp_dir, 'datumaro', save_images=save_images) - make_zip_archive(tmp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) - dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env) + dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/icdar.py b/cvat/apps/dataset_manager/formats/icdar.py index 7e404823..3effe805 100644 --- a/cvat/apps/dataset_manager/formats/icdar.py +++ b/cvat/apps/dataset_manager/formats/icdar.py @@ -1,10 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import zipfile -from tempfile import TemporaryDirectory from datumaro.components.annotation import (AnnotationType, Caption, Label, LabelCategories) @@ -78,64 +77,58 @@ class LabelToCaption(ItemTransform): return item.wrap(annotations=annotations) @exporter(name='ICDAR Recognition', ext='ZIP', version='1.0') -def _export_recognition(dst_file, instance_data, save_images=False): +def _export_recognition(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(LabelToCaption) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) + make_zip_archive(temp_dir, dst_file) @importer(name='ICDAR Recognition', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - dataset = Dataset.import_from(tmp_dir, 'icdar_word_recognition', env=dm_env) - dataset.transform(CaptionToLabel, label='icdar') - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + zipfile.ZipFile(src_file).extractall(temp_dir) + dataset = Dataset.import_from(temp_dir, 'icdar_word_recognition', env=dm_env) + dataset.transform(CaptionToLabel, label='icdar') + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) @exporter(name='ICDAR Localization', ext='ZIP', version='1.0') -def _export_localization(dst_file, instance_data, save_images=False): +def _export_localization(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images) + make_zip_archive(temp_dir, dst_file) @importer(name='ICDAR Localization', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + zipfile.ZipFile(src_file).extractall(temp_dir) - dataset = Dataset.import_from(tmp_dir, 'icdar_text_localization', env=dm_env) - dataset.transform(AddLabelToAnns, label='icdar') - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) + dataset = Dataset.import_from(temp_dir, 'icdar_text_localization', env=dm_env) + dataset.transform(AddLabelToAnns, label='icdar') + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) @exporter(name='ICDAR Segmentation', ext='ZIP', version='1.0') -def _export_segmentation(dst_file, instance_data, save_images=False): +def _export_segmentation(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('boxes_to_masks') - dataset.transform('merge_instance_segments') - dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.transform(RotatedBoxesToPolygons) + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') + dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images) + make_zip_archive(temp_dir, dst_file) @importer(name='ICDAR Segmentation', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - dataset = Dataset.import_from(tmp_dir, 'icdar_text_segmentation', env=dm_env) - dataset.transform(AddLabelToAnns, label='icdar') - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + zipfile.ZipFile(src_file).extractall(temp_dir) + dataset = Dataset.import_from(temp_dir, 'icdar_text_segmentation', env=dm_env) + dataset.transform(AddLabelToAnns, label='icdar') + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/imagenet.py b/cvat/apps/dataset_manager/formats/imagenet.py index 51cb2ee1..f4fccef5 100644 --- a/cvat/apps/dataset_manager/formats/imagenet.py +++ b/cvat/apps/dataset_manager/formats/imagenet.py @@ -1,11 +1,11 @@ # Copyright (C) 2020-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import os.path as osp import zipfile from glob import glob -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset @@ -17,25 +17,23 @@ from .registry import dm_env, exporter, importer @exporter(name='ImageNet', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - if save_images: - dataset.export(temp_dir, 'imagenet', save_images=save_images) - else: - dataset.export(temp_dir, 'imagenet_txt', save_images=save_images) + if save_images: + dataset.export(temp_dir, 'imagenet', save_images=save_images) + else: + dataset.export(temp_dir, 'imagenet_txt', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name='ImageNet', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - if glob(osp.join(tmp_dir, '*.txt')): - dataset = Dataset.import_from(tmp_dir, 'imagenet_txt', env=dm_env) - else: - dataset = Dataset.import_from(tmp_dir, 'imagenet', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + zipfile.ZipFile(src_file).extractall(temp_dir) + if glob(osp.join(temp_dir, '*.txt')): + dataset = Dataset.import_from(temp_dir, 'imagenet_txt', env=dm_env) + else: + dataset = Dataset.import_from(temp_dir, 'imagenet', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/kitti.py b/cvat/apps/dataset_manager/formats/kitti.py index 57d46a94..8de1f524 100644 --- a/cvat/apps/dataset_manager/formats/kitti.py +++ b/cvat/apps/dataset_manager/formats/kitti.py @@ -1,10 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import os.path as osp -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset from datumaro.plugins.kitti_format.format import KittiPath, write_label_map @@ -19,38 +18,36 @@ from .utils import make_colormap @exporter(name='KITTI', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor(instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as tmp_dir: - dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('merge_instance_segments') - dataset.export(tmp_dir, format='kitti', - label_map={k: v[0] for k, v in make_colormap(instance_data).items()}, - apply_colormap=True, save_images=save_images - ) + dataset.transform(RotatedBoxesToPolygons) + dataset.transform('polygons_to_masks') + dataset.transform('merge_instance_segments') + dataset.export(temp_dir, format='kitti', + label_map={k: v[0] for k, v in make_colormap(instance_data).items()}, + apply_colormap=True, save_images=save_images + ) - make_zip_archive(tmp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name='KITTI', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - color_map = {k: v[0] for k, v in make_colormap(instance_data).items()} - color_map_path = osp.join(tmp_dir, KittiPath.LABELMAP_FILE) - if not osp.isfile(color_map_path): - write_label_map(color_map_path, color_map) - - dataset = Dataset.import_from(tmp_dir, format='kitti', env=dm_env) - labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] - if 'background' not in [label['name'] for _, label in labels_meta]: - dataset.filter('/item/annotation[label != "background"]', - filter_annotations=True) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + color_map = {k: v[0] for k, v in make_colormap(instance_data).items()} + color_map_path = osp.join(temp_dir, KittiPath.LABELMAP_FILE) + if not osp.isfile(color_map_path): + write_label_map(color_map_path, color_map) + + dataset = Dataset.import_from(temp_dir, format='kitti', env=dm_env) + labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] + if 'background' not in [label['name'] for _, label in labels_meta]: + dataset.filter('/item/annotation[label != "background"]', + filter_annotations=True) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index 8a475363..910cb7f8 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -1,9 +1,8 @@ # Copyright (C) 2019-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory - from datumaro.components.dataset import Dataset from pyunpack import Archive @@ -16,21 +15,20 @@ from .registry import dm_env, exporter, importer @exporter(name='LabelMe', ext='ZIP', version='3.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'label_me', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'label_me', save_images=save_images) + + make_zip_archive(temp_dir, dst_file) @importer(name='LabelMe', ext='ZIP', version='3.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'label_me', env=dm_env) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'label_me', env=dm_env) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/lfw.py b/cvat/apps/dataset_manager/formats/lfw.py index d1b5138c..4fed9649 100644 --- a/cvat/apps/dataset_manager/formats/lfw.py +++ b/cvat/apps/dataset_manager/formats/lfw.py @@ -1,7 +1,7 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset from pyunpack import Archive @@ -14,20 +14,18 @@ from .registry import dm_env, exporter, importer @importer(name='LFW', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) - dataset = Dataset.import_from(tmp_dir, 'lfw') - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) + dataset = Dataset.import_from(temp_dir, 'lfw') + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) @exporter(name='LFW', ext='ZIP', version='1.0') -def _exporter(dst_file, instance_data, save_images=False): +def _exporter(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor(instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as tmp_dir: - dataset.export(tmp_dir, format='lfw', save_images=save_images) - make_zip_archive(tmp_dir, dst_file) + dataset.export(temp_dir, format='lfw', save_images=save_images) + make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/market1501.py b/cvat/apps/dataset_manager/formats/market1501.py index 07bdf21b..bb50bbaf 100644 --- a/cvat/apps/dataset_manager/formats/market1501.py +++ b/cvat/apps/dataset_manager/formats/market1501.py @@ -1,10 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import zipfile -from tempfile import TemporaryDirectory from datumaro.components.annotation import (AnnotationType, Label, LabelCategories) @@ -62,21 +61,20 @@ class LabelAttrToAttr(ItemTransform): @exporter(name='Market-1501', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.transform(LabelAttrToAttr, label='market-1501') - dataset.export(temp_dir, 'market1501', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + + dataset.transform(LabelAttrToAttr, label='market-1501') + dataset.export(temp_dir, 'market1501', save_images=save_images) + make_zip_archive(temp_dir, dst_file) @importer(name='Market-1501', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'market1501', env=dm_env) - dataset.transform(AttrToLabelAttr, label='market-1501') - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + zipfile.ZipFile(src_file).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'market1501', env=dm_env) + dataset.transform(AttrToLabelAttr, label='market-1501') + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index fb84ffab..b584264b 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -1,9 +1,8 @@ # Copyright (C) 2019-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory - from datumaro.components.dataset import Dataset from pyunpack import Archive @@ -16,26 +15,25 @@ from .registry import dm_env, exporter, importer from .utils import make_colormap @exporter(name='Segmentation mask', ext='ZIP', version='1.1') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'voc_segmentation', save_images=save_images, - apply_colormap=True, label_map=make_colormap(instance_data)) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'voc_segmentation', save_images=save_images, + apply_colormap=True, label_map=make_colormap(instance_data)) + + make_zip_archive(temp_dir, dst_file) @importer(name='Segmentation mask', ext='ZIP', version='1.1') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index 368105a6..359dd28c 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -1,10 +1,8 @@ # Copyright (C) 2019-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory - import datumaro as dm from pyunpack import Archive @@ -95,26 +93,25 @@ def _import_to_task(dataset, instance_data): @exporter(name='MOT', ext='ZIP', version='1.1') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = dm.Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images) + + make_zip_archive(temp_dir, dst_file) @importer(name='MOT', ext='ZIP', version='1.1') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - dataset = dm.Dataset.import_from(tmp_dir, 'mot_seq', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - - # Dirty way to determine instance type to avoid circular dependency - if hasattr(instance_data, '_db_project'): - for sub_dataset, task_data in instance_data.split_dataset(dataset): - _import_to_task(sub_dataset, task_data) - else: - _import_to_task(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + dataset = dm.Dataset.import_from(temp_dir, 'mot_seq', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + + # Dirty way to determine instance type to avoid circular dependency + if hasattr(instance_data, '_db_project'): + for sub_dataset, task_data in instance_data.split_dataset(dataset): + _import_to_task(sub_dataset, task_data) + else: + _import_to_task(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/mots.py b/cvat/apps/dataset_manager/formats/mots.py index b602f0a0..eb967811 100644 --- a/cvat/apps/dataset_manager/formats/mots.py +++ b/cvat/apps/dataset_manager/formats/mots.py @@ -1,10 +1,8 @@ # Copyright (C) 2019-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory - from datumaro.components.annotation import AnnotationType from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform @@ -95,7 +93,7 @@ def _import_to_task(dataset, instance_data): instance_data.add_track(track) @exporter(name='MOTS PNG', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(KeepTracks) # can only export tracks @@ -103,25 +101,24 @@ def _export(dst_file, instance_data, save_images=False): dataset.transform('polygons_to_masks') dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'mots_png', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'mots_png', save_images=save_images) + + make_zip_archive(temp_dir, dst_file) @importer(name='MOTS PNG', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'mots', env=dm_env) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - - # Dirty way to determine instance type to avoid circular dependency - if hasattr(instance_data, '_db_project'): - for sub_dataset, task_data in instance_data.split_dataset(dataset): - _import_to_task(sub_dataset, task_data) - else: - _import_to_task(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'mots', env=dm_env) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + + # Dirty way to determine instance type to avoid circular dependency + if hasattr(instance_data, '_db_project'): + for sub_dataset, task_data in instance_data.split_dataset(dataset): + _import_to_task(sub_dataset, task_data) + else: + _import_to_task(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/openimages.py b/cvat/apps/dataset_manager/formats/openimages.py index 21526c39..0430ebc1 100644 --- a/cvat/apps/dataset_manager/formats/openimages.py +++ b/cvat/apps/dataset_manager/formats/openimages.py @@ -1,10 +1,10 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import glob import os.path as osp -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset, DatasetItem from datumaro.plugins.open_images_format import OpenImagesPath @@ -38,50 +38,48 @@ def find_item_ids(path): yield row.split(',')[0] @exporter(name='Open Images V6', ext='ZIP', version='1.0') -def _export(dst_file, task_data, save_images=False): +def _export(dst_file, temp_dir, task_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( task_data, include_images=save_images), env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') dataset.transform('merge_instance_segments') - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'open_images', save_images=save_images) + dataset.export(temp_dir, 'open_images', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name='Open Images V6', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - image_meta_path = osp.join(tmp_dir, OpenImagesPath.ANNOTATIONS_DIR, - DEFAULT_IMAGE_META_FILE_NAME) - image_meta = None - - if not osp.isfile(image_meta_path): - image_meta = {} - item_ids = list(find_item_ids(tmp_dir)) - - root_hint = find_dataset_root( - [DatasetItem(id=item_id) for item_id in item_ids], instance_data) - - for item_id in item_ids: - frame_info = None - try: - frame_id = match_dm_item(DatasetItem(id=item_id), - instance_data, root_hint) - frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec - pass - if frame_info is not None: - image_meta[item_id] = (frame_info['height'], frame_info['width']) - - dataset = Dataset.import_from(tmp_dir, 'open_images', - image_meta=image_meta, env=dm_env) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + image_meta_path = osp.join(temp_dir, OpenImagesPath.ANNOTATIONS_DIR, + DEFAULT_IMAGE_META_FILE_NAME) + image_meta = None + + if not osp.isfile(image_meta_path): + image_meta = {} + item_ids = list(find_item_ids(temp_dir)) + + root_hint = find_dataset_root( + [DatasetItem(id=item_id) for item_id in item_ids], instance_data) + + for item_id in item_ids: + frame_info = None + try: + frame_id = match_dm_item(DatasetItem(id=item_id), + instance_data, root_hint) + frame_info = instance_data.frame_info[frame_id] + except Exception: # nosec + pass + if frame_info is not None: + image_meta[item_id] = (frame_info['height'], frame_info['width']) + + dataset = Dataset.import_from(temp_dir, 'open_images', + image_meta=image_meta, env=dm_env) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index bd297e62..a1b3fe99 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -1,5 +1,5 @@ # Copyright (C) 2020-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -7,7 +7,6 @@ import os import os.path as osp import shutil from glob import glob -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset from pyunpack import Archive @@ -20,44 +19,43 @@ from .registry import dm_env, exporter, importer @exporter(name='PASCAL VOC', ext='ZIP', version='1.1') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'voc', save_images=save_images, - label_map='source') - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'voc', save_images=save_images, + label_map='source') + + make_zip_archive(temp_dir, dst_file) @importer(name='PASCAL VOC', ext='ZIP', version='1.1') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - # put label map from the task if not present - labelmap_file = osp.join(tmp_dir, 'labelmap.txt') - if not osp.isfile(labelmap_file): - labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] - labels = (label['name'] + ':::' for _, label in labels_meta) - with open(labelmap_file, 'w') as f: - f.write('\n'.join(labels)) - - # support flat archive layout - anno_dir = osp.join(tmp_dir, 'Annotations') - if not osp.isdir(anno_dir): - anno_files = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True) - subsets_dir = osp.join(tmp_dir, 'ImageSets', 'Main') - os.makedirs(subsets_dir, exist_ok=True) - with open(osp.join(subsets_dir, 'train.txt'), 'w') as subset_file: - for f in anno_files: - subset_file.write(osp.splitext(osp.basename(f))[0] + '\n') - - os.makedirs(anno_dir, exist_ok=True) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + # put label map from the task if not present + labelmap_file = osp.join(temp_dir, 'labelmap.txt') + if not osp.isfile(labelmap_file): + labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] + labels = (label['name'] + ':::' for _, label in labels_meta) + with open(labelmap_file, 'w') as f: + f.write('\n'.join(labels)) + + # support flat archive layout + anno_dir = osp.join(temp_dir, 'Annotations') + if not osp.isdir(anno_dir): + anno_files = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True) + subsets_dir = osp.join(temp_dir, 'ImageSets', 'Main') + os.makedirs(subsets_dir, exist_ok=True) + with open(osp.join(subsets_dir, 'train.txt'), 'w') as subset_file: for f in anno_files: - shutil.move(f, anno_dir) + subset_file.write(osp.splitext(osp.basename(f))[0] + '\n') + + os.makedirs(anno_dir, exist_ok=True) + for f in anno_files: + shutil.move(f, anno_dir) - dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env) - dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) + dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env) + dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/pointcloud.py b/cvat/apps/dataset_manager/formats/pointcloud.py index f92c036d..5be57e59 100644 --- a/cvat/apps/dataset_manager/formats/pointcloud.py +++ b/cvat/apps/dataset_manager/formats/pointcloud.py @@ -1,9 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import zipfile -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset @@ -16,29 +16,24 @@ from .registry import dm_env, exporter, importer @exporter(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) -def _export_images(dst_file, task_data, save_images=False): - +def _export_images(dst_file, temp_dir, task_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( - task_data, include_images=save_images, format_type='sly_pointcloud', dimension=DimensionType.DIM_3D), env=dm_env) + task_data, include_images=save_images, format_type='sly_pointcloud', + dimension=DimensionType.DIM_3D), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'sly_pointcloud', save_images=save_images) + dataset.export(temp_dir, 'sly_pointcloud', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - - with TemporaryDirectory() as tmp_dir: - if zipfile.is_zipfile(src_file): - zipfile.ZipFile(src_file).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'sly_pointcloud', env=dm_env) - else: - dataset = Dataset.import_from(src_file.name, - 'sly_pointcloud', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) - +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + if zipfile.is_zipfile(src_file): + zipfile.ZipFile(src_file).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'sly_pointcloud', env=dm_env) + else: + dataset = Dataset.import_from(src_file.name, 'sly_pointcloud', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py index 37ef8f87..defb530a 100644 --- a/cvat/apps/dataset_manager/formats/registry.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -1,4 +1,3 @@ - # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT @@ -17,11 +16,11 @@ class _Format: ENABLED = True class Exporter(_Format): - def __call__(self, dst_file, instance_data, **options): + def __call__(self, dst_file, temp_dir, instance_data, **options): raise NotImplementedError() class Importer(_Format): - def __call__(self, src_file, instance_data, load_data_callback=None, **options): + def __call__(self, src_file, temp_dir, instance_data, load_data_callback=None, **options): raise NotImplementedError() def _wrap_format(f_or_cls, klass, name, version, ext, display_name, enabled, dimension=DimensionType.DIM_2D): diff --git a/cvat/apps/dataset_manager/formats/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py index f0b15f4a..42ee5a8b 100644 --- a/cvat/apps/dataset_manager/formats/tfrecord.py +++ b/cvat/apps/dataset_manager/formats/tfrecord.py @@ -1,9 +1,8 @@ # Copyright (C) 2019-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT -from tempfile import TemporaryDirectory - from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, @@ -13,7 +12,6 @@ from datumaro.components.project import Dataset from .registry import dm_env, exporter, importer - from datumaro.util.tf_util import import_tf try: import_tf() @@ -23,20 +21,19 @@ except ImportError: @exporter(name='TFRecord', ext='ZIP', version='1.0', enabled=tf_available) -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'tf_detection_api', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'tf_detection_api', save_images=save_images) + + make_zip_archive(temp_dir, dst_file) @importer(name='TFRecord', ext='ZIP', version='1.0', enabled=tf_available) -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'tf_detection_api', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/velodynepoint.py b/cvat/apps/dataset_manager/formats/velodynepoint.py index 887a4056..1c917b3e 100644 --- a/cvat/apps/dataset_manager/formats/velodynepoint.py +++ b/cvat/apps/dataset_manager/formats/velodynepoint.py @@ -1,9 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import zipfile -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset @@ -18,24 +18,23 @@ from .registry import exporter, importer @exporter(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) -def _export_images(dst_file, task_data, save_images=False): +def _export_images(dst_file, temp_dir, task_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( - task_data, include_images=save_images, format_type="kitti_raw", dimension=DimensionType.DIM_3D), env=dm_env) + task_data, include_images=save_images, format_type="kitti_raw", + dimension=DimensionType.DIM_3D), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'kitti_raw', save_images=save_images, reindex=True) + dataset.export(temp_dir, 'kitti_raw', save_images=save_images, reindex=True) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_file) @importer(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - if zipfile.is_zipfile(src_file): - zipfile.ZipFile(src_file).extractall(tmp_dir) - dataset = Dataset.import_from(tmp_dir, 'kitti_raw', env=dm_env) - else: - dataset = Dataset.import_from(src_file.name, 'kitti_raw', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + if zipfile.is_zipfile(src_file): + zipfile.ZipFile(src_file).extractall(temp_dir) + dataset = Dataset.import_from(temp_dir, 'kitti_raw', env=dm_env) + else: + dataset = Dataset.import_from(src_file.name, 'kitti_raw', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/vggface2.py b/cvat/apps/dataset_manager/formats/vggface2.py index bc296ca1..b1cbbaee 100644 --- a/cvat/apps/dataset_manager/formats/vggface2.py +++ b/cvat/apps/dataset_manager/formats/vggface2.py @@ -1,10 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import zipfile -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset @@ -16,22 +15,21 @@ from .registry import dm_env, exporter, importer @exporter(name='VGGFace2', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'vgg_face2', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'vgg_face2', save_images=save_images) + + make_zip_archive(temp_dir, dst_file) @importer(name='VGGFace2', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'vgg_face2', env=dm_env) - if isinstance(instance_data, TaskData): - dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|") - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + zipfile.ZipFile(src_file).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'vgg_face2', env=dm_env) + if isinstance(instance_data, TaskData): + dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|") + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/widerface.py b/cvat/apps/dataset_manager/formats/widerface.py index afa3cdac..04dd9d69 100644 --- a/cvat/apps/dataset_manager/formats/widerface.py +++ b/cvat/apps/dataset_manager/formats/widerface.py @@ -1,9 +1,9 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import zipfile -from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset @@ -15,20 +15,19 @@ from .registry import dm_env, exporter, importer @exporter(name='WiderFace', ext='ZIP', version='1.0') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'wider_face', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'wider_face', save_images=save_images) + + make_zip_archive(temp_dir, dst_file) @importer(name='WiderFace', ext='ZIP', version='1.0') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - zipfile.ZipFile(src_file).extractall(tmp_dir) - - dataset = Dataset.import_from(tmp_dir, 'wider_face', env=dm_env) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + zipfile.ZipFile(src_file).extractall(temp_dir) + + dataset = Dataset.import_from(temp_dir, 'wider_face', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index f72bc980..d7861c5c 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -1,10 +1,10 @@ # Copyright (C) 2019-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import os.path as osp from glob import glob -from tempfile import TemporaryDirectory from pyunpack import Archive @@ -19,37 +19,36 @@ from .registry import dm_env, exporter, importer @exporter(name='YOLO', ext='ZIP', version='1.1') -def _export(dst_file, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) - with TemporaryDirectory() as temp_dir: - dataset.export(temp_dir, 'yolo', save_images=save_images) - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'yolo', save_images=save_images) + + make_zip_archive(temp_dir, dst_file) @importer(name='YOLO', ext='ZIP', version='1.1') -def _import(src_file, instance_data, load_data_callback=None, **kwargs): - with TemporaryDirectory() as tmp_dir: - Archive(src_file.name).extractall(tmp_dir) - - image_info = {} - frames = [YoloExtractor.name_from_path(osp.relpath(p, tmp_dir)) - for p in glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)] - root_hint = find_dataset_root( - [DatasetItem(id=frame) for frame in frames], instance_data) - for frame in frames: - frame_info = None - try: - frame_id = match_dm_item(DatasetItem(id=frame), instance_data, - root_hint=root_hint) - frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec - pass - if frame_info is not None: - image_info[frame] = (frame_info['height'], frame_info['width']) - - dataset = Dataset.import_from(tmp_dir, 'yolo', - env=dm_env, image_info=image_info) - if load_data_callback is not None: - load_data_callback(dataset, instance_data) - import_dm_annotations(dataset, instance_data) +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + Archive(src_file.name).extractall(temp_dir) + + image_info = {} + frames = [YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) + for p in glob(osp.join(temp_dir, '**', '*.txt'), recursive=True)] + root_hint = find_dataset_root( + [DatasetItem(id=frame) for frame in frames], instance_data) + for frame in frames: + frame_info = None + try: + frame_id = match_dm_item(DatasetItem(id=frame), instance_data, + root_hint=root_hint) + frame_info = instance_data.frame_info[frame_id] + except Exception: # nosec + pass + if frame_info is not None: + image_info[frame] = (frame_info['height'], frame_info['width']) + + dataset = Dataset.import_from(temp_dir, 'yolo', + env=dm_env, image_info=image_info) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/project.py b/cvat/apps/dataset_manager/project.py index 4858429f..e52fb2eb 100644 --- a/cvat/apps/dataset_manager/project.py +++ b/cvat/apps/dataset_manager/project.py @@ -1,7 +1,10 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT +import os +from tempfile import TemporaryDirectory import rq from typing import Any, Callable, List, Mapping, Tuple @@ -127,7 +130,11 @@ class ProjectAnnotationAndData: db_project=self.db_project, host=host ) - exporter(dst_file, project_data, **options) + + temp_dir_base = self.db_project.get_tmp_dirname() + os.makedirs(temp_dir_base, exist_ok=True) + with TemporaryDirectory(dir=temp_dir_base) as temp_dir: + exporter(dst_file, temp_dir, project_data, **options) def load_dataset_data(self, *args, **kwargs): load_dataset_data(self, *args, **kwargs) @@ -141,7 +148,10 @@ class ProjectAnnotationAndData: ) project_data.soft_attribute_import = True - importer(dataset_file, project_data, self.load_dataset_data, **options) + temp_dir_base = self.db_project.get_tmp_dirname() + os.makedirs(temp_dir_base, exist_ok=True) + with TemporaryDirectory(dir=temp_dir_base) as temp_dir: + importer(dataset_file, temp_dir, project_data, self.load_dataset_data, **options) self.create({tid: ir.serialize() for tid, ir in self.annotation_irs.items() if tid in project_data.new_tasks}) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index fdedb494..66acbf17 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -1,11 +1,12 @@ - # Copyright (C) 2019-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT from collections import OrderedDict from enum import Enum +import os +from tempfile import TemporaryDirectory from django.db import transaction from django.db.models.query import Prefetch @@ -567,7 +568,11 @@ class JobAnnotation: db_job=self.db_job, host=host, ) - exporter(dst_file, job_data, **options) + + temp_dir_base = self.db_job.get_tmp_dirname() + os.makedirs(temp_dir_base, exist_ok=True) + with TemporaryDirectory(dir=temp_dir_base) as temp_dir: + exporter(dst_file, temp_dir, job_data, **options) def import_annotations(self, src_file, importer, **options): job_data = JobData( @@ -577,7 +582,10 @@ class JobAnnotation: ) self.delete() - importer(src_file, job_data, **options) + temp_dir_base = self.db_job.get_tmp_dirname() + os.makedirs(temp_dir_base, exist_ok=True) + with TemporaryDirectory(dir=temp_dir_base) as temp_dir: + importer(src_file, temp_dir, job_data, **options) self.create(job_data.data.slice(self.start_frame, self.stop_frame).serialize()) @@ -654,7 +662,11 @@ class TaskAnnotation: db_task=self.db_task, host=host, ) - exporter(dst_file, task_data, **options) + + temp_dir_base = self.db_task.get_tmp_dirname() + os.makedirs(temp_dir_base, exist_ok=True) + with TemporaryDirectory(dir=temp_dir_base) as temp_dir: + exporter(dst_file, temp_dir, task_data, **options) def import_annotations(self, src_file, importer, **options): task_data = TaskData( @@ -664,7 +676,10 @@ class TaskAnnotation: ) self.delete() - importer(src_file, task_data, **options) + temp_dir_base = self.db_task.get_tmp_dirname() + os.makedirs(temp_dir_base, exist_ok=True) + with TemporaryDirectory(dir=temp_dir_base) as temp_dir: + importer(src_file, temp_dir, task_data, **options) self.create(task_data.data.serialize()) @@ -709,8 +724,7 @@ def delete_job_data(pk): annotation = JobAnnotation(pk) annotation.delete() -def export_job(job_id, dst_file, format_name, - server_url=None, save_images=False): +def export_job(job_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. # But there is the bug with corrupted dump file in case 2 or @@ -759,8 +773,7 @@ def delete_task_data(pk): annotation = TaskAnnotation(pk) annotation.delete() -def export_task(task_id, dst_file, format_name, - server_url=None, save_images=False): +def export_task(task_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. # But there is the bug with corrupted dump file in case 2 or diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 50263e07..ba133cc6 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -1,4 +1,5 @@ # Copyright (C) 2019-2022 Intel Corporation +# Copyright (C) 2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -72,7 +73,8 @@ def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=No instance_time = timezone.localtime(db_instance.updated_date).timestamp() if isinstance(db_instance, Project): - tasks_update = list(map(lambda db_task: timezone.localtime(db_task.updated_date).timestamp(), db_instance.tasks.all())) + tasks_update = list(map(lambda db_task: timezone.localtime( + db_task.updated_date).timestamp(), db_instance.tasks.all())) instance_time = max(tasks_update + [instance_time]) if not (osp.exists(output_path) and \ instance_time <= osp.getmtime(output_path)): @@ -120,10 +122,10 @@ def export_task_annotations(task_id, dst_format=None, server_url=None): def export_project_as_dataset(project_id, dst_format=None, server_url=None): return export(dst_format, project_id=project_id, server_url=server_url, save_images=True) - def export_project_annotations(project_id, dst_format=None, server_url=None): return export(dst_format, project_id=project_id, server_url=server_url, save_images=False) + def clear_export_cache(file_path, file_ctime, logger): try: if osp.exists(file_path) and osp.getctime(file_path) == file_ctime: diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index cd47e516..1084b254 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -468,6 +468,9 @@ class Job(models.Model): def get_dirname(self): return os.path.join(settings.JOBS_ROOT, str(self.id)) + def get_tmp_dirname(self): + return os.path.join(self.get_dirname(), 'tmp') + @extend_schema_field(OpenApiTypes.INT) def get_project_id(self): project = self.segment.task.project