You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
86 lines
3.0 KiB
Python
86 lines
3.0 KiB
Python
# Copyright (C) 2021-2022 Intel Corporation
|
|
# Copyright (C) 2023 CVAT.ai Corporation
|
|
#
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
import glob
|
|
import os.path as osp
|
|
|
|
from datumaro.components.dataset import Dataset, DatasetItem
|
|
from datumaro.plugins.open_images_format import OpenImagesPath
|
|
from datumaro.util.image import DEFAULT_IMAGE_META_FILE_NAME
|
|
from pyunpack import Archive
|
|
|
|
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
|
|
find_dataset_root, import_dm_annotations, match_dm_item)
|
|
from cvat.apps.dataset_manager.util import make_zip_archive
|
|
|
|
from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons
|
|
from .registry import dm_env, exporter, importer
|
|
|
|
|
|
def find_item_ids(path):
|
|
image_desc_patterns = (
|
|
OpenImagesPath.FULL_IMAGE_DESCRIPTION_FILE_NAME,
|
|
*OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS
|
|
)
|
|
|
|
image_desc_patterns = (
|
|
osp.join(path, OpenImagesPath.ANNOTATIONS_DIR, pattern)
|
|
for pattern in image_desc_patterns
|
|
)
|
|
|
|
for pattern in image_desc_patterns:
|
|
for path in glob.glob(pattern):
|
|
with open(path, 'r') as desc:
|
|
next(desc)
|
|
for row in desc:
|
|
yield row.split(',')[0]
|
|
|
|
@exporter(name='Open Images V6', ext='ZIP', version='1.0')
|
|
def _export(dst_file, temp_dir, task_data, save_images=False):
|
|
dataset = Dataset.from_extractors(GetCVATDataExtractor(
|
|
task_data, include_images=save_images), env=dm_env)
|
|
dataset.transform(RotatedBoxesToPolygons)
|
|
dataset.transform('polygons_to_masks')
|
|
dataset.transform('merge_instance_segments')
|
|
|
|
dataset.export(temp_dir, 'open_images', save_images=save_images)
|
|
|
|
make_zip_archive(temp_dir, dst_file)
|
|
|
|
@importer(name='Open Images V6', ext='ZIP', version='1.0')
|
|
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
|
|
Archive(src_file.name).extractall(temp_dir)
|
|
|
|
image_meta_path = osp.join(temp_dir, OpenImagesPath.ANNOTATIONS_DIR,
|
|
DEFAULT_IMAGE_META_FILE_NAME)
|
|
image_meta = None
|
|
|
|
if not osp.isfile(image_meta_path):
|
|
image_meta = {}
|
|
item_ids = list(find_item_ids(temp_dir))
|
|
|
|
root_hint = find_dataset_root(
|
|
[DatasetItem(id=item_id) for item_id in item_ids], instance_data)
|
|
|
|
for item_id in item_ids:
|
|
frame_info = None
|
|
try:
|
|
frame_id = match_dm_item(DatasetItem(id=item_id),
|
|
instance_data, root_hint)
|
|
frame_info = instance_data.frame_info[frame_id]
|
|
except Exception: # nosec
|
|
pass
|
|
if frame_info is not None:
|
|
image_meta[item_id] = (frame_info['height'], frame_info['width'])
|
|
|
|
dataset = Dataset.import_from(temp_dir, 'open_images',
|
|
image_meta=image_meta, env=dm_env)
|
|
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
|
|
if load_data_callback is not None:
|
|
load_data_callback(dataset, instance_data)
|
|
import_dm_annotations(dataset, instance_data)
|
|
|
|
|