[Datumaro] Support relative paths (#1715)

* Support relative image paths in Datumaro

* Update bindings

* Fix merge

* linter
main
zhiltsov-max 6 years ago committed by GitHub
parent 7f8348458d
commit 5912bf0447
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -11,6 +11,7 @@ from django.utils import timezone
import datumaro.components.extractor as datumaro
from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import AttributeType, ShapeType
from datumaro.util import cast
from datumaro.util.image import Image
from .annotation import AnnotationManager, TrackManager
@ -422,8 +423,9 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor):
size=(frame_data.height, frame_data.width)
)
dm_anno = self._read_cvat_anno(frame_data, task_data)
dm_item = datumaro.DatasetItem(id=frame_data.frame,
annotations=dm_anno, image=dm_image)
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image,
attributes={'frame': frame_data.frame})
dm_items.append(dm_item)
self._items = dm_items
@ -533,23 +535,21 @@ def match_frame(item, task_data):
is_video = task_data.meta['task']['mode'] == 'interpolation'
frame_number = None
if frame_number is None:
try:
frame_number = task_data.match_frame(item.id)
except Exception:
pass
if frame_number is None and item.has_image:
try:
frame_number = task_data.match_frame(item.image.filename)
frame_number = task_data.match_frame(item.image.path)
except Exception:
pass
if frame_number is None:
try:
frame_number = int(item.id)
frame_number = task_data.match_frame(item.id)
except Exception:
pass
if frame_number is None and is_video and item.id.startswith('frame_'):
frame_number = int(item.id[len('frame_'):])
if frame_number is None:
frame_number = cast(item.attributes.get('frame', item.id), int)
if frame_number is None and is_video:
frame_number = cast(osp.basename(item.id)[len('frame_'):], int)
if not frame_number in task_data.frame_info:
raise Exception("Could not match item id: '%s' with any task frame" %
item.id)

@ -17,8 +17,6 @@ from .registry import dm_env, exporter, importer
@exporter(name='LabelMe', ext='ZIP', version='3.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(envt.get('id_from_image_name'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('label_me', save_images=save_images)

@ -24,7 +24,6 @@ def _export(dst_file, task_data, save_images=False):
extractor = extractor.transform(envt.get('polygons_to_masks'))
extractor = extractor.transform(envt.get('boxes_to_masks'))
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = extractor.transform(envt.get('id_from_image_name'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('voc_segmentation',

@ -7,8 +7,7 @@ from tempfile import TemporaryDirectory
from pyunpack import Archive
import datumaro.components.extractor as datumaro
from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
match_frame)
from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset
@ -18,8 +17,6 @@ from .registry import dm_env, exporter, importer
@exporter(name='MOT', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(envt.get('id_from_image_name'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('mot_seq_gt',
@ -39,8 +36,8 @@ def _import(src_file, task_data):
label_cat = dataset.categories()[datumaro.AnnotationType.label]
for item in dataset:
item = item.wrap(id=int(item.id) - 1) # NOTE: MOT frames start from 1
frame_number = task_data.abs_frame_id(match_frame(item, task_data))
frame_number = int(item.id) - 1 # NOTE: MOT frames start from 1
frame_number = task_data.abs_frame_id(frame_number)
for ann in item.annotations:
if ann.type != datumaro.AnnotationType.bbox:

@ -22,8 +22,6 @@ from .registry import dm_env, exporter, importer
@exporter(name='PASCAL VOC', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(envt.get('id_from_image_name'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('voc', label_map='source',

@ -578,7 +578,7 @@ class DatasetItem:
def __init__(self, id=None, annotations=None,
subset=None, path=None, image=None, attributes=None):
assert id is not None
self._id = str(id)
self._id = str(id).replace('\\', '/')
if subset is None:
subset = ''

@ -496,25 +496,19 @@ class _Converter:
def _get_image_id(self, item):
image_id = self._image_ids.get(item.id)
if image_id is None:
image_id = cast(item.id, int, len(self._image_ids) + 1)
image_id = cast(item.attributes.get('id'), int,
len(self._image_ids) + 1)
self._image_ids[item.id] = image_id
return image_id
def _save_image(self, item):
def _save_image(self, item, filename):
image = item.image.data
if image is None:
log.warning("Item '%s' has no image" % item.id)
return ''
filename = item.image.filename
if filename:
filename = osp.splitext(filename)[0]
else:
filename = item.id
filename += CocoPath.IMAGE_EXT
path = osp.join(self._images_dir, filename)
save_image(path, image)
return path
save_image(osp.join(self._images_dir, filename), image,
create_dir=True)
def convert(self):
self._make_dirs()
@ -534,12 +528,10 @@ class _Converter:
for task_conv in task_converters.values():
task_conv.save_categories(subset)
for item in subset:
filename = ''
if item.has_image:
filename = item.image.path
filename = item.id + CocoPath.IMAGE_EXT
if self._save_images:
if item.has_image:
filename = self._save_image(item)
self._save_image(item, filename)
else:
log.debug("Item '%s' has no image info" % item.id)
for task_conv in task_converters.values():

@ -126,8 +126,10 @@ class _CocoExtractor(SourceExtractor):
anns = loader.loadAnns(anns)
anns = sum((self._load_annotations(a, image_info) for a in anns), [])
items[img_id] = DatasetItem(id=img_id, subset=self._subset,
image=image, annotations=anns)
items[img_id] = DatasetItem(
id=osp.splitext(image_info['file_name'])[0],
subset=self._subset, image=image, annotations=anns,
attributes={'id': img_id})
return items

@ -163,26 +163,21 @@ class _SubsetWriter:
self._writer.close_root()
def _save_image(self, item):
def _save_image(self, item, filename):
image = item.image.data
if image is None:
log.warning("Item '%s' has no image" % item.id)
return ''
filename = item.image.filename
if filename:
filename = osp.splitext(filename)[0]
else:
filename = item.id
filename += CvatPath.IMAGE_EXT
image_path = osp.join(self._context._images_dir, filename)
save_image(image_path, image)
return filename
save_image(osp.join(self._context._images_dir, filename), image,
create_dir=True)
def _write_item(self, item, index):
image_info = OrderedDict([
("id", str(cast(item.id, int, index))),
("id", str(cast(item.attributes.get('frame'), int, index))),
])
filename = item.id + CvatPath.IMAGE_EXT
image_info["name"] = filename
if item.has_image:
size = item.image.size
if size:
@ -190,10 +185,8 @@ class _SubsetWriter:
image_info["width"] = str(w)
image_info["height"] = str(h)
filename = item.image.filename
if self._context._save_images:
filename = self._save_image(item)
image_info["name"] = filename
self._save_image(item, filename)
else:
log.debug("Item '%s' has no image info" % item.id)
self._writer.open_image(image_info)

@ -303,17 +303,14 @@ class CvatExtractor(SourceExtractor):
def _load_items(self, parsed):
for frame_id, item_desc in parsed.items():
path = item_desc.get('name', 'frame_%06d.png' % int(frame_id))
name = item_desc.get('name', 'frame_%06d.png' % int(frame_id))
image = osp.join(self._images_dir, name)
image_size = (item_desc.get('height'), item_desc.get('width'))
if all(image_size):
image_size = (int(image_size[0]), int(image_size[1]))
else:
image_size = None
image = None
if path:
image = Image(path=osp.join(self._images_dir, path),
size=image_size)
parsed[frame_id] = DatasetItem(id=frame_id, subset=self._subset,
image=image, annotations=item_desc.get('annotations'))
image = Image(path=image, size=tuple(map(int, image_size)))
parsed[frame_id] = DatasetItem(id=osp.splitext(name)[0],
subset=self._subset, image=image,
annotations=item_desc.get('annotations'),
attributes={'frame': int(frame_id)})
return parsed

@ -253,12 +253,7 @@ class _Converter:
if image is None:
return ''
filename = item.image.filename
if filename:
filename = osp.splitext(filename)[0]
else:
filename = item.id
filename += DatumaroPath.IMAGE_EXT
filename = item.id + DatumaroPath.IMAGE_EXT
image_path = osp.join(self._images_dir, filename)
save_image(image_path, image, create_dir=True)
return filename

@ -82,10 +82,11 @@ class DatumaroExtractor(SourceExtractor):
item_id = item_desc['id']
image = None
image_info = item_desc.get('image', {})
image_info = item_desc.get('image')
if image_info:
image_path = osp.join(self._images_dir,
image_info.get('path', '')) # relative or absolute fits
image_path = image_info.get('path') or \
item_id + DatumaroPath.IMAGE_EXT
image_path = osp.join(self._images_dir, image_path)
image = Image(path=image_path, size=image_info.get('size'))
annotations = self._load_annotations(item_desc)

@ -3,7 +3,6 @@
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
import os
import os.path as osp
@ -41,29 +40,24 @@ class ImageDirExtractor(SourceExtractor):
assert osp.isdir(url), url
items = []
for name in os.listdir(url):
path = osp.join(url, name)
if self._is_image(path):
item_id = osp.splitext(name)[0]
item = DatasetItem(id=item_id, image=path)
items.append((item.id, item))
items = sorted(items, key=lambda e: e[0])
items = OrderedDict(items)
for dirpath, _, filenames in os.walk(url):
for name in filenames:
path = osp.join(dirpath, name)
if not self._is_image(path):
continue
item_id = osp.relpath(osp.splitext(path)[0], url)
items.append(DatasetItem(id=item_id, image=path))
self._items = items
def __iter__(self):
for item in self._items.values():
for item in self._items:
yield item
def __len__(self):
return len(self._items)
def get(self, item_id, subset=None, path=None):
if path or subset:
raise KeyError()
return self._items[item_id]
def _is_image(self, path):
if not osp.isfile(path):
return False
@ -79,11 +73,5 @@ class ImageDirConverter(Converter):
for item in extractor:
if item.has_image and item.image.has_data:
filename = item.image.filename
if filename:
filename = osp.splitext(filename)[0]
else:
filename = item.id
filename += '.jpg'
save_image(osp.join(save_dir, filename), item.image.data,
create_dir=True)
save_image(osp.join(save_dir, item.id + '.jpg'),
item.image.data, create_dir=True)

@ -331,16 +331,13 @@ class LabelMeConverter(Converter, CliPlugin):
log.debug("Converting item '%s'", item.id)
image_filename = ''
if item.has_image:
image_filename = item.image.filename
if '/' in item.id:
raise Exception("Can't export item '%s': "
"LabelMe format only supports flat image layout" % item.id)
image_filename = item.id + LabelMePath.IMAGE_EXT
if self._save_images:
if item.has_image and item.image.has_data:
if image_filename:
image_filename = osp.splitext(image_filename)[0]
else:
image_filename = item.id
image_filename += LabelMePath.IMAGE_EXT
save_image(osp.join(subset_dir, image_filename),
item.image.data, create_dir=True)
else:

@ -311,7 +311,9 @@ class MotSeqGtConverter(Converter, CliPlugin):
if self._save_images:
if item.has_image and item.image.has_data:
self._save_image(item, index=frame_id)
save_image(osp.join(self._images_dir,
'%06d%s' % (frame_id, MotPath.IMAGE_EXT)),
item.image.data)
else:
log.debug("Item '%s' has no image" % item.id)
@ -320,13 +322,3 @@ class MotSeqGtConverter(Converter, CliPlugin):
f.write('\n'.join(l.name
for l in extractor.categories()[AnnotationType.label].items)
)
def _save_image(self, item, index):
if item.image.filename:
frame_id = osp.splitext(item.image.filename)[0]
else:
frame_id = item.id
frame_id = cast(frame_id, int, index)
image_filename = '%06d%s' % (frame_id, MotPath.IMAGE_EXT)
save_image(osp.join(self._images_dir, image_filename),
item.image.data)

@ -162,14 +162,12 @@ class TfDetectionApiConverter(Converter, CliPlugin):
def _make_tf_example(self, item):
features = {
'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
'image/source_id': bytes_feature(
str(item.attributes.get('source_id') or '').encode('utf-8')
),
}
filename = ''
if item.has_image:
filename = item.image.filename
if not filename:
filename = item.id + DetectionApiPath.IMAGE_EXT
filename = item.id + DetectionApiPath.IMAGE_EXT
features['image/filename'] = bytes_feature(filename.encode('utf-8'))
if not item.has_image:

@ -145,9 +145,7 @@ class TfDetectionApiExtractor(SourceExtractor):
continue
dataset_labels[label] = label_id - 1
item_id = frame_id
if not item_id:
item_id = osp.splitext(frame_filename)[0]
item_id = osp.splitext(frame_filename)[0]
annotations = []
for shape_id, shape in enumerate(
@ -188,6 +186,7 @@ class TfDetectionApiExtractor(SourceExtractor):
image = Image(**image_params, size=image_size)
dataset_items.append(DatasetItem(id=item_id, subset=subset,
image=image, annotations=annotations))
image=image, annotations=annotations,
attributes={'source_id': frame_id}))
return dataset_items, dataset_labels

@ -364,10 +364,13 @@ class RandomSplit(Transform, CliPlugin):
class IdFromImageName(Transform, CliPlugin):
def transform_item(self, item):
name = item.id
if item.has_image and item.image.filename:
name = osp.splitext(item.image.filename)[0]
return self.wrap_item(item, id=name)
if item.has_image and item.image.path:
name = osp.splitext(osp.basename(item.image.path))[0]
return self.wrap_item(item, id=name)
else:
log.debug("Can't change item id for item '%s': "
"item has no image info" % item.id)
return item
class RemapLabels(Transform, CliPlugin):
DefaultAction = Enum('DefaultAction', ['keep', 'delete'])

@ -135,16 +135,9 @@ class _Converter:
for item in subset:
log.debug("Converting item '%s'", item.id)
image_filename = ''
if item.has_image:
image_filename = item.image.filename
image_filename = item.id + VocPath.IMAGE_EXT
if self._save_images:
if item.has_image and item.image.has_data:
if image_filename:
image_filename = osp.splitext(image_filename)[0]
else:
image_filename = item.id
image_filename += VocPath.IMAGE_EXT
save_image(osp.join(self._images_dir, image_filename),
item.image.data, create_dir=True)
else:

@ -108,8 +108,8 @@ class _VocXmlExtractor(_VocExtractor):
for item_id in self._items:
log.debug("Reading item '%s'" % item_id)
image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR,
item_id + VocPath.IMAGE_EXT)
image = item_id + VocPath.IMAGE_EXT
height, width = 0, 0
anns = []
ann_file = osp.join(anno_dir, item_id + '.xml')
@ -121,11 +121,15 @@ class _VocXmlExtractor(_VocExtractor):
width = root_elem.find('size/width')
if width is not None:
width = int(width.text)
if height and width:
image = Image(path=image, size=(height, width))
filename_elem = root_elem.find('filename')
if filename_elem is not None:
image = filename_elem.text
anns = self._parse_annotations(root_elem)
image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, image)
if height and width:
image = Image(path=image, size=(height, width))
yield DatasetItem(id=item_id, subset=self._subset,
image=image, annotations=anns)

@ -80,13 +80,9 @@ class YoloConverter(Converter, CliPlugin):
"item has no image info" % item.id)
height, width = item.image.size
image_name = item.image.filename
item_name = osp.splitext(item.image.filename)[0]
image_name = item.id + '.jpg'
if self._save_images:
if item.has_image and item.image.has_data:
if not item_name:
item_name = item.id
image_name = item_name + '.jpg'
save_image(osp.join(subset_dir, image_name),
item.image.data, create_dir=True)
else:
@ -105,7 +101,8 @@ class YoloConverter(Converter, CliPlugin):
yolo_bb = ' '.join('%.6f' % p for p in yolo_bb)
yolo_annotation += '%s %s\n' % (bbox.label, yolo_bb)
annotation_path = osp.join(subset_dir, '%s.txt' % item_name)
annotation_path = osp.join(subset_dir, '%s.txt' % item.id)
os.makedirs(osp.dirname(annotation_path), exist_ok=True)
with open(annotation_path, 'w') as f:
f.write(yolo_annotation)

@ -10,6 +10,7 @@ import re
from datumaro.components.extractor import (SourceExtractor, Extractor,
DatasetItem, AnnotationType, Bbox, LabelCategories
)
from datumaro.util import split_path
from datumaro.util.image import Image
from .format import YoloPath
@ -83,14 +84,14 @@ class YoloExtractor(SourceExtractor):
config_path)
for subset_name, list_path in subsets.items():
list_path = self._make_local_path(list_path)
list_path = osp.join(self._path, self.localize_path(list_path))
if not osp.isfile(list_path):
raise Exception("Not found '%s' subset list file" % subset_name)
subset = YoloExtractor.Subset(subset_name, self)
with open(list_path, 'r') as f:
subset.items = OrderedDict(
(osp.splitext(osp.basename(p.strip()))[0], p.strip())
(self.name_from_path(p), self.localize_path(p))
for p in f
)
subsets[subset_name] = subset
@ -99,25 +100,38 @@ class YoloExtractor(SourceExtractor):
self._categories = {
AnnotationType.label:
self._load_categories(self._make_local_path(names_path))
self._load_categories(
osp.join(self._path, self.localize_path(names_path)))
}
def _make_local_path(self, path):
@staticmethod
def localize_path(path):
path = path.strip()
default_base = osp.join('data', '')
if path.startswith(default_base): # default path
path = path[len(default_base) : ]
return osp.join(self._path, path) # relative or absolute path
return path
@classmethod
def name_from_path(cls, path):
path = cls.localize_path(path)
parts = split_path(path)
if 1 < len(parts) and not osp.isabs(path):
# NOTE: when path is like [data/]<subset_obj>/<image_name>
# drop everything but <image name>
# <image name> can be <a/b/c/filename.ext>, so no just basename()
path = osp.join(*parts[1:])
return osp.splitext(path)[0]
def _get(self, item_id, subset_name):
subset = self._subsets[subset_name]
item = subset.items[item_id]
if isinstance(item, str):
image_path = self._make_local_path(item)
image_size = self._image_info.get(item_id)
image = Image(path=image_path, size=image_size)
image = Image(path=osp.join(self._path, item), size=image_size)
anno_path = osp.splitext(image_path)[0] + '.txt'
anno_path = osp.splitext(image.path)[0] + '.txt'
annotations = self._parse_annotations(anno_path, image)
item = DatasetItem(id=item_id, subset=subset_name,
@ -137,8 +151,10 @@ class YoloExtractor(SourceExtractor):
annotations = []
if lines:
# use image info as late as possible
image_height, image_width = image.size
size = image.size # use image info as late as possible
if size is None:
raise Exception("Can't find image info for '%s'" % image.path)
image_height, image_width = size
for line in lines:
label_id, xc, yc, w, h = line.split()
label_id = int(label_id)

@ -215,10 +215,6 @@ class Image:
def path(self):
return self._path
@property
def filename(self):
return osp.basename(self._path)
@property
def data(self):
if callable(self._data):

@ -87,6 +87,7 @@ def compare_datasets(test, expected, actual):
item_b = find(actual, lambda x: x.id == item_a.id and \
x.subset == item_a.subset)
test.assertFalse(item_b is None, item_a.id)
test.assertEqual(item_a.attributes, item_b.attributes)
test.assertEqual(len(item_a.annotations), len(item_b.annotations))
for ann_a in item_a.annotations:
# We might find few corresponding items, so check them all

@ -51,4 +51,4 @@
<walking>1</walking>
</actions>
</object>
</annotation>
</annotation>

@ -28,7 +28,8 @@ class CocoImporterTest(TestCase):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.ones((10, 5, 3)), subset='val',
DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
subset='val', attributes={'id': 1},
annotations=[
Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
id=1, group=1, attributes={'is_crowd': False}),
@ -76,16 +77,16 @@ class CocoConverterTest(TestCase):
annotations=[
Caption('hello', id=1, group=1),
Caption('world', id=2, group=2),
]),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train',
annotations=[
Caption('test', id=3, group=3),
]),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val',
annotations=[
Caption('word', id=1, group=1),
]
], attributes={'id': 1}
),
])
@ -111,7 +112,7 @@ class CocoConverterTest(TestCase):
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
attributes={ 'is_crowd': False },
label=2, group=1, id=1),
]),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
annotations=[
# Mask + bbox
@ -125,7 +126,7 @@ class CocoConverterTest(TestCase):
label=4, group=3, id=3),
Bbox(1, 0, 2, 2, label=4, group=3, id=3,
attributes={ 'is_crowd': True }),
]),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[
@ -140,7 +141,7 @@ class CocoConverterTest(TestCase):
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
]),
], attributes={'id': 1}),
])
def categories(self):
@ -154,7 +155,7 @@ class CocoConverterTest(TestCase):
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
attributes={ 'is_crowd': False },
label=2, group=1, id=1),
]),
], attributes={'id': 1}),
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
annotations=[
Mask(np.array([
@ -165,7 +166,7 @@ class CocoConverterTest(TestCase):
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
]),
], attributes={'id': 2}),
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
annotations=[
@ -177,7 +178,7 @@ class CocoConverterTest(TestCase):
),
attributes={ 'is_crowd': True },
label=4, group=3, id=3),
]),
], attributes={'id': 1}),
])
def categories(self):
@ -227,7 +228,7 @@ class CocoConverterTest(TestCase):
),
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
]
], attributes={'id': 1}
),
])
@ -285,7 +286,7 @@ class CocoConverterTest(TestCase):
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
label=1, id=2, group=2,
attributes={ 'is_crowd': False }),
]
], attributes={'id': 1}
),
])
@ -335,7 +336,7 @@ class CocoConverterTest(TestCase):
),
attributes={ 'is_crowd': True },
label=3, id=4, group=4),
]
], attributes={'id': 1}
),
])
@ -385,7 +386,7 @@ class CocoConverterTest(TestCase):
[5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
]
], attributes={'id': 1}
),
])
@ -401,14 +402,14 @@ class CocoConverterTest(TestCase):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train'),
DatasetItem(id=2, subset='train'),
DatasetItem(id=1, subset='train', attributes={'id': 1}),
DatasetItem(id=2, subset='train', attributes={'id': 2}),
DatasetItem(id=2, subset='val'),
DatasetItem(id=3, subset='val'),
DatasetItem(id=4, subset='val'),
DatasetItem(id=2, subset='val', attributes={'id': 2}),
DatasetItem(id=3, subset='val', attributes={'id': 3}),
DatasetItem(id=4, subset='val', attributes={'id': 4}),
DatasetItem(id=5, subset='test'),
DatasetItem(id=5, subset='test', attributes={'id': 1}),
])
with TestDir() as test_dir:
@ -423,7 +424,7 @@ class CocoConverterTest(TestCase):
annotations=[
Label(4, id=1, group=1),
Label(9, id=2, group=2),
]
], attributes={'id': 1}
),
])
@ -511,7 +512,7 @@ class CocoConverterTest(TestCase):
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
group=5, id=5,
attributes={'is_crowd': False}),
]),
], attributes={'id': 1}),
])
with TestDir() as test_dir:
@ -523,8 +524,8 @@ class CocoConverterTest(TestCase):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1),
DatasetItem(id=2),
DatasetItem(id=1, attributes={'id': 1}),
DatasetItem(id=2, attributes={'id': 2}),
])
def categories(self):
@ -538,9 +539,38 @@ class CocoConverterTest(TestCase):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15))),
DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
attributes={'id': 1}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(tasks='image_info'), test_dir)
def test_relative_paths(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3)),
attributes={'id': 1}),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'id': 2}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'id': 3}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(tasks='image_info', save_images=True), test_dir)
def test_preserve_coco_ids(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
attributes={'id': 40}),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(tasks='image_info'), test_dir)
CocoConverter(tasks='image_info', save_images=True), test_dir)

@ -30,7 +30,7 @@ class CvatImporterTest(TestCase):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0, subset='train',
DatasetItem(id='img0', subset='train',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=0, z_order=1,
@ -40,15 +40,15 @@ class CvatImporterTest(TestCase):
}),
PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
attributes={'occluded': False}),
]),
DatasetItem(id=1, subset='train',
], attributes={'frame': 0}),
DatasetItem(id='img1', subset='train',
image=np.ones((10, 10, 3)),
annotations=[
Polygon([1, 2, 3, 4, 6, 5], z_order=1,
attributes={'occluded': False}),
Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
attributes={'occluded': False}),
]),
], attributes={'frame': 1}),
])
def categories(self):
@ -65,7 +65,7 @@ class CvatImporterTest(TestCase):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=10, subset='annotations',
DatasetItem(id='frame_000010', subset='annotations',
image=np.ones((20, 25, 3)),
annotations=[
Bbox(3, 4, 7, 1, label=2,
@ -83,8 +83,8 @@ class CvatImporterTest(TestCase):
'outside': False, 'keyframe': True,
'track_id': 1, 'hgl': 'hgkf',
}),
]),
DatasetItem(id=13, subset='annotations',
], attributes={'frame': 10}),
DatasetItem(id='frame_000013', subset='annotations',
image=np.ones((20, 25, 3)),
annotations=[
Bbox(7, 6, 7, 2, label=2,
@ -110,10 +110,9 @@ class CvatImporterTest(TestCase):
'outside': False, 'keyframe': True,
'track_id': 2,
}),
]),
DatasetItem(id=16, subset='annotations',
image=Image(path='frame_0000016.png',
size=(20, 25)), # no image in the dataset files
], attributes={'frame': 13}),
DatasetItem(id='frame_000016', subset='annotations',
image=Image(path='frame_0000016.png', size=(20, 25)),
annotations=[
Bbox(8, 7, 6, 10, label=2,
id=0,
@ -130,7 +129,7 @@ class CvatImporterTest(TestCase):
'outside': True, 'keyframe': True,
'track_id': 2,
}),
]),
], attributes={'frame': 16}),
])
def categories(self):
@ -220,7 +219,7 @@ class CvatConverterTest(TestCase):
'a1': 'x', 'a2': 42 }),
Label(1),
Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
]
], attributes={'frame': 0}
),
DatasetItem(id=1, subset='s1',
annotations=[
@ -230,7 +229,7 @@ class CvatConverterTest(TestCase):
Bbox(5, 0, 1, 9,
label=3, group=4,
attributes={ 'occluded': False }),
]
], attributes={'frame': 1}
),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
@ -238,11 +237,12 @@ class CvatConverterTest(TestCase):
Polygon([0, 0, 4, 0, 4, 4], z_order=1,
label=3, group=4,
attributes={ 'occluded': False }),
]
], attributes={'frame': 0}
),
DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4))),
path='3.jpg', size=(2, 4)),
attributes={'frame': 0}),
])
def categories(self):
@ -252,3 +252,49 @@ class CvatConverterTest(TestCase):
self._test_save_and_load(SrcExtractor(),
CvatConverter(save_images=True), test_dir,
target_dataset=DstExtractor())
def test_relative_paths(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3)),
attributes={'frame': 0}),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
attributes={'frame': 1}),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
attributes={'frame': 2}),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
CvatConverter(save_images=True), test_dir,
target_dataset=DstExtractor())
def test_preserve_frame_ids(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
attributes={'frame': 40}),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CvatConverter(save_images=True), test_dir)

@ -14,6 +14,7 @@ from datumaro.util.mask_tools import generate_colormap
from datumaro.util.image import Image
from datumaro.util.test_utils import TestDir, compare_datasets_strict
class DatumaroConverterTest(TestCase):
def _test_save_and_load(self, source_dataset, converter, test_dir,
target_dataset=None, importer_args=None):
@ -96,3 +97,16 @@ class DatumaroConverterTest(TestCase):
DatumaroConverter()(self.TestExtractor(), save_dir=test_dir)
self.assertTrue(DatumaroImporter.detect(test_dir))
def test_relative_paths(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
DatumaroConverter(save_images=True), test_dir)

@ -26,3 +26,23 @@ class ImageDirFormatTest(TestCase):
parsed_dataset = project.make_dataset()
compare_datasets(self, source_dataset, parsed_dataset)
def test_relative_paths(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
with TestDir() as test_dir:
source_dataset = TestExtractor()
ImageDirConverter()(source_dataset, save_dir=test_dir)
project = Project.import_from(test_dir, 'image_dir')
parsed_dataset = project.make_dataset()
compare_datasets(self, source_dataset, parsed_dataset)

@ -108,6 +108,21 @@ class LabelMeConverterTest(TestCase):
SrcExtractor(), LabelMeConverter(save_images=True),
test_dir, target_dataset=DstExtractor())
def test_cant_save_dataset_with_relative_paths(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
with self.assertRaisesRegex(Exception, r'only supports flat'):
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
LabelMeConverter(save_images=True), test_dir)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')

@ -56,7 +56,7 @@ class TfrecordConverterTest(TestCase):
Bbox(0, 4, 4, 8, label=2),
Bbox(0, 4, 4, 4, label=3),
Bbox(2, 4, 4, 4),
]
], attributes={'source_id': ''}
),
])
@ -85,7 +85,8 @@ class TfrecordConverterTest(TestCase):
[0, 1, 1, 0],
[1, 0, 0, 1],
]), label=1),
]
],
attributes={'source_id': ''}
),
])
@ -111,18 +112,21 @@ class TfrecordConverterTest(TestCase):
annotations=[
Bbox(2, 1, 4, 4, label=2),
Bbox(4, 2, 8, 4, label=3),
]
],
attributes={'source_id': ''}
),
DatasetItem(id=2,
image=np.ones((8, 8, 3)) * 2,
annotations=[
Bbox(4, 4, 4, 4, label=3),
]
],
attributes={'source_id': ''}
),
DatasetItem(id=3,
image=np.ones((8, 4, 3)) * 3,
attributes={'source_id': ''}
),
])
@ -143,7 +147,10 @@ class TfrecordConverterTest(TestCase):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=Image(path='1/q.e', size=(10, 15))),
DatasetItem(id='1/q.e',
image=Image(path='1/q.e', size=(10, 15)),
attributes={'source_id': ''}
)
])
def categories(self):
@ -199,6 +206,7 @@ class TfrecordImporterTest(TestCase):
Bbox(0, 4, 4, 4, label=3),
Bbox(2, 4, 4, 4),
],
attributes={'source_id': '1'}
),
DatasetItem(id=2, subset='val',
@ -206,10 +214,12 @@ class TfrecordImporterTest(TestCase):
annotations=[
Bbox(1, 2, 4, 2, label=3),
],
attributes={'source_id': '2'}
),
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
attributes={'source_id': '3'}
),
])

@ -145,7 +145,7 @@ class VocConverterTest(TestCase):
class TestExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=0, subset='a', annotations=[
DatasetItem(id='a/0', subset='a', annotations=[
Label(1),
Label(2),
Label(3),
@ -164,7 +164,7 @@ class VocConverterTest(TestCase):
class TestExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a', annotations=[
DatasetItem(id='a/1', subset='a', annotations=[
Bbox(2, 3, 4, 5, label=2,
attributes={ 'occluded': True }
),
@ -183,7 +183,7 @@ class VocConverterTest(TestCase):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a', annotations=[
DatasetItem(id='a/1', subset='a', annotations=[
Bbox(2, 3, 4, 5, label=2, id=1, group=1,
attributes={
'truncated': False,
@ -220,7 +220,7 @@ class VocConverterTest(TestCase):
class TestExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a', annotations=[
DatasetItem(id='a/b/1', subset='a', annotations=[
# overlapping masks, the first should be truncated
# the second and third are different instances
Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3,
@ -235,7 +235,7 @@ class VocConverterTest(TestCase):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a', annotations=[
DatasetItem(id='a/b/1', subset='a', annotations=[
Mask(image=np.array([[0, 0, 1, 0, 0]]), label=4,
group=1),
Mask(image=np.array([[1, 1, 0, 0, 0]]), label=3,
@ -323,7 +323,7 @@ class VocConverterTest(TestCase):
class TestExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a', annotations=[
DatasetItem(id='a/b/1', subset='a', annotations=[
Bbox(2, 3, 4, 5, label=2, id=1, group=1,
attributes={
'pose': VOC.VocPose(1).name,
@ -347,7 +347,7 @@ class VocConverterTest(TestCase):
class TestExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a', annotations=[
DatasetItem(id='a/b/1', subset='a', annotations=[
Bbox(2, 3, 4, 5, label=2,
attributes={
'truncated': True,
@ -368,7 +368,7 @@ class VocConverterTest(TestCase):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='a', annotations=[
DatasetItem(id='a/b/1', subset='a', annotations=[
Bbox(2, 3, 4, 5, label=2,
id=1, group=1, attributes={
'truncated': True,
@ -666,3 +666,16 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocConverter(label_map='voc'), test_dir)
def test_relative_paths(self):
class TestExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
])
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocConverter(label_map='voc', save_images=True), test_dir)

@ -116,6 +116,32 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset)
def test_relative_paths(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id='1', subset='train',
image=np.ones((4, 2, 3))),
DatasetItem(id='subdir1/1', subset='train',
image=np.ones((2, 6, 3))),
DatasetItem(id='subdir2/1', subset='train',
image=np.ones((5, 4, 3))),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
for save_images in {True, False}:
with self.subTest(save_images=save_images):
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter(save_images=save_images)(
source_dataset, test_dir)
parsed_dataset = YoloImporter()(test_dir).make_dataset()
compare_datasets(self, source_dataset, parsed_dataset)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'yolo_dataset')

Loading…
Cancel
Save