[Datumaro] Image control in converters (#1799)

* Update converter interface, add image control

* Update bindings

* update changelog

* Fix build
main
zhiltsov-max 6 years ago committed by GitHub
parent fd81d72111
commit f807714870
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -55,6 +55,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Appearance block in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- Keyframe navigations and some switchers in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- [Datumaro] Added `convert` command to convert datasets directly (<https://github.com/opencv/cvat/pull/1837>)
- [Datumaro] Added an option to specify image extension when exporting datasets (<https://github.com/opencv/cvat/pull/1799>)
- [Datumaro] Added image copying when exporting datasets, if possible (<https://github.com/opencv/cvat/pull/1799>)
### Changed
- Removed information about e-mail from the basic user information (<https://github.com/opencv/cvat/pull/1627>)
@ -65,6 +67,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added support for attributes in VOC XML format (https://github.com/opencv/cvat/pull/1792)
- Added annotation attributes in COCO format (https://github.com/opencv/cvat/pull/1782)
- Colorized object items in the side panel (<https://github.com/opencv/cvat/pull/1753>)
- [Datumaro] Annotation-less files are not generated anymore in COCO format, unless tasks explicitly requested (<https://github.com/opencv/cvat/pull/1799>)
### Deprecated
-

@ -18,9 +18,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('coco_instances',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('coco_instances').convert(extractor,
save_dir=temp_dir, save_images=save_images)
make_zip_archive(temp_dir, dst_file)

@ -48,11 +48,10 @@ class DatumaroProjectExporter:
def _export(self, task_data, save_dir, save_images=False):
dataset = CvatTaskDataExtractor(task_data, include_images=save_images)
converter = dm_env.make_converter('datumaro_project',
save_images=save_images,
config={ 'project_name': task_data.db_task.name, }
dm_env.converters.get('datumaro_project').convert(dataset,
save_dir=save_dir, save_images=save_images,
project_config={ 'project_name': task_data.db_task.name, }
)
converter(dataset, save_dir=save_dir)
project = Project.load(save_dir)
target_dir = project.config.project_dir

@ -19,8 +19,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('label_me', save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('label_me').convert(extractor, save_dir=temp_dir,
save_images=save_images)
make_zip_archive(temp_dir, dst_file)

@ -26,10 +26,9 @@ def _export(dst_file, task_data, save_images=False):
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('voc_segmentation',
apply_colormap=True, label_map=make_colormap(task_data),
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('voc_segmentation').convert(extractor,
save_dir=temp_dir, save_images=save_images,
apply_colormap=True, label_map=make_colormap(task_data))
make_zip_archive(temp_dir, dst_file)
@ -68,8 +67,9 @@ def normalize_label(label):
def make_colormap(task_data):
labels = sorted([label['name']
for _, label in task_data.meta['task']['labels']])
if 'background' not in labels:
labels.insert(0, 'background')
if 'background' in labels:
labels.remove('background')
labels.insert(0, 'background')
predefined = parse_default_colors()

@ -19,9 +19,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('mot_seq_gt',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('mot_seq_gt').convert(extractor,
save_dir=temp_dir, save_images=save_images)
make_zip_archive(temp_dir, dst_file)

@ -24,9 +24,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('voc', label_map='source',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('voc').convert(extractor,
save_dir=temp_dir, save_images=save_images, label_map='source')
make_zip_archive(temp_dir, dst_file)

@ -27,9 +27,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('tf_detection_api',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('tf_detection_api').convert(extractor,
save_dir=temp_dir, save_images=save_images)
make_zip_archive(temp_dir, dst_file)

@ -23,8 +23,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('yolo', save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('yolo').convert(extractor,
save_dir=temp_dir, save_images=save_images)
make_zip_archive(temp_dir, dst_file)

@ -340,9 +340,9 @@ def export_command(args):
raise CliException("Converter for format '%s' is not found" % \
args.format)
if hasattr(converter, 'from_cmdline'):
extra_args = converter.from_cmdline(args.extra_args)
converter = converter(**extra_args)
extra_args = converter.from_cmdline(args.extra_args)
def converter_proxy(extractor, save_dir):
return converter.convert(extractor, save_dir, **extra_args)
filter_args = FilterModes.make_filter_args(args.filter_mode)
@ -352,7 +352,7 @@ def export_command(args):
log.info("Exporting the project...")
dataset.export_project(
save_dir=dst_dir,
converter=converter,
converter=converter_proxy,
filter_expr=args.filter,
**filter_args)
log.info("Project exported to '%s' as '%s'" % \

@ -3,17 +3,77 @@
#
# SPDX-License-Identifier: MIT
class Converter:
def __init__(self, cmdline_args=None):
pass
import logging as log
import os
import os.path as osp
import shutil
def __call__(self, extractor, save_dir):
raise NotImplementedError()
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util.image import save_image
def _parse_cmdline(self, cmdline):
parser = self.build_cmdline_parser()
if len(cmdline) != 0 and cmdline[0] == '--':
cmdline = cmdline[1:]
args = parser.parse_args(cmdline)
return vars(args)
class IConverter:
@classmethod
def convert(cls, extractor, save_dir, **options):
raise NotImplementedError("Should be implemented in a subclass")
class Converter(IConverter, CliPlugin):
DEFAULT_IMAGE_EXT = None
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
parser.add_argument('--image-ext', default=None,
help="Image extension (default: keep or use format default%s)" % \
(' ' + cls.DEFAULT_IMAGE_EXT if cls.DEFAULT_IMAGE_EXT else ''))
return parser
@classmethod
def convert(cls, extractor, save_dir, **options):
converter = cls(extractor, save_dir, **options)
return converter.apply()
def apply(self):
raise NotImplementedError("Should be implemented in a subclass")
def __init__(self, extractor, save_dir, save_images=False,
image_ext=None, default_image_ext=None):
default_image_ext = default_image_ext or self.DEFAULT_IMAGE_EXT
assert default_image_ext
self._default_image_ext = default_image_ext
self._save_images = save_images
self._image_ext = image_ext
self._extractor = extractor
self._save_dir = save_dir
def _find_image_ext(self, item):
src_ext = None
if item.has_image:
src_ext = osp.splitext(osp.basename(item.image.path))[1]
return self._image_ext or src_ext or self._default_image_ext
def _make_image_filename(self, item):
return item.id + self._find_image_ext(item)
def _save_image(self, item, path=None):
image = item.image.data
if image is None:
log.warning("Item '%s' has no image", item.id)
return item.image.path
path = path or self._make_image_filename(item)
src_ext = osp.splitext(osp.basename(item.image.path))[1]
dst_ext = osp.splitext(osp.basename(path))[1]
os.makedirs(osp.dirname(path), exist_ok=True)
if src_ext == dst_ext and osp.isfile(item.image.path):
shutil.copyfile(item.image.path, path)
else:
save_image(path, image)

@ -624,9 +624,8 @@ class ProjectDataset(Dataset):
if merge:
# merge and save the resulting dataset
converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self, dataset_save_dir)
self.env.converters.get(DEFAULT_FORMAT).convert(
self, dataset_save_dir, **converter_kwargs)
else:
if recursive:
# children items should already be updated
@ -635,9 +634,8 @@ class ProjectDataset(Dataset):
if isinstance(source, ProjectDataset):
source.save(**converter_kwargs)
converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self.iterate_own(), dataset_save_dir)
self.env.converters.get(DEFAULT_FORMAT).convert(
self.iterate_own(), dataset_save_dir, **converter_kwargs)
project.save(save_dir)
except BaseException:

@ -3,26 +3,23 @@
#
# SPDX-License-Identifier: MIT
from enum import Enum
from itertools import groupby
import json
import logging as log
import os
import os.path as osp
from enum import Enum
from itertools import groupby
import pycocotools.mask as mask_utils
from datumaro.components.converter import Converter
from datumaro.components.extractor import (DEFAULT_SUBSET_NAME,
AnnotationType, Points
)
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util import find, cast, str_to_bool
from datumaro.util.image import save_image
import datumaro.util.mask_tools as mask_tools
import datumaro.util.annotation_tools as anno_tools
import datumaro.util.mask_tools as mask_tools
from datumaro.components.converter import Converter
from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType,
Points)
from datumaro.util import cast, find, str_to_bool
from .format import CocoTask, CocoPath
from .format import CocoPath, CocoTask
SegmentationMode = Enum('SegmentationMode', ['guess', 'polygons', 'mask'])
@ -443,7 +440,40 @@ class _LabelsConverter(_TaskConverter):
self.annotations.append(elem)
class _Converter:
class CocoConverter(Converter):
@staticmethod
def _split_tasks_string(s):
return [CocoTask[i.strip()] for i in s.split(',')]
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--segmentation-mode',
choices=[m.name for m in SegmentationMode],
default=SegmentationMode.guess.name,
help="""
Save mode for instance segmentation:|n
- '{sm.guess.name}': guess the mode for each instance,|n
|s|suse 'is_crowd' attribute as hint|n
- '{sm.polygons.name}': save polygons,|n
|s|smerge and convert masks, prefer polygons|n
- '{sm.mask.name}': save masks,|n
|s|smerge and convert polygons, prefer masks|n
Default: %(default)s.
""".format(sm=SegmentationMode))
parser.add_argument('--crop-covered', action='store_true',
help="Crop covered segments so that background objects' "
"segmentation was more accurate (default: %(default)s)")
parser.add_argument('--allow-attributes',
type=str_to_bool, default=True,
help="Allow export of attributes (default: %(default)s)")
parser.add_argument('--tasks', type=cls._split_tasks_string,
help="COCO task filter, comma-separated list of {%s} "
"(default: all)" % ', '.join(t.name for t in CocoTask))
return parser
DEFAULT_IMAGE_EXT = CocoPath.IMAGE_EXT
_TASK_CONVERTER = {
CocoTask.image_info: _ImageInfoConverter,
CocoTask.instances: _InstancesConverter,
@ -453,16 +483,16 @@ class _Converter:
}
def __init__(self, extractor, save_dir,
tasks=None, save_images=False, segmentation_mode=None,
crop_covered=False, allow_attributes=True):
tasks=None, segmentation_mode=None, crop_covered=False,
allow_attributes=True, **kwargs):
super().__init__(extractor, save_dir, **kwargs)
assert tasks is None or isinstance(tasks, (CocoTask, list, str))
if tasks is None:
tasks = list(self._TASK_CONVERTER)
elif isinstance(tasks, CocoTask):
if isinstance(tasks, CocoTask):
tasks = [tasks]
elif isinstance(tasks, str):
tasks = [CocoTask[tasks]]
else:
elif tasks:
for i, t in enumerate(tasks):
if isinstance(t, str):
tasks[i] = CocoTask[t]
@ -470,11 +500,6 @@ class _Converter:
assert t in CocoTask, t
self._tasks = tasks
self._extractor = extractor
self._save_dir = save_dir
self._save_images = save_images
assert segmentation_mode is None or \
isinstance(segmentation_mode, str) or \
segmentation_mode in SegmentationMode
@ -502,9 +527,8 @@ class _Converter:
return self._TASK_CONVERTER[task](self)
def _make_task_converters(self):
return {
task: self._make_task_converter(task) for task in self._tasks
}
return { task: self._make_task_converter(task)
for task in (self._tasks or self._TASK_CONVERTER) }
def _get_image_id(self, item):
image_id = self._image_ids.get(item.id)
@ -514,23 +538,14 @@ class _Converter:
self._image_ids[item.id] = image_id
return image_id
def _save_image(self, item, filename):
image = item.image.data
if image is None:
log.warning("Item '%s' has no image" % item.id)
return ''
def _save_image(self, item, path=None):
super()._save_image(item,
osp.join(self._images_dir, self._make_image_filename(item)))
save_image(osp.join(self._images_dir, filename), image,
create_dir=True)
def convert(self):
def apply(self):
self._make_dirs()
subsets = self._extractor.subsets()
if len(subsets) == 0:
subsets = [ None ]
for subset_name in subsets:
for subset_name in self._extractor.subsets() or [None]:
if subset_name:
subset = self._extractor.get_subset(subset_name)
else:
@ -541,94 +556,43 @@ class _Converter:
for task_conv in task_converters.values():
task_conv.save_categories(subset)
for item in subset:
filename = item.id + CocoPath.IMAGE_EXT
if self._save_images:
if item.has_image:
self._save_image(item, filename)
self._save_image(item)
else:
log.debug("Item '%s' has no image info" % item.id)
log.debug("Item '%s' has no image info", item.id)
for task_conv in task_converters.values():
task_conv.save_image_info(item, filename)
task_conv.save_image_info(item,
self._make_image_filename(item))
task_conv.save_annotations(item)
for task, task_conv in task_converters.items():
if task_conv.is_empty() and not self._tasks:
continue
task_conv.write(osp.join(self._ann_dir,
'%s_%s.json' % (task.name, subset_name)))
class CocoConverter(Converter, CliPlugin):
@staticmethod
def _split_tasks_string(s):
return [CocoTask[i.strip()] for i in s.split(',')]
@classmethod
def build_cmdline_parser(cls, **kwargs):
kwargs['description'] = """
Segmentation save modes:|n
- '{sm.guess.name}': guess the mode for each instance,|n
|s|suse 'is_crowd' attribute as a hint|n
- '{sm.polygons.name}': save polygons,|n
|s|smerge and convert masks, prefer polygons|n
- '{sm.mask.name}': save masks,|n
|s|smerge and convert polygons, prefer masks
""".format(sm=SegmentationMode)
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
parser.add_argument('--segmentation-mode',
choices=[m.name for m in SegmentationMode],
default=SegmentationMode.guess.name,
help="Save mode for instance segmentation (default: %(default)s)")
parser.add_argument('--crop-covered', action='store_true',
help="Crop covered segments so that background objects' "
"segmentation was more accurate (default: %(default)s)")
parser.add_argument('--allow-attributes',
type=str_to_bool, default=True,
help="Allow export of attributes (default: %(default)s)")
parser.add_argument('--tasks', type=cls._split_tasks_string,
default=None,
help="COCO task filter, comma-separated list of {%s} "
"(default: all)" % ', '.join([t.name for t in CocoTask]))
return parser
def __init__(self,
tasks=None, save_images=False, segmentation_mode=None,
crop_covered=False, allow_attributes=True):
super().__init__()
self._options = {
'tasks': tasks,
'save_images': save_images,
'segmentation_mode': segmentation_mode,
'crop_covered': crop_covered,
'allow_attributes': allow_attributes,
}
def __call__(self, extractor, save_dir):
converter = _Converter(extractor, save_dir, **self._options)
converter.convert()
class CocoInstancesConverter(CocoConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = CocoTask.instances
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class CocoImageInfoConverter(CocoConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = CocoTask.image_info
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class CocoPersonKeypointsConverter(CocoConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = CocoTask.person_keypoints
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class CocoCaptionsConverter(CocoConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = CocoTask.captions
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class CocoLabelsConverter(CocoConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = CocoTask.labels
super().__init__(**kwargs)
super().__init__(*args, **kwargs)

@ -3,25 +3,19 @@
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
import logging as log
import os
import os.path as osp
from collections import OrderedDict
from xml.sax.saxutils import XMLGenerator
from datumaro.components.cli_plugin import CliPlugin
from datumaro.components.converter import Converter
from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType
from datumaro.util import cast
from datumaro.util.image import save_image
from datumaro.util import cast, pairwise
from .format import CvatPath
def pairwise(iterable):
a = iter(iterable)
return zip(a, a)
class XmlAnnotationWriter:
VERSION = '1.1'
@ -163,15 +157,6 @@ class _SubsetWriter:
self._writer.close_root()
def _save_image(self, item, filename):
image = item.image.data
if image is None:
log.warning("Item '%s' has no image" % item.id)
return ''
save_image(osp.join(self._context._images_dir, filename), image,
create_dir=True)
def _write_item(self, item, index):
image_info = OrderedDict([
("id", str(cast(item.attributes.get('frame'), int, index))),
@ -186,9 +171,10 @@ class _SubsetWriter:
image_info["height"] = str(h)
if self._context._save_images:
self._save_image(item, filename)
self._context._save_image(item,
osp.join(self._context._images_dir, filename))
else:
log.debug("Item '%s' has no image info" % item.id)
log.debug("Item '%s' has no image info", item.id)
self._writer.open_image(image_info)
for ann in item.annotations:
@ -321,15 +307,10 @@ class _SubsetWriter:
self._writer.close_tag()
class _Converter:
def __init__(self, extractor, save_dir, save_images=False):
self._extractor = extractor
self._save_dir = save_dir
self._save_images = save_images
def convert(self):
os.makedirs(self._save_dir, exist_ok=True)
class CvatConverter(Converter):
DEFAULT_IMAGE_EXT = CvatPath.IMAGE_EXT
def apply(self):
images_dir = osp.join(self._save_dir, CvatPath.IMAGES_DIR)
os.makedirs(images_dir, exist_ok=True)
self._images_dir = images_dir
@ -348,22 +329,3 @@ class _Converter:
with open(osp.join(self._save_dir, '%s.xml' % subset_name), 'w') as f:
writer = _SubsetWriter(f, subset_name, subset, self)
writer.write()
class CvatConverter(Converter, CliPlugin):
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
return parser
def __init__(self, save_images=False):
super().__init__()
self._options = {
'save_images': save_images,
}
def __call__(self, extractor, save_dir):
converter = _Converter(extractor, save_dir, **self._options)
converter.convert()

@ -17,9 +17,7 @@ from datumaro.components.extractor import (
LabelCategories, MaskCategories, PointsCategories
)
from datumaro.util import cast
from datumaro.util.image import save_image
import pycocotools.mask as mask_utils
from datumaro.components.cli_plugin import CliPlugin
from .format import DatumaroPath
@ -213,13 +211,10 @@ class _SubsetWriter:
})
return converted
class _Converter:
def __init__(self, extractor, save_dir, save_images=False):
self._extractor = extractor
self._save_dir = save_dir
self._save_images = save_images
class DatumaroConverter(Converter):
DEFAULT_IMAGE_EXT = DatumaroPath.IMAGE_EXT
def convert(self):
def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
images_dir = osp.join(self._save_dir, DatumaroPath.IMAGES_DIR)
@ -230,9 +225,7 @@ class _Converter:
os.makedirs(annotations_dir, exist_ok=True)
self._annotations_dir = annotations_dir
subsets = self._extractor.subsets()
if len(subsets) == 0:
subsets = [ None ]
subsets = self._extractor.subsets() or [None]
subsets = [n or DEFAULT_SUBSET_NAME for n in subsets]
subsets = { name: _SubsetWriter(name, self) for name in subsets }
@ -248,55 +241,20 @@ class _Converter:
for subset, writer in subsets.items():
writer.write(annotations_dir)
def _save_image(self, item):
image = item.image.data
if image is None:
return ''
filename = item.id + DatumaroPath.IMAGE_EXT
image_path = osp.join(self._images_dir, filename)
save_image(image_path, image, create_dir=True)
return filename
class DatumaroConverter(Converter, CliPlugin):
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
return parser
def __init__(self, save_images=False):
super().__init__()
self._options = {
'save_images': save_images,
}
def __call__(self, extractor, save_dir):
converter = _Converter(extractor, save_dir, **self._options)
converter.convert()
def _save_image(self, item, path=None):
super()._save_image(item,
osp.join(self._images_dir, self._make_image_filename(item)))
class DatumaroProjectConverter(Converter):
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
return parser
def __init__(self, config=None, save_images=False):
self._config = config
self._save_images = save_images
def __call__(self, extractor, save_dir):
def convert(cls, extractor, save_dir, **kwargs):
os.makedirs(save_dir, exist_ok=True)
from datumaro.components.project import Project
project = Project.generate(save_dir, config=self._config)
project = Project.generate(save_dir,
config=kwargs.pop('project_config', None))
converter = project.env.make_converter('datumaro',
save_images=self._save_images)
converter(extractor, save_dir=osp.join(
project.config.project_dir, project.config.dataset_dir))
DatumaroConverter.convert(extractor,
save_dir=osp.join(
project.config.project_dir, project.config.dataset_dir),
**kwargs)

@ -3,12 +3,13 @@
#
# SPDX-License-Identifier: MIT
import logging as log
import os
import os.path as osp
from datumaro.components.extractor import DatasetItem, SourceExtractor, Importer
from datumaro.components.converter import Converter
from datumaro.util.image import save_image
from datumaro.util.image import Image
class ImageDirImporter(Importer):
@ -32,8 +33,6 @@ class ImageDirImporter(Importer):
class ImageDirExtractor(SourceExtractor):
_SUPPORTED_FORMATS = ['.png', '.jpg']
def __init__(self, url):
super().__init__()
@ -43,11 +42,15 @@ class ImageDirExtractor(SourceExtractor):
for dirpath, _, filenames in os.walk(url):
for name in filenames:
path = osp.join(dirpath, name)
if not self._is_image(path):
try:
image = Image(path)
# force loading
image.data # pylint: disable=pointless-statement
except Exception:
continue
item_id = osp.relpath(osp.splitext(path)[0], url)
items.append(DatasetItem(id=item_id, image=path))
items.append(DatasetItem(id=item_id, image=image))
self._items = items
@ -58,20 +61,16 @@ class ImageDirExtractor(SourceExtractor):
def __len__(self):
return len(self._items)
def _is_image(self, path):
if not osp.isfile(path):
return False
for ext in self._SUPPORTED_FORMATS:
if path.endswith(ext):
return True
return False
class ImageDirConverter(Converter):
def __call__(self, extractor, save_dir):
os.makedirs(save_dir, exist_ok=True)
DEFAULT_IMAGE_EXT = '.jpg'
def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
for item in extractor:
if item.has_image and item.image.has_data:
save_image(osp.join(save_dir, item.id + '.jpg'),
item.image.data, create_dir=True)
for item in self._extractor:
if item.has_image:
self._save_image(item,
osp.join(self._save_dir, self._make_image_filename(item)))
else:
log.debug("Item '%s' has no image info", item.id)

@ -15,7 +15,6 @@ from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME,
)
from datumaro.components.extractor import Importer
from datumaro.components.converter import Converter
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util.image import Image, save_image
from datumaro.util.mask_tools import load_mask, find_mask_bbox
@ -254,8 +253,7 @@ class LabelMeImporter(Importer):
params.update(extra_params)
source_name = osp.splitext(osp.basename(subset_path))[0]
project.add_source(source_name,
{
project.add_source(source_name, {
'url': subset_path,
'format': self._EXTRACTOR_NAME,
'options': params,
@ -285,34 +283,18 @@ class LabelMeImporter(Importer):
return subset_paths
class LabelMeConverter(Converter, CliPlugin):
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
return parser
def __init__(self, save_images=False):
super().__init__()
self._save_images = save_images
def __call__(self, extractor, save_dir):
self._extractor = extractor
subsets = extractor.subsets()
if len(subsets) == 0:
subsets = [ None ]
class LabelMeConverter(Converter):
DEFAULT_IMAGE_EXT = LabelMePath.IMAGE_EXT
for subset_name in subsets:
def apply(self):
for subset_name in self._extractor.subsets() or [None]:
if subset_name:
subset = extractor.get_subset(subset_name)
subset = self._extractor.get_subset(subset_name)
else:
subset_name = DEFAULT_SUBSET_NAME
subset = extractor
subset = self._extractor
subset_dir = osp.join(save_dir, subset_name)
subset_dir = osp.join(self._save_dir, subset_name)
os.makedirs(subset_dir, exist_ok=True)
os.makedirs(osp.join(subset_dir, LabelMePath.MASKS_DIR),
exist_ok=True)
@ -335,13 +317,12 @@ class LabelMeConverter(Converter, CliPlugin):
raise Exception("Can't export item '%s': "
"LabelMe format only supports flat image layout" % item.id)
image_filename = item.id + LabelMePath.IMAGE_EXT
image_filename = self._make_image_filename(item)
if self._save_images:
if item.has_image and item.image.has_data:
save_image(osp.join(subset_dir, image_filename),
item.image.data, create_dir=True)
self._save_image(item, osp.join(subset_dir, image_filename))
else:
log.debug("Item '%s' has no image" % item.id)
log.debug("Item '%s' has no image", item.id)
root_elem = ET.Element('annotation')
ET.SubElement(root_elem, 'filename').text = image_filename

@ -19,7 +19,6 @@ from datumaro.components.extractor import (SourceExtractor,
)
from datumaro.components.extractor import Importer
from datumaro.components.converter import Converter
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util import cast
from datumaro.util.image import Image, save_image
@ -253,25 +252,17 @@ class MotSeqImporter(Importer):
subsets.append(p)
return subsets
class MotSeqGtConverter(Converter, CliPlugin):
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().__init__(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
return parser
def __init__(self, save_images=False):
super().__init__()
class MotSeqGtConverter(Converter):
DEFAULT_IMAGE_EXT = MotPath.IMAGE_EXT
self._save_images = save_images
def apply(self):
extractor = self._extractor
def __call__(self, extractor, save_dir):
images_dir = osp.join(save_dir, MotPath.IMAGE_DIR)
images_dir = osp.join(self._save_dir, MotPath.IMAGE_DIR)
os.makedirs(images_dir, exist_ok=True)
self._images_dir = images_dir
anno_dir = osp.join(save_dir, 'gt')
anno_dir = osp.join(self._save_dir, 'gt')
os.makedirs(anno_dir, exist_ok=True)
anno_file = osp.join(anno_dir, MotPath.GT_FILENAME)
with open(anno_file, 'w', encoding="utf-8") as csv_file:
@ -291,6 +282,7 @@ class MotSeqGtConverter(Converter, CliPlugin):
if track_id not in track_id_mapping:
track_id_mapping[track_id] = len(track_id_mapping)
track_id = track_id_mapping[track_id]
writer.writerow({
'frame_id': frame_id,
'track_id': track_id,
@ -311,11 +303,10 @@ class MotSeqGtConverter(Converter, CliPlugin):
if self._save_images:
if item.has_image and item.image.has_data:
save_image(osp.join(self._images_dir,
'%06d%s' % (frame_id, MotPath.IMAGE_EXT)),
item.image.data)
self._save_image(item, osp.join(self._images_dir,
'%06d%s' % (frame_id, self._find_image_ext(item))))
else:
log.debug("Item '%s' has no image" % item.id)
log.debug("Item '%s' has no image", item.id)
labels_file = osp.join(anno_dir, MotPath.LABELS_FILE)
with open(labels_file, 'w', encoding='utf-8') as f:

@ -14,11 +14,10 @@ from datumaro.components.extractor import (AnnotationType, DEFAULT_SUBSET_NAME,
LabelCategories
)
from datumaro.components.converter import Converter
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util.image import encode_image
from datumaro.util.mask_tools import merge_masks
from datumaro.util.annotation_tools import (compute_bbox,
find_group_leader, find_instances)
from datumaro.util.mask_tools import merge_masks
from datumaro.util.tf_util import import_tf as _import_tf
from .format import DetectionApiPath
@ -45,26 +44,25 @@ def bytes_list_feature(value):
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
class TfDetectionApiConverter(Converter, CliPlugin):
class TfDetectionApiConverter(Converter):
DEFAULT_IMAGE_EXT = DetectionApiPath.DEFAULT_IMAGE_EXT
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
parser.add_argument('--save-masks', action='store_true',
help="Include instance masks (default: %(default)s)")
return parser
def __init__(self, save_images=False, save_masks=False):
super().__init__()
def __init__(self, extractor, save_dir, save_masks=False, **kwargs):
super().__init__(extractor, save_dir, **kwargs)
self._save_images = save_images
self._save_masks = save_masks
def __call__(self, extractor, save_dir):
os.makedirs(save_dir, exist_ok=True)
def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
label_categories = extractor.categories().get(AnnotationType.label,
label_categories = self._extractor.categories().get(AnnotationType.label,
LabelCategories())
get_label = lambda label_id: label_categories.items[label_id].name \
if label_id is not None else ''
@ -74,18 +72,18 @@ class TfDetectionApiConverter(Converter, CliPlugin):
self._get_label = get_label
self._get_label_id = map_label_id
subsets = extractor.subsets()
subsets = self._extractor.subsets()
if len(subsets) == 0:
subsets = [ None ]
for subset_name in subsets:
if subset_name:
subset = extractor.get_subset(subset_name)
subset = self._extractor.get_subset(subset_name)
else:
subset_name = DEFAULT_SUBSET_NAME
subset = extractor
subset = self._extractor
labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE)
labelmap_path = osp.join(self._save_dir, DetectionApiPath.LABELMAP_FILE)
with codecs.open(labelmap_path, 'w', encoding='utf8') as f:
for label, idx in label_ids.items():
f.write(
@ -95,7 +93,7 @@ class TfDetectionApiConverter(Converter, CliPlugin):
'}\n\n'
)
anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name))
anno_path = osp.join(self._save_dir, '%s.tfrecord' % (subset_name))
with tf.io.TFRecordWriter(anno_path) as writer:
for item in subset:
tf_example = self._make_tf_example(item)
@ -167,7 +165,7 @@ class TfDetectionApiConverter(Converter, CliPlugin):
),
}
filename = item.id + DetectionApiPath.IMAGE_EXT
filename = self._make_image_filename(item)
features['image/filename'] = bytes_feature(filename.encode('utf-8'))
if not item.has_image:
@ -186,8 +184,7 @@ class TfDetectionApiConverter(Converter, CliPlugin):
})
if self._save_images:
if item.has_image and item.image.has_data:
fmt = DetectionApiPath.IMAGE_FORMAT
buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT)
buffer, fmt = self._save_image(item, filename)
features.update({
'image/encoded': bytes_feature(buffer),
@ -204,3 +201,13 @@ class TfDetectionApiConverter(Converter, CliPlugin):
features=tf.train.Features(feature=features))
return tf_example
def _save_image(self, item, path=None):
dst_ext = osp.splitext(osp.basename(path))[1]
fmt = DetectionApiPath.IMAGE_EXT_FORMAT.get(dst_ext)
if not fmt:
log.warning("Item '%s': can't find format string for the '%s' "
"image extension, the corresponding field will be empty." % \
(item.id, dst_ext))
buffer = encode_image(item.image.data, dst_ext)
return buffer, fmt

@ -117,7 +117,6 @@ class TfDetectionApiExtractor(SourceExtractor):
frame_width = tf.cast(
parsed_record['image/width'], tf.int64).numpy().item()
frame_image = parsed_record['image/encoded'].numpy()
frame_format = parsed_record['image/format'].numpy().decode('utf-8')
xmins = tf.sparse.to_dense(
parsed_record['image/object/bbox/xmin']).numpy()
ymins = tf.sparse.to_dense(
@ -176,7 +175,7 @@ class TfDetectionApiExtractor(SourceExtractor):
image_size = (frame_height, frame_width)
image_params = {}
if frame_image and frame_format:
if frame_image:
image_params['data'] = lazy_image(frame_image, decode_image)
if frame_filename:
image_params['path'] = osp.join(images_dir, frame_filename)

@ -7,7 +7,7 @@ class DetectionApiPath:
IMAGES_DIR = 'images'
ANNOTATIONS_DIR = 'annotations'
IMAGE_EXT = '.jpg'
IMAGE_FORMAT = 'jpeg'
DEFAULT_IMAGE_EXT = '.jpg'
IMAGE_EXT_FORMAT = {'.jpg': 'jpeg', '.png': 'png'}
LABELMAP_FILE = 'label_map.pbtxt'

@ -3,20 +3,19 @@
#
# SPDX-License-Identifier: MIT
import logging as log
import os
import os.path as osp
from collections import OrderedDict, defaultdict
from enum import Enum
from itertools import chain
import logging as log
from lxml import etree as ET
import os
import os.path as osp
from datumaro.components.cli_plugin import CliPlugin
from datumaro.components.converter import Converter
from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType,
LabelCategories, CompiledMask,
)
from datumaro.util import str_to_bool, find
CompiledMask, LabelCategories)
from datumaro.util import find, str_to_bool
from datumaro.util.image import save_image
from datumaro.util.mask_tools import paint_mask, remap_mask
@ -50,10 +49,47 @@ def _write_xml_bbox(bbox, parent_elem):
LabelmapType = Enum('LabelmapType', ['voc', 'source', 'guess'])
class _Converter:
class VocConverter(Converter):
DEFAULT_IMAGE_EXT = VocPath.IMAGE_EXT
@staticmethod
def _split_tasks_string(s):
return [VocTask[i.strip()] for i in s.split(',')]
@staticmethod
def _get_labelmap(s):
if osp.isfile(s):
return s
try:
return LabelmapType[s].name
except KeyError:
import argparse
raise argparse.ArgumentTypeError()
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--apply-colormap', type=str_to_bool, default=True,
help="Use colormap for class and instance masks "
"(default: %(default)s)")
parser.add_argument('--label-map', type=cls._get_labelmap, default=None,
help="Labelmap file path or one of %s" % \
', '.join(t.name for t in LabelmapType))
parser.add_argument('--allow-attributes',
type=str_to_bool, default=True,
help="Allow export of attributes (default: %(default)s)")
parser.add_argument('--tasks', type=cls._split_tasks_string,
help="VOC task filter, comma-separated list of {%s} "
"(default: all)" % ', '.join(t.name for t in VocTask))
return parser
def __init__(self, extractor, save_dir,
tasks=None, apply_colormap=True, save_images=False, label_map=None,
allow_attributes=True):
tasks=None, apply_colormap=True, label_map=None,
allow_attributes=True, **kwargs):
super().__init__(extractor, save_dir, **kwargs)
assert tasks is None or isinstance(tasks, (VocTask, list, set))
if tasks is None:
tasks = set(VocTask)
@ -63,20 +99,17 @@ class _Converter:
tasks = set(t if t in VocTask else VocTask[t] for t in tasks)
self._tasks = tasks
self._extractor = extractor
self._save_dir = save_dir
self._apply_colormap = apply_colormap
self._allow_attributes = allow_attributes
self._save_images = save_images
self._load_categories(label_map)
def convert(self):
self.init_dirs()
def apply(self):
self.make_dirs()
self.save_subsets()
self.save_label_map()
def init_dirs(self):
def make_dirs(self):
save_dir = self._save_dir
subsets_dir = osp.join(save_dir, VocPath.SUBSETS_DIR)
cls_subsets_dir = osp.join(subsets_dir,
@ -116,11 +149,7 @@ class _Converter:
categories()[AnnotationType.label].items[label_id].name
def save_subsets(self):
subsets = self._extractor.subsets()
if len(subsets) == 0:
subsets = [ None ]
for subset_name in subsets:
for subset_name in self._extractor.subsets() or [None]:
if subset_name:
subset = self._extractor.get_subset(subset_name)
else:
@ -136,13 +165,13 @@ class _Converter:
for item in subset:
log.debug("Converting item '%s'", item.id)
image_filename = item.id + VocPath.IMAGE_EXT
image_filename = self._make_image_filename(item)
if self._save_images:
if item.has_image and item.image.has_data:
save_image(osp.join(self._images_dir, image_filename),
item.image.data, create_dir=True)
self._save_image(item,
osp.join(self._images_dir, image_filename))
else:
log.debug("Item '%s' has no image" % item.id)
log.debug("Item '%s' has no image", item.id)
labels = []
bboxes = []
@ -269,9 +298,7 @@ class _Converter:
if len(attrs_elem):
obj_elem.append(attrs_elem)
if self._tasks & {None,
VocTask.detection,
VocTask.person_layout,
if self._tasks & {VocTask.detection, VocTask.person_layout,
VocTask.action_classification}:
ann_path = osp.join(self._ann_dir, item.id + '.xml')
os.makedirs(osp.dirname(ann_path), exist_ok=True)
@ -314,19 +341,16 @@ class _Converter:
action_list[item.id] = None
segm_list[item.id] = None
if self._tasks & {None,
VocTask.classification,
VocTask.detection,
VocTask.action_classification,
VocTask.person_layout}:
if self._tasks & {VocTask.classification, VocTask.detection,
VocTask.action_classification, VocTask.person_layout}:
self.save_clsdet_lists(subset_name, clsdet_list)
if self._tasks & {None, VocTask.classification}:
if self._tasks & {VocTask.classification}:
self.save_class_lists(subset_name, class_lists)
if self._tasks & {None, VocTask.action_classification}:
if self._tasks & {VocTask.action_classification}:
self.save_action_lists(subset_name, action_list)
if self._tasks & {None, VocTask.person_layout}:
if self._tasks & {VocTask.person_layout}:
self.save_layout_lists(subset_name, layout_list)
if self._tasks & {None, VocTask.segmentation}:
if self._tasks & {VocTask.segmentation}:
self.save_segm_lists(subset_name, segm_list)
def save_action_lists(self, subset_name, action_list):
@ -555,79 +579,27 @@ class _Converter:
def _remap_mask(self, mask):
return remap_mask(mask, self._label_id_mapping)
class VocConverter(Converter, CliPlugin):
@staticmethod
def _split_tasks_string(s):
return [VocTask[i.strip()] for i in s.split(',')]
@staticmethod
def _get_labelmap(s):
if osp.isfile(s):
return s
try:
return LabelmapType[s].name
except KeyError:
import argparse
raise argparse.ArgumentTypeError()
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
parser.add_argument('--apply-colormap', type=str_to_bool, default=True,
help="Use colormap for class and instance masks "
"(default: %(default)s)")
parser.add_argument('--label-map', type=cls._get_labelmap, default=None,
help="Labelmap file path or one of %s" % \
', '.join(t.name for t in LabelmapType))
parser.add_argument('--allow-attributes',
type=str_to_bool, default=True,
help="Allow export of attributes (default: %(default)s)")
parser.add_argument('--tasks', type=cls._split_tasks_string,
help="VOC task filter, comma-separated list of {%s} "
"(default: all)" % ', '.join(t.name for t in VocTask))
return parser
def __init__(self, tasks=None, save_images=False,
apply_colormap=False, label_map=None, allow_attributes=True):
super().__init__()
self._options = {
'tasks': tasks,
'save_images': save_images,
'apply_colormap': apply_colormap,
'label_map': label_map,
'allow_attributes': allow_attributes,
}
def __call__(self, extractor, save_dir):
converter = _Converter(extractor, save_dir, **self._options)
converter.convert()
class VocClassificationConverter(VocConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = VocTask.classification
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class VocDetectionConverter(VocConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = VocTask.detection
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class VocLayoutConverter(VocConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = VocTask.person_layout
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class VocActionConverter(VocConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = VocTask.action_classification
super().__init__(**kwargs)
super().__init__(*args, **kwargs)
class VocSegmentationConverter(VocConverter):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = VocTask.segmentation
super().__init__(**kwargs)
super().__init__(*args, **kwargs)

@ -3,15 +3,13 @@
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
import logging as log
import os
import os.path as osp
from collections import OrderedDict
from datumaro.components.converter import Converter
from datumaro.components.extractor import AnnotationType
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util.image import save_image
from .format import YoloPath
@ -26,21 +24,14 @@ def _make_yolo_bbox(img_size, box):
h = (box[3] - box[1]) / img_size[1]
return x, y, w, h
class YoloConverter(Converter, CliPlugin):
class YoloConverter(Converter):
# https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects
DEFAULT_IMAGE_EXT = '.jpg'
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
return parser
def apply(self):
extractor = self._extractor
save_dir = self._save_dir
def __init__(self, save_images=False):
super().__init__()
self._save_images = save_images
def __call__(self, extractor, save_dir):
os.makedirs(save_dir, exist_ok=True)
label_categories = extractor.categories()[AnnotationType.label]
@ -50,13 +41,9 @@ class YoloConverter(Converter, CliPlugin):
f.writelines('%s\n' % l[0]
for l in sorted(label_ids.items(), key=lambda x: x[1]))
subsets = extractor.subsets()
if len(subsets) == 0:
subsets = [ None ]
subset_lists = OrderedDict()
for subset_name in subsets:
for subset_name in extractor.subsets() or [None]:
if subset_name and subset_name in YoloPath.SUBSET_NAMES:
subset = extractor.get_subset(subset_name)
elif not subset_name:
@ -80,11 +67,10 @@ class YoloConverter(Converter, CliPlugin):
"item has no image info" % item.id)
height, width = item.image.size
image_name = item.id + '.jpg'
image_name = self._make_image_filename(item)
if self._save_images:
if item.has_image and item.image.has_data:
save_image(osp.join(subset_dir, image_name),
item.image.data, create_dir=True)
self._save_image(item, osp.join(subset_dir, image_name))
else:
log.warning("Item '%s' has no image" % item.id)
image_paths[item.id] = osp.join('data',
@ -119,4 +105,4 @@ class YoloConverter(Converter, CliPlugin):
osp.join('data', subset_list_name)))
f.write('names = %s\n' % osp.join('data', 'obj.names'))
f.write('backup = backup/\n')
f.write('backup = backup/\n')

@ -62,6 +62,10 @@ def to_snake_case(s):
name.append(char)
return ''.join(name)
def pairwise(iterable):
a = iter(iterable)
return zip(a, a)
def take_by(iterable, count):
"""
Returns elements from the input iterable by batches of N items.

@ -1,3 +1,4 @@
from functools import partial
import numpy as np
import os.path as osp
@ -92,7 +93,7 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoCaptionsConverter(), test_dir)
CocoCaptionsConverter.convert, test_dir)
def test_can_save_and_load_instances(self):
label_categories = LabelCategories()
@ -186,7 +187,7 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoInstancesConverter(), test_dir,
CocoInstancesConverter.convert, test_dir,
target_dataset=DstExtractor())
def test_can_merge_polygons_on_loading(self):
@ -237,7 +238,7 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
CocoInstancesConverter(), test_dir,
CocoInstancesConverter.convert, test_dir,
importer_args={'merge_instance_polygons': True},
target_dataset=DstExtractor())
@ -295,8 +296,8 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(SrcTestExtractor(),
CocoInstancesConverter(crop_covered=True), test_dir,
target_dataset=DstTestExtractor())
partial(CocoInstancesConverter.convert, crop_covered=True),
test_dir, target_dataset=DstTestExtractor())
def test_can_convert_polygons_to_mask(self):
label_categories = LabelCategories()
@ -345,8 +346,8 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(SrcTestExtractor(),
CocoInstancesConverter(segmentation_mode='mask'), test_dir,
target_dataset=DstTestExtractor())
partial(CocoInstancesConverter.convert, segmentation_mode='mask'),
test_dir, target_dataset=DstTestExtractor())
def test_can_convert_masks_to_polygons(self):
label_categories = LabelCategories()
@ -395,8 +396,8 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
CocoInstancesConverter(segmentation_mode='polygons'), test_dir,
target_dataset=DstExtractor())
partial(CocoInstancesConverter.convert, segmentation_mode='polygons'),
test_dir, target_dataset=DstExtractor())
def test_can_save_and_load_images(self):
class TestExtractor(Extractor):
@ -414,7 +415,7 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoImageInfoConverter(), test_dir)
CocoImageInfoConverter.convert, test_dir)
def test_can_save_and_load_labels(self):
class TestExtractor(Extractor):
@ -438,7 +439,7 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoLabelsConverter(), test_dir)
CocoLabelsConverter.convert, test_dir)
def test_can_save_and_load_keypoints(self):
label_categories = LabelCategories()
@ -517,7 +518,7 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoPersonKeypointsConverter(), test_dir,
CocoPersonKeypointsConverter.convert, test_dir,
target_dataset=DstTestExtractor())
def test_can_save_dataset_with_no_subsets(self):
@ -528,12 +529,9 @@ class CocoConverterTest(TestCase):
DatasetItem(id=2, attributes={'id': 2}),
])
def categories(self):
return { AnnotationType.label: LabelCategories() }
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(), test_dir)
CocoConverter.convert, test_dir)
def test_can_save_dataset_with_image_info(self):
class TestExtractor(Extractor):
@ -545,7 +543,7 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(tasks='image_info'), test_dir)
CocoImageInfoConverter.convert, test_dir)
def test_relative_paths(self):
class TestExtractor(Extractor):
@ -561,7 +559,8 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(tasks='image_info', save_images=True), test_dir)
partial(CocoImageInfoConverter.convert, save_images=True),
test_dir)
def test_preserve_coco_ids(self):
class TestExtractor(Extractor):
@ -573,7 +572,8 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(tasks='image_info', save_images=True), test_dir)
partial(CocoImageInfoConverter.convert, save_images=True),
test_dir)
def test_annotation_attributes(self):
class TestExtractor(Extractor):
@ -593,4 +593,4 @@ class CocoConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoConverter(), test_dir)
CocoConverter.convert, test_dir)

@ -1,3 +1,4 @@
from functools import partial
import numpy as np
import os.path as osp
@ -250,7 +251,7 @@ class CvatConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
CvatConverter(save_images=True), test_dir,
partial(CvatConverter.convert, save_images=True), test_dir,
target_dataset=DstExtractor())
def test_relative_paths(self):
@ -281,7 +282,7 @@ class CvatConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
CvatConverter(save_images=True), test_dir,
partial(CvatConverter.convert, save_images=True), test_dir,
target_dataset=DstExtractor())
def test_preserve_frame_ids(self):
@ -297,4 +298,4 @@ class CvatConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CvatConverter(save_images=True), test_dir)
CvatConverter.convert, test_dir)

@ -1,3 +1,4 @@
from functools import partial
import numpy as np
from unittest import TestCase
@ -90,11 +91,11 @@ class DatumaroConverterTest(TestCase):
def test_can_save_and_load(self):
with TestDir() as test_dir:
self._test_save_and_load(self.TestExtractor(),
DatumaroConverter(save_images=True), test_dir)
partial(DatumaroConverter.convert, save_images=True), test_dir)
def test_can_detect(self):
with TestDir() as test_dir:
DatumaroConverter()(self.TestExtractor(), save_dir=test_dir)
DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir)
self.assertTrue(DatumaroImporter.detect(test_dir))
@ -109,4 +110,4 @@ class DatumaroConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
DatumaroConverter(save_images=True), test_dir)
partial(DatumaroConverter.convert, save_images=True), test_dir)

@ -20,7 +20,7 @@ class ImageDirFormatTest(TestCase):
with TestDir() as test_dir:
source_dataset = TestExtractor()
ImageDirConverter()(source_dataset, save_dir=test_dir)
ImageDirConverter.convert(source_dataset, save_dir=test_dir)
project = Project.import_from(test_dir, 'image_dir')
parsed_dataset = project.make_dataset()
@ -39,7 +39,7 @@ class ImageDirFormatTest(TestCase):
with TestDir() as test_dir:
source_dataset = TestExtractor()
ImageDirConverter()(source_dataset, save_dir=test_dir)
ImageDirConverter.convert(source_dataset, save_dir=test_dir)
project = Project.import_from(test_dir, 'image_dir')
parsed_dataset = project.make_dataset()

@ -1,3 +1,4 @@
from functools import partial
import numpy as np
import os.path as osp
@ -104,8 +105,8 @@ class LabelMeConverterTest(TestCase):
}
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), LabelMeConverter(save_images=True),
self._test_save_and_load(SrcExtractor(),
partial(LabelMeConverter.convert, save_images=True),
test_dir, target_dataset=DstExtractor())
def test_cant_save_dataset_with_relative_paths(self):
@ -121,7 +122,8 @@ class LabelMeConverterTest(TestCase):
with self.assertRaisesRegex(Exception, r'only supports flat'):
with TestDir() as test_dir:
self._test_save_and_load(SrcExtractor(),
LabelMeConverter(save_images=True), test_dir)
partial(LabelMeConverter.convert, save_images=True),
test_dir)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')

@ -1,3 +1,4 @@
from functools import partial
import numpy as np
import os.path as osp
@ -114,8 +115,8 @@ class MotConverterTest(TestCase):
}
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), MotSeqGtConverter(save_images=True),
self._test_save_and_load(SrcExtractor(),
partial(MotSeqGtConverter.convert, save_images=True),
test_dir, target_dataset=DstExtractor())

@ -7,7 +7,6 @@ from unittest import TestCase
from datumaro.components.project import Project, Environment, Dataset
from datumaro.components.config_model import Source, Model
from datumaro.components.launcher import Launcher, ModelTransform
from datumaro.components.converter import Converter
from datumaro.components.extractor import (Extractor, DatasetItem,
Label, Mask, Points, Polygon, PolyLine, Bbox, Caption,
LabelCategories, AnnotationType
@ -181,12 +180,6 @@ class ProjectTest(TestCase):
for inp in inputs:
yield [ Label(inp[0, 0, 0]) ]
class TestConverter(Converter):
def __call__(self, extractor, save_dir):
for item in extractor:
with open(osp.join(save_dir, '%s.txt' % item.id), 'w') as f:
f.write(str(item.annotations[0].label) + '\n')
class TestExtractorDst(Extractor):
def __init__(self, url):
super().__init__()
@ -206,7 +199,6 @@ class ProjectTest(TestCase):
project = Project()
project.env.launchers.register(launcher_name, TestLauncher)
project.env.extractors.register(extractor_name, TestExtractorSrc)
project.env.converters.register(extractor_name, TestConverter)
project.add_model(model_name, { 'launcher': launcher_name })
project.add_source('source', { 'format': extractor_name })

@ -1,3 +1,4 @@
from functools import partial
import numpy as np
import os.path as osp
@ -69,8 +70,8 @@ class TfrecordConverterTest(TestCase):
}
with TestDir() as test_dir:
self._test_save_and_load(
TestExtractor(), TfDetectionApiConverter(save_images=True),
self._test_save_and_load(TestExtractor(),
partial(TfDetectionApiConverter.convert, save_images=True),
test_dir)
def test_can_save_masks(self):
@ -99,8 +100,8 @@ class TfrecordConverterTest(TestCase):
}
with TestDir() as test_dir:
self._test_save_and_load(
TestExtractor(), TfDetectionApiConverter(save_masks=True),
self._test_save_and_load(TestExtractor(),
partial(TfDetectionApiConverter.convert, save_masks=True),
test_dir)
def test_can_save_dataset_with_no_subsets(self):
@ -139,8 +140,8 @@ class TfrecordConverterTest(TestCase):
}
with TestDir() as test_dir:
self._test_save_and_load(
TestExtractor(), TfDetectionApiConverter(save_images=True),
self._test_save_and_load(TestExtractor(),
partial(TfDetectionApiConverter.convert, save_images=True),
test_dir)
def test_can_save_dataset_with_image_info(self):
@ -158,7 +159,7 @@ class TfrecordConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
TfDetectionApiConverter(), test_dir)
TfDetectionApiConverter.convert, test_dir)
def test_labelmap_parsing(self):
text = """

@ -1,4 +1,5 @@
from collections import OrderedDict
from functools import partial
import numpy as np
import os.path as osp
@ -158,7 +159,8 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocClassificationConverter(label_map='voc'), test_dir)
partial(VocClassificationConverter.convert, label_map='voc'),
test_dir)
def test_can_save_voc_det(self):
class TestExtractor(TestExtractorBase):
@ -213,8 +215,8 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocDetectionConverter(label_map='voc'), test_dir,
target_dataset=DstExtractor())
partial(VocDetectionConverter.convert, label_map='voc'),
test_dir, target_dataset=DstExtractor())
def test_can_save_voc_segm(self):
class TestExtractor(TestExtractorBase):
@ -247,8 +249,8 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocSegmentationConverter(label_map='voc'), test_dir,
target_dataset=DstExtractor())
partial(VocSegmentationConverter.convert, label_map='voc'),
test_dir, target_dataset=DstExtractor())
def test_can_save_voc_segm_unpainted(self):
class TestExtractor(TestExtractorBase):
@ -281,7 +283,8 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocSegmentationConverter(label_map='voc', apply_colormap=False),
partial(VocSegmentationConverter.convert,
label_map='voc', apply_colormap=False),
test_dir, target_dataset=DstExtractor())
def test_can_save_voc_segm_with_many_instances(self):
@ -316,8 +319,8 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocSegmentationConverter(label_map='voc'), test_dir,
target_dataset=DstExtractor())
partial(VocSegmentationConverter.convert, label_map='voc'),
test_dir, target_dataset=DstExtractor())
def test_can_save_voc_layout(self):
class TestExtractor(TestExtractorBase):
@ -341,7 +344,7 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocLayoutConverter(label_map='voc'), test_dir)
partial(VocLayoutConverter.convert, label_map='voc'), test_dir)
def test_can_save_voc_action(self):
class TestExtractor(TestExtractorBase):
@ -395,8 +398,9 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocActionConverter(label_map='voc', allow_attributes=False),
test_dir, target_dataset=DstExtractor())
partial(VocActionConverter.convert,
label_map='voc', allow_attributes=False), test_dir,
target_dataset=DstExtractor())
def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(TestExtractorBase):
@ -414,7 +418,7 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocConverter(label_map='voc'), test_dir)
partial(VocConverter.convert, label_map='voc'), test_dir)
def test_can_save_dataset_with_images(self):
class TestExtractor(TestExtractorBase):
@ -428,7 +432,8 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocConverter(label_map='voc', save_images=True), test_dir)
partial(VocConverter.convert, label_map='voc', save_images=True),
test_dir)
def test_dataset_with_voc_labelmap(self):
class SrcExtractor(TestExtractorBase):
@ -463,8 +468,8 @@ class VocConverterTest(TestCase):
return VOC.make_voc_categories()
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), VocConverter(label_map='voc'),
self._test_save_and_load(SrcExtractor(),
partial(VocConverter.convert, label_map='voc'),
test_dir, target_dataset=DstExtractor())
def test_dataset_with_guessed_labelmap(self):
@ -510,8 +515,8 @@ class VocConverterTest(TestCase):
return VOC.make_voc_categories(label_map)
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), VocConverter(label_map='guess'),
self._test_save_and_load(SrcExtractor(),
partial(VocConverter.convert, label_map='guess'),
test_dir, target_dataset=DstExtractor())
def test_dataset_with_source_labelmap_undefined(self):
@ -557,8 +562,8 @@ class VocConverterTest(TestCase):
return VOC.make_voc_categories(label_map)
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), VocConverter(label_map='source'),
self._test_save_and_load(SrcExtractor(),
partial(VocConverter.convert, label_map='source'),
test_dir, target_dataset=DstExtractor())
def test_dataset_with_source_labelmap_defined(self):
@ -603,8 +608,8 @@ class VocConverterTest(TestCase):
return VOC.make_voc_categories(label_map)
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), VocConverter(label_map='source'),
self._test_save_and_load(SrcExtractor(),
partial(VocConverter.convert, label_map='source'),
test_dir, target_dataset=DstExtractor())
def test_dataset_with_fixed_labelmap(self):
@ -652,8 +657,8 @@ class VocConverterTest(TestCase):
return VOC.make_voc_categories(label_map)
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), VocConverter(label_map=label_map),
self._test_save_and_load(SrcExtractor(),
partial(VocConverter.convert, label_map=label_map),
test_dir, target_dataset=DstExtractor())
def test_can_save_dataset_with_image_info(self):
@ -665,7 +670,7 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocConverter(label_map='voc'), test_dir)
partial(VocConverter.convert, label_map='voc'), test_dir)
def test_relative_paths(self):
class TestExtractor(TestExtractorBase):
@ -678,7 +683,9 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocConverter(label_map='voc', save_images=True), test_dir)
partial(VocConverter.convert,
label_map='voc', save_images=True),
test_dir)
def test_can_save_attributes(self):
class TestExtractor(TestExtractorBase):
@ -708,5 +715,5 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocDetectionConverter(label_map='voc'), test_dir,
target_dataset=DstExtractor())
partial(VocConverter.convert, label_map='voc'), test_dir,
target_dataset=DstExtractor())

@ -50,7 +50,7 @@ class YoloFormatTest(TestCase):
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter(save_images=True)(source_dataset, test_dir)
YoloConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = YoloImporter()(test_dir).make_dataset()
compare_datasets(self, source_dataset, parsed_dataset)
@ -78,7 +78,7 @@ class YoloFormatTest(TestCase):
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter()(source_dataset, test_dir)
YoloConverter.convert(source_dataset, test_dir)
save_image(osp.join(test_dir, 'obj_train_data', '1.jpg'),
np.ones((10, 15, 3))) # put the image for dataset
@ -109,7 +109,7 @@ class YoloFormatTest(TestCase):
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter()(source_dataset, test_dir)
YoloConverter.convert(source_dataset, test_dir)
parsed_dataset = YoloImporter()(test_dir,
image_info={'1': (10, 15)}).make_dataset()
@ -136,8 +136,8 @@ class YoloFormatTest(TestCase):
with TestDir() as test_dir:
source_dataset = TestExtractor()
YoloConverter(save_images=save_images)(
source_dataset, test_dir)
YoloConverter.convert(source_dataset, test_dir,
save_images=save_images)
parsed_dataset = YoloImporter()(test_dir).make_dataset()
compare_datasets(self, source_dataset, parsed_dataset)

Loading…
Cancel
Save