[Datumaro] Convert command (#1837)

* Add convert command

* fixes

* update readme

* update changelog

Co-authored-by: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com>
main
zhiltsov-max 6 years ago committed by GitHub
parent 0b1c3a31dd
commit 575c93ff2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -54,6 +54,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added new tag annotation workspace (<https://github.com/opencv/cvat/pull/1570>)
- Appearance block in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- Keyframe navigations and some switchers in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- [Datumaro] Added `convert` command to convert datasets directly (<https://github.com/opencv/cvat/pull/1837>)
### Changed
- Removed information about e-mail from the basic user information (<https://github.com/opencv/cvat/pull/1627>)

@ -133,8 +133,8 @@ project = Project.load('directory')
```bash
# Download VOC dataset:
# http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
datum project import --format voc --input-path <path/to/voc>
datum project export --format coco --filter '/item[annotation/label="cat"]'
datum convert --input-format voc --input-path <path/to/voc> \
--output-format coco --filter '/item[annotation/label="cat"]'
```
- Convert only non-occluded annotations from a CVAT-annotated project to TFrecord:
@ -161,8 +161,8 @@ project = Project.load('directory')
- Annotate instance polygons in CVAT, export as masks in COCO:
```bash
datum project import --format cvat --input-path <path/to/cvat.xml>
datum project export --format coco -- --segmentation-mode masks
datum convert --input-format cvat --input-path <path/to/cvat.xml> \
--output-format coco -- --segmentation-mode masks
```
- Apply an OpenVINO detection model to some COCO-like dataset,

@ -68,6 +68,7 @@ def make_parser():
('remove', commands.remove, "Remove source from project"),
('export', commands.export, "Export project"),
('explain', commands.explain, "Run Explainable AI algorithm for model"),
('convert', commands.convert, "Convert dataset"),
]
# Argparse doesn't support subparser groups:

@ -3,4 +3,4 @@
#
# SPDX-License-Identifier: MIT
from . import add, create, explain, export, remove
from . import add, create, explain, export, remove, convert

@ -0,0 +1,137 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import os
import os.path as osp
from datumaro.components.project import Environment
from ..contexts.project import FilterModes
from ..util import CliException, MultilineFormatter, make_file_name
from ..util.project import generate_next_dir_name
def build_parser(parser_ctor=argparse.ArgumentParser):
builtin_importers = sorted(Environment().importers.items)
builtin_converters = sorted(Environment().converters.items)
parser = parser_ctor(help="Convert an existing dataset to another format",
description="""
Converts a dataset from one format to another.
You can add your own formats using a project.|n
|n
Supported input formats: %s|n
|n
Supported output formats: %s|n
|n
Examples:|n
- Export a dataset as a PASCAL VOC dataset, include images:|n
|s|sconvert -i src/path -f voc -- --save-images|n
|n
- Export a dataset as a COCO dataset to a specific directory:|n
|s|sconvert -i src/path -f coco -o path/I/like/
""" % (', '.join(builtin_importers), ', '.join(builtin_converters)),
formatter_class=MultilineFormatter)
parser.add_argument('-i', '--input-path', default='.', dest='source',
help="Path to look for a dataset")
parser.add_argument('-if', '--input-format',
help="Input dataset format. Will try to detect, if not specified.")
parser.add_argument('-f', '--output-format', required=True,
help="Output format")
parser.add_argument('-o', '--output-dir', dest='dst_dir',
help="Directory to save output (default: a subdir in the current one)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-e', '--filter',
help="Filter expression for dataset items")
parser.add_argument('--filter-mode', default=FilterModes.i.name,
type=FilterModes.parse,
help="Filter mode (options: %s; default: %s)" % \
(', '.join(FilterModes.list_options()) , '%(default)s'))
parser.add_argument('extra_args', nargs=argparse.REMAINDER,
help="Additional arguments for output format (pass '-- -h' for help)")
parser.set_defaults(command=convert_command)
return parser
def convert_command(args):
env = Environment()
try:
converter = env.converters.get(args.output_format)
except KeyError:
raise CliException("Converter for format '%s' is not found" % \
args.output_format)
if hasattr(converter, 'from_cmdline'):
extra_args = converter.from_cmdline(args.extra_args)
converter = converter(**extra_args)
filter_args = FilterModes.make_filter_args(args.filter_mode)
if not args.input_format:
matches = []
for format_name in env.importers.items:
log.debug("Checking '%s' format...", format_name)
importer = env.make_importer(format_name)
try:
match = importer.detect(args.source)
if match:
log.debug("format matched")
matches.append((format_name, importer))
except NotImplementedError:
log.debug("Format '%s' does not support auto detection.",
format_name)
if len(matches) == 0:
log.error("Failed to detect dataset format. "
"Try to specify format with '-if/--input-format' parameter.")
return 1
elif len(matches) != 1:
log.error("Multiple formats match the dataset: %s. "
"Try to specify format with '-if/--input-format' parameter.",
', '.join(m[0] for m in matches))
return 2
format_name, importer = matches[0]
args.input_format = format_name
log.info("Source dataset format detected as '%s'", args.input_format)
else:
try:
importer = env.make_importer(args.input_format)
if hasattr(importer, 'from_cmdline'):
extra_args = importer.from_cmdline()
except KeyError:
raise CliException("Importer for format '%s' is not found" % \
args.input_format)
source = osp.abspath(args.source)
dst_dir = args.dst_dir
if dst_dir:
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
raise CliException("Directory '%s' already exists "
"(pass --overwrite to overwrite)" % dst_dir)
else:
dst_dir = generate_next_dir_name('%s-%s' % \
(osp.basename(source), make_file_name(args.output_format)))
dst_dir = osp.abspath(dst_dir)
project = importer(source)
dataset = project.make_dataset()
log.info("Exporting the dataset")
dataset.export_project(
save_dir=dst_dir,
converter=converter,
filter_expr=args.filter,
**filter_args)
log.info("Dataset exported to '%s' as '%s'" % \
(dst_dir, args.output_format))
return 0
Loading…
Cancel
Save