[Datumaro] Convert command (#1837)

* Add convert command

* fixes

* update readme

* update changelog

Co-authored-by: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com>
main
zhiltsov-max 6 years ago committed by GitHub
parent 0b1c3a31dd
commit 575c93ff2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -54,6 +54,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added new tag annotation workspace (<https://github.com/opencv/cvat/pull/1570>) - Added new tag annotation workspace (<https://github.com/opencv/cvat/pull/1570>)
- Appearance block in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>) - Appearance block in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- Keyframe navigations and some switchers in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>) - Keyframe navigations and some switchers in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- [Datumaro] Added `convert` command to convert datasets directly (<https://github.com/opencv/cvat/pull/1837>)
### Changed ### Changed
- Removed information about e-mail from the basic user information (<https://github.com/opencv/cvat/pull/1627>) - Removed information about e-mail from the basic user information (<https://github.com/opencv/cvat/pull/1627>)

@ -133,8 +133,8 @@ project = Project.load('directory')
```bash ```bash
# Download VOC dataset: # Download VOC dataset:
# http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
datum project import --format voc --input-path <path/to/voc> datum convert --input-format voc --input-path <path/to/voc> \
datum project export --format coco --filter '/item[annotation/label="cat"]' --output-format coco --filter '/item[annotation/label="cat"]'
``` ```
- Convert only non-occluded annotations from a CVAT-annotated project to TFrecord: - Convert only non-occluded annotations from a CVAT-annotated project to TFrecord:
@ -161,8 +161,8 @@ project = Project.load('directory')
- Annotate instance polygons in CVAT, export as masks in COCO: - Annotate instance polygons in CVAT, export as masks in COCO:
```bash ```bash
datum project import --format cvat --input-path <path/to/cvat.xml> datum convert --input-format cvat --input-path <path/to/cvat.xml> \
datum project export --format coco -- --segmentation-mode masks --output-format coco -- --segmentation-mode masks
``` ```
- Apply an OpenVINO detection model to some COCO-like dataset, - Apply an OpenVINO detection model to some COCO-like dataset,

@ -68,6 +68,7 @@ def make_parser():
('remove', commands.remove, "Remove source from project"), ('remove', commands.remove, "Remove source from project"),
('export', commands.export, "Export project"), ('export', commands.export, "Export project"),
('explain', commands.explain, "Run Explainable AI algorithm for model"), ('explain', commands.explain, "Run Explainable AI algorithm for model"),
('convert', commands.convert, "Convert dataset"),
] ]
# Argparse doesn't support subparser groups: # Argparse doesn't support subparser groups:

@ -3,4 +3,4 @@
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
from . import add, create, explain, export, remove from . import add, create, explain, export, remove, convert

@ -0,0 +1,137 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import os
import os.path as osp
from datumaro.components.project import Environment
from ..contexts.project import FilterModes
from ..util import CliException, MultilineFormatter, make_file_name
from ..util.project import generate_next_dir_name
def build_parser(parser_ctor=argparse.ArgumentParser):
builtin_importers = sorted(Environment().importers.items)
builtin_converters = sorted(Environment().converters.items)
parser = parser_ctor(help="Convert an existing dataset to another format",
description="""
Converts a dataset from one format to another.
You can add your own formats using a project.|n
|n
Supported input formats: %s|n
|n
Supported output formats: %s|n
|n
Examples:|n
- Export a dataset as a PASCAL VOC dataset, include images:|n
|s|sconvert -i src/path -f voc -- --save-images|n
|n
- Export a dataset as a COCO dataset to a specific directory:|n
|s|sconvert -i src/path -f coco -o path/I/like/
""" % (', '.join(builtin_importers), ', '.join(builtin_converters)),
formatter_class=MultilineFormatter)
parser.add_argument('-i', '--input-path', default='.', dest='source',
help="Path to look for a dataset")
parser.add_argument('-if', '--input-format',
help="Input dataset format. Will try to detect, if not specified.")
parser.add_argument('-f', '--output-format', required=True,
help="Output format")
parser.add_argument('-o', '--output-dir', dest='dst_dir',
help="Directory to save output (default: a subdir in the current one)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-e', '--filter',
help="Filter expression for dataset items")
parser.add_argument('--filter-mode', default=FilterModes.i.name,
type=FilterModes.parse,
help="Filter mode (options: %s; default: %s)" % \
(', '.join(FilterModes.list_options()) , '%(default)s'))
parser.add_argument('extra_args', nargs=argparse.REMAINDER,
help="Additional arguments for output format (pass '-- -h' for help)")
parser.set_defaults(command=convert_command)
return parser
def convert_command(args):
env = Environment()
try:
converter = env.converters.get(args.output_format)
except KeyError:
raise CliException("Converter for format '%s' is not found" % \
args.output_format)
if hasattr(converter, 'from_cmdline'):
extra_args = converter.from_cmdline(args.extra_args)
converter = converter(**extra_args)
filter_args = FilterModes.make_filter_args(args.filter_mode)
if not args.input_format:
matches = []
for format_name in env.importers.items:
log.debug("Checking '%s' format...", format_name)
importer = env.make_importer(format_name)
try:
match = importer.detect(args.source)
if match:
log.debug("format matched")
matches.append((format_name, importer))
except NotImplementedError:
log.debug("Format '%s' does not support auto detection.",
format_name)
if len(matches) == 0:
log.error("Failed to detect dataset format. "
"Try to specify format with '-if/--input-format' parameter.")
return 1
elif len(matches) != 1:
log.error("Multiple formats match the dataset: %s. "
"Try to specify format with '-if/--input-format' parameter.",
', '.join(m[0] for m in matches))
return 2
format_name, importer = matches[0]
args.input_format = format_name
log.info("Source dataset format detected as '%s'", args.input_format)
else:
try:
importer = env.make_importer(args.input_format)
if hasattr(importer, 'from_cmdline'):
extra_args = importer.from_cmdline()
except KeyError:
raise CliException("Importer for format '%s' is not found" % \
args.input_format)
source = osp.abspath(args.source)
dst_dir = args.dst_dir
if dst_dir:
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
raise CliException("Directory '%s' already exists "
"(pass --overwrite to overwrite)" % dst_dir)
else:
dst_dir = generate_next_dir_name('%s-%s' % \
(osp.basename(source), make_file_name(args.output_format)))
dst_dir = osp.abspath(dst_dir)
project = importer(source)
dataset = project.make_dataset()
log.info("Exporting the dataset")
dataset.export_project(
save_dir=dst_dir,
converter=converter,
filter_expr=args.filter,
**filter_args)
log.info("Dataset exported to '%s' as '%s'" % \
(dst_dir, args.output_format))
return 0
Loading…
Cancel
Save