diff --git a/.vscode/settings.json b/.vscode/settings.json index d9724300..d681cd03 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -25,5 +25,11 @@ } ], "python.linting.pylintEnabled": true, - "python.envFile": "${workspaceFolder}/.vscode/python.env" + "python.envFile": "${workspaceFolder}/.vscode/python.env", + "python.testing.unittestEnabled": true, + "python.testing.unittestArgs": [ + "-v", + "-s", + "./datumaro", + ], } diff --git a/README.md b/README.md index 5dcdf86a..27567e3b 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ CVAT is free, online, interactive video and image annotation tool for computer v - [Installation guide](cvat/apps/documentation/installation.md) - [User's guide](cvat/apps/documentation/user_guide.md) - [Django REST API documentation](#rest-api) +- [Datumaro dataset framework](datumaro/README.md) - [Command line interface](utils/cli/) - [XML annotation format](cvat/apps/documentation/xml_format.md) - [AWS Deployment Guide](cvat/apps/documentation/AWS-Deployment-Guide.md) @@ -34,6 +35,8 @@ CVAT is free, online, interactive video and image annotation tool for computer v ## Supported annotation formats Format selection is possible after clicking on the Upload annotation / Dump annotation button. +[Datumaro](datumaro/README.md) dataset framework allows additional dataset transformations +via its command line tool. | Annotation format | Dumper | Loader | | ---------------------------------------------------------------------------------- | ------ | ------ | diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index cc758fd0..2923f7dc 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -1,3 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + from collections import OrderedDict import os import os.path as osp @@ -6,7 +11,7 @@ from django.db import transaction from cvat.apps.annotation.annotation import Annotation from cvat.apps.engine.annotation import TaskAnnotation -from cvat.apps.engine.models import Task, ShapeType +from cvat.apps.engine.models import Task, ShapeType, AttributeType import datumaro.components.extractor as datumaro from datumaro.util.image import lazy_image @@ -128,18 +133,33 @@ class CvatTaskExtractor(datumaro.Extractor): attrs = {} db_attributes = db_label.attributespec_set.all() for db_attr in db_attributes: - attrs[db_attr.name] = db_attr.default_value + attrs[db_attr.name] = db_attr label_attrs[db_label.name] = attrs map_label = lambda label_db_name: label_map[label_db_name] + def convert_attrs(label, cvat_attrs): + cvat_attrs = {a.name: a.value for a in cvat_attrs} + dm_attr = dict() + for attr_name, attr_spec in label_attrs[label].items(): + attr_value = cvat_attrs.get(attr_name, attr_spec.default_value) + try: + if attr_spec.input_type == AttributeType.NUMBER: + attr_value = float(attr_value) + elif attr_spec.input_type == AttributeType.CHECKBOX: + attr_value = attr_value.lower() == 'true' + dm_attr[attr_name] = attr_value + except Exception as e: + slogger.task[self._db_task.id].error( + "Failed to convert attribute '%s'='%s': %s" % \ + (attr_name, attr_value, e)) + return dm_attr + for tag_obj in cvat_anno.tags: anno_group = tag_obj.group if isinstance(anno_group, int): anno_group = anno_group anno_label = map_label(tag_obj.label) - anno_attr = dict(label_attrs[tag_obj.label]) - for attr in tag_obj.attributes: - anno_attr[attr.name] = attr.value + anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes) anno = datumaro.LabelObject(label=anno_label, attributes=anno_attr, group=anno_group) @@ -150,9 +170,7 @@ class CvatTaskExtractor(datumaro.Extractor): if isinstance(anno_group, int): anno_group = anno_group anno_label = map_label(shape_obj.label) - anno_attr = dict(label_attrs[shape_obj.label]) - for attr in shape_obj.attributes: - anno_attr[attr.name] = attr.value + anno_attr = convert_attrs(shape_obj.label, shape_obj.attributes) anno_points = shape_obj.points if shape_obj.type == ShapeType.POINTS: diff --git a/cvat/apps/dataset_manager/export_templates/README.md b/cvat/apps/dataset_manager/export_templates/README.md index 82067fa2..a375bbdc 100644 --- a/cvat/apps/dataset_manager/export_templates/README.md +++ b/cvat/apps/dataset_manager/export_templates/README.md @@ -6,17 +6,15 @@ python -m virtualenv .venv . .venv/bin/activate # install dependencies -sed -r "s/^(.*)#.*$/\1/g" datumaro/requirements.txt | xargs -n 1 -L 1 pip install +pip install -e datumaro/ pip install -r cvat/utils/cli/requirements.txt # set up environment PYTHONPATH=':' export PYTHONPATH -ln -s $PWD/datumaro/datum.py ./datum -chmod a+x datum # use Datumaro -./datum --help +datum --help ``` -Check Datumaro [QUICKSTART.md](datumaro/docs/quickstart.md) for further info. +Check Datumaro [docs](datumaro/README.md) for more info. diff --git a/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py b/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py index 28baafad..f6d5da6b 100644 --- a/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py +++ b/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py @@ -1,3 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + from collections import OrderedDict import getpass import json @@ -27,7 +32,7 @@ class cvat_rest_api_task_images(datumaro.Extractor): def _image_local_path(self, item_id): task_id = self._config.task_id return osp.join(self._cache_dir, - 'task_{}_frame_{:06d}.jpg'.format(task_id, item_id)) + 'task_{}_frame_{:06d}.jpg'.format(task_id, int(item_id))) def _make_image_loader(self, item_id): return lazy_image(item_id, diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 5f0b422d..7c361a61 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -1,3 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + from datetime import timedelta import json import os @@ -217,8 +222,9 @@ class TaskProject: if dst_format == EXPORT_FORMAT_DATUMARO_PROJECT: self._remote_export(save_dir=save_dir, server_url=server_url) else: - self._dataset.export_project(output_format=dst_format, - save_dir=save_dir, save_images=save_images) + converter = self._dataset.env.make_converter(dst_format, + save_images=save_images) + self._dataset.export_project(converter=converter, save_dir=save_dir) def _remote_image_converter(self, save_dir, server_url=None): os.makedirs(save_dir, exist_ok=True) @@ -246,7 +252,7 @@ class TaskProject: if db_video is not None: for i in range(self._db_task.size): frame_info = { - 'id': str(i), + 'id': i, 'width': db_video.width, 'height': db_video.height, } diff --git a/cvat/apps/dataset_manager/util.py b/cvat/apps/dataset_manager/util.py index 8ad9aabc..c18db840 100644 --- a/cvat/apps/dataset_manager/util.py +++ b/cvat/apps/dataset_manager/util.py @@ -1,3 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + import inspect import os, os.path as osp import zipfile diff --git a/datumaro/CONTRIBUTING.md b/datumaro/CONTRIBUTING.md new file mode 100644 index 00000000..07b93548 --- /dev/null +++ b/datumaro/CONTRIBUTING.md @@ -0,0 +1,119 @@ +## Table of Contents + +- [Installation](#installation) +- [Usage](#usage) +- [Testing](#testing) +- [Design](#design-and-code-structure) + +## Installation + +### Prerequisites + +- Python (3.5+) +- OpenVINO (optional) + +``` bash +git clone https://github.com/opencv/cvat +``` + +Optionally, install a virtual environment: + +``` bash +python -m pip install virtualenv +python -m virtualenv venv +. venv/bin/activate +``` + +Then install all dependencies: + +``` bash +while read -r p; do pip install $p; done < requirements.txt +``` + +If you're working inside CVAT environment: +``` bash +. .env/bin/activate +while read -r p; do pip install $p; done < datumaro/requirements.txt +``` + +## Usage + +> The directory containing Datumaro should be in the `PYTHONPATH` +> environment variable or `cvat/datumaro/` should be the current directory. + +``` bash +datum --help +python -m datumaro --help +python datumaro/ --help +python datum.py --help +``` + +``` python +import datumaro +``` + +## Testing + +It is expected that all Datumaro functionality is covered and checked by +unit tests. Tests are placed in `tests/` directory. + +To run tests use: + +``` bash +python -m unittest discover -s tests +``` + +If you're working inside CVAT environment, you can also use: + +``` bash +python manage.py test datumaro/ +``` + +## Design and code structure + +- [Design document](docs/design.md) + +### Command-line + +Use [Docker](https://www.docker.com/) as an example. Basically, +the interface is divided on contexts and single commands. +Contexts are semantically grouped commands, +related to a single topic or target. Single commands are handy shorter +alternatives for the most used commands and also special commands, +which are hard to be put into any specific context. + +![cli-design-image](docs/images/cli_design.png) + +- The diagram above was created with [FreeMind](http://freemind.sourceforge.net/wiki/index.php/Main_Page) + +Model-View-ViewModel (MVVM) UI pattern is used. + +![mvvm-image](docs/images/mvvm.png) + +### Datumaro project and environment structure + + +``` +├── [datumaro module] +└── [project folder] + ├── .datumaro/ + | ├── config.yml + │   ├── .git/ + │   ├── importers/ + │   │   ├── custom_format_importer1.py + │   │   └── ... + │   ├── statistics/ + │   │   ├── custom_statistic1.py + │   │   └── ... + │   ├── visualizers/ + │   │ ├── custom_visualizer1.py + │   │ └── ... + │   └── extractors/ + │   ├── custom_extractor1.py + │   └── ... + ├── dataset/ + └── sources/ + ├── source1 + └── ... +``` + diff --git a/datumaro/README.md b/datumaro/README.md index e7b58f54..f7f1c46e 100644 --- a/datumaro/README.md +++ b/datumaro/README.md @@ -1,36 +1,176 @@ -# Dataset framework +# Dataset Framework (Datumaro) -A framework to prepare, manage, build, analyze datasets +A framework to build, transform, and analyze datasets. + + +``` +CVAT annotations -- ---> Annotation tool +... \ / +COCO-like dataset -----> Datumaro ---> dataset ------> Model training +... / \ +VOC-like dataset -- ---> Publication etc. +``` + + +## Contents + +- [Documentation](#documentation) +- [Features](#features) +- [Installation](#installation) +- [Usage](#usage) +- [Examples](#examples) +- [Contributing](#contributing) ## Documentation --[Quick start guide](docs/quickstart.md) +- [User manual](docs/user_manual.md) +- [Design document](docs/design.md) +- [Contributing](CONTRIBUTING.md) -## Installation +## Features -Python3.5+ is required. +- Dataset format conversions: + - COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*) + - [Format specification](http://cocodataset.org/#format-data) + - `labels` are our extension - like `instances` with only `category_id` + - PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`) + - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) + - YOLO (`bboxes`) + - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) + - TF Detection API (`bboxes`, `masks`) + - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) + - CVAT + - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) +- Dataset building operations: + - Merging multiple datasets into one + - Dataset filtering with custom conditions, for instance: + - remove all annotations except polygons of a certain class + - remove images without a specific class + - remove occluded annotations from images + - keep only vertically-oriented images + - remove small area bounding boxes from annotations + - Annotation conversions, for instance + - polygons to instance masks and vise-versa + - apply a custom colormap for mask annotations + - remap dataset labels +- Dataset comparison +- Model integration: + - Inference (OpenVINO and custom models) + - Explainable AI ([RISE algorithm](https://arxiv.org/abs/1806.07421)) -To install into a virtual environment do: +> Check the [design document](docs/design.md) for a full list of features + +## Installation + +Optionally, create a virtual environment: ``` bash python -m pip install virtualenv python -m virtualenv venv . venv/bin/activate -pip install -r requirements.txt ``` -## Execution - -The tool can be executed both as a script and as a module. +Install Datumaro package: ``` bash -PYTHONPATH="..." -python -m datumaro -python path/to/datum.py +pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro' ``` -## Testing +## Usage + +There are several options available: +- [A standalone command-line tool](#standalone-tool) +- [A python module](#python-module) + +### Standalone tool + + +``` + User + | + v ++------------------+ +| CVAT | ++--------v---------+ +------------------+ +--------------+ +| Datumaro module | ----> | Datumaro project | <---> | Datumaro CLI | <--- User ++------------------+ +------------------+ +--------------+ +``` + ``` bash -python -m unittest discover -s tests +datum --help +python -m datumaro --help ``` + +### Python module + +Datumaro can be used in custom scripts as a library in the following way: + +``` python +from datumaro.components.project import Project # project-related things +import datumaro.components.extractor # annotations and high-level interfaces +# etc. +project = Project.load('directory') +``` + +## Examples + + + + +- Convert [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#data) to COCO, keep only images with `cat` class presented: + ```bash + # Download VOC dataset: + # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar + datum project import --format voc --input-path + datum project export --format coco --filter '/item[annotation/label="cat"]' + ``` + +- Convert only non-occluded annotations from a CVAT-annotated project to TFrecord: + ```bash + # export Datumaro dataset in CVAT UI, extract somewhere, go to the project dir + datum project extract --filter '/item/annotation[occluded="False"]' \ + --mode items+anno --output-dir not_occluded + datum project export --project not_occluded \ + --format tf_detection_api -- --save-images + ``` + +- Annotate COCO, extract image subset, re-annotate it in CVAT, update old dataset: + ```bash + # Download COCO dataset http://cocodataset.org/#download + # Put images to coco/images/ and annotations to coco/annotations/ + datum project import --format coco --input-path + datum project export --filter '/image[images_I_dont_like]' --format cvat \ + --output-dir reannotation + # import dataset and images to CVAT, re-annotate + # export Datumaro project, extract to 'reannotation-upd' + datum project project merge reannotation-upd + datum project export --format coco + ``` + +- Annotate instance polygons in CVAT, export as masks in COCO: + ```bash + datum project import --format cvat --input-path + datum project export --format coco -- --segmentation-mode masks + ``` + +- Apply an OpenVINO detection model to some COCO-like dataset, + then compare annotations with ground truth and visualize in TensorBoard: + ```bash + datum project import --format coco --input-path + # create model results interpretation script + datum model add mymodel openvino \ + --weights model.bin --description model.xml \ + --interpretation-script parse_results.py + datum model run --model mymodel --output-dir mymodel_inference/ + datum project diff mymodel_inference/ --format tensorboard --output-dir diff + ``` + + + + +## Contributing + +Feel free to [open an Issue](https://github.com/opencv/cvat/issues/new) if you +think something needs to be changed. You are welcome to participate in development, +development instructions are available in our [developer manual](CONTRIBUTING.md). diff --git a/datumaro/datum.py b/datumaro/datum.py index d6ae4d2c..12c150bd 100755 --- a/datumaro/datum.py +++ b/datumaro/datum.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import sys -from datumaro import main +from datumaro.cli.__main__ import main if __name__ == '__main__': diff --git a/datumaro/datumaro/__init__.py b/datumaro/datumaro/__init__.py index ea5ad68e..cd825f56 100644 --- a/datumaro/datumaro/__init__.py +++ b/datumaro/datumaro/__init__.py @@ -2,92 +2,3 @@ # Copyright (C) 2019 Intel Corporation # # SPDX-License-Identifier: MIT - -import argparse -import logging as log -import sys - -from .cli import ( - project as project_module, - source as source_module, - item as item_module, - model as model_module, - # inference as inference_module, - - create_command as create_command_module, - add_command as add_command_module, - remove_command as remove_command_module, - export_command as export_command_module, - # diff_command as diff_command_module, - # build_command as build_command_module, - stats_command as stats_command_module, - explain_command as explain_command_module, -) -from .version import VERSION - - -KNOWN_COMMANDS = { - # contexts - 'project': project_module.main, - 'source': source_module.main, - 'item': item_module.main, - 'model': model_module.main, - # 'inference': inference_module.main, - - # shortcuts - 'create': create_command_module.main, - 'add': add_command_module.main, - 'remove': remove_command_module.main, - 'export': export_command_module.main, - # 'diff': diff_command_module.main, - # 'build': build_command_module.main, - 'stats': stats_command_module.main, - 'explain': explain_command_module.main, -} - -def get_command(name, args=None): - return KNOWN_COMMANDS[name] - -def loglevel(name): - numeric = getattr(log, name.upper(), None) - if not isinstance(numeric, int): - raise ValueError('Invalid log level: %s' % name) - return numeric - -def parse_command(input_args): - parser = argparse.ArgumentParser() - parser.add_argument('command', choices=KNOWN_COMMANDS.keys(), - help='A command to execute') - parser.add_argument('args', nargs=argparse.REMAINDER) - parser.add_argument('--version', action='version', version=VERSION) - parser.add_argument('--loglevel', type=loglevel, default='info', - help="Logging level (default: %(default)s)") - - general_args = parser.parse_args(input_args) - command_name = general_args.command - command_args = general_args.args - return general_args, command_name, command_args - -def set_up_logger(general_args): - loglevel = general_args.loglevel - log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', - level=loglevel) - -def main(args=None): - if args is None: - args = sys.argv[1:] - - general_args, command_name, command_args = parse_command(args) - - set_up_logger(general_args) - - command = get_command(command_name, general_args) - try: - return command(command_args) - except Exception as e: - log.error(e) - raise - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/datumaro/datumaro/__main__.py b/datumaro/datumaro/__main__.py index 9a055fae..27148356 100644 --- a/datumaro/datumaro/__main__.py +++ b/datumaro/datumaro/__main__.py @@ -4,9 +4,9 @@ # SPDX-License-Identifier: MIT import sys -from . import main + +from datumaro.cli.__main__ import main if __name__ == '__main__': sys.exit(main()) - diff --git a/datumaro/datumaro/cli/__init__.py b/datumaro/datumaro/cli/__init__.py index a9773073..cd825f56 100644 --- a/datumaro/datumaro/cli/__init__.py +++ b/datumaro/datumaro/cli/__init__.py @@ -2,4 +2,3 @@ # Copyright (C) 2019 Intel Corporation # # SPDX-License-Identifier: MIT - diff --git a/datumaro/datumaro/cli/__main__.py b/datumaro/datumaro/cli/__main__.py new file mode 100644 index 00000000..0ed611d0 --- /dev/null +++ b/datumaro/datumaro/cli/__main__.py @@ -0,0 +1,109 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import logging as log +import sys + +from . import contexts, commands +from .util import CliException, add_subparser +from ..version import VERSION + + +_log_levels = { + 'debug': log.DEBUG, + 'info': log.INFO, + 'warning': log.WARNING, + 'error': log.ERROR, + 'critical': log.CRITICAL +} + +def loglevel(name): + return _log_levels[name] + +def _make_subcommands_help(commands, help_line_start=0): + desc = "" + for command_name, _, command_help in commands: + desc += (" %-" + str(max(0, help_line_start - 2 - 1)) + "s%s\n") % \ + (command_name, command_help) + return desc + +def make_parser(): + parser = argparse.ArgumentParser(prog="datumaro", + description="Dataset Framework", + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument('--version', action='version', version=VERSION) + parser.add_argument('--loglevel', type=loglevel, default='info', + help="Logging level (options: %s; default: %s)" % \ + (', '.join(_log_levels.keys()), "%(default)s")) + + known_contexts = [ + ('project', contexts.project, "Actions on projects (datasets)"), + ('source', contexts.source, "Actions on data sources"), + ('model', contexts.model, "Actions on models"), + ] + known_commands = [ + ('create', commands.create, "Create project"), + ('add', commands.add, "Add source to project"), + ('remove', commands.remove, "Remove source from project"), + ('export', commands.export, "Export project"), + ('explain', commands.explain, "Run Explainable AI algorithm for model"), + ] + + # Argparse doesn't support subparser groups: + # https://stackoverflow.com/questions/32017020/grouping-argparse-subparser-arguments + help_line_start = max((len(e[0]) for e in known_contexts + known_commands), + default=0) + help_line_start = max((2 + help_line_start) // 4 + 1, 6) * 4 # align to tabs + subcommands_desc = "" + if known_contexts: + subcommands_desc += "Contexts:\n" + subcommands_desc += _make_subcommands_help(known_contexts, + help_line_start) + if known_commands: + if subcommands_desc: + subcommands_desc += "\n" + subcommands_desc += "Commands:\n" + subcommands_desc += _make_subcommands_help(known_commands, + help_line_start) + if subcommands_desc: + subcommands_desc += \ + "\nRun '%s COMMAND --help' for more information on a command." % \ + parser.prog + + subcommands = parser.add_subparsers(title=subcommands_desc, + description="", help=argparse.SUPPRESS) + for command_name, command, _ in known_contexts + known_commands: + add_subparser(subcommands, command_name, command.build_parser) + + return parser + +def set_up_logger(args): + log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', + level=args.loglevel) + +def main(args=None): + parser = make_parser() + args = parser.parse_args(args) + + set_up_logger(args) + + if 'command' not in args: + parser.print_help() + return 1 + + try: + return args.command(args) + except CliException as e: + log.error(e) + return 1 + except Exception as e: + log.error(e) + raise + + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/datumaro/datumaro/cli/add_command.py b/datumaro/datumaro/cli/add_command.py deleted file mode 100644 index 49113084..00000000 --- a/datumaro/datumaro/cli/add_command.py +++ /dev/null @@ -1,21 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse - -from . import source as source_module - - -def build_parser(parser=argparse.ArgumentParser()): - source_module.build_add_parser(parser). \ - set_defaults(command=source_module.add_command) - - return parser - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - - return args.command(args) diff --git a/datumaro/datumaro/cli/commands/__init__.py b/datumaro/datumaro/cli/commands/__init__.py new file mode 100644 index 00000000..7656b7ef --- /dev/null +++ b/datumaro/datumaro/cli/commands/__init__.py @@ -0,0 +1,6 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from . import add, create, explain, export, remove \ No newline at end of file diff --git a/datumaro/datumaro/cli/commands/add.py b/datumaro/datumaro/cli/commands/add.py new file mode 100644 index 00000000..b2864039 --- /dev/null +++ b/datumaro/datumaro/cli/commands/add.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.source import build_add_parser as build_parser diff --git a/datumaro/datumaro/cli/commands/create.py b/datumaro/datumaro/cli/commands/create.py new file mode 100644 index 00000000..16f6737c --- /dev/null +++ b/datumaro/datumaro/cli/commands/create.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.project import build_create_parser as build_parser \ No newline at end of file diff --git a/datumaro/datumaro/cli/explain_command.py b/datumaro/datumaro/cli/commands/explain.py similarity index 85% rename from datumaro/datumaro/cli/explain_command.py rename to datumaro/datumaro/cli/commands/explain.py index 8a83f7da..9b5a6432 100644 --- a/datumaro/datumaro/cli/explain_command.py +++ b/datumaro/datumaro/cli/commands/explain.py @@ -9,25 +9,35 @@ import os import os.path as osp from datumaro.components.project import Project -from datumaro.components.algorithms.rise import RISE from datumaro.util.command_targets import (TargetKinds, target_selector, ProjectTarget, SourceTarget, ImageTarget, is_project_path) from datumaro.util.image import load_image, save_image -from .util.project import load_project +from ..util import MultilineFormatter +from ..util.project import load_project -def build_parser(parser=argparse.ArgumentParser()): +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Run Explainable AI algorithm", + description="Runs an explainable AI algorithm for a model.") + parser.add_argument('-m', '--model', required=True, help="Model to use for inference") parser.add_argument('-t', '--target', default=None, help="Inference target - image, source, project " "(default: current dir)") - parser.add_argument('-d', '--save-dir', default=None, + parser.add_argument('-o', '--output-dir', dest='save_dir', default=None, help="Directory to save output (default: display only)") method_sp = parser.add_subparsers(dest='algorithm') - rise_parser = method_sp.add_parser('rise') + rise_parser = method_sp.add_parser('rise', + description=""" + RISE: Randomized Input Sampling for + Explanation of Black-box Models algorithm|n + |n + See explanations at: https://arxiv.org/pdf/1806.07421.pdf + """, + formatter_class=MultilineFormatter) rise_parser.add_argument('-s', '--max-samples', default=None, type=int, help="Number of algorithm iterations (default: mask size ^ 2)") rise_parser.add_argument('--mw', '--mask-width', @@ -46,7 +56,7 @@ def build_parser(parser=argparse.ArgumentParser()): help="IoU match threshold in Non-maxima suppression (default: no NMS)") rise_parser.add_argument('--conf', '--det-conf-thresh', dest='det_conf_thresh', default=0.0, type=float, - help="Confidence threshold for detections (default: do not filter)") + help="Confidence threshold for detections (default: include all)") rise_parser.add_argument('-b', '--batch-size', default=1, type=int, help="Inference batch size (default: %(default)s)") rise_parser.add_argument('--progressive', action='store_true', @@ -59,6 +69,21 @@ def build_parser(parser=argparse.ArgumentParser()): return parser def explain_command(args): + project_path = args.project_dir + if is_project_path(project_path): + project = Project.load(project_path) + else: + project = None + args.target = target_selector( + ProjectTarget(is_default=True, project=project), + SourceTarget(project=project), + ImageTarget() + )(args.target) + if args.target[0] == TargetKinds.project: + if is_project_path(args.target[1]): + args.project_dir = osp.dirname(osp.abspath(args.target[1])) + + import cv2 from matplotlib import cm @@ -69,6 +94,7 @@ def explain_command(args): if str(args.algorithm).lower() != 'rise': raise NotImplementedError() + from datumaro.components.algorithms.rise import RISE rise = RISE(model, max_samples=args.max_samples, mask_width=args.mask_width, @@ -162,31 +188,3 @@ def explain_command(args): raise NotImplementedError() return 0 - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - if 'command' not in args: - parser.print_help() - return 1 - - project_path = args.project_dir - if is_project_path(project_path): - project = Project.load(project_path) - else: - project = None - try: - args.target = target_selector( - ProjectTarget(is_default=True, project=project), - SourceTarget(project=project), - ImageTarget() - )(args.target) - if args.target[0] == TargetKinds.project: - if is_project_path(args.target[1]): - args.project_dir = osp.dirname(osp.abspath(args.target[1])) - except argparse.ArgumentTypeError as e: - print(e) - parser.print_help() - return 1 - - return args.command(args) diff --git a/datumaro/datumaro/cli/commands/export.py b/datumaro/datumaro/cli/commands/export.py new file mode 100644 index 00000000..afeb73cd --- /dev/null +++ b/datumaro/datumaro/cli/commands/export.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.project import build_export_parser as build_parser \ No newline at end of file diff --git a/datumaro/datumaro/cli/commands/remove.py b/datumaro/datumaro/cli/commands/remove.py new file mode 100644 index 00000000..0e0d076f --- /dev/null +++ b/datumaro/datumaro/cli/commands/remove.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.source import build_remove_parser as build_parser \ No newline at end of file diff --git a/datumaro/datumaro/cli/contexts/__init__.py b/datumaro/datumaro/cli/contexts/__init__.py new file mode 100644 index 00000000..95019b7b --- /dev/null +++ b/datumaro/datumaro/cli/contexts/__init__.py @@ -0,0 +1,6 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from . import project, source, model, item \ No newline at end of file diff --git a/datumaro/datumaro/cli/contexts/item/__init__.py b/datumaro/datumaro/cli/contexts/item/__init__.py new file mode 100644 index 00000000..1df66809 --- /dev/null +++ b/datumaro/datumaro/cli/contexts/item/__init__.py @@ -0,0 +1,36 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse + +from ...util import add_subparser + + +def build_export_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_stats_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_diff_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_edit_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'export', build_export_parser) + add_subparser(subparsers, 'stats', build_stats_parser) + add_subparser(subparsers, 'diff', build_diff_parser) + add_subparser(subparsers, 'edit', build_edit_parser) + + return parser diff --git a/datumaro/datumaro/cli/model/__init__.py b/datumaro/datumaro/cli/contexts/model/__init__.py similarity index 66% rename from datumaro/datumaro/cli/model/__init__.py rename to datumaro/datumaro/cli/contexts/model/__init__.py index b168248f..5f40bd38 100644 --- a/datumaro/datumaro/cli/model/__init__.py +++ b/datumaro/datumaro/cli/contexts/model/__init__.py @@ -9,9 +9,49 @@ import os import os.path as osp import shutil -from ..util.project import load_project +from datumaro.components.config import DEFAULT_FORMAT +from ...util import add_subparser +from ...util.project import load_project +def build_openvino_add_parser(parser=argparse.ArgumentParser()): + parser.add_argument('-d', '--description', required=True, + help="Path to the model description file (.xml)") + parser.add_argument('-w', '--weights', required=True, + help="Path to the model weights file (.bin)") + parser.add_argument('-i', '--interpretation-script', required=True, + help="Path to the network output interpretation script (.py)") + parser.add_argument('--plugins-path', default=None, + help="Path to the custom Inference Engine plugins directory") + parser.add_argument('--copy', action='store_true', + help="Copy the model data to the project") + + return parser + +def openvino_args_extractor(args): + my_args = argparse.Namespace() + my_args.description = args.description + my_args.weights = args.weights + my_args.interpretation_script = args.interpretation_script + my_args.plugins_path = args.plugins_path + return my_args + +def build_add_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + parser.add_argument('name', + help="Name of the model to be added") + launchers_sp = parser.add_subparsers(dest='launcher') + + build_openvino_add_parser(launchers_sp.add_parser('openvino')) \ + .set_defaults(launcher_args_extractor=openvino_args_extractor) + + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=add_command) + + return parser + def add_command(args): project = load_project(args.project_dir) @@ -55,39 +95,16 @@ def add_command(args): return 0 -def build_openvino_add_parser(parser): - parser.add_argument('-d', '--description', required=True, - help="Path to the model description file (.xml)") - parser.add_argument('-w', '--weights', required=True, - help="Path to the model weights file (.bin)") - parser.add_argument('-i', '--interpretation-script', required=True, - help="Path to the network output interpretation script (.py)") - parser.add_argument('--plugins-path', default=None, - help="Path to the custom Inference Engine plugins directory") - parser.add_argument('--copy', action='store_true', - help="Copy the model data to the project") - return parser - -def openvino_args_extractor(args): - my_args = argparse.Namespace() - my_args.description = args.description - my_args.weights = args.weights - my_args.interpretation_script = args.interpretation_script - my_args.plugins_path = args.plugins_path - return my_args +def build_remove_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() -def build_add_parser(parser): parser.add_argument('name', - help="Name of the model to be added") - launchers_sp = parser.add_subparsers(dest='launcher') - - build_openvino_add_parser(launchers_sp.add_parser('openvino')) \ - .set_defaults(launcher_args_extractor=openvino_args_extractor) - + help="Name of the model to be removed") parser.add_argument('-p', '--project', dest='project_dir', default='.', help="Directory of the project to operate on (default: current dir)") - return parser + parser.set_defaults(command=remove_command) + return parser def remove_command(args): project = load_project(args.project_dir) @@ -97,31 +114,39 @@ def remove_command(args): return 0 -def build_remove_parser(parser): - parser.add_argument('name', - help="Name of the model to be removed") +def build_run_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + parser.add_argument('-o', '--output-dir', dest='dst_dir', required=True, + help="Directory to save output") + parser.add_argument('-m', '--model', dest='model_name', required=True, + help="Model to apply to the project") parser.add_argument('-p', '--project', dest='project_dir', default='.', help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=run_command) return parser +def run_command(args): + project = load_project(args.project_dir) + + dst_dir = osp.abspath(args.dst_dir) + os.makedirs(dst_dir, exist_ok=False) + project.make_dataset().apply_model( + save_dir=dst_dir, + model_name=args.model_name) -def build_parser(parser=argparse.ArgumentParser()): - command_parsers = parser.add_subparsers(dest='command_name') + log.info("Inference results have been saved to '%s'" % dst_dir) - build_add_parser(command_parsers.add_parser('add')) \ - .set_defaults(command=add_command) + return 0 - build_remove_parser(command_parsers.add_parser('remove')) \ - .set_defaults(command=remove_command) - return parser +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - if 'command' not in args: - parser.print_help() - return 1 + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'add', build_add_parser) + add_subparser(subparsers, 'remove', build_remove_parser) + add_subparser(subparsers, 'run', build_run_parser) - return args.command(args) + return parser diff --git a/datumaro/datumaro/cli/contexts/project/__init__.py b/datumaro/datumaro/cli/contexts/project/__init__.py new file mode 100644 index 00000000..0ba03461 --- /dev/null +++ b/datumaro/datumaro/cli/contexts/project/__init__.py @@ -0,0 +1,647 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +from enum import Enum +import logging as log +import os +import os.path as osp +import shutil + +from datumaro.components.project import Project +from datumaro.components.comparator import Comparator +from datumaro.components.dataset_filter import DatasetItemEncoder +from datumaro.components.extractor import AnnotationType +from .diff import DiffVisualizer +from ...util import add_subparser, CliException, MultilineFormatter +from ...util.project import make_project_path, load_project, \ + generate_next_dir_name + + +def build_create_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Create empty project", + description=""" + Create a new empty project.|n + |n + Examples:|n + - Create a project in the current directory:|n + |s|screate -n myproject|n + |n + - Create a project in other directory:|n + |s|screate -o path/I/like/ + """, + formatter_class=MultilineFormatter) + + parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir', + help="Save directory for the new project (default: current dir") + parser.add_argument('-n', '--name', default=None, + help="Name of the new project (default: same as project dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.set_defaults(command=create_command) + + return parser + +def create_command(args): + project_dir = osp.abspath(args.dst_dir) + project_path = make_project_path(project_dir) + + if osp.isdir(project_dir) and os.listdir(project_dir): + if not args.overwrite: + raise CliException("Directory '%s' already exists " + "(pass --overwrite to force creation)" % project_dir) + else: + shutil.rmtree(project_dir) + os.makedirs(project_dir, exist_ok=True) + + if not args.overwrite and osp.isfile(project_path): + raise CliException("Project file '%s' already exists " + "(pass --overwrite to force creation)" % project_path) + + project_name = args.name + if project_name is None: + project_name = osp.basename(project_dir) + + log.info("Creating project at '%s'" % project_dir) + + Project.generate(project_dir, { + 'project_name': project_name, + }) + + log.info("Project has been created at '%s'" % project_dir) + + return 0 + +def build_import_parser(parser_ctor=argparse.ArgumentParser): + import datumaro.components.importers as importers_module + builtin_importers = [name for name, cls in importers_module.items] + + parser = parser_ctor(help="Create project from existing dataset", + description=""" + Creates a project from an existing dataset. The source can be:|n + - a dataset in a supported format (check 'formats' section below)|n + - a Datumaro project|n + |n + Formats:|n + Datasets come in a wide variety of formats. Each dataset + format defines its own data structure and rules on how to + interpret the data. For example, the following data structure + is used in COCO format:|n + /dataset/|n + - /images/.jpg|n + - /annotations/|n + |n + In Datumaro dataset formats are supported by + Extractor-s and Importer-s. + An Extractor produces a list of dataset items corresponding + to the dataset. An Importer creates a project from the + data source location. + It is possible to add a custom Extractor and Importer. + To do this, you need to put an Extractor and + Importer implementation scripts to + /.datumaro/extractors + and /.datumaro/importers.|n + |n + List of supported dataset formats: %s|n + |n + Examples:|n + - Create a project from VOC dataset in the current directory:|n + |s|simport -f voc -i path/to/voc|n + |n + - Create a project from COCO dataset in other directory:|n + |s|simport -f coco -i path/to/coco -o path/I/like/ + """ % ', '.join(builtin_importers), + formatter_class=MultilineFormatter) + + parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir', + help="Directory to save the new project to (default: current dir)") + parser.add_argument('-n', '--name', default=None, + help="Name of the new project (default: same as project dir)") + parser.add_argument('--copy', action='store_true', + help="Copy the dataset instead of saving source links") + parser.add_argument('--skip-check', action='store_true', + help="Skip source checking") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-i', '--input-path', required=True, dest='source', + help="Path to import project from") + parser.add_argument('-f', '--format', required=True, + help="Source project format") + # parser.add_argument('extra_args', nargs=argparse.REMAINDER, + # help="Additional arguments for importer (pass '-- -h' for help)") + parser.set_defaults(command=import_command) + + return parser + +def import_command(args): + project_dir = osp.abspath(args.dst_dir) + project_path = make_project_path(project_dir) + + if osp.isdir(project_dir) and os.listdir(project_dir): + if not args.overwrite: + raise CliException("Directory '%s' already exists " + "(pass --overwrite to force creation)" % project_dir) + else: + shutil.rmtree(project_dir) + os.makedirs(project_dir, exist_ok=True) + + if not args.overwrite and osp.isfile(project_path): + raise CliException("Project file '%s' already exists " + "(pass --overwrite to force creation)" % project_path) + + project_name = args.name + if project_name is None: + project_name = osp.basename(project_dir) + + log.info("Importing project from '%s' as '%s'" % \ + (args.source, args.format)) + + source = osp.abspath(args.source) + project = Project.import_from(source, args.format) + project.config.project_name = project_name + project.config.project_dir = project_dir + + if not args.skip_check or args.copy: + log.info("Checking the dataset...") + dataset = project.make_dataset() + if args.copy: + log.info("Cloning data...") + dataset.save(merge=True, save_images=True) + else: + project.save() + + log.info("Project has been created at '%s'" % project_dir) + + return 0 + + +class FilterModes(Enum): + # primary + items = 1 + annotations = 2 + items_annotations = 3 + + # shortcuts + i = 1 + a = 2 + i_a = 3 + a_i = 3 + annotations_items = 3 + + @staticmethod + def parse(s): + s = s.lower() + s = s.replace('+', '_') + return FilterModes[s] + + @classmethod + def make_filter_args(cls, mode): + if mode == cls.items: + return {} + elif mode == cls.annotations: + return { + 'filter_annotations': True + } + elif mode == cls.items_annotations: + return { + 'filter_annotations': True, + 'remove_empty': True, + } + else: + raise NotImplementedError() + + @classmethod + def list_options(cls): + return [m.name.replace('_', '+') for m in cls] + +def build_export_parser(parser_ctor=argparse.ArgumentParser): + import datumaro.components.converters as converters_module + builtin_converters = [name for name, cls in converters_module.items] + + parser = parser_ctor(help="Export project", + description=""" + Exports the project dataset in some format. Optionally, a filter + can be passed, check 'extract' command description for more info. + Each dataset format has its own options, which + are passed after '--' separator (see examples), pass '-- -h' + for more info. If not stated otherwise, by default + only annotations are exported, to include images pass + '--save-images' parameter.|n + |n + Formats:|n + In Datumaro dataset formats are supported by Converter-s. + A Converter produces a dataset of a specific format + from dataset items. It is possible to add a custom Converter. + To do this, you need to put a Converter + definition script to /.datumaro/converters.|n + |n + List of supported dataset formats: %s|n + |n + Examples:|n + - Export project as a VOC-like dataset, include images:|n + |s|sexport -f voc -- --save-images|n + |n + - Export project as a COCO-like dataset in other directory:|n + |s|sexport -f coco -o path/I/like/ + """ % ', '.join(builtin_converters), + formatter_class=MultilineFormatter) + + parser.add_argument('-e', '--filter', default=None, + help="Filter expression for dataset items") + parser.add_argument('--filter-mode', default=FilterModes.i.name, + type=FilterModes.parse, + help="Filter mode (options: %s; default: %s)" % \ + (', '.join(FilterModes.list_options()) , '%(default)s')) + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Directory to save output (default: a subdir in the current one)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.add_argument('-f', '--format', required=True, + help="Output format") + parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, + help="Additional arguments for converter (pass '-- -h' for help)") + parser.set_defaults(command=export_command) + + return parser + +def export_command(args): + project = load_project(args.project_dir) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to force creation)" % dst_dir) + else: + dst_dir = generate_next_dir_name('%s-export-%s' % \ + (project.config.project_name, args.format)) + dst_dir = osp.abspath(dst_dir) + + try: + converter = project.env.make_converter(args.format, + cmdline_args=args.extra_args) + except KeyError: + raise CliException("Converter for format '%s' is not found" % \ + args.format) + + filter_args = FilterModes.make_filter_args(args.filter_mode) + + log.info("Loading the project...") + dataset = project.make_dataset() + + log.info("Exporting the project...") + dataset.export_project( + save_dir=dst_dir, + converter=converter, + filter_expr=args.filter, + **filter_args) + log.info("Project exported to '%s' as '%s'" % \ + (dst_dir, args.format)) + + return 0 + +def build_extract_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Extract subproject", + description=""" + Extracts a subproject that contains only items matching filter. + A filter is an XPath expression, which is applied to XML + representation of a dataset item. Check '--dry-run' parameter + to see XML representations of the dataset items.|n + |n + To filter annotations use the mode ('-m') parameter.|n + Supported modes:|n + - 'i', 'items'|n + - 'a', 'annotations'|n + - 'i+a', 'a+i', 'items+annotations', 'annotations+items'|n + When filtering annotations, use the 'items+annotations' + mode to point that annotation-less dataset items should be + removed. To select an annotation, write an XPath that + returns 'annotation' elements (see examples).|n + |n + Examples:|n + - Filter images with width < height:|n + |s|sextract -e '/item[image/width < image/height]'|n + |n + - Filter images with large-area bboxes:|n + |s|sextract -e '/item[annotation/type="bbox" and + annotation/area>2000]'|n + |n + - Filter out all irrelevant annotations from items:|n + |s|sextract -m a -e '/item/annotation[label = "person"]'|n + |n + - Filter out all irrelevant annotations from items:|n + |s|sextract -m a -e '/item/annotation[label="cat" and + area > 99.5]'|n + |n + - Filter occluded annotations and items, if no annotations left:|n + |s|sextract -m i+a -e '/item/annotation[occluded="True"]' + """, + formatter_class=MultilineFormatter) + + parser.add_argument('-e', '--filter', default=None, + help="XML XPath filter expression for dataset items") + parser.add_argument('-m', '--mode', default=FilterModes.i.name, + type=FilterModes.parse, + help="Filter mode (options: %s; default: %s)" % \ + (', '.join(FilterModes.list_options()) , '%(default)s')) + parser.add_argument('--dry-run', action='store_true', + help="Print XML representations to be filtered and exit") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Output directory (default: update current project)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=extract_command) + + return parser + +def extract_command(args): + project = load_project(args.project_dir) + + if not args.dry_run: + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to force creation)" % dst_dir) + else: + dst_dir = generate_next_dir_name('%s-filter' % \ + project.config.project_name) + dst_dir = osp.abspath(dst_dir) + + dataset = project.make_dataset() + + filter_args = FilterModes.make_filter_args(args.mode) + + if args.dry_run: + dataset = dataset.extract(filter_expr=args.filter, **filter_args) + for item in dataset: + encoded_item = DatasetItemEncoder.encode(item, dataset.categories()) + xml_item = DatasetItemEncoder.to_string(encoded_item) + print(xml_item) + return 0 + + if not args.filter: + raise CliException("Expected a filter expression ('-e' argument)") + + os.makedirs(dst_dir, exist_ok=False) + dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter, + **filter_args) + + log.info("Subproject has been extracted to '%s'" % dst_dir) + + return 0 + +def build_merge_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Merge projects", + description=""" + Updates items of the current project with items + from the other project.|n + |n + Examples:|n + - Update a project with items from other project:|n + |s|smerge -p path/to/first/project path/to/other/project + """, + formatter_class=MultilineFormatter) + + parser.add_argument('other_project_dir', + help="Directory of the project to get data updates from") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Output directory (default: current project's dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=merge_command) + + return parser + +def merge_command(args): + first_project = load_project(args.project_dir) + second_project = load_project(args.other_project_dir) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to force creation)" % dst_dir) + + first_dataset = first_project.make_dataset() + first_dataset.update(second_project.make_dataset()) + + first_dataset.save(save_dir=dst_dir) + + if dst_dir is None: + dst_dir = first_project.config.project_dir + dst_dir = osp.abspath(dst_dir) + log.info("Merge results have been saved to '%s'" % dst_dir) + + return 0 + +def build_diff_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Compare projects", + description=""" + Compares two projects.|n + |n + Examples:|n + - Compare two projects, consider bboxes matching if their IoU > 0.7,|n + |s|s|s|sprint results to Tensorboard: + |s|sdiff path/to/other/project -o diff/ -f tensorboard --iou-thresh 0.7 + """, + formatter_class=MultilineFormatter) + + parser.add_argument('other_project_dir', + help="Directory of the second project to be compared") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Directory to save comparison results (default: do not save)") + parser.add_argument('-f', '--format', + default=DiffVisualizer.DEFAULT_FORMAT, + choices=[f.name for f in DiffVisualizer.Format], + help="Output format (default: %(default)s)") + parser.add_argument('--iou-thresh', default=0.5, type=float, + help="IoU match threshold for detections (default: %(default)s)") + parser.add_argument('--conf-thresh', default=0.5, type=float, + help="Confidence threshold for detections (default: %(default)s)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the first project to be compared (default: current dir)") + parser.set_defaults(command=diff_command) + + return parser + +def diff_command(args): + first_project = load_project(args.project_dir) + second_project = load_project(args.other_project_dir) + + comparator = Comparator( + iou_threshold=args.iou_thresh, + conf_threshold=args.conf_thresh) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to force creation)" % dst_dir) + else: + dst_dir = generate_next_dir_name('%s-%s-diff' % ( + first_project.config.project_name, + second_project.config.project_name) + ) + dst_dir = osp.abspath(dst_dir) + if dst_dir: + log.info("Saving diff to '%s'" % dst_dir) + + visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator, + output_format=args.format) + visualizer.save_dataset_diff( + first_project.make_dataset(), + second_project.make_dataset()) + + return 0 + +def build_transform_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Transform project", + description=""" + Applies some operation to dataset items in the project + and produces a new project. + + [NOT IMPLEMENTED YET] + """, + formatter_class=MultilineFormatter) + + parser.add_argument('-t', '--transform', required=True, + help="Transform to apply to the project") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Directory to save output (default: current dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=transform_command) + + return parser + +def transform_command(args): + raise NotImplementedError("Not implemented yet.") + + # project = load_project(args.project_dir) + + # dst_dir = args.dst_dir + # if dst_dir: + # if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + # raise CliException("Directory '%s' already exists " + # "(pass --overwrite to force creation)" % dst_dir) + # dst_dir = osp.abspath(args.dst_dir) + + # project.make_dataset().transform_project( + # method=args.transform, + # save_dir=dst_dir + # ) + + # log.info("Transform results saved to '%s'" % dst_dir) + + # return 0 + +def build_info_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Get project info", + description=""" + Outputs project info. + """, + formatter_class=MultilineFormatter) + + parser.add_argument('--all', action='store_true', + help="Print all information") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=info_command) + + return parser + +def info_command(args): + project = load_project(args.project_dir) + config = project.config + env = project.env + dataset = project.make_dataset() + + print("Project:") + print(" name:", config.project_name) + print(" location:", config.project_dir) + print("Plugins:") + print(" importers:", ', '.join(env.importers.items)) + print(" extractors:", ', '.join(env.extractors.items)) + print(" converters:", ', '.join(env.converters.items)) + print(" launchers:", ', '.join(env.launchers.items)) + + print("Sources:") + for source_name, source in config.sources.items(): + print(" source '%s':" % source_name) + print(" format:", source.format) + print(" url:", source.url) + print(" location:", project.local_source_dir(source_name)) + + def print_extractor_info(extractor, indent=''): + print("%slength:" % indent, len(extractor)) + + categories = extractor.categories() + print("%scategories:" % indent, ', '.join(c.name for c in categories)) + + for cat_type, cat in categories.items(): + print("%s %s:" % (indent, cat_type.name)) + if cat_type == AnnotationType.label: + print("%s count:" % indent, len(cat.items)) + + count_threshold = 10 + if args.all: + count_threshold = len(cat.items) + labels = ', '.join(c.name for c in cat.items[:count_threshold]) + if count_threshold < len(cat.items): + labels += " (and %s more)" % ( + len(cat.items) - count_threshold) + print("%s labels:" % indent, labels) + + print("Dataset:") + print_extractor_info(dataset, indent=" ") + + subsets = dataset.subsets() + print(" subsets:", ', '.join(subsets)) + for subset_name in subsets: + subset = dataset.get_subset(subset_name) + print(" subset '%s':" % subset_name) + print_extractor_info(subset, indent=" ") + + print("Models:") + for model_name, model in env.config.models.items(): + print(" model '%s':" % model_name) + print(" type:", model.launcher) + + return 0 + + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor( + description=""" + Manipulate projects.|n + |n + By default, the project to be operated on is searched for + in the current directory. An additional '-p' argument can be + passed to specify project location. + """, + formatter_class=MultilineFormatter) + + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'create', build_create_parser) + add_subparser(subparsers, 'import', build_import_parser) + add_subparser(subparsers, 'export', build_export_parser) + add_subparser(subparsers, 'extract', build_extract_parser) + add_subparser(subparsers, 'merge', build_merge_parser) + add_subparser(subparsers, 'diff', build_diff_parser) + add_subparser(subparsers, 'transform', build_transform_parser) + add_subparser(subparsers, 'info', build_info_parser) + + return parser diff --git a/datumaro/datumaro/cli/project/diff.py b/datumaro/datumaro/cli/contexts/project/diff.py similarity index 100% rename from datumaro/datumaro/cli/project/diff.py rename to datumaro/datumaro/cli/contexts/project/diff.py diff --git a/datumaro/datumaro/cli/contexts/source/__init__.py b/datumaro/datumaro/cli/contexts/source/__init__.py new file mode 100644 index 00000000..b20be3de --- /dev/null +++ b/datumaro/datumaro/cli/contexts/source/__init__.py @@ -0,0 +1,247 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import logging as log +import os +import os.path as osp +import shutil + +from ...util import add_subparser, CliException, MultilineFormatter +from ...util.project import load_project + + +def build_add_parser(parser_ctor=argparse.ArgumentParser): + import datumaro.components.extractors as extractors_module + extractors_list = [name for name, cls in extractors_module.items] + + base_parser = argparse.ArgumentParser(add_help=False) + base_parser.add_argument('-n', '--name', default=None, + help="Name of the new source") + base_parser.add_argument('-f', '--format', required=True, + help="Source dataset format") + base_parser.add_argument('--skip-check', action='store_true', + help="Skip source checking") + base_parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + + parser = parser_ctor(help="Add data source to project", + description=""" + Adds a data source to a project. The source can be:|n + - a dataset in a supported format (check 'formats' section below)|n + - a Datumaro project|n + |n + The source can be either a local directory or a remote + git repository. Each source type has its own parameters, which can + be checked by:|n + '%s'.|n + |n + Formats:|n + Datasets come in a wide variety of formats. Each dataset + format defines its own data structure and rules on how to + interpret the data. For example, the following data structure + is used in COCO format:|n + /dataset/|n + - /images/.jpg|n + - /annotations/|n + |n + In Datumaro dataset formats are supported by Extractor-s. + An Extractor produces a list of dataset items corresponding + to the dataset. It is possible to add a custom Extractor. + To do this, you need to put an Extractor + definition script to /.datumaro/extractors.|n + |n + List of supported source formats: %s|n + |n + Examples:|n + - Add a local directory with VOC-like dataset:|n + |s|sadd path path/to/voc -f voc_detection|n + - Add a local file with CVAT annotations, call it 'mysource'|n + |s|s|s|sto the project somewhere else:|n + |s|sadd path path/to/cvat.xml -f cvat -n mysource -p somewhere/else/ + """ % ('%(prog)s SOURCE_TYPE --help', ', '.join(extractors_list)), + formatter_class=MultilineFormatter, + add_help=False) + parser.set_defaults(command=add_command) + + sp = parser.add_subparsers(dest='source_type', metavar='SOURCE_TYPE', + help="The type of the data source " + "(call '%s SOURCE_TYPE --help' for more info)" % parser.prog) + + dir_parser = sp.add_parser('path', help="Add local path as source", + parents=[base_parser]) + dir_parser.add_argument('url', + help="Path to the source") + dir_parser.add_argument('--copy', action='store_true', + help="Copy the dataset instead of saving source links") + + repo_parser = sp.add_parser('git', help="Add git repository as source", + parents=[base_parser]) + repo_parser.add_argument('url', + help="URL of the source git repository") + repo_parser.add_argument('-b', '--branch', default='master', + help="Branch of the source repository (default: %(default)s)") + repo_parser.add_argument('--checkout', action='store_true', + help="Do branch checkout") + + # NOTE: add common parameters to the parent help output + # the other way could be to use parse_known_args() + display_parser = argparse.ArgumentParser( + parents=[base_parser, parser], + prog=parser.prog, usage="%(prog)s [-h] SOURCE_TYPE ...", + description=parser.description, formatter_class=MultilineFormatter) + class HelpAction(argparse._HelpAction): + def __call__(self, parser, namespace, values, option_string=None): + display_parser.print_help() + parser.exit() + + parser.add_argument('-h', '--help', action=HelpAction, + help='show this help message and exit') + + # TODO: needed distinction on how to add an extractor or a remote source + + return parser + +def add_command(args): + project = load_project(args.project_dir) + + if args.source_type == 'git': + name = args.name + if name is None: + name = osp.splitext(osp.basename(args.url))[0] + + if project.env.git.has_submodule(name): + raise CliException("Git submodule '%s' already exists" % name) + + try: + project.get_source(name) + raise CliException("Source '%s' already exists" % name) + except KeyError: + pass + + rel_local_dir = project.local_source_dir(name) + local_dir = osp.join(project.config.project_dir, rel_local_dir) + url = args.url + project.env.git.create_submodule(name, local_dir, + url=url, branch=args.branch, no_checkout=not args.checkout) + elif args.source_type == 'path': + url = osp.abspath(args.url) + if not osp.exists(url): + raise CliException("Source path '%s' does not exist" % url) + + name = args.name + if name is None: + name = osp.splitext(osp.basename(url))[0] + + if project.env.git.has_submodule(name): + raise CliException("Git submodule '%s' already exists" % name) + + try: + project.get_source(name) + raise CliException("Source '%s' already exists" % name) + except KeyError: + pass + + rel_local_dir = project.local_source_dir(name) + local_dir = osp.join(project.config.project_dir, rel_local_dir) + + if args.copy: + log.info("Copying from '%s' to '%s'" % (url, local_dir)) + if osp.isdir(url): + # copytree requires destination dir not to exist + shutil.copytree(url, local_dir) + url = rel_local_dir + elif osp.isfile(url): + os.makedirs(local_dir) + shutil.copy2(url, local_dir) + url = osp.join(rel_local_dir, osp.basename(url)) + else: + raise Exception("Expected file or directory") + else: + os.makedirs(local_dir) + + project.add_source(name, { 'url': url, 'format': args.format }) + + if not args.skip_check: + log.info("Checking the source...") + try: + project.make_source_project(name).make_dataset() + except Exception: + shutil.rmtree(local_dir, ignore_errors=True) + raise + + project.save() + + log.info("Source '%s' has been added to the project, location: '%s'" \ + % (name, rel_local_dir)) + + return 0 + +def build_remove_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Remove source from project", + description="Remove a source from a project.") + + parser.add_argument('-n', '--name', required=True, + help="Name of the source to be removed") + parser.add_argument('--force', action='store_true', + help="Ignore possible errors during removal") + parser.add_argument('--keep-data', action='store_true', + help="Do not remove source data") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=remove_command) + + return parser + +def remove_command(args): + project = load_project(args.project_dir) + + name = args.name + if not name: + raise CliException("Expected source name") + try: + project.get_source(name) + except KeyError: + if not args.force: + raise CliException("Source '%s' does not exist" % name) + + if project.env.git.has_submodule(name): + if args.force: + log.warning("Forcefully removing the '%s' source..." % name) + + project.env.git.remove_submodule(name, force=args.force) + + source_dir = osp.join(project.config.project_dir, + project.local_source_dir(name)) + project.remove_source(name) + project.save() + + if not args.keep_data: + shutil.rmtree(source_dir, ignore_errors=True) + + log.info("Source '%s' has been removed from the project" % name) + + return 0 + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(description=""" + Manipulate data sources inside of a project.|n + |n + A data source is a source of data for a project. + The project combines multiple data sources into one dataset. + The role of a data source is to provide dataset items - images + and/or annotations.|n + |n + By default, the project to be operated on is searched for + in the current directory. An additional '-p' argument can be + passed to specify project location. + """, + formatter_class=MultilineFormatter) + + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'add', build_add_parser) + add_subparser(subparsers, 'remove', build_remove_parser) + + return parser diff --git a/datumaro/datumaro/cli/create_command.py b/datumaro/datumaro/cli/create_command.py deleted file mode 100644 index eb52458b..00000000 --- a/datumaro/datumaro/cli/create_command.py +++ /dev/null @@ -1,21 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse - -from . import project as project_module - - -def build_parser(parser=argparse.ArgumentParser()): - project_module.build_create_parser(parser) \ - .set_defaults(command=project_module.create_command) - - return parser - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - - return args.command(args) diff --git a/datumaro/datumaro/cli/export_command.py b/datumaro/datumaro/cli/export_command.py deleted file mode 100644 index 3bd3efe6..00000000 --- a/datumaro/datumaro/cli/export_command.py +++ /dev/null @@ -1,69 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse -import os.path as osp - -from datumaro.components.project import Project -from datumaro.util.command_targets import (TargetKinds, target_selector, - ProjectTarget, SourceTarget, ImageTarget, ExternalDatasetTarget, - is_project_path -) - -from . import project as project_module -from . import source as source_module -from . import item as item_module - - -def export_external_dataset(target, params): - raise NotImplementedError() - -def build_parser(parser=argparse.ArgumentParser()): - parser.add_argument('target', nargs='?', default=None) - parser.add_argument('params', nargs=argparse.REMAINDER) - - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - - return parser - -def process_command(target, params, args): - project_dir = args.project_dir - target_kind, target_value = target - if target_kind == TargetKinds.project: - return project_module.main(['export', '-p', target_value] + params) - elif target_kind == TargetKinds.source: - return source_module.main(['export', '-p', project_dir, '-n', target_value] + params) - elif target_kind == TargetKinds.item: - return item_module.main(['export', '-p', project_dir, target_value] + params) - elif target_kind == TargetKinds.external_dataset: - return export_external_dataset(target_value, params) - return 1 - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - - project_path = args.project_dir - if is_project_path(project_path): - project = Project.load(project_path) - else: - project = None - try: - args.target = target_selector( - ProjectTarget(is_default=True, project=project), - SourceTarget(project=project), - ExternalDatasetTarget(), - ImageTarget() - )(args.target) - if args.target[0] == TargetKinds.project: - if is_project_path(args.target[1]): - args.project_dir = osp.dirname(osp.abspath(args.target[1])) - except argparse.ArgumentTypeError as e: - print(e) - parser.print_help() - return 1 - - return process_command(args.target, args.params, args) diff --git a/datumaro/datumaro/cli/inference/__init__.py b/datumaro/datumaro/cli/inference/__init__.py deleted file mode 100644 index f5d48b7c..00000000 --- a/datumaro/datumaro/cli/inference/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse - - -def run_command(args): - return 0 - -def build_run_parser(parser): - return parser - -def build_parser(parser=argparse.ArgumentParser()): - command_parsers = parser.add_subparsers(dest='command') - - build_run_parser(command_parsers.add_parser('run')). \ - set_defaults(command=run_command) - - return parser - -def process_command(command, args): - return 0 - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - if 'command' not in args: - parser.print_help() - return 1 - - return args.command(args) diff --git a/datumaro/datumaro/cli/item/__init__.py b/datumaro/datumaro/cli/item/__init__.py deleted file mode 100644 index 6082932a..00000000 --- a/datumaro/datumaro/cli/item/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse - - -def build_export_parser(parser): - return parser - -def build_stats_parser(parser): - return parser - -def build_diff_parser(parser): - return parser - -def build_edit_parser(parser): - return parser - -def build_parser(parser=argparse.ArgumentParser()): - command_parsers = parser.add_subparsers(dest='command_name') - - build_export_parser(command_parsers.add_parser('export')) - build_stats_parser(command_parsers.add_parser('stats')) - build_diff_parser(command_parsers.add_parser('diff')) - build_edit_parser(command_parsers.add_parser('edit')) - - return parser - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - if 'command' not in args: - parser.print_help() - return 1 - - return args.command(args) diff --git a/datumaro/datumaro/cli/project/__init__.py b/datumaro/datumaro/cli/project/__init__.py deleted file mode 100644 index 234f89d7..00000000 --- a/datumaro/datumaro/cli/project/__init__.py +++ /dev/null @@ -1,361 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse -import logging as log -import os -import os.path as osp -import shutil - -from datumaro.components.project import Project -from datumaro.components.comparator import Comparator -from datumaro.components.dataset_filter import DatasetItemEncoder -from .diff import DiffVisualizer -from ..util.project import make_project_path, load_project - - -def build_create_parser(parser): - parser.add_argument('-d', '--dest', default='.', dest='dst_dir', - help="Save directory for the new project (default: current dir") - parser.add_argument('-n', '--name', default=None, - help="Name of the new project (default: same as project dir)") - parser.add_argument('--overwrite', action='store_true', - help="Overwrite existing files in the save directory") - return parser - -def create_command(args): - project_dir = osp.abspath(args.dst_dir) - project_path = make_project_path(project_dir) - - if osp.isdir(project_dir) and os.listdir(project_dir): - if not args.overwrite: - log.error("Directory '%s' already exists " - "(pass --overwrite to force creation)" % project_dir) - return 1 - else: - shutil.rmtree(project_dir) - os.makedirs(project_dir, exist_ok=args.overwrite) - - if not args.overwrite and osp.isfile(project_path): - log.error("Project file '%s' already exists " - "(pass --overwrite to force creation)" % project_path) - return 1 - - project_name = args.name - if project_name is None: - project_name = osp.basename(project_dir) - - log.info("Creating project at '%s'" % (project_dir)) - - Project.generate(project_dir, { - 'project_name': project_name, - }) - - log.info("Project has been created at '%s'" % (project_dir)) - - return 0 - -def build_import_parser(parser): - import datumaro.components.importers as importers_module - importers_list = [name for name, cls in importers_module.items] - - parser.add_argument('-s', '--source', required=True, - help="Path to import a project from") - parser.add_argument('-f', '--format', required=True, - help="Source project format (options: %s)" % (', '.join(importers_list))) - parser.add_argument('-d', '--dest', default='.', dest='dst_dir', - help="Directory to save the new project to (default: current dir)") - parser.add_argument('-n', '--name', default=None, - help="Name of the new project (default: same as project dir)") - parser.add_argument('--overwrite', action='store_true', - help="Overwrite existing files in the save directory") - parser.add_argument('--copy', action='store_true', - help="Copy the dataset instead of saving source links") - parser.add_argument('--skip-check', action='store_true', - help="Skip source checking") - # parser.add_argument('extra_args', nargs=argparse.REMAINDER, - # help="Additional arguments for importer (pass '-- -h' for help)") - return parser - -def import_command(args): - project_dir = osp.abspath(args.dst_dir) - project_path = make_project_path(project_dir) - - if osp.isdir(project_dir) and os.listdir(project_dir): - if not args.overwrite: - log.error("Directory '%s' already exists " - "(pass --overwrite to force creation)" % project_dir) - return 1 - else: - shutil.rmtree(project_dir) - os.makedirs(project_dir, exist_ok=args.overwrite) - - if not args.overwrite and osp.isfile(project_path): - log.error("Project file '%s' already exists " - "(pass --overwrite to force creation)" % project_path) - return 1 - - project_name = args.name - if project_name is None: - project_name = osp.basename(project_dir) - - log.info("Importing project from '%s' as '%s'" % \ - (args.source, args.format)) - - source = osp.abspath(args.source) - project = Project.import_from(source, args.format) - project.config.project_name = project_name - project.config.project_dir = project_dir - - if not args.skip_check or args.copy: - log.info("Checking the dataset...") - dataset = project.make_dataset() - if args.copy: - log.info("Cloning data...") - dataset.save(merge=True, save_images=True) - else: - project.save() - - log.info("Project has been created at '%s'" % (project_dir)) - - return 0 - -def build_build_parser(parser): - return parser - -def build_export_parser(parser): - parser.add_argument('-e', '--filter', default=None, - help="Filter expression for dataset items. Examples: " - "extract images with width < height: " - "'/item[image/width < image/height]'; " - "extract images with large-area bboxes: " - "'/item[annotation/type=\"bbox\" and annotation/area>2000]'" - "filter out irrelevant annotations from items: " - "'/item/annotation[label = \"person\"]'" - ) - parser.add_argument('-a', '--filter-annotations', action='store_true', - help="Filter annotations instead of dataset " - "items (default: %(default)s)") - parser.add_argument('-d', '--dest', dest='dst_dir', required=True, - help="Directory to save output") - parser.add_argument('-f', '--output-format', required=True, - help="Output format") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - parser.add_argument('--overwrite', action='store_true', - help="Overwrite existing files in the save directory") - parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, - help="Additional arguments for converter (pass '-- -h' for help)") - return parser - -def export_command(args): - project = load_project(args.project_dir) - - dst_dir = osp.abspath(args.dst_dir) - if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): - log.error("Directory '%s' already exists " - "(pass --overwrite to force creation)" % dst_dir) - return 1 - os.makedirs(dst_dir, exist_ok=args.overwrite) - - log.info("Loading the project...") - dataset = project.make_dataset() - - log.info("Exporting the project...") - dataset.export_project( - save_dir=dst_dir, - output_format=args.output_format, - filter_expr=args.filter, - filter_annotations=args.filter_annotations, - cmdline_args=args.extra_args) - log.info("Project exported to '%s' as '%s'" % \ - (dst_dir, args.output_format)) - - return 0 - -def build_stats_parser(parser): - parser.add_argument('name') - return parser - -def build_docs_parser(parser): - return parser - -def build_extract_parser(parser): - parser.add_argument('-e', '--filter', default=None, - help="XML XPath filter expression for dataset items. Examples: " - "extract images with width < height: " - "'/item[image/width < image/height]'; " - "extract images with large-area bboxes: " - "'/item[annotation/type=\"bbox\" and annotation/area>2000]' " - "filter out irrelevant annotations from items: " - "'/item/annotation[label = \"person\"]'" - ) - parser.add_argument('-a', '--filter-annotations', action='store_true', - help="Filter annotations instead of dataset " - "items (default: %(default)s)") - parser.add_argument('--remove-empty', action='store_true', - help="Remove an item if there are no annotations left after filtration") - parser.add_argument('--dry-run', action='store_true', - help="Print XML representations to be filtered and exit") - parser.add_argument('-d', '--dest', dest='dst_dir', required=True, - help="Output directory") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - return parser - -def extract_command(args): - project = load_project(args.project_dir) - - dst_dir = osp.abspath(args.dst_dir) - if not args.dry_run: - os.makedirs(dst_dir, exist_ok=False) - - dataset = project.make_dataset() - - kwargs = {} - if args.filter_annotations: - kwargs['remove_empty'] = args.remove_empty - - if args.dry_run: - dataset = dataset.extract(filter_expr=args.filter, - filter_annotations=args.filter_annotations, **kwargs) - for item in dataset: - encoded_item = DatasetItemEncoder.encode(item, dataset.categories()) - xml_item = DatasetItemEncoder.to_string(encoded_item) - print(xml_item) - return 0 - - dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter, - filter_annotations=args.filter_annotations, **kwargs) - - log.info("Subproject extracted to '%s'" % (dst_dir)) - - return 0 - -def build_merge_parser(parser): - parser.add_argument('other_project_dir', - help="Directory of the project to get data updates from") - parser.add_argument('-d', '--dest', dest='dst_dir', default=None, - help="Output directory (default: current project's dir)") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - return parser - -def merge_command(args): - first_project = load_project(args.project_dir) - second_project = load_project(args.other_project_dir) - - first_dataset = first_project.make_dataset() - first_dataset.update(second_project.make_dataset()) - - dst_dir = args.dst_dir - first_dataset.save(save_dir=dst_dir) - - if dst_dir is None: - dst_dir = first_project.config.project_dir - dst_dir = osp.abspath(dst_dir) - log.info("Merge result saved to '%s'" % (dst_dir)) - - return 0 - -def build_diff_parser(parser): - parser.add_argument('other_project_dir', - help="Directory of the second project to be compared") - parser.add_argument('-d', '--dest', default=None, dest='dst_dir', - help="Directory to save comparison results (default: do not save)") - parser.add_argument('-f', '--output-format', - default=DiffVisualizer.DEFAULT_FORMAT, - choices=[f.name for f in DiffVisualizer.Format], - help="Output format (default: %(default)s)") - parser.add_argument('--iou-thresh', default=0.5, type=float, - help="IoU match threshold for detections (default: %(default)s)") - parser.add_argument('--conf-thresh', default=0.5, type=float, - help="Confidence threshold for detections (default: %(default)s)") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the first project to be compared (default: current dir)") - return parser - -def diff_command(args): - first_project = load_project(args.project_dir) - second_project = load_project(args.other_project_dir) - - comparator = Comparator( - iou_threshold=args.iou_thresh, - conf_threshold=args.conf_thresh) - - save_dir = args.dst_dir - if save_dir is not None: - log.info("Saving diff to '%s'" % save_dir) - os.makedirs(osp.abspath(save_dir)) - visualizer = DiffVisualizer(save_dir=save_dir, comparator=comparator, - output_format=args.output_format) - visualizer.save_dataset_diff( - first_project.make_dataset(), - second_project.make_dataset()) - - return 0 - -def build_transform_parser(parser): - parser.add_argument('-d', '--dest', dest='dst_dir', required=True, - help="Directory to save output") - parser.add_argument('-m', '--model', dest='model_name', required=True, - help="Model to apply to the project") - parser.add_argument('-f', '--output-format', required=True, - help="Output format") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - return parser - -def transform_command(args): - project = load_project(args.project_dir) - - dst_dir = osp.abspath(args.dst_dir) - os.makedirs(dst_dir, exist_ok=False) - project.make_dataset().apply_model( - save_dir=dst_dir, - model_name=args.model_name) - - log.info("Transform results saved to '%s'" % (dst_dir)) - - return 0 - - -def build_parser(parser=argparse.ArgumentParser()): - command_parsers = parser.add_subparsers(dest='command_name') - - build_create_parser(command_parsers.add_parser('create')) \ - .set_defaults(command=create_command) - - build_import_parser(command_parsers.add_parser('import')) \ - .set_defaults(command=import_command) - - build_export_parser(command_parsers.add_parser('export')) \ - .set_defaults(command=export_command) - - build_extract_parser(command_parsers.add_parser('extract')) \ - .set_defaults(command=extract_command) - - build_merge_parser(command_parsers.add_parser('merge')) \ - .set_defaults(command=merge_command) - - build_build_parser(command_parsers.add_parser('build')) - build_stats_parser(command_parsers.add_parser('stats')) - build_docs_parser(command_parsers.add_parser('docs')) - build_diff_parser(command_parsers.add_parser('diff')) \ - .set_defaults(command=diff_command) - - build_transform_parser(command_parsers.add_parser('transform')) \ - .set_defaults(command=transform_command) - - return parser - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - if 'command' not in args: - parser.print_help() - return 1 - - return args.command(args) diff --git a/datumaro/datumaro/cli/remove_command.py b/datumaro/datumaro/cli/remove_command.py deleted file mode 100644 index f419cd3a..00000000 --- a/datumaro/datumaro/cli/remove_command.py +++ /dev/null @@ -1,21 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse - -from . import source as source_module - - -def build_parser(parser=argparse.ArgumentParser()): - source_module.build_add_parser(parser). \ - set_defaults(command=source_module.remove_command) - - return parser - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - - return args.command(args) diff --git a/datumaro/datumaro/cli/source/__init__.py b/datumaro/datumaro/cli/source/__init__.py deleted file mode 100644 index 8fa3364b..00000000 --- a/datumaro/datumaro/cli/source/__init__.py +++ /dev/null @@ -1,254 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse -import logging as log -import os -import os.path as osp -import shutil - -from ..util.project import load_project - - -def build_create_parser(parser): - parser.add_argument('-n', '--name', required=True, - help="Name of the source to be created") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - return parser - -def create_command(args): - project = load_project(args.project_dir) - config = project.config - - name = args.name - - if project.env.git.has_submodule(name): - log.fatal("Submodule '%s' already exists" % (name)) - return 1 - - try: - project.get_source(name) - log.fatal("Source '%s' already exists" % (name)) - return 1 - except KeyError: - pass - - dst_dir = osp.join(config.project_dir, config.sources_dir, name) - project.env.git.init(dst_dir) - - project.add_source(name, { 'url': name }) - project.save() - - log.info("Source '%s' has been added to the project, location: '%s'" \ - % (name, dst_dir)) - - return 0 - -def build_import_parser(parser): - sp = parser.add_subparsers(dest='source_type') - - repo_parser = sp.add_parser('repo') - repo_parser.add_argument('url', - help="URL of the source git repository") - repo_parser.add_argument('-b', '--branch', default='master', - help="Branch of the source repository (default: %(default)s)") - repo_parser.add_argument('--checkout', action='store_true', - help="Do branch checkout") - - dir_parser = sp.add_parser('dir') - dir_parser.add_argument('url', - help="Path to the source directory") - dir_parser.add_argument('--copy', action='store_true', - help="Copy the dataset instead of saving source links") - - parser.add_argument('-n', '--name', default=None, - help="Name of the new source") - parser.add_argument('-f', '--format', default=None, - help="Name of the source dataset format (default: 'project')") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - parser.add_argument('--skip-check', action='store_true', - help="Skip source checking") - return parser - -def import_command(args): - project = load_project(args.project_dir) - - if args.source_type == 'repo': - name = args.name - if name is None: - name = osp.splitext(osp.basename(args.url))[0] - - if project.env.git.has_submodule(name): - log.fatal("Submodule '%s' already exists" % (name)) - return 1 - - try: - project.get_source(name) - log.fatal("Source '%s' already exists" % (name)) - return 1 - except KeyError: - pass - - dst_dir = project.local_source_dir(name) - project.env.git.create_submodule(name, dst_dir, - url=args.url, branch=args.branch, no_checkout=not args.checkout) - - source = { 'url': args.url } - if args.format: - source['format'] = args.format - project.add_source(name, source) - - if not args.skip_check: - log.info("Checking the source...") - project.make_source_project(name) - project.save() - - log.info("Source '%s' has been added to the project, location: '%s'" \ - % (name, dst_dir)) - elif args.source_type == 'dir': - url = osp.abspath(args.url) - if not osp.exists(url): - log.fatal("Source path '%s' does not exist" % url) - return 1 - - name = args.name - if name is None: - name = osp.splitext(osp.basename(url))[0] - - try: - project.get_source(name) - log.fatal("Source '%s' already exists" % (name)) - return 1 - except KeyError: - pass - - dst_dir = url - if args.copy: - dst_dir = project.local_source_dir(name) - log.info("Copying from '%s' to '%s'" % (url, dst_dir)) - shutil.copytree(url, dst_dir) - url = name - - source = { 'url': url } - if args.format: - source['format'] = args.format - project.add_source(name, source) - - if not args.skip_check: - log.info("Checking the source...") - project.make_source_project(name) - project.save() - - log.info("Source '%s' has been added to the project, location: '%s'" \ - % (name, dst_dir)) - - return 0 - -def build_remove_parser(parser): - parser.add_argument('-n', '--name', required=True, - help="Name of the source to be removed") - parser.add_argument('--force', action='store_true', - help="Ignore possible errors during removal") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - return parser - -def remove_command(args): - project = load_project(args.project_dir) - - name = args.name - if name is None: - log.fatal("Expected source name") - return - - if project.env.git.has_submodule(name): - if args.force: - log.warning("Forcefully removing the '%s' source..." % (name)) - - project.env.git.remove_submodule(name, force=args.force) - - project.remove_source(name) - project.save() - - log.info("Source '%s' has been removed from the project" % (name)) - - return 0 - -def build_export_parser(parser): - parser.add_argument('-n', '--name', required=True, - help="Source dataset to be extracted") - parser.add_argument('-e', '--filter', default=None, - help="Filter expression for dataset items. Examples: " - "extract images with width < height: " - "'/item[image/width < image/height]'; " - "extract images with large-area bboxes: " - "'/item[annotation/type=\"bbox\" and annotation/area>2000]'" - ) - parser.add_argument('-a', '--filter-annotations', action='store_true', - help="Filter annotations instead of dataset " - "items (default: %(default)s)") - parser.add_argument('-d', '--dest', dest='dst_dir', required=True, - help="Directory to save output") - parser.add_argument('-f', '--output-format', required=True, - help="Output format") - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - parser.add_argument('--overwrite', action='store_true', - help="Overwrite existing files in the save directory") - parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, - help="Additional arguments for converter (pass '-- -h' for help)") - return parser - -def export_command(args): - project = load_project(args.project_dir) - - dst_dir = osp.abspath(args.dst_dir) - if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): - log.error("Directory '%s' already exists " - "(pass --overwrite to force creation)" % dst_dir) - return 1 - os.makedirs(dst_dir, exist_ok=args.overwrite) - - log.info("Loading the project...") - source_project = project.make_source_project(args.name) - dataset = source_project.make_dataset() - - log.info("Exporting the project...") - dataset.export_project( - save_dir=dst_dir, - output_format=args.output_format, - filter_expr=args.filter, - filter_annotations=args.filter_annotations, - cmdline_args=args.extra_args) - log.info("Source '%s' exported to '%s' as '%s'" % \ - (args.name, dst_dir, args.output_format)) - - return 0 - -def build_parser(parser=argparse.ArgumentParser()): - command_parsers = parser.add_subparsers(dest='command_name') - - build_create_parser(command_parsers.add_parser('create')) \ - .set_defaults(command=create_command) - build_import_parser(command_parsers.add_parser('import')) \ - .set_defaults(command=import_command) - build_remove_parser(command_parsers.add_parser('remove')) \ - .set_defaults(command=remove_command) - build_export_parser(command_parsers.add_parser('export')) \ - .set_defaults(command=export_command) - - return parser - - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - if 'command' not in args: - parser.print_help() - return 1 - - return args.command(args) diff --git a/datumaro/datumaro/cli/stats_command.py b/datumaro/datumaro/cli/stats_command.py deleted file mode 100644 index 333883de..00000000 --- a/datumaro/datumaro/cli/stats_command.py +++ /dev/null @@ -1,69 +0,0 @@ - -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse -import os.path as osp - -from datumaro.components.project import Project -from datumaro.util.command_targets import (TargetKinds, target_selector, - ProjectTarget, SourceTarget, ExternalDatasetTarget, ImageTarget, - is_project_path -) - -from . import project as project_module -from . import source as source_module -from . import item as item_module - - -def compute_external_dataset_stats(target, params): - raise NotImplementedError() - -def build_parser(parser=argparse.ArgumentParser()): - parser.add_argument('target', nargs='?', default=None) - parser.add_argument('params', nargs=argparse.REMAINDER) - - parser.add_argument('-p', '--project', dest='project_dir', default='.', - help="Directory of the project to operate on (default: current dir)") - - return parser - -def process_command(target, params, args): - project_dir = args.project_dir - target_kind, target_value = target - if target_kind == TargetKinds.project: - return project_module.main(['stats', '-p', target_value] + params) - elif target_kind == TargetKinds.source: - return source_module.main(['stats', '-p', project_dir, target_value] + params) - elif target_kind == TargetKinds.item: - return item_module.main(['stats', '-p', project_dir, target_value] + params) - elif target_kind == TargetKinds.external_dataset: - return compute_external_dataset_stats(target_value, params) - return 1 - -def main(args=None): - parser = build_parser() - args = parser.parse_args(args) - - project_path = args.project_dir - if is_project_path(project_path): - project = Project.load(project_path) - else: - project = None - try: - args.target = target_selector( - ProjectTarget(is_default=True, project=project), - SourceTarget(project=project), - ExternalDatasetTarget(), - ImageTarget() - )(args.target) - if args.target[0] == TargetKinds.project: - if is_project_path(args.target[1]): - args.project_dir = osp.dirname(osp.abspath(args.target[1])) - except argparse.ArgumentTypeError as e: - print(e) - parser.print_help() - return 1 - - return process_command(args.target, args.params, args) diff --git a/datumaro/datumaro/cli/util/__init__.py b/datumaro/datumaro/cli/util/__init__.py index a9773073..49319983 100644 --- a/datumaro/datumaro/cli/util/__init__.py +++ b/datumaro/datumaro/cli/util/__init__.py @@ -3,3 +3,36 @@ # # SPDX-License-Identifier: MIT +import argparse +import textwrap + + +class CliException(Exception): pass + +def add_subparser(subparsers, name, builder): + return builder(lambda **kwargs: subparsers.add_parser(name, **kwargs)) + +class MultilineFormatter(argparse.HelpFormatter): + """ + Keeps line breaks introduced with '|n' separator + and spaces introduced with '|s'. + """ + + def __init__(self, keep_natural=False, **kwargs): + super().__init__(**kwargs) + self._keep_natural = keep_natural + + def _fill_text(self, text, width, indent): + text = self._whitespace_matcher.sub(' ', text).strip() + text = text.replace('|s', ' ') + + paragraphs = text.split('|n ') + if self._keep_natural: + paragraphs = sum((p.split('\n ') for p in paragraphs), []) + + multiline_text = '' + for paragraph in paragraphs: + formatted_paragraph = textwrap.fill(paragraph, width, + initial_indent=indent, subsequent_indent=indent) + '\n' + multiline_text += formatted_paragraph + return multiline_text diff --git a/datumaro/datumaro/cli/util/project.py b/datumaro/datumaro/cli/util/project.py index 6e1f5e65..dde4531a 100644 --- a/datumaro/datumaro/cli/util/project.py +++ b/datumaro/datumaro/cli/util/project.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: MIT +import os import os.path as osp from datumaro.components.project import Project, \ @@ -17,4 +18,26 @@ def make_project_path(project_dir, project_filename=None): def load_project(project_dir, project_filename=None): if project_filename: project_dir = osp.join(project_dir, project_filename) - return Project.load(project_dir) \ No newline at end of file + return Project.load(project_dir) + +def generate_next_dir_name(dirname, basedir='.', sep='.'): + """ + If basedir does not contain dirname, returns dirname itself, + else generates a dirname by appending separator to the dirname + and the number, next to the last used number in the basedir for + files with dirname prefix. + """ + + def _to_int(s): + try: + return int(s) + except Exception: + return 0 + sep_count = dirname.count(sep) + 2 + + files = [e for e in os.listdir(basedir) if e.startswith(dirname)] + if files: + files = [e.split(sep) for e in files] + files = [_to_int(e[-1]) for e in files if len(e) == sep_count] + dirname += '%s%s' % (sep, max(files, default=0) + 1) + return dirname \ No newline at end of file diff --git a/datumaro/datumaro/components/algorithms/rise.py b/datumaro/datumaro/components/algorithms/rise.py index 8e75f10a..277bedd2 100644 --- a/datumaro/datumaro/components/algorithms/rise.py +++ b/datumaro/datumaro/components/algorithms/rise.py @@ -8,7 +8,7 @@ import numpy as np from math import ceil -from datumaro.components.extractor import * +from datumaro.components.extractor import AnnotationType def flatmatvec(mat): diff --git a/datumaro/datumaro/components/converters/__init__.py b/datumaro/datumaro/components/converters/__init__.py index e2f5e3d8..0991ed29 100644 --- a/datumaro/datumaro/components/converters/__init__.py +++ b/datumaro/datumaro/components/converters/__init__.py @@ -5,7 +5,7 @@ from datumaro.components.converters.datumaro import DatumaroConverter -from datumaro.components.converters.ms_coco import ( +from datumaro.components.converters.coco import ( CocoConverter, CocoImageInfoConverter, CocoCaptionsConverter, diff --git a/datumaro/datumaro/components/converters/ms_coco.py b/datumaro/datumaro/components/converters/coco.py similarity index 95% rename from datumaro/datumaro/components/converters/ms_coco.py rename to datumaro/datumaro/components/converters/coco.py index e6a3b12a..f2017a19 100644 --- a/datumaro/datumaro/components/converters/ms_coco.py +++ b/datumaro/datumaro/components/converters/coco.py @@ -14,9 +14,9 @@ import pycocotools.mask as mask_utils from datumaro.components.converter import Converter from datumaro.components.extractor import ( - DEFAULT_SUBSET_NAME, AnnotationType, PointsObject, BboxObject, MaskObject + DEFAULT_SUBSET_NAME, AnnotationType, PointsObject, MaskObject ) -from datumaro.components.formats.ms_coco import CocoTask, CocoPath +from datumaro.components.formats.coco import CocoTask, CocoPath from datumaro.util import find from datumaro.util.image import save_image import datumaro.util.mask_tools as mask_tools @@ -139,7 +139,10 @@ class _CaptionsConverter(_TaskConverter): 'caption': ann.caption, } if 'score' in ann.attributes: - elem['score'] = float(ann.attributes['score']) + try: + elem['score'] = float(ann.attributes['score']) + except Exception as e: + log.warning("Failed to convert attribute 'score': %e" % e) self.annotations.append(elem) @@ -202,7 +205,7 @@ class _InstancesConverter(_TaskConverter): polygons = [p.get_polygon() for p in polygons] if self._context._segmentation_mode == SegmentationMode.guess: - use_masks = leader.attributes.get('is_crowd', + use_masks = True == leader.attributes.get('is_crowd', find(masks, lambda x: x.label == leader.label) is not None) elif self._context._segmentation_mode == SegmentationMode.polygons: use_masks = False @@ -342,7 +345,10 @@ class _InstancesConverter(_TaskConverter): 'iscrowd': int(is_crowd), } if 'score' in ann.attributes: - elem['score'] = float(ann.attributes['score']) + try: + elem['score'] = float(ann.attributes['score']) + except Exception as e: + log.warning("Failed to convert attribute 'score': %e" % e) return elem @@ -448,7 +454,10 @@ class _LabelsConverter(_TaskConverter): 'category_id': int(ann.label) + 1, } if 'score' in ann.attributes: - elem['score'] = float(ann.attributes['score']) + try: + elem['score'] = float(ann.attributes['score']) + except Exception as e: + log.warning("Failed to convert attribute 'score': %e" % e) self.annotations.append(elem) @@ -570,7 +579,7 @@ class CocoConverter(Converter): def build_cmdline_parser(cls, parser=None): import argparse if not parser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='coco') parser.add_argument('--save-images', action='store_true', help="Save images (default: %(default)s)") diff --git a/datumaro/datumaro/components/converters/cvat.py b/datumaro/datumaro/components/converters/cvat.py index 242af837..475bc0b9 100644 --- a/datumaro/datumaro/components/converters/cvat.py +++ b/datumaro/datumaro/components/converters/cvat.py @@ -14,6 +14,14 @@ from datumaro.components.formats.cvat import CvatPath from datumaro.util.image import save_image +def _cast(value, type_conv, default=None): + if value is None: + return default + try: + return type_conv(value) + except Exception: + return default + def pairwise(iterable): a = iter(iterable) return zip(a, a) @@ -261,6 +269,8 @@ class _SubsetWriter: raise NotImplementedError("unknown shape type") for attr_name, attr_value in shape.attributes.items(): + if isinstance(attr_value, bool): + attr_value = 'true' if attr_value else 'false' if attr_name in self._get_label(shape.label).attributes: self._writer.add_attribute(OrderedDict([ ("name", str(attr_name)), @@ -325,7 +335,7 @@ class CvatConverter(Converter): def build_cmdline_parser(cls, parser=None): import argparse if not parser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='cvat') parser.add_argument('--save-images', action='store_true', help="Save images (default: %(default)s)") diff --git a/datumaro/datumaro/components/converters/datumaro.py b/datumaro/datumaro/components/converters/datumaro.py index 246d1911..635817d4 100644 --- a/datumaro/datumaro/components/converters/datumaro.py +++ b/datumaro/datumaro/components/converters/datumaro.py @@ -287,7 +287,7 @@ class DatumaroConverter(Converter): def build_cmdline_parser(cls, parser=None): import argparse if not parser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='datumaro') parser.add_argument('--save-images', action='store_true', help="Save images (default: %(default)s)") diff --git a/datumaro/datumaro/components/converters/tfrecord.py b/datumaro/datumaro/components/converters/tfrecord.py index 7d6c5c19..72b9c95c 100644 --- a/datumaro/datumaro/components/converters/tfrecord.py +++ b/datumaro/datumaro/components/converters/tfrecord.py @@ -10,6 +10,7 @@ import os.path as osp import string from datumaro.components.extractor import AnnotationType, DEFAULT_SUBSET_NAME +from datumaro.components.converter import Converter from datumaro.components.formats.tfrecord import DetectionApiPath from datumaro.util.image import encode_image from datumaro.util.tf_util import import_tf as _import_tf @@ -97,7 +98,7 @@ def _make_tf_example(item, get_label_id, get_label, save_images=False): return tf_example -class DetectionApiConverter: +class DetectionApiConverter(Converter): def __init__(self, save_images=False, cmdline_args=None): super().__init__() @@ -113,7 +114,7 @@ class DetectionApiConverter: def build_cmdline_parser(cls, parser=None): import argparse if not parser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='tf_detection_api') parser.add_argument('--save-images', action='store_true', help="Save images (default: %(default)s)") diff --git a/datumaro/datumaro/components/converters/voc.py b/datumaro/datumaro/components/converters/voc.py index 18c99783..0d1eec2a 100644 --- a/datumaro/datumaro/components/converters/voc.py +++ b/datumaro/datumaro/components/converters/voc.py @@ -23,6 +23,19 @@ from datumaro.util.image import save_image from datumaro.util.mask_tools import apply_colormap, remap_mask +def _convert_attr(name, attributes, type_conv, default=None, warn=True): + d = object() + value = attributes.get(name, d) + if value is d: + return default + + try: + return type_conv(value) + except Exception as e: + log.warning("Failed to convert attribute '%s'='%s': %s" % \ + (name, value, e)) + return default + def _write_xml_bbox(bbox, parent_elem): x, y, w, h = bbox bbox_elem = ET.SubElement(parent_elem, 'bndbox') @@ -185,26 +198,17 @@ class _Converter: obj_label = self.get_label(obj.label) ET.SubElement(obj_elem, 'name').text = obj_label - pose = attr.get('pose') - if pose is not None: - pose = VocPose[pose] - else: - pose = VocPose.Unspecified + pose = _convert_attr('pose', attr, lambda v: VocPose[v], + VocPose.Unspecified) ET.SubElement(obj_elem, 'pose').text = pose.name - truncated = attr.get('truncated') - if truncated is not None: - truncated = int(truncated) - else: - truncated = 0 - ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated + truncated = _convert_attr('truncated', attr, int, 0) + ET.SubElement(obj_elem, 'truncated').text = \ + '%d' % truncated - difficult = attr.get('difficult') - if difficult is not None: - difficult = int(difficult) - else: - difficult = 0 - ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult + difficult = _convert_attr('difficult', attr, int, 0) + ET.SubElement(obj_elem, 'difficult').text = \ + '%d' % difficult bbox = obj.get_bbox() if bbox is not None: @@ -219,16 +223,16 @@ class _Converter: objects_with_parts.append(new_obj_id) - actions = {k: v for k, v in obj.attributes.items() - if self._is_action(obj_label, k)} + label_actions = self._get_actions(obj_label) actions_elem = ET.Element('actions') - for action in self._get_actions(obj_label): - presented = action in actions and actions[action] + for action in label_actions: + presented = _convert_attr(action, attr, + lambda v: int(v == True), 0) ET.SubElement(actions_elem, action).text = \ '%d' % presented objects_with_actions[new_obj_id][action] = presented - if len(actions) != 0: + if len(actions_elem) != 0: obj_elem.append(actions_elem) if set(self._tasks) & set([None, @@ -502,7 +506,7 @@ class VocConverter(Converter): def build_cmdline_parser(cls, parser=None): import argparse if not parser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='voc') parser.add_argument('--save-images', action='store_true', help="Save images (default: %(default)s)") diff --git a/datumaro/datumaro/components/converters/yolo.py b/datumaro/datumaro/components/converters/yolo.py index cf0d1db7..a25c7b04 100644 --- a/datumaro/datumaro/components/converters/yolo.py +++ b/datumaro/datumaro/components/converters/yolo.py @@ -41,7 +41,7 @@ class YoloConverter(Converter): def build_cmdline_parser(cls, parser=None): import argparse if not parser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='yolo') parser.add_argument('--save-images', action='store_true', help="Save images (default: %(default)s)") diff --git a/datumaro/datumaro/components/dataset_filter.py b/datumaro/datumaro/components/dataset_filter.py index a32b5df6..73c7ce81 100644 --- a/datumaro/datumaro/components/dataset_filter.py +++ b/datumaro/datumaro/components/dataset_filter.py @@ -57,6 +57,8 @@ class DatasetItemEncoder: @staticmethod def _get_label(label_id, categories): label = '' + if label_id is None: + return '' if categories is not None: label_cat = categories.get(AnnotationType.label) if label_cat is not None: diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py index 8c07cfe3..afc221ac 100644 --- a/datumaro/datumaro/components/extractor.py +++ b/datumaro/datumaro/components/extractor.py @@ -213,7 +213,7 @@ class MaskObject(Annotation): (self.label == other.label) and \ (self.z_order == other.z_order) and \ (self.image is not None and other.image is not None and \ - np.all(self.image == other.image)) + np.array_equal(self.image, other.image)) class RleMask(MaskObject): # pylint: disable=redefined-builtin @@ -546,7 +546,7 @@ class DatasetItem: (self.annotations == other.annotations) and \ (self.path == other.path) and \ (self.has_image == other.has_image) and \ - (self.has_image and np.all(self.image == other.image) or \ + (self.has_image and np.array_equal(self.image, other.image) or \ not self.has_image) class IExtractor: diff --git a/datumaro/datumaro/components/extractors/__init__.py b/datumaro/datumaro/components/extractors/__init__.py index 6e5f323b..0b7a1947 100644 --- a/datumaro/datumaro/components/extractors/__init__.py +++ b/datumaro/datumaro/components/extractors/__init__.py @@ -5,7 +5,7 @@ from datumaro.components.extractors.datumaro import DatumaroExtractor -from datumaro.components.extractors.ms_coco import ( +from datumaro.components.extractors.coco import ( CocoImageInfoExtractor, CocoCaptionsExtractor, CocoInstancesExtractor, @@ -29,6 +29,7 @@ from datumaro.components.extractors.voc import ( from datumaro.components.extractors.yolo import YoloExtractor from datumaro.components.extractors.tfrecord import DetectionApiExtractor from datumaro.components.extractors.cvat import CvatExtractor +from datumaro.components.extractors.image_dir import ImageDirExtractor items = [ ('datumaro', DatumaroExtractor), @@ -56,4 +57,6 @@ items = [ ('tf_detection_api', DetectionApiExtractor), ('cvat', CvatExtractor), + + ('image_dir', ImageDirExtractor), ] \ No newline at end of file diff --git a/datumaro/datumaro/components/extractors/ms_coco.py b/datumaro/datumaro/components/extractors/coco.py similarity index 98% rename from datumaro/datumaro/components/extractors/ms_coco.py rename to datumaro/datumaro/components/extractors/coco.py index f6d1f9e1..05404f21 100644 --- a/datumaro/datumaro/components/extractors/ms_coco.py +++ b/datumaro/datumaro/components/extractors/coco.py @@ -15,7 +15,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem, BboxObject, CaptionObject, LabelCategories, PointsCategories ) -from datumaro.components.formats.ms_coco import CocoTask, CocoPath +from datumaro.components.formats.coco import CocoTask, CocoPath from datumaro.util.image import lazy_image diff --git a/datumaro/datumaro/components/extractors/cvat.py b/datumaro/datumaro/components/extractors/cvat.py index 200fe88e..e3c869c4 100644 --- a/datumaro/datumaro/components/extractors/cvat.py +++ b/datumaro/datumaro/components/extractors/cvat.py @@ -91,7 +91,15 @@ class CvatExtractor(Extractor): shape.update(image) elif ev == 'end': if el.tag == 'attribute' and shape is not None: - shape['attributes'][el.attrib['name']] = el.text + attr_value = el.text + if el.text in ['true', 'false']: + attr_value = attr_value == 'true' + else: + try: + attr_value = float(attr_value) + except Exception: + pass + shape['attributes'][el.attrib['name']] = attr_value elif el.tag in cls._SUPPORTED_SHAPES: if track is not None: shape['frame'] = el.attrib['frame'] diff --git a/datumaro/datumaro/components/extractors/image_dir.py b/datumaro/datumaro/components/extractors/image_dir.py new file mode 100644 index 00000000..561fa9d8 --- /dev/null +++ b/datumaro/datumaro/components/extractors/image_dir.py @@ -0,0 +1,55 @@ + +# Copyright (C) 2018 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +import os +import os.path as osp + +from datumaro.components.extractor import DatasetItem, Extractor +from datumaro.util.image import lazy_image + + +class ImageDirExtractor(Extractor): + _SUPPORTED_FORMATS = ['.png', '.jpg'] + + def __init__(self, url): + super().__init__() + + assert osp.isdir(url) + + items = [] + for name in os.listdir(url): + path = osp.join(url, name) + if self._is_image(path): + item_id = osp.splitext(name)[0] + item = DatasetItem(id=item_id, image=lazy_image(path)) + items.append((item.id, item)) + + items = sorted(items, key=lambda e: e[0]) + items = OrderedDict(items) + self._items = items + + self._subsets = None + + def __iter__(self): + for item in self._items.values(): + yield item + + def __len__(self): + return len(self._items) + + def subsets(self): + return self._subsets + + def get(self, item_id, subset=None, path=None): + if path or subset: + raise KeyError() + return self._items[item_id] + + def _is_image(self, path): + for ext in self._SUPPORTED_FORMATS: + if osp.isfile(path) and path.endswith(ext): + return True + return False diff --git a/datumaro/datumaro/components/extractors/voc.py b/datumaro/datumaro/components/extractors/voc.py index f1ad0712..086649f5 100644 --- a/datumaro/datumaro/components/extractors/voc.py +++ b/datumaro/datumaro/components/extractors/voc.py @@ -230,6 +230,8 @@ class VocExtractor(Extractor): if self._task is not VocTask.person_layout: break + if bbox is None: + continue item_annotations.append(BboxObject( *bbox, label=part_label_id, group=obj_id)) @@ -247,16 +249,16 @@ class VocExtractor(Extractor): @staticmethod def _parse_bbox(object_elem): - try: - bbox_elem = object_elem.find('bndbox') - xmin = int(bbox_elem.find('xmin').text) - xmax = int(bbox_elem.find('xmax').text) - ymin = int(bbox_elem.find('ymin').text) - ymax = int(bbox_elem.find('ymax').text) - return [xmin, ymin, xmax - xmin, ymax - ymin] - except Exception: + bbox_elem = object_elem.find('bndbox') + if bbox_elem is None: return None + xmin = float(bbox_elem.find('xmin').text) + xmax = float(bbox_elem.find('xmax').text) + ymin = float(bbox_elem.find('ymin').text) + ymax = float(bbox_elem.find('ymax').text) + return [xmin, ymin, xmax - xmin, ymax - ymin] + class VocClassificationExtractor(VocExtractor): def __init__(self, path): super().__init__(path, task=VocTask.classification) diff --git a/datumaro/datumaro/components/formats/ms_coco.py b/datumaro/datumaro/components/formats/coco.py similarity index 100% rename from datumaro/datumaro/components/formats/ms_coco.py rename to datumaro/datumaro/components/formats/coco.py diff --git a/datumaro/datumaro/components/importers/__init__.py b/datumaro/datumaro/components/importers/__init__.py index 7c952d2c..cc009dbf 100644 --- a/datumaro/datumaro/components/importers/__init__.py +++ b/datumaro/datumaro/components/importers/__init__.py @@ -4,22 +4,18 @@ # SPDX-License-Identifier: MIT from datumaro.components.importers.datumaro import DatumaroImporter -from datumaro.components.importers.ms_coco import CocoImporter - -from datumaro.components.importers.voc import ( - VocImporter, - VocResultsImporter, -) - +from datumaro.components.importers.coco import CocoImporter +from datumaro.components.importers.voc import VocImporter, VocResultsImporter from datumaro.components.importers.tfrecord import DetectionApiImporter from datumaro.components.importers.yolo import YoloImporter from datumaro.components.importers.cvat import CvatImporter +from datumaro.components.importers.image_dir import ImageDirImporter items = [ ('datumaro', DatumaroImporter), - ('ms_coco', CocoImporter), + ('coco', CocoImporter), ('voc', VocImporter), ('voc_results', VocResultsImporter), @@ -29,4 +25,6 @@ items = [ ('tf_detection_api', DetectionApiImporter), ('cvat', CvatImporter), + + ('image_dir', ImageDirImporter), ] \ No newline at end of file diff --git a/datumaro/datumaro/components/importers/ms_coco.py b/datumaro/datumaro/components/importers/coco.py similarity index 96% rename from datumaro/datumaro/components/importers/ms_coco.py rename to datumaro/datumaro/components/importers/coco.py index cb0fb838..9e3d38e6 100644 --- a/datumaro/datumaro/components/importers/ms_coco.py +++ b/datumaro/datumaro/components/importers/coco.py @@ -8,7 +8,7 @@ from glob import glob import logging as log import os.path as osp -from datumaro.components.formats.ms_coco import CocoTask, CocoPath +from datumaro.components.formats.coco import CocoTask, CocoPath class CocoImporter: diff --git a/datumaro/datumaro/components/importers/image_dir.py b/datumaro/datumaro/components/importers/image_dir.py new file mode 100644 index 00000000..ef2cdd43 --- /dev/null +++ b/datumaro/datumaro/components/importers/image_dir.py @@ -0,0 +1,26 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp + + +class ImageDirImporter: + EXTRACTOR_NAME = 'image_dir' + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + if not osp.isdir(path): + raise Exception("Can't find a directory at '%s'" % path) + + source_name = osp.basename(osp.normpath(path)) + project.add_source(source_name, { + 'url': source_name, + 'format': self.EXTRACTOR_NAME, + 'options': dict(extra_params), + }) + + return project diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py index a1e9645d..34acf41f 100644 --- a/datumaro/datumaro/components/project.py +++ b/datumaro/datumaro/components/project.py @@ -105,15 +105,17 @@ class GitWrapper: def _git_dir(base_path): return osp.join(base_path, '.git') - def init(self, path): - spawn = not osp.isdir(GitWrapper._git_dir(path)) - self.repo = git.Repo.init(path=path) + @classmethod + def spawn(cls, path): + spawn = not osp.isdir(cls._git_dir(path)) + repo = git.Repo.init(path=path) if spawn: author = git.Actor("Nobody", "nobody@example.com") - self.repo.index.commit('Initial commit', author=author) - return self.repo + repo.index.commit('Initial commit', author=author) + return repo - def get_repo(self): + def init(self, path): + self.repo = self.spawn(path) return self.repo def is_initialized(self): @@ -316,7 +318,9 @@ class Dataset(Extractor): categories.update(source.categories()) for source in sources: for cat_type, source_cat in source.categories().items(): - assert categories[cat_type] == source_cat + if not categories[cat_type] == source_cat: + raise NotImplementedError( + "Merging different categories is not implemented yet") dataset = Dataset(categories=categories) # merge items @@ -395,11 +399,12 @@ class Dataset(Extractor): return item - def extract(self, filter_expr, filter_annotations=False, **kwargs): + def extract(self, filter_expr, filter_annotations=False, remove_empty=False): if filter_annotations: - return self.transform(XPathAnnotationsFilter, filter_expr, **kwargs) + return self.transform(XPathAnnotationsFilter, filter_expr, + remove_empty) else: - return self.transform(XPathDatasetFilter, filter_expr, **kwargs) + return self.transform(XPathDatasetFilter, filter_expr) def update(self, items): for item in items: @@ -468,7 +473,9 @@ class ProjectDataset(Dataset): categories.update(source.categories()) for source in self._sources.values(): for cat_type, source_cat in source.categories().items(): - assert categories[cat_type] == source_cat + if not categories[cat_type] == source_cat: + raise NotImplementedError( + "Merging different categories is not implemented yet") if own_source is not None and len(own_source) != 0: categories.update(own_source.categories()) self._categories = categories @@ -651,17 +658,18 @@ class ProjectDataset(Dataset): launcher = self._project.make_executable_model(model_name) self.transform_project(InferenceWrapper, launcher, save_dir=save_dir) - def export_project(self, save_dir, output_format, - filter_expr=None, filter_annotations=False, **converter_kwargs): + def export_project(self, save_dir, converter, + filter_expr=None, filter_annotations=False, remove_empty=False): # NOTE: probably this function should be in the ViewModel layer save_dir = osp.abspath(save_dir) os.makedirs(save_dir, exist_ok=True) dataset = self if filter_expr: - dataset = dataset.extract(filter_expr, filter_annotations) + dataset = dataset.extract(filter_expr, + filter_annotations=filter_annotations, + remove_empty=remove_empty) - converter = self.env.make_converter(output_format, **converter_kwargs) converter(dataset, save_dir) def extract_project(self, filter_expr, filter_annotations=False, diff --git a/datumaro/datumaro/util/test_utils.py b/datumaro/datumaro/util/test_utils.py index 9219f5cf..e855fad0 100644 --- a/datumaro/datumaro/util/test_utils.py +++ b/datumaro/datumaro/util/test_utils.py @@ -7,6 +7,7 @@ import inspect import os import os.path as osp import shutil +import tempfile def current_function_name(depth=1): @@ -32,8 +33,22 @@ class FileRemover: class TestDir(FileRemover): def __init__(self, path=None, ignore_errors=False): if path is None: - path = osp.abspath('temp_%s' % current_function_name(2)) - - os.makedirs(path, exist_ok=ignore_errors) - - super().__init__(path, is_dir=True, ignore_errors=ignore_errors) \ No newline at end of file + path = osp.abspath('temp_%s-' % current_function_name(2)) + path = tempfile.mkdtemp(dir=os.getcwd(), prefix=path) + else: + os.makedirs(path, exist_ok=ignore_errors) + + super().__init__(path, is_dir=True, ignore_errors=ignore_errors) + +def ann_to_str(ann): + return vars(ann) + +def item_to_str(item): + return '\n'.join( + [ + '%s' % vars(item) + ] + [ + 'ann[%s]: %s' % (i, ann_to_str(a)) + for i, a in enumerate(item.annotations) + ] + ) \ No newline at end of file diff --git a/datumaro/docs/cli_design.mm b/datumaro/docs/cli_design.mm index 4c7b188c..0ff17cb2 100644 --- a/datumaro/docs/cli_design.mm +++ b/datumaro/docs/cli_design.mm @@ -2,146 +2,64 @@ - - + - - + - - + - - + - - + - - + - - + - - - - - - - + - - - - - - - - - - - - - + + - - + - - - - - - - - + - - + - - - + - - - - - - - - - - - - + - - - + + - - + - - - - - - + + - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - diff --git a/datumaro/docs/design.md b/datumaro/docs/design.md index 69d4d198..7d89e8eb 100644 --- a/datumaro/docs/design.md +++ b/datumaro/docs/design.md @@ -5,7 +5,6 @@ ## Table of contents - [Concept](#concept) -- [Design](#design) - [RC 1 vision](#rc-1-vision) ## Concept @@ -70,53 +69,6 @@ Datumaro is: - guidance for key frame selection for tracking ([paper](https://arxiv.org/abs/1903.11779)) Use case: more effective annotation, better predictions -## Design - -### Command-line - -Use Docker as an example. Basically, the interface is partitioned -on contexts and shortcuts. Contexts are semantically grouped commands, -related to a single topic or target. Shortcuts are handy shorter -alternatives for the most used commands and also special commands, -which are hard to be put into specific context. - -![cli-design-image](images/cli_design.png) - -- [FreeMind tool link](http://freemind.sourceforge.net/wiki/index.php/Main_Page) - -### High-level architecture - -- Using MVVM UI pattern - -![mvvm-image](images/mvvm.png) - -### Datumaro project and environment structure - - -``` -├── [datumaro module] -└── [project folder] - ├── .datumaro/ - │   ├── config.yml - │   ├── .git/ - │   ├── importers/ - │   │   ├── custom_format_importer1.py - │   │   └── ... - │   ├── statistics/ - │   │   ├── custom_statistic1.py - │   │   └── ... - │   ├── visualizers/ - │   │ ├── custom_visualizer1.py - │   │ └── ... - │   └── extractors/ - │   ├── custom_extractor1.py - │   └── ... - └── sources/ - ├── source1 - └── ... -``` - - ## RC 1 vision In the first version Datumaro should be a project manager for CVAT. @@ -139,18 +91,20 @@ can be downloaded by user to be operated on with Datumaro CLI. ### Interfaces - [x] Python API for user code - - [ ] Installation as a package + - [x] Installation as a package - [x] A command-line tool for dataset manipulations ### Features -- Dataset format support (reading, exporting) +- Dataset format support (reading, writing) - [x] Own format + - [x] CVAT - [x] COCO - [x] PASCAL VOC + - [x] YOLO + - [x] TF Detection API - [ ] Cityscapes - [ ] ImageNet - - [ ] CVAT - Dataset visualization (`show`) - [ ] Ability to visualize a dataset @@ -199,6 +153,7 @@ can be downloaded by user to be operated on with Datumaro CLI. - export the task - convert to a training format - train a DL model + - [x] Use case "annotate - reannotate problematic images - merge" - [ ] Use case "annotate and estimate quality" - create a task - annotate diff --git a/datumaro/docs/images/cli_design.png b/datumaro/docs/images/cli_design.png index 702728c4..f83b1430 100644 Binary files a/datumaro/docs/images/cli_design.png and b/datumaro/docs/images/cli_design.png differ diff --git a/datumaro/docs/quickstart.md b/datumaro/docs/quickstart.md deleted file mode 100644 index d5fb98a6..00000000 --- a/datumaro/docs/quickstart.md +++ /dev/null @@ -1,325 +0,0 @@ -# Quick start guide - -## Installation - -### Prerequisites - -- Python (3.5+) -- OpenVINO (optional) - -### Installation steps - -Download the project to any directory. - -Set up a virtual environment: - -``` bash -python -m pip install virtualenv -python -m virtualenv venv -. venv/bin/activate -while read -r p; do pip install $p; done < requirements.txt -``` - -## Usage - -The directory containing the project should be in the -`PYTHONPATH` environment variable. The other way is to invoke -commands from that directory. - -As a python module: - -``` bash -python -m datumaro --help -``` - -As a standalone python script: - -``` bash -python datum.py --help -``` - -As a python library: - -``` python -import datumaro -``` - -## Workflow - -> **Note**: command invocation **syntax is subject to change, refer to --help output** - -The key object is the project. It can be created or imported with -`project create` and `project import` commands. The project is a combination of -dataset and environment. - -If you want to interact with models, you should add them to project first. - -Implemented commands ([CLI design doc](images/cli_design.png)): -- project create -- project import -- project diff -- project transform -- source add -- explain - -### Create a project - -Usage: - -``` bash -python datum.py project create --help - -python datum.py project create \ - -d -``` - -Example: - -``` bash -python datum.py project create -d /home/my_dataset -``` - -### Import a project - -This command creates a project from an existing dataset. Supported formats: -- MS COCO -- Custom formats via custom `importers` and `extractors` - -Usage: - -``` bash -python -m datumaro project import --help - -python -m datumaro project import \ - \ - -d \ - -t -``` - -Example: - -``` bash -python -m datumaro project import \ - /home/coco_dir \ - -d /home/project_dir \ - -t ms_coco -``` - -An _MS COCO_-like dataset should have the following directory structure: - - -``` -COCO/ -├── annotations/ -│   ├── instances_val2017.json -│   ├── instances_train2017.json -├── images/ -│   ├── val2017 -│   ├── train2017 -``` - - -Everything after the last `_` is considered as a subset name. - -### Register a model - -Supported models: -- OpenVINO -- Custom models via custom `launchers` - -Usage: - -``` bash -python -m datumaro model add --help -``` - -Example: register OpenVINO model - -A model consists of a graph description and weights. There is also a script -used to convert model outputs to internal data structures. - -``` bash -python -m datumaro model add \ - openvino \ - -d -w -i -``` - -Interpretation script for an OpenVINO detection model (`convert.py`): - -``` python -from datumaro.components.extractor import * - -max_det = 10 -conf_thresh = 0.1 - -def process_outputs(inputs, outputs): - # inputs = model input, array or images, shape = (N, C, H, W) - # outputs = model output, shape = (N, 1, K, 7) - # results = conversion result, [ [ Annotation, ... ], ... ] - results = [] - for input, output in zip(inputs, outputs): - input_height, input_width = input.shape[:2] - detections = output[0] - image_results = [] - for i, det in enumerate(detections): - label = int(det[1]) - conf = det[2] - if conf <= conf_thresh: - continue - - x = max(int(det[3] * input_width), 0) - y = max(int(det[4] * input_height), 0) - w = min(int(det[5] * input_width - x), input_width) - h = min(int(det[6] * input_height - y), input_height) - image_results.append(BboxObject(x, y, w, h, - label=label, attributes={'score': conf} )) - - results.append(image_results[:max_det]) - - return results - -def get_categories(): - # Optionally, provide output categories - label map etc. - # Example: - label_categories = LabelCategories() - label_categories.add('person') - label_categories.add('car') - return { AnnotationType.label: label_categories } -``` - -### Run a model inference - -This command сreates a new project from the current project. The new -one annotations are the model outputs. - -Usage: - -``` bash -python -m datumaro project transform --help - -python -m datumaro project transform \ - -m \ - -d -``` - -Example: - -``` bash -python -m datumaro project import <...> -python -m datumaro model add mymodel <...> -python -m datumaro project transform -m mymodel -d ../mymodel_inference -``` - -### Compare datasets - -The command compares two datasets and saves the results in the -specified directory. The current project is considered to be -"ground truth". - -``` bash -python -m datumaro project diff --help - -python -m datumaro project diff -d -``` - -Example: compare a dataset with model inference - -``` bash -python -m datumaro project import <...> -python -m datumaro model add mymodel <...> -python -m datumaro project transform <...> -d ../inference -python -m datumaro project diff ../inference -d ../diff -``` - -### Run inference explanation - -Usage: - -``` bash -python -m datumaro explain --help - -python -m datumaro explain \ - -m \ - -d \ - -t \ - \ - -``` - -Example: run inference explanation on a single image with visualization - -``` bash -python -m datumaro project create <...> -python -m datumaro model add mymodel <...> -python -m datumaro explain \ - -m mymodel \ - -t 'image.png' \ - rise \ - -s 1000 --progressive -``` - -### Extract data subset based on filter - -This command allows to create a subprject form a project, which -would include only items satisfying some condition. XPath is used as a query -format. - -Usage: - -``` bash -python -m datumaro project extract --help - -python -m datumaro project extract \ - -p \ - -d \ - -f '' -``` - -Example: - -``` bash -python -m datumaro project extract \ - -p ../test_project \ - -d ../test_project-extract \ - -f '/item[image/width < image/height]' -``` - -Item representation: - -``` xml - - 290768 - minival2014 - - 612 - 612 - 3 - - - 80154 - bbox - 39 - 264.59 - 150.25 - 11.199999999999989 - 42.31 - 473.87199999999956 - - - 669839 - bbox - 41 - 163.58 - 191.75 - 76.98999999999998 - 73.63 - 5668.773699999998 - - ... - -``` - -## Links -- [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) -- [How to convert model to OpenVINO format](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html) -- [Model convert script for this model](https://github.com/opencv/cvat/blob/3e09503ba6c6daa6469a6c4d275a5a8b168dfa2c/components/tf_annotation/install.sh#L23) diff --git a/datumaro/docs/user_manual.md b/datumaro/docs/user_manual.md new file mode 100644 index 00000000..0b61c11a --- /dev/null +++ b/datumaro/docs/user_manual.md @@ -0,0 +1,563 @@ +# Quick start guide + +## Contents + +- [Installation](#installation) +- [Interfaces](#interfaces) +- [Supported dataset formats and annotations](#formats-support) +- [Command line workflow](#command-line-workflow) + - [Create a project](#create-project) + - [Add and remove data](#add-and-remove-data) + - [Import a project](#import-project) + - [Extract a subproject](#extract-subproject) + - [Merge projects](#merge-project) + - [Export a project](#export-project) + - [Compare projects](#compare-projects) + - [Get project info](#get-project-info) + - [Register a model](#register-model) + - [Run inference](#run-inference) + - [Run inference explanation](#explain-inference) +- [Links](#links) + +## Installation + +### Prerequisites + +- Python (3.5+) +- OpenVINO (optional) + +### Installation steps + +Optionally, set up a virtual environment: + +``` bash +python -m pip install virtualenv +python -m virtualenv venv +. venv/bin/activate +``` + +Install Datumaro: +``` bash +pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro' +``` + +> You can change the installation branch with `.../cvat@#egg...` +> Also note `--force-reinstall` parameter in this case. + +## Interfaces + +As a standalone tool: + +``` bash +datum --help +``` + +As a python module: +> The directory containing Datumaro should be in the `PYTHONPATH` +> environment variable or `cvat/datumaro/` should be the current directory. + +``` bash +python -m datumaro --help +python datumaro/ --help +python datum.py --help +``` + +As a python library: + +``` python +import datumaro +``` + +## Formats support + +List of supported formats: +- COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*) + - [Format specification](http://cocodataset.org/#format-data) + - `labels` are our extension - like `instances` with only `category_id` +- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`) + - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) +- YOLO (`bboxes`) + - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) +- TF Detection API (`bboxes`, `masks`) + - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) +- CVAT + - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) + +List of supported annotation types: +- Labels +- Bounding boxes +- Polygons +- Polylines +- (Key-)Points +- Captions +- Masks + +## Command line workflow + +> **Note**: command invocation syntax is subject to change, +> **always refer to command --help output** + +The key object is the Project. The Project is a combination of +a Project's own dataset, a number of external data sources and an environment. +An empty Project can be created by `project create` command, +an existing dataset can be imported with `project import` command. +A typical way to obtain projects is to export tasks in CVAT UI. + +Available CLI commands: +![CLI design doc](images/cli_design.png) + +If you want to interact with models, you need to add them to project first. + +### Import project + +This command creates a Project from an existing dataset. + +Supported formats are listed in the command help. +In Datumaro dataset formats are supported by Extractors and Importers. +An Extractor produces a list of dataset items corresponding +to the dataset. An Importer creates a Project from the +data source location. It is possible to add a custom Extractor and Importer. +To do this, you need to put an Extractor and Importer implementation scripts to +`/.datumaro/extractors` and `/.datumaro/importers`. + +Usage: + +``` bash +datum project import --help + +datum project import \ + -i \ + -o \ + -f +``` + +Example: create a project from COCO-like dataset + +``` bash +datum project import \ + -i /home/coco_dir \ + -o /home/project_dir \ + -f coco +``` + +An _MS COCO_-like dataset should have the following directory structure: + + +``` +COCO/ +├── annotations/ +│   ├── instances_val2017.json +│   ├── instances_train2017.json +├── images/ +│   ├── val2017 +│   ├── train2017 +``` + + +Everything after the last `_` is considered a subset name in the COCO format. + +### Create project + +The command creates an empty project. Once a Project is created, there are +a few options to interact with it. + +Usage: + +``` bash +datum project create --help + +datum project create \ + -o +``` + +Example: create an empty project `my_dataset` + +``` bash +datum project create -o my_dataset/ +``` + +### Add and remove data + +A Project can be attached to a number of external Data Sources. Each Source +describes a way to produce dataset items. A Project combines dataset items from +all the sources and its own dataset into one composite dataset. You can manage +project sources by commands in the `source` command line context. + +Datasets come in a wide variety of formats. Each dataset +format defines its own data structure and rules on how to +interpret the data. For example, the following data structure +is used in COCO format: + +``` +/dataset/ +- /images/.jpg +- /annotations/ +``` + + +In Datumaro dataset formats are supported by Extractors. +An Extractor produces a list of dataset items corresponding +to the dataset. It is possible to add a custom Extractor. +To do this, you need to put an Extractor +definition script to `/.datumaro/extractors`. + +Usage: + +``` bash +datum source add --help +datum source remove --help + +datum source add \ + path \ + -p \ + -n + +datum source remove \ + -p \ + -n +``` + +Example: create a project from a bunch of different annotations and images, +and generate TFrecord for TF Detection API for model training + +``` bash +datum project create +# 'default' is the name of the subset below +datum source add path -f coco_instances +datum source add path -f cvat +datum source add path -f voc_detection +datum source add path -f datumaro +datum source add path -f image_dir +datum project export -f tf_detection_api +``` + +### Extract subproject + +This command allows to create a sub-Project from a Project. The new project +includes only items satisfying some condition. [XPath](https://devhints.io/xpath) +is used as query format. + +There are several filtering modes available ('-m/--mode' parameter). +Supported modes: +- 'i', 'items' +- 'a', 'annotations' +- 'i+a', 'a+i', 'items+annotations', 'annotations+items' + +When filtering annotations, use the 'items+annotations' +mode to point that annotation-less dataset items should be +removed. To select an annotation, write an XPath that +returns 'annotation' elements (see examples). + +Usage: + +``` bash +datum project extract --help + +datum project extract \ + -p \ + -o \ + -e '' +``` + +Example: extract a dataset with only images which width < height + +``` bash +datum project extract \ + -p test_project \ + -o test_project-extract \ + -e '/item[image/width < image/height]' +``` + +Example: extract a dataset with only large annotations of class `cat` and any non-`persons` + +``` bash +datum project extract \ + -p test_project \ + -o test_project-extract \ + --mode annotations -e '/item/annotation[(label="cat" and area > 999.5) or label!="person"]' +``` + +Example: extract a dataset with only occluded annotations, remove empty images + +``` bash +datum project extract \ + -p test_project \ + -o test_project-extract \ + -m i+a -e '/item/annotation[occluded="True"]' +``` + +Item representations are available with `--dry-run` parameter: + +``` xml + + 290768 + minival2014 + + 612 + 612 + 3 + + + 80154 + bbox + 39 + 264.59 + 150.25 + 11.199999999999989 + 42.31 + 473.87199999999956 + + + 669839 + bbox + 41 + 163.58 + 191.75 + 76.98999999999998 + 73.63 + 5668.773699999998 + + ... + +``` + +### Merge projects + +This command combines multiple Projects into one. + +Usage: + +``` bash +datum project merge --help + +datum project merge \ + -p \ + -o \ + +``` + +Example: update annotations in the `first_project` with annotations +from the `second_project` and save the result as `merged_project` + +``` bash +datum project merge \ + -p first_project \ + -o merged_project \ + second_project +``` + +### Export project + +This command exports a Project in some format. + +Supported formats are listed in the command help. +In Datumaro dataset formats are supported by Converters. +A Converter produces a dataset of a specific format +from dataset items. It is possible to add a custom Converter. +To do this, you need to put a Converter +definition script to /.datumaro/converters. + +Usage: + +``` bash +datum project export --help + +datum project export \ + -p \ + -o \ + -f \ + [-- ] +``` + +Example: save project as VOC-like dataset, include images + +``` bash +datum project export \ + -p test_project \ + -o test_project-export \ + -f voc \ + -- --save-images +``` + +### Get project info + +This command outputs project status information. + +Usage: + +``` bash +datum project info --help + +datum project info \ + -p +``` + +Example: + +``` bash +datum project info -p /test_project + +Project: + name: test_project2 + location: /test_project +Sources: + source 'instances_minival2014': + format: coco_instances + url: /coco_like/annotations/instances_minival2014.json +Dataset: + length: 5000 + categories: label + label: + count: 80 + labels: person, bicycle, car, motorcycle (and 76 more) + subsets: minival2014 + subset 'minival2014': + length: 5000 + categories: label + label: + count: 80 + labels: person, bicycle, car, motorcycle (and 76 more) +``` + +### Register model + +Supported models: +- OpenVINO +- Custom models via custom `launchers` + +Usage: + +``` bash +datum model add --help +``` + +Example: register an OpenVINO model + +A model consists of a graph description and weights. There is also a script +used to convert model outputs to internal data structures. + +``` bash +datum project create +datum model add \ + -n openvino \ + -d -w -i +``` + +Interpretation script for an OpenVINO detection model (`convert.py`): + +``` python +from datumaro.components.extractor import * + +max_det = 10 +conf_thresh = 0.1 + +def process_outputs(inputs, outputs): + # inputs = model input, array or images, shape = (N, C, H, W) + # outputs = model output, shape = (N, 1, K, 7) + # results = conversion result, [ [ Annotation, ... ], ... ] + results = [] + for input, output in zip(inputs, outputs): + input_height, input_width = input.shape[:2] + detections = output[0] + image_results = [] + for i, det in enumerate(detections): + label = int(det[1]) + conf = det[2] + if conf <= conf_thresh: + continue + + x = max(int(det[3] * input_width), 0) + y = max(int(det[4] * input_height), 0) + w = min(int(det[5] * input_width - x), input_width) + h = min(int(det[6] * input_height - y), input_height) + image_results.append(BboxObject(x, y, w, h, + label=label, attributes={'score': conf} )) + + results.append(image_results[:max_det]) + + return results + +def get_categories(): + # Optionally, provide output categories - label map etc. + # Example: + label_categories = LabelCategories() + label_categories.add('person') + label_categories.add('car') + return { AnnotationType.label: label_categories } +``` + +### Run model + +This command applies model to dataset images and produces a new project. + +Usage: + +``` bash +datum model run --help + +datum model run \ + -p \ + -m \ + -o +``` + +Example: launch inference on a dataset + +``` bash +datum project import <...> +datum model add mymodel <...> +datum model run -m mymodel -o inference +``` + +### Compare projects + +The command compares two datasets and saves the results in the +specified directory. The current project is considered to be +"ground truth". + +``` bash +datum project diff --help + +datum project diff -o +``` + +Example: compare a dataset with model inference + +``` bash +datum project import <...> +datum model add mymodel <...> +datum project transform <...> -o inference +datum project diff inference -o diff +``` + +### Explain inference + +Usage: + +``` bash +datum explain --help + +datum explain \ + -m \ + -o \ + -t \ + \ + +``` + +Example: run inference explanation on a single image with visualization + +``` bash +datum project create <...> +datum model add mymodel <...> +datum explain \ + -m mymodel \ + -t 'image.png' \ + rise \ + -s 1000 --progressive +``` + +## Links +- [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) +- [How to convert model to OpenVINO format](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html) +- [Model conversion script example](https://github.com/opencv/cvat/blob/3e09503ba6c6daa6469a6c4d275a5a8b168dfa2c/components/tf_annotation/install.sh#L23) diff --git a/datumaro/setup.py b/datumaro/setup.py index 6f3e02d7..7880e644 100644 --- a/datumaro/setup.py +++ b/datumaro/setup.py @@ -62,7 +62,7 @@ setuptools.setup( ], entry_points={ 'console_scripts': [ - 'datum=datumaro:main', + 'datum=datumaro.cli.__main__:main', ], }, ) diff --git a/datumaro/test.py b/datumaro/test.py deleted file mode 100644 index 184bbff5..00000000 --- a/datumaro/test.py +++ /dev/null @@ -1,5 +0,0 @@ -import unittest - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py index 1631434e..e32303b6 100644 --- a/datumaro/tests/test_coco_format.py +++ b/datumaro/tests/test_coco_format.py @@ -12,7 +12,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem, BboxObject, CaptionObject, LabelCategories, PointsCategories ) -from datumaro.components.converters.ms_coco import ( +from datumaro.components.converters.coco import ( CocoConverter, CocoImageInfoConverter, CocoCaptionsConverter, @@ -112,7 +112,7 @@ class CocoImporterTest(TestCase): def test_can_import(self): with TestDir() as temp_dir: self.COCO_dataset_generate(temp_dir.path) - project = Project.import_from(temp_dir.path, 'ms_coco') + project = Project.import_from(temp_dir.path, 'coco') dataset = project.make_dataset() self.assertListEqual(['val'], sorted(dataset.subsets())) @@ -142,7 +142,7 @@ class CocoConverterTest(TestCase): if not importer_params: importer_params = {} - project = Project.import_from(test_dir.path, 'ms_coco', + project = Project.import_from(test_dir.path, 'coco', **importer_params) parsed_dataset = project.make_dataset() diff --git a/datumaro/tests/test_cvat_format.py b/datumaro/tests/test_cvat_format.py index 1cbdb743..8a4c95ad 100644 --- a/datumaro/tests/test_cvat_format.py +++ b/datumaro/tests/test_cvat_format.py @@ -14,7 +14,7 @@ from datumaro.components.converters.cvat import CvatConverter from datumaro.components.project import Project import datumaro.components.formats.cvat as Cvat from datumaro.util.image import save_image -from datumaro.util.test_utils import TestDir +from datumaro.util.test_utils import TestDir, item_to_str class CvatExtractorTest(TestCase): @@ -108,7 +108,7 @@ class CvatExtractorTest(TestCase): BboxObject(0, 2, 4, 2, label=0, attributes={ 'occluded': True, 'z_order': 1, - 'a1': 'true', 'a2': 'v3' + 'a1': True, 'a2': 'v3' }), PolyLineObject([1, 2, 3, 4, 5, 6, 7, 8], attributes={'occluded': False, 'z_order': 0}), @@ -175,7 +175,8 @@ class CvatConverterTest(TestCase): self.assertEqual(len(source_subset), len(parsed_subset)) for idx, (item_a, item_b) in enumerate( zip(source_subset, parsed_subset)): - self.assertEqual(item_a, item_b, str(idx)) + self.assertEqual(item_a, item_b, '%s:\n%s\nvs.\n%s\n' % \ + (idx, item_to_str(item_a), item_to_str(item_b))) def test_can_save_and_load(self): label_categories = LabelCategories() @@ -209,12 +210,12 @@ class CvatConverterTest(TestCase): ] ), - DatasetItem(id=0, subset='s2', image=np.zeros((5, 10, 3)), + DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), annotations=[ PolygonObject([0, 0, 4, 0, 4, 4], label=3, group=4, attributes={ 'z_order': 1, 'occluded': False }), - PolyLineObject([5, 0, 9, 0, 5, 5]), # will be skipped + PolyLineObject([5, 0, 9, 0, 5, 5]), # will be skipped as no label ] ), ]) @@ -236,7 +237,7 @@ class CvatConverterTest(TestCase): PointsObject([1, 1, 3, 2, 2, 3], label=2, attributes={ 'z_order': 0, 'occluded': False, - 'a1': 'x', 'a2': '42' }), + 'a1': 'x', 'a2': 42 }), ] ), DatasetItem(id=1, subset='s1', @@ -250,7 +251,7 @@ class CvatConverterTest(TestCase): ] ), - DatasetItem(id=0, subset='s2', image=np.zeros((5, 10, 3)), + DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), annotations=[ PolygonObject([0, 0, 4, 0, 4, 4], label=3, group=4, diff --git a/datumaro/tests/test_datumaro_format.py b/datumaro/tests/test_datumaro_format.py index 3a83c424..77b1b1c0 100644 --- a/datumaro/tests/test_datumaro_format.py +++ b/datumaro/tests/test_datumaro_format.py @@ -9,7 +9,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem, LabelCategories, MaskCategories, PointsCategories ) from datumaro.components.converters.datumaro import DatumaroConverter -from datumaro.util.test_utils import TestDir +from datumaro.util.test_utils import TestDir, item_to_str from datumaro.util.mask_tools import generate_colormap @@ -26,7 +26,7 @@ class DatumaroConverterTest(TestCase): 'y': '2', }), BboxObject(1, 2, 3, 4, label=4, id=4, attributes={ - 'score': 10.0, + 'score': 1.0, }), BboxObject(5, 6, 7, 8, id=5, group=5), PointsObject([1, 2, 2, 0, 1, 1], label=0, id=5), @@ -92,7 +92,8 @@ class DatumaroConverterTest(TestCase): self.assertEqual(len(source_subset), len(parsed_subset)) for idx, (item_a, item_b) in enumerate( zip(source_subset, parsed_subset)): - self.assertEqual(item_a, item_b, str(idx)) + self.assertEqual(item_a, item_b, '%s:\n%s\nvs.\n%s\n' % \ + (idx, item_to_str(item_a), item_to_str(item_b))) self.assertEqual( source_dataset.categories(), diff --git a/datumaro/tests/test_image.py b/datumaro/tests/test_image.py index 424fd9c8..1e1ed5c7 100644 --- a/datumaro/tests/test_image.py +++ b/datumaro/tests/test_image.py @@ -31,7 +31,7 @@ class ImageTest(TestCase): image_module._IMAGE_BACKEND = load_backend dst_image = image_module.load_image(path) - self.assertTrue(np.all(src_image == dst_image), + self.assertTrue(np.array_equal(src_image, dst_image), 'save: %s, load: %s' % (save_backend, load_backend)) def test_encode_and_decode_backends(self): @@ -48,5 +48,5 @@ class ImageTest(TestCase): image_module._IMAGE_BACKEND = load_backend dst_image = image_module.decode_image(buffer) - self.assertTrue(np.all(src_image == dst_image), + self.assertTrue(np.array_equal(src_image, dst_image), 'save: %s, load: %s' % (save_backend, load_backend)) \ No newline at end of file diff --git a/datumaro/tests/test_image_dir_format.py b/datumaro/tests/test_image_dir_format.py new file mode 100644 index 00000000..27568d55 --- /dev/null +++ b/datumaro/tests/test_image_dir_format.py @@ -0,0 +1,48 @@ +import numpy as np +import os.path as osp + +from unittest import TestCase + +from datumaro.components.project import Project +from datumaro.components.extractor import Extractor, DatasetItem +from datumaro.util.test_utils import TestDir +from datumaro.util.image import save_image + + +class ImageDirFormatTest(TestCase): + class TestExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, image=np.ones((10, 6, 3))), + DatasetItem(id=2, image=np.ones((5, 4, 3))), + ]) + + def test_can_load(self): + with TestDir() as test_dir: + source_dataset = self.TestExtractor() + + for item in source_dataset: + save_image(osp.join(test_dir.path, '%s.jpg' % item.id), + item.image) + + project = Project.import_from(test_dir.path, 'image_dir') + parsed_dataset = project.make_dataset() + + self.assertListEqual( + sorted(source_dataset.subsets()), + sorted(parsed_dataset.subsets()), + ) + + self.assertEqual(len(source_dataset), len(parsed_dataset)) + + for subset_name in source_dataset.subsets(): + source_subset = source_dataset.get_subset(subset_name) + parsed_subset = parsed_dataset.get_subset(subset_name) + self.assertEqual(len(source_subset), len(parsed_subset)) + for idx, (item_a, item_b) in enumerate( + zip(source_subset, parsed_subset)): + self.assertEqual(item_a, item_b, str(idx)) + + self.assertEqual( + source_dataset.categories(), + parsed_dataset.categories()) \ No newline at end of file diff --git a/datumaro/tests/test_project.py b/datumaro/tests/test_project.py index c30a570c..93a2aad4 100644 --- a/datumaro/tests/test_project.py +++ b/datumaro/tests/test_project.py @@ -353,6 +353,7 @@ class DatasetFilterTest(TestCase): BboxObject(1, 2, 3, 4, label=4, id=4, attributes={ 'a': 1.0 }), BboxObject(5, 6, 7, 8, id=5, group=5), PointsObject([1, 2, 2, 0, 1, 1], label=0, id=5), + MaskObject(id=5, image=np.ones((3, 2))), MaskObject(label=3, id=5, image=np.ones((2, 3))), PolyLineObject([1, 2, 3, 4, 5, 6, 7, 8], id=11), PolygonObject([1, 2, 3, 4, 5, 6, 7, 8]), diff --git a/datumaro/tests/test_voc_format.py b/datumaro/tests/test_voc_format.py index 0c9c8eea..de58ce40 100644 --- a/datumaro/tests/test_voc_format.py +++ b/datumaro/tests/test_voc_format.py @@ -141,9 +141,9 @@ def generate_dummy_voc(path): obj2head_elem = ET.SubElement(obj2_elem, 'part') ET.SubElement(obj2head_elem, 'name').text = VOC.VocBodyPart(1).name obj2headbb_elem = ET.SubElement(obj2head_elem, 'bndbox') - ET.SubElement(obj2headbb_elem, 'xmin').text = '5' + ET.SubElement(obj2headbb_elem, 'xmin').text = '5.5' ET.SubElement(obj2headbb_elem, 'ymin').text = '6' - ET.SubElement(obj2headbb_elem, 'xmax').text = '7' + ET.SubElement(obj2headbb_elem, 'xmax').text = '7.5' ET.SubElement(obj2headbb_elem, 'ymax').text = '8' obj2act_elem = ET.SubElement(obj2_elem, 'actions') for act in VOC.VocAction: @@ -328,7 +328,7 @@ class VocExtractorTest(TestCase): lambda x: x.type == AnnotationType.bbox and \ get_label(extractor, x.label) == VOC.VocBodyPart(1).name) self.assertTrue(obj2.id == obj2head.group) - self.assertListEqual([5, 6, 2, 2], obj2head.get_bbox()) + self.assertListEqual([5.5, 6, 2, 2], obj2head.get_bbox()) self.assertEqual(2, len(item.annotations)) diff --git a/utils/cli/requirements.txt b/utils/cli/requirements.txt index 55fe4f56..14cc33a6 100644 --- a/utils/cli/requirements.txt +++ b/utils/cli/requirements.txt @@ -1,2 +1,2 @@ -Pillow==6.2.0 -requests==2.20.1 +Pillow>=6.2.0 +requests>=2.20.1