[Datumaro] CLI updates + better documentation (#1057)

6 years ago · 93b3c091f5
parent 095d6d4611
commit 93b3c091f5
73 changed files with 2461 additions and 1678 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -25,5 +25,11 @@
        }
    ],
    "python.linting.pylintEnabled": true,
-    "python.envFile": "${workspaceFolder}/.vscode/python.env"
+    "python.envFile": "${workspaceFolder}/.vscode/python.env",
+    "python.testing.unittestEnabled": true,
+    "python.testing.unittestArgs": [
+        "-v",
+        "-s",
+        "./datumaro",
+    ],
 }
--- a/README.md
+++ b/README.md
@ -16,6 +16,7 @@ CVAT is free, online, interactive video and image annotation tool for computer v
 - [Installation guide](cvat/apps/documentation/installation.md)
 - [User's guide](cvat/apps/documentation/user_guide.md)
 - [Django REST API documentation](#rest-api)
+- [Datumaro dataset framework](datumaro/README.md)
 - [Command line interface](utils/cli/)
 - [XML annotation format](cvat/apps/documentation/xml_format.md)
 - [AWS Deployment Guide](cvat/apps/documentation/AWS-Deployment-Guide.md)
@ -34,6 +35,8 @@ CVAT is free, online, interactive video and image annotation tool for computer v
 ## Supported annotation formats

 Format selection is possible after clicking on the Upload annotation / Dump annotation button.
+[Datumaro](datumaro/README.md) dataset framework allows additional dataset transformations
+via its command line tool.

 | Annotation format                                                                  | Dumper | Loader |
 | ---------------------------------------------------------------------------------- | ------ | ------ |
--- a/cvat/apps/dataset_manager/bindings.py
+++ b/cvat/apps/dataset_manager/bindings.py
@ -1,3 +1,8 @@
+
+# Copyright (C) 2019-2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
 from collections import OrderedDict
 import os
 import os.path as osp
@ -6,7 +11,7 @@ from django.db import transaction

 from cvat.apps.annotation.annotation import Annotation
 from cvat.apps.engine.annotation import TaskAnnotation
-from cvat.apps.engine.models import Task, ShapeType
+from cvat.apps.engine.models import Task, ShapeType, AttributeType

 import datumaro.components.extractor as datumaro
 from datumaro.util.image import lazy_image
@ -128,18 +133,33 @@ class CvatTaskExtractor(datumaro.Extractor):
            attrs = {}
            db_attributes = db_label.attributespec_set.all()
            for db_attr in db_attributes:
-                attrs[db_attr.name] = db_attr.default_value
+                attrs[db_attr.name] = db_attr
            label_attrs[db_label.name] = attrs
        map_label = lambda label_db_name: label_map[label_db_name]

+        def convert_attrs(label, cvat_attrs):
+            cvat_attrs = {a.name: a.value for a in cvat_attrs}
+            dm_attr = dict()
+            for attr_name, attr_spec in label_attrs[label].items():
+                attr_value = cvat_attrs.get(attr_name, attr_spec.default_value)
+                try:
+                    if attr_spec.input_type == AttributeType.NUMBER:
+                        attr_value = float(attr_value)
+                    elif attr_spec.input_type == AttributeType.CHECKBOX:
+                        attr_value = attr_value.lower() == 'true'
+                    dm_attr[attr_name] = attr_value
+                except Exception as e:
+                    slogger.task[self._db_task.id].error(
+                        "Failed to convert attribute '%s'='%s': %s" % \
+                            (attr_name, attr_value, e))
+            return dm_attr
+
        for tag_obj in cvat_anno.tags:
            anno_group = tag_obj.group
            if isinstance(anno_group, int):
                anno_group = anno_group
            anno_label = map_label(tag_obj.label)
-            anno_attr = dict(label_attrs[tag_obj.label])
-            for attr in tag_obj.attributes:
-                anno_attr[attr.name] = attr.value
+            anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes)

            anno = datumaro.LabelObject(label=anno_label,
                attributes=anno_attr, group=anno_group)
@ -150,9 +170,7 @@ class CvatTaskExtractor(datumaro.Extractor):
            if isinstance(anno_group, int):
                anno_group = anno_group
            anno_label = map_label(shape_obj.label)
-            anno_attr = dict(label_attrs[shape_obj.label])
-            for attr in shape_obj.attributes:
-                anno_attr[attr.name] = attr.value
+            anno_attr = convert_attrs(shape_obj.label, shape_obj.attributes)

            anno_points = shape_obj.points
            if shape_obj.type == ShapeType.POINTS:
--- a/cvat/apps/dataset_manager/export_templates/README.md
+++ b/cvat/apps/dataset_manager/export_templates/README.md
@ -6,17 +6,15 @@ python -m virtualenv .venv
 . .venv/bin/activate

 # install dependencies
-sed -r "s/^(.*)#.*$/\1/g" datumaro/requirements.txt | xargs -n 1 -L 1 pip install
+pip install -e datumaro/
 pip install -r cvat/utils/cli/requirements.txt

 # set up environment
 PYTHONPATH=':'
 export PYTHONPATH
-ln -s $PWD/datumaro/datum.py ./datum
-chmod a+x datum

 # use Datumaro
-./datum --help
+datum --help
 ```

-Check Datumaro [QUICKSTART.md](datumaro/docs/quickstart.md) for further info.
+Check Datumaro [docs](datumaro/README.md) for more info.
--- a/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py
+++ b/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py
@ -1,3 +1,8 @@
+
+# Copyright (C) 2019-2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
 from collections import OrderedDict
 import getpass
 import json
@ -27,7 +32,7 @@ class cvat_rest_api_task_images(datumaro.Extractor):
    def _image_local_path(self, item_id):
        task_id = self._config.task_id
        return osp.join(self._cache_dir,
-            'task_{}_frame_{:06d}.jpg'.format(task_id, item_id))
+            'task_{}_frame_{:06d}.jpg'.format(task_id, int(item_id)))

    def _make_image_loader(self, item_id):
        return lazy_image(item_id,
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@ -1,3 +1,8 @@
+
+# Copyright (C) 2019-2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
 from datetime import timedelta
 import json
 import os
@ -217,8 +222,9 @@ class TaskProject:
        if dst_format == EXPORT_FORMAT_DATUMARO_PROJECT:
            self._remote_export(save_dir=save_dir, server_url=server_url)
        else:
-            self._dataset.export_project(output_format=dst_format,
-                save_dir=save_dir, save_images=save_images)
+            converter = self._dataset.env.make_converter(dst_format,
+                save_images=save_images)
+            self._dataset.export_project(converter=converter, save_dir=save_dir)

    def _remote_image_converter(self, save_dir, server_url=None):
        os.makedirs(save_dir, exist_ok=True)
@ -246,7 +252,7 @@ class TaskProject:
        if db_video is not None:
            for i in range(self._db_task.size):
                frame_info = {
-                    'id': str(i),
+                    'id': i,
                    'width': db_video.width,
                    'height': db_video.height,
                }
--- a/cvat/apps/dataset_manager/util.py
+++ b/cvat/apps/dataset_manager/util.py
@ -1,3 +1,8 @@
+
+# Copyright (C) 2019-2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
 import inspect
 import os, os.path as osp
 import zipfile
--- a/datumaro/CONTRIBUTING.md
+++ b/datumaro/CONTRIBUTING.md
@ -0,0 +1,119 @@
+## Table of Contents
+
+- [Installation](#installation)
+- [Usage](#usage)
+- [Testing](#testing)
+- [Design](#design-and-code-structure)
+
+## Installation
+
+### Prerequisites
+
+- Python (3.5+)
+- OpenVINO (optional)
+
+``` bash
+git clone https://github.com/opencv/cvat
+```
+
+Optionally, install a virtual environment:
+
+``` bash
+python -m pip install virtualenv
+python -m virtualenv venv
+. venv/bin/activate
+```
+
+Then install all dependencies:
+
+``` bash
+while read -r p; do pip install $p; done < requirements.txt
+```
+
+If you're working inside CVAT environment:
+``` bash
+. .env/bin/activate
+while read -r p; do pip install $p; done < datumaro/requirements.txt
+```
+
+## Usage
+
+> The directory containing Datumaro should be in the `PYTHONPATH`
+> environment variable or `cvat/datumaro/` should be the current directory.
+
+``` bash
+datum --help
+python -m datumaro --help
+python datumaro/ --help
+python datum.py --help
+```
+
+``` python
+import datumaro
+```
+
+## Testing
+
+It is expected that all Datumaro functionality is covered and checked by
+unit tests. Tests are placed in `tests/` directory.
+
+To run tests use:
+
+``` bash
+python -m unittest discover -s tests
+```
+
+If you're working inside CVAT environment, you can also use:
+
+``` bash
+python manage.py test datumaro/
+```
+
+## Design and code structure
+
+- [Design document](docs/design.md)
+
+### Command-line
+
+Use [Docker](https://www.docker.com/) as an example. Basically,
+the interface is divided on contexts and single commands.
+Contexts are semantically grouped commands,
+related to a single topic or target. Single commands are handy shorter
+alternatives for the most used commands and also special commands,
+which are hard to be put into any specific context.
+
+![cli-design-image](docs/images/cli_design.png)
+
+- The diagram above was created with [FreeMind](http://freemind.sourceforge.net/wiki/index.php/Main_Page)
+
+Model-View-ViewModel (MVVM) UI pattern is used.
+
+![mvvm-image](docs/images/mvvm.png)
+
+### Datumaro project and environment structure
+
+<!--lint disable fenced-code-flag-->
+```
+├── [datumaro module]
+└── [project folder]
+    ├── .datumaro/
+    |   ├── config.yml
+    │   ├── .git/
+    │   ├── importers/
+    │   │   ├── custom_format_importer1.py
+    │   │   └── ...
+    │   ├── statistics/
+    │   │   ├── custom_statistic1.py
+    │   │   └── ...
+    │   ├── visualizers/
+    │   │   ├── custom_visualizer1.py
+    │   │   └── ...
+    │   └── extractors/
+    │       ├── custom_extractor1.py
+    │       └── ...
+    ├── dataset/
+    └── sources/
+        ├── source1
+        └── ...
+```
+<!--lint enable fenced-code-flag-->
--- a/datumaro/README.md
+++ b/datumaro/README.md
@ -1,36 +1,176 @@
-# Dataset framework
+# Dataset Framework (Datumaro)

-A framework to prepare, manage, build, analyze datasets
+A framework to build, transform, and analyze datasets.
+
+<!--lint disable fenced-code-flag-->
+```
+CVAT annotations  --                              ---> Annotation tool
+...                  \                          /
+COCO-like dataset -----> Datumaro ---> dataset ------> Model training
+...                  /                          \
+VOC-like dataset  --                              ---> Publication etc.
+```
+<!--lint enable fenced-code-flag-->
+
+## Contents
+
+- [Documentation](#documentation)
+- [Features](#features)
+- [Installation](#installation)
+- [Usage](#usage)
+- [Examples](#examples)
+- [Contributing](#contributing)

 ## Documentation

-[Quick start guide](docs/quickstart.md)
+- [User manual](docs/user_manual.md)
+- [Design document](docs/design.md)
+- [Contributing](CONTRIBUTING.md)

-## Installation
+## Features

-Python3.5+ is required.
+- Dataset format conversions:
+  - COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
+    - [Format specification](http://cocodataset.org/#format-data)
+    - `labels` are our extension - like `instances` with only `category_id`
+  - PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`)
+    - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html)
+  - YOLO (`bboxes`)
+    - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data)
+  - TF Detection API (`bboxes`, `masks`)
+    - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md)
+  - CVAT
+    - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
+- Dataset building operations:
+  - Merging multiple datasets into one
+  - Dataset filtering with custom conditions, for instance:
+    - remove all annotations except polygons of a certain class
+    - remove images without a specific class
+    - remove occluded annotations from images
+    - keep only vertically-oriented images
+    - remove small area bounding boxes from annotations
+  - Annotation conversions, for instance
+    - polygons to instance masks and vise-versa
+    - apply a custom colormap for mask annotations
+    - remap dataset labels
+- Dataset comparison
+- Model integration:
+  - Inference (OpenVINO and custom models)
+  - Explainable AI ([RISE algorithm](https://arxiv.org/abs/1806.07421))

-To install into a virtual environment do:
+> Check the [design document](docs/design.md) for a full list of features
+
+## Installation
+
+Optionally, create a virtual environment:

 ``` bash
 python -m pip install virtualenv
 python -m virtualenv venv
 . venv/bin/activate
-pip install -r requirements.txt
 ```

-## Execution
-
-The tool can be executed both as a script and as a module.
+Install Datumaro package:

 ``` bash
-PYTHONPATH="..."
-python -m datumaro <command>
-python path/to/datum.py
+pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro'
 ```

-## Testing
+## Usage
+
+There are several options available:
+- [A standalone command-line tool](#standalone-tool)
+- [A python module](#python-module)
+
+### Standalone tool
+
+<!--lint disable fenced-code-flag-->
+```
+    User
+        |
+        v
+------------------+
+|       CVAT       |
+--------v---------+       +------------------+       +--------------+
+| Datumaro module  | ----> | Datumaro project | <---> | Datumaro CLI | <--- User
+------------------+       +------------------+       +--------------+
+```
+<!--lint enable fenced-code-flag-->

 ``` bash
-python -m unittest discover -s tests
+datum --help
+python -m datumaro --help
 ```
+
+### Python module
+
+Datumaro can be used in custom scripts as a library in the following way:
+
+``` python
+from datumaro.components.project import Project # project-related things
+import datumaro.components.extractor # annotations and high-level interfaces
+# etc.
+project = Project.load('directory')
+```
+
+## Examples
+
+<!--lint disable list-item-indent-->
+<!--lint disable list-item-bullet-indent-->
+
+- Convert [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#data) to COCO, keep only images with `cat` class presented:
+  ```bash
+  # Download VOC dataset:
+  # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
+  datum project import --format voc --input-path <path/to/voc>
+  datum project export --format coco --filter '/item[annotation/label="cat"]'
+  ```
+
+- Convert only non-occluded annotations from a CVAT-annotated project to TFrecord:
+  ```bash
+  # export Datumaro dataset in CVAT UI, extract somewhere, go to the project dir
+  datum project extract --filter '/item/annotation[occluded="False"]' \
+    --mode items+anno --output-dir not_occluded
+  datum project export --project not_occluded \
+    --format tf_detection_api -- --save-images
+  ```
+
+- Annotate COCO, extract image subset, re-annotate it in CVAT, update old dataset:
+  ```bash
+  # Download COCO dataset http://cocodataset.org/#download
+  # Put images to coco/images/ and annotations to coco/annotations/
+  datum project import --format coco --input-path <path/to/coco>
+  datum project export --filter '/image[images_I_dont_like]' --format cvat \
+    --output-dir reannotation
+  # import dataset and images to CVAT, re-annotate
+  # export Datumaro project, extract to 'reannotation-upd'
+  datum project project merge reannotation-upd
+  datum project export --format coco
+  ```
+
+- Annotate instance polygons in CVAT, export as masks in COCO:
+  ```bash
+  datum project import --format cvat --input-path <path/to/cvat.xml>
+  datum project export --format coco -- --segmentation-mode masks
+  ```
+
+- Apply an OpenVINO detection model to some COCO-like dataset,
+  then compare annotations with ground truth and visualize in TensorBoard:
+  ```bash
+  datum project import --format coco --input-path <path/to/coco>
+  # create model results interpretation script
+  datum model add mymodel openvino \
+    --weights model.bin --description model.xml \
+    --interpretation-script parse_results.py
+  datum model run --model mymodel --output-dir mymodel_inference/
+  datum project diff mymodel_inference/ --format tensorboard --output-dir diff
+  ```
+
+<!--lint enable list-item-bullet-indent-->
+<!--lint enable list-item-indent-->
+
+## Contributing
+
+Feel free to [open an Issue](https://github.com/opencv/cvat/issues/new) if you
+think something needs to be changed. You are welcome to participate in development,
+development instructions are available in our [developer manual](CONTRIBUTING.md).
--- a/datumaro/datum.py
+++ b/datumaro/datum.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys

-from datumaro import main
+from datumaro.cli.__main__ import main


 if __name__ == '__main__':
--- a/datumaro/datumaro/init.py
+++ b/datumaro/datumaro/init.py
@ -2,92 +2,3 @@
 # Copyright (C) 2019 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import sys
-
-from .cli import (
-    project as project_module,
-    source as source_module,
-    item as item_module,
-    model as model_module,
-    # inference as inference_module,
-
-    create_command as create_command_module,
-    add_command as add_command_module,
-    remove_command as remove_command_module,
-    export_command as export_command_module,
-    # diff_command as diff_command_module,
-    # build_command as build_command_module,
-    stats_command as stats_command_module,
-    explain_command as explain_command_module,
-)
-from .version import VERSION
-
-
-KNOWN_COMMANDS = {
-    # contexts
-    'project': project_module.main,
-    'source': source_module.main,
-    'item': item_module.main,
-    'model': model_module.main,
-    # 'inference': inference_module.main,
-
-    # shortcuts
-    'create': create_command_module.main,
-    'add': add_command_module.main,
-    'remove': remove_command_module.main,
-    'export': export_command_module.main,
-    # 'diff': diff_command_module.main,
-    # 'build': build_command_module.main,
-    'stats': stats_command_module.main,
-    'explain': explain_command_module.main,
-}
-
-def get_command(name, args=None):
-    return KNOWN_COMMANDS[name]
-
-def loglevel(name):
-    numeric = getattr(log, name.upper(), None)
-    if not isinstance(numeric, int):
-        raise ValueError('Invalid log level: %s' % name)
-    return numeric
-
-def parse_command(input_args):
-    parser = argparse.ArgumentParser()
-    parser.add_argument('command', choices=KNOWN_COMMANDS.keys(),
-        help='A command to execute')
-    parser.add_argument('args', nargs=argparse.REMAINDER)
-    parser.add_argument('--version', action='version', version=VERSION)
-    parser.add_argument('--loglevel', type=loglevel, default='info',
-        help="Logging level (default: %(default)s)")
-
-    general_args = parser.parse_args(input_args)
-    command_name = general_args.command
-    command_args = general_args.args
-    return general_args, command_name, command_args
-
-def set_up_logger(general_args):
-    loglevel = general_args.loglevel
-    log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
-        level=loglevel)
-
-def main(args=None):
-    if args is None:
-        args = sys.argv[1:]
-
-    general_args, command_name, command_args = parse_command(args)
-
-    set_up_logger(general_args)
-
-    command = get_command(command_name, general_args)
-    try:
-        return command(command_args)
-    except Exception as e:
-        log.error(e)
-        raise
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/datumaro/datumaro/main.py
+++ b/datumaro/datumaro/main.py
@ -4,9 +4,9 @@
 # SPDX-License-Identifier: MIT

 import sys
-from . import main
+
+from datumaro.cli.__main__ import main


 if __name__ == '__main__':
    sys.exit(main())
-
--- a/datumaro/datumaro/cli/init.py
+++ b/datumaro/datumaro/cli/init.py
@ -2,4 +2,3 @@
 # Copyright (C) 2019 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
-
--- a/datumaro/datumaro/cli/main.py
+++ b/datumaro/datumaro/cli/main.py
@ -0,0 +1,109 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import argparse
+import logging as log
+import sys
+
+from . import contexts, commands
+from .util import CliException, add_subparser
+from ..version import VERSION
+
+
+_log_levels = {
+    'debug': log.DEBUG,
+    'info': log.INFO,
+    'warning': log.WARNING,
+    'error': log.ERROR,
+    'critical': log.CRITICAL
+}
+
+def loglevel(name):
+    return _log_levels[name]
+
+def _make_subcommands_help(commands, help_line_start=0):
+    desc = ""
+    for command_name, _, command_help in commands:
+        desc += ("  %-" + str(max(0, help_line_start - 2 - 1)) + "s%s\n") % \
+            (command_name, command_help)
+    return desc
+
+def make_parser():
+    parser = argparse.ArgumentParser(prog="datumaro",
+        description="Dataset Framework",
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    parser.add_argument('--version', action='version', version=VERSION)
+    parser.add_argument('--loglevel', type=loglevel, default='info',
+        help="Logging level (options: %s; default: %s)" % \
+            (', '.join(_log_levels.keys()), "%(default)s"))
+
+    known_contexts = [
+        ('project', contexts.project, "Actions on projects (datasets)"),
+        ('source', contexts.source, "Actions on data sources"),
+        ('model', contexts.model, "Actions on models"),
+    ]
+    known_commands = [
+        ('create', commands.create, "Create project"),
+        ('add', commands.add, "Add source to project"),
+        ('remove', commands.remove, "Remove source from project"),
+        ('export', commands.export, "Export project"),
+        ('explain', commands.explain, "Run Explainable AI algorithm for model"),
+    ]
+
+    # Argparse doesn't support subparser groups:
+    # https://stackoverflow.com/questions/32017020/grouping-argparse-subparser-arguments
+    help_line_start = max((len(e[0]) for e in known_contexts + known_commands),
+        default=0)
+    help_line_start = max((2 + help_line_start) // 4 + 1, 6) * 4 # align to tabs
+    subcommands_desc = ""
+    if known_contexts:
+        subcommands_desc += "Contexts:\n"
+        subcommands_desc += _make_subcommands_help(known_contexts,
+            help_line_start)
+    if known_commands:
+        if subcommands_desc:
+            subcommands_desc += "\n"
+        subcommands_desc += "Commands:\n"
+        subcommands_desc += _make_subcommands_help(known_commands,
+            help_line_start)
+    if subcommands_desc:
+        subcommands_desc += \
+            "\nRun '%s COMMAND --help' for more information on a command." % \
+                parser.prog
+
+    subcommands = parser.add_subparsers(title=subcommands_desc,
+        description="", help=argparse.SUPPRESS)
+    for command_name, command, _ in known_contexts + known_commands:
+        add_subparser(subcommands, command_name, command.build_parser)
+
+    return parser
+
+def set_up_logger(args):
+    log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
+        level=args.loglevel)
+
+def main(args=None):
+    parser = make_parser()
+    args = parser.parse_args(args)
+
+    set_up_logger(args)
+
+    if 'command' not in args:
+        parser.print_help()
+        return 1
+
+    try:
+        return args.command(args)
+    except CliException as e:
+        log.error(e)
+        return 1
+    except Exception as e:
+        log.error(e)
+        raise
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/datumaro/datumaro/cli/add_command.py
+++ b/datumaro/datumaro/cli/add_command.py
@ -1,21 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-
-from . import source as source_module
-
-
-def build_parser(parser=argparse.ArgumentParser()):
-    source_module.build_add_parser(parser). \
-        set_defaults(command=source_module.add_command)
-
-    return parser
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/commands/init.py
+++ b/datumaro/datumaro/cli/commands/init.py
@ -0,0 +1,6 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from . import add, create, explain, export, remove
--- a/datumaro/datumaro/cli/commands/add.py
+++ b/datumaro/datumaro/cli/commands/add.py
@ -0,0 +1,8 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+# pylint: disable=unused-import
+
+from ..contexts.source import build_add_parser as build_parser
--- a/datumaro/datumaro/cli/commands/create.py
+++ b/datumaro/datumaro/cli/commands/create.py
@ -0,0 +1,8 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+# pylint: disable=unused-import
+
+from ..contexts.project import build_create_parser as build_parser
--- a/datumaro/datumaro/cli/commands/explain.py
+++ b/datumaro/datumaro/cli/commands/explain.py
@ -9,25 +9,35 @@ import os
 import os.path as osp

 from datumaro.components.project import Project
-from datumaro.components.algorithms.rise import RISE
 from datumaro.util.command_targets import (TargetKinds, target_selector,
    ProjectTarget, SourceTarget, ImageTarget, is_project_path)
 from datumaro.util.image import load_image, save_image
-from .util.project import load_project
+from ..util import MultilineFormatter
+from ..util.project import load_project


-def build_parser(parser=argparse.ArgumentParser()):
+def build_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Run Explainable AI algorithm",
+        description="Runs an explainable AI algorithm for a model.")
+
    parser.add_argument('-m', '--model', required=True,
        help="Model to use for inference")
    parser.add_argument('-t', '--target', default=None,
        help="Inference target - image, source, project "
             "(default: current dir)")
-    parser.add_argument('-d', '--save-dir', default=None,
+    parser.add_argument('-o', '--output-dir', dest='save_dir', default=None,
        help="Directory to save output (default: display only)")

    method_sp = parser.add_subparsers(dest='algorithm')

-    rise_parser = method_sp.add_parser('rise')
+    rise_parser = method_sp.add_parser('rise',
+        description="""
+        RISE: Randomized Input Sampling for
+        Explanation of Black-box Models algorithm|n
+        |n
+        See explanations at: https://arxiv.org/pdf/1806.07421.pdf
+        """,
+        formatter_class=MultilineFormatter)
    rise_parser.add_argument('-s', '--max-samples', default=None, type=int,
        help="Number of algorithm iterations (default: mask size ^ 2)")
    rise_parser.add_argument('--mw', '--mask-width',
@ -46,7 +56,7 @@ def build_parser(parser=argparse.ArgumentParser()):
        help="IoU match threshold in Non-maxima suppression (default: no NMS)")
    rise_parser.add_argument('--conf', '--det-conf-thresh',
        dest='det_conf_thresh', default=0.0, type=float,
-        help="Confidence threshold for detections (default: do not filter)")
+        help="Confidence threshold for detections (default: include all)")
    rise_parser.add_argument('-b', '--batch-size', default=1, type=int,
        help="Inference batch size (default: %(default)s)")
    rise_parser.add_argument('--progressive', action='store_true',
@ -59,6 +69,21 @@ def build_parser(parser=argparse.ArgumentParser()):
    return parser

 def explain_command(args):
+    project_path = args.project_dir
+    if is_project_path(project_path):
+        project = Project.load(project_path)
+    else:
+        project = None
+    args.target = target_selector(
+        ProjectTarget(is_default=True, project=project),
+        SourceTarget(project=project),
+        ImageTarget()
+    )(args.target)
+    if args.target[0] == TargetKinds.project:
+        if is_project_path(args.target[1]):
+            args.project_dir = osp.dirname(osp.abspath(args.target[1]))
+
+
    import cv2
    from matplotlib import cm

@ -69,6 +94,7 @@ def explain_command(args):
    if str(args.algorithm).lower() != 'rise':
        raise NotImplementedError()

+    from datumaro.components.algorithms.rise import RISE
    rise = RISE(model,
        max_samples=args.max_samples,
        mask_width=args.mask_width,
@ -162,31 +188,3 @@ def explain_command(args):
        raise NotImplementedError()

    return 0
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-    if 'command' not in args:
-        parser.print_help()
-        return 1
-
-    project_path = args.project_dir
-    if is_project_path(project_path):
-        project = Project.load(project_path)
-    else:
-        project = None
-    try:
-        args.target = target_selector(
-            ProjectTarget(is_default=True, project=project),
-            SourceTarget(project=project),
-            ImageTarget()
-        )(args.target)
-        if args.target[0] == TargetKinds.project:
-            if is_project_path(args.target[1]):
-                args.project_dir = osp.dirname(osp.abspath(args.target[1]))
-    except argparse.ArgumentTypeError as e:
-        print(e)
-        parser.print_help()
-        return 1
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/commands/export.py
+++ b/datumaro/datumaro/cli/commands/export.py
@ -0,0 +1,8 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+# pylint: disable=unused-import
+
+from ..contexts.project import build_export_parser as build_parser
--- a/datumaro/datumaro/cli/commands/remove.py
+++ b/datumaro/datumaro/cli/commands/remove.py
@ -0,0 +1,8 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+# pylint: disable=unused-import
+
+from ..contexts.source import build_remove_parser as build_parser
--- a/datumaro/datumaro/cli/contexts/init.py
+++ b/datumaro/datumaro/cli/contexts/init.py
@ -0,0 +1,6 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from . import project, source, model, item
--- a/datumaro/datumaro/cli/contexts/item/init.py
+++ b/datumaro/datumaro/cli/contexts/item/init.py
@ -0,0 +1,36 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import argparse
+
+from ...util import add_subparser
+
+
+def build_export_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+    return parser
+
+def build_stats_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+    return parser
+
+def build_diff_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+    return parser
+
+def build_edit_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+    return parser
+
+def build_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+
+    subparsers = parser.add_subparsers()
+    add_subparser(subparsers, 'export', build_export_parser)
+    add_subparser(subparsers, 'stats', build_stats_parser)
+    add_subparser(subparsers, 'diff', build_diff_parser)
+    add_subparser(subparsers, 'edit', build_edit_parser)
+
+    return parser
--- a/datumaro/datumaro/cli/contexts/model/init.py
+++ b/datumaro/datumaro/cli/contexts/model/init.py
@ -9,9 +9,49 @@ import os
 import os.path as osp
 import shutil

-from ..util.project import load_project
+from datumaro.components.config import DEFAULT_FORMAT
+from ...util import add_subparser
+from ...util.project import load_project


+def build_openvino_add_parser(parser=argparse.ArgumentParser()):
+    parser.add_argument('-d', '--description', required=True,
+        help="Path to the model description file (.xml)")
+    parser.add_argument('-w', '--weights', required=True,
+        help="Path to the model weights file (.bin)")
+    parser.add_argument('-i', '--interpretation-script', required=True,
+        help="Path to the network output interpretation script (.py)")
+    parser.add_argument('--plugins-path', default=None,
+        help="Path to the custom Inference Engine plugins directory")
+    parser.add_argument('--copy', action='store_true',
+        help="Copy the model data to the project")
+
+    return parser
+
+def openvino_args_extractor(args):
+    my_args = argparse.Namespace()
+    my_args.description = args.description
+    my_args.weights = args.weights
+    my_args.interpretation_script = args.interpretation_script
+    my_args.plugins_path = args.plugins_path
+    return my_args
+
+def build_add_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+
+    parser.add_argument('name',
+        help="Name of the model to be added")
+    launchers_sp = parser.add_subparsers(dest='launcher')
+
+    build_openvino_add_parser(launchers_sp.add_parser('openvino')) \
+        .set_defaults(launcher_args_extractor=openvino_args_extractor)
+
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+    parser.set_defaults(command=add_command)
+
+    return parser
+
 def add_command(args):
    project = load_project(args.project_dir)

@ -55,39 +95,16 @@ def add_command(args):

    return 0

-def build_openvino_add_parser(parser):
-    parser.add_argument('-d', '--description', required=True,
-        help="Path to the model description file (.xml)")
-    parser.add_argument('-w', '--weights', required=True,
-        help="Path to the model weights file (.bin)")
-    parser.add_argument('-i', '--interpretation-script', required=True,
-        help="Path to the network output interpretation script (.py)")
-    parser.add_argument('--plugins-path', default=None,
-        help="Path to the custom Inference Engine plugins directory")
-    parser.add_argument('--copy', action='store_true',
-        help="Copy the model data to the project")
-    return parser
-
-def openvino_args_extractor(args):
-    my_args = argparse.Namespace()
-    my_args.description = args.description
-    my_args.weights = args.weights
-    my_args.interpretation_script = args.interpretation_script
-    my_args.plugins_path = args.plugins_path
-    return my_args
+def build_remove_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()

-def build_add_parser(parser):
    parser.add_argument('name',
-        help="Name of the model to be added")
-    launchers_sp = parser.add_subparsers(dest='launcher')
-
-    build_openvino_add_parser(launchers_sp.add_parser('openvino')) \
-        .set_defaults(launcher_args_extractor=openvino_args_extractor)
-
+        help="Name of the model to be removed")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
-    return parser
+    parser.set_defaults(command=remove_command)

+    return parser

 def remove_command(args):
    project = load_project(args.project_dir)
@ -97,31 +114,39 @@ def remove_command(args):

    return 0

-def build_remove_parser(parser):
-    parser.add_argument('name',
-        help="Name of the model to be removed")
+def build_run_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+
+    parser.add_argument('-o', '--output-dir', dest='dst_dir', required=True,
+        help="Directory to save output")
+    parser.add_argument('-m', '--model', dest='model_name', required=True,
+        help="Model to apply to the project")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
+    parser.set_defaults(command=run_command)

    return parser

+def run_command(args):
+    project = load_project(args.project_dir)
+
+    dst_dir = osp.abspath(args.dst_dir)
+    os.makedirs(dst_dir, exist_ok=False)
+    project.make_dataset().apply_model(
+        save_dir=dst_dir,
+        model_name=args.model_name)

-def build_parser(parser=argparse.ArgumentParser()):
-    command_parsers = parser.add_subparsers(dest='command_name')
+    log.info("Inference results have been saved to '%s'" % dst_dir)

-    build_add_parser(command_parsers.add_parser('add')) \
-        .set_defaults(command=add_command)
+    return 0

-    build_remove_parser(command_parsers.add_parser('remove')) \
-        .set_defaults(command=remove_command)

-    return parser
+def build_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()

-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-    if 'command' not in args:
-        parser.print_help()
-        return 1
+    subparsers = parser.add_subparsers()
+    add_subparser(subparsers, 'add', build_add_parser)
+    add_subparser(subparsers, 'remove', build_remove_parser)
+    add_subparser(subparsers, 'run', build_run_parser)

-    return args.command(args)
+    return parser
--- a/datumaro/datumaro/cli/contexts/project/init.py
+++ b/datumaro/datumaro/cli/contexts/project/init.py
@ -0,0 +1,647 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import argparse
+from enum import Enum
+import logging as log
+import os
+import os.path as osp
+import shutil
+
+from datumaro.components.project import Project
+from datumaro.components.comparator import Comparator
+from datumaro.components.dataset_filter import DatasetItemEncoder
+from datumaro.components.extractor import AnnotationType
+from .diff import DiffVisualizer
+from ...util import add_subparser, CliException, MultilineFormatter
+from ...util.project import make_project_path, load_project, \
+    generate_next_dir_name
+
+
+def build_create_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Create empty project",
+        description="""
+            Create a new empty project.|n
+            |n
+            Examples:|n
+            - Create a project in the current directory:|n
+            |s|screate -n myproject|n
+            |n
+            - Create a project in other directory:|n
+            |s|screate -o path/I/like/
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir',
+        help="Save directory for the new project (default: current dir")
+    parser.add_argument('-n', '--name', default=None,
+        help="Name of the new project (default: same as project dir)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.set_defaults(command=create_command)
+
+    return parser
+
+def create_command(args):
+    project_dir = osp.abspath(args.dst_dir)
+    project_path = make_project_path(project_dir)
+
+    if osp.isdir(project_dir) and os.listdir(project_dir):
+        if not args.overwrite:
+            raise CliException("Directory '%s' already exists "
+                "(pass --overwrite to force creation)" % project_dir)
+        else:
+            shutil.rmtree(project_dir)
+    os.makedirs(project_dir, exist_ok=True)
+
+    if not args.overwrite and osp.isfile(project_path):
+        raise CliException("Project file '%s' already exists "
+            "(pass --overwrite to force creation)" % project_path)
+
+    project_name = args.name
+    if project_name is None:
+        project_name = osp.basename(project_dir)
+
+    log.info("Creating project at '%s'" % project_dir)
+
+    Project.generate(project_dir, {
+        'project_name': project_name,
+    })
+
+    log.info("Project has been created at '%s'" % project_dir)
+
+    return 0
+
+def build_import_parser(parser_ctor=argparse.ArgumentParser):
+    import datumaro.components.importers as importers_module
+    builtin_importers = [name for name, cls in importers_module.items]
+
+    parser = parser_ctor(help="Create project from existing dataset",
+        description="""
+            Creates a project from an existing dataset. The source can be:|n
+            - a dataset in a supported format (check 'formats' section below)|n
+            - a Datumaro project|n
+            |n
+            Formats:|n
+            Datasets come in a wide variety of formats. Each dataset
+            format defines its own data structure and rules on how to
+            interpret the data. For example, the following data structure
+            is used in COCO format:|n
+            /dataset/|n
+            - /images/<id>.jpg|n
+            - /annotations/|n
+            |n
+            In Datumaro dataset formats are supported by
+            Extractor-s and Importer-s.
+            An Extractor produces a list of dataset items corresponding
+            to the dataset. An Importer creates a project from the
+            data source location.
+            It is possible to add a custom Extractor and Importer.
+            To do this, you need to put an Extractor and
+            Importer implementation scripts to
+            <project_dir>/.datumaro/extractors
+            and <project_dir>/.datumaro/importers.|n
+            |n
+            List of supported dataset formats: %s|n
+            |n
+            Examples:|n
+            - Create a project from VOC dataset in the current directory:|n
+            |s|simport -f voc -i path/to/voc|n
+            |n
+            - Create a project from COCO dataset in other directory:|n
+            |s|simport -f coco -i path/to/coco -o path/I/like/
+        """ % ', '.join(builtin_importers),
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir',
+        help="Directory to save the new project to (default: current dir)")
+    parser.add_argument('-n', '--name', default=None,
+        help="Name of the new project (default: same as project dir)")
+    parser.add_argument('--copy', action='store_true',
+        help="Copy the dataset instead of saving source links")
+    parser.add_argument('--skip-check', action='store_true',
+        help="Skip source checking")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.add_argument('-i', '--input-path', required=True, dest='source',
+        help="Path to import project from")
+    parser.add_argument('-f', '--format', required=True,
+        help="Source project format")
+    # parser.add_argument('extra_args', nargs=argparse.REMAINDER,
+    #     help="Additional arguments for importer (pass '-- -h' for help)")
+    parser.set_defaults(command=import_command)
+
+    return parser
+
+def import_command(args):
+    project_dir = osp.abspath(args.dst_dir)
+    project_path = make_project_path(project_dir)
+
+    if osp.isdir(project_dir) and os.listdir(project_dir):
+        if not args.overwrite:
+            raise CliException("Directory '%s' already exists "
+                "(pass --overwrite to force creation)" % project_dir)
+        else:
+            shutil.rmtree(project_dir)
+    os.makedirs(project_dir, exist_ok=True)
+
+    if not args.overwrite and osp.isfile(project_path):
+        raise CliException("Project file '%s' already exists "
+            "(pass --overwrite to force creation)" % project_path)
+
+    project_name = args.name
+    if project_name is None:
+        project_name = osp.basename(project_dir)
+
+    log.info("Importing project from '%s' as '%s'" % \
+        (args.source, args.format))
+
+    source = osp.abspath(args.source)
+    project = Project.import_from(source, args.format)
+    project.config.project_name = project_name
+    project.config.project_dir = project_dir
+
+    if not args.skip_check or args.copy:
+        log.info("Checking the dataset...")
+        dataset = project.make_dataset()
+    if args.copy:
+        log.info("Cloning data...")
+        dataset.save(merge=True, save_images=True)
+    else:
+        project.save()
+
+    log.info("Project has been created at '%s'" % project_dir)
+
+    return 0
+
+
+class FilterModes(Enum):
+    # primary
+    items = 1
+    annotations = 2
+    items_annotations = 3
+
+    # shortcuts
+    i = 1
+    a = 2
+    i_a = 3
+    a_i = 3
+    annotations_items = 3
+
+    @staticmethod
+    def parse(s):
+        s = s.lower()
+        s = s.replace('+', '_')
+        return FilterModes[s]
+
+    @classmethod
+    def make_filter_args(cls, mode):
+        if mode == cls.items:
+            return {}
+        elif mode == cls.annotations:
+            return {
+                'filter_annotations': True
+            }
+        elif mode == cls.items_annotations:
+            return {
+                'filter_annotations': True,
+                'remove_empty': True,
+            }
+        else:
+            raise NotImplementedError()
+
+    @classmethod
+    def list_options(cls):
+        return [m.name.replace('_', '+') for m in cls]
+
+def build_export_parser(parser_ctor=argparse.ArgumentParser):
+    import datumaro.components.converters as converters_module
+    builtin_converters = [name for name, cls in converters_module.items]
+
+    parser = parser_ctor(help="Export project",
+        description="""
+            Exports the project dataset in some format. Optionally, a filter
+            can be passed, check 'extract' command description for more info.
+            Each dataset format has its own options, which
+            are passed after '--' separator (see examples), pass '-- -h'
+            for more info. If not stated otherwise, by default
+            only annotations are exported, to include images pass
+            '--save-images' parameter.|n
+            |n
+            Formats:|n
+            In Datumaro dataset formats are supported by Converter-s.
+            A Converter produces a dataset of a specific format
+            from dataset items. It is possible to add a custom Converter.
+            To do this, you need to put a Converter
+            definition script to <project_dir>/.datumaro/converters.|n
+            |n
+            List of supported dataset formats: %s|n
+            |n
+            Examples:|n
+            - Export project as a VOC-like dataset, include images:|n
+            |s|sexport -f voc -- --save-images|n
+            |n
+            - Export project as a COCO-like dataset in other directory:|n
+            |s|sexport -f coco -o path/I/like/
+        """ % ', '.join(builtin_converters),
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('-e', '--filter', default=None,
+        help="Filter expression for dataset items")
+    parser.add_argument('--filter-mode', default=FilterModes.i.name,
+        type=FilterModes.parse,
+        help="Filter mode (options: %s; default: %s)" % \
+            (', '.join(FilterModes.list_options()) , '%(default)s'))
+    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
+        help="Directory to save output (default: a subdir in the current one)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+    parser.add_argument('-f', '--format', required=True,
+        help="Output format")
+    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
+        help="Additional arguments for converter (pass '-- -h' for help)")
+    parser.set_defaults(command=export_command)
+
+    return parser
+
+def export_command(args):
+    project = load_project(args.project_dir)
+
+    dst_dir = args.dst_dir
+    if dst_dir:
+        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+            raise CliException("Directory '%s' already exists "
+                "(pass --overwrite to force creation)" % dst_dir)
+    else:
+        dst_dir = generate_next_dir_name('%s-export-%s' % \
+            (project.config.project_name, args.format))
+    dst_dir = osp.abspath(dst_dir)
+
+    try:
+        converter = project.env.make_converter(args.format,
+            cmdline_args=args.extra_args)
+    except KeyError:
+        raise CliException("Converter for format '%s' is not found" % \
+            args.format)
+
+    filter_args = FilterModes.make_filter_args(args.filter_mode)
+
+    log.info("Loading the project...")
+    dataset = project.make_dataset()
+
+    log.info("Exporting the project...")
+    dataset.export_project(
+        save_dir=dst_dir,
+        converter=converter,
+        filter_expr=args.filter,
+        **filter_args)
+    log.info("Project exported to '%s' as '%s'" % \
+        (dst_dir, args.format))
+
+    return 0
+
+def build_extract_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Extract subproject",
+        description="""
+            Extracts a subproject that contains only items matching filter.
+            A filter is an XPath expression, which is applied to XML
+            representation of a dataset item. Check '--dry-run' parameter
+            to see XML representations of the dataset items.|n
+            |n
+            To filter annotations use the mode ('-m') parameter.|n
+            Supported modes:|n
+            - 'i', 'items'|n
+            - 'a', 'annotations'|n
+            - 'i+a', 'a+i', 'items+annotations', 'annotations+items'|n
+            When filtering annotations, use the 'items+annotations'
+            mode to point that annotation-less dataset items should be
+            removed. To select an annotation, write an XPath that
+            returns 'annotation' elements (see examples).|n
+            |n
+            Examples:|n
+            - Filter images with width < height:|n
+            |s|sextract -e '/item[image/width < image/height]'|n
+            |n
+            - Filter images with large-area bboxes:|n
+            |s|sextract -e '/item[annotation/type="bbox" and
+                annotation/area>2000]'|n
+            |n
+            - Filter out all irrelevant annotations from items:|n
+            |s|sextract -m a -e '/item/annotation[label = "person"]'|n
+            |n
+            - Filter out all irrelevant annotations from items:|n
+            |s|sextract -m a -e '/item/annotation[label="cat" and
+            area > 99.5]'|n
+            |n
+            - Filter occluded annotations and items, if no annotations left:|n
+            |s|sextract -m i+a -e '/item/annotation[occluded="True"]'
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('-e', '--filter', default=None,
+        help="XML XPath filter expression for dataset items")
+    parser.add_argument('-m', '--mode', default=FilterModes.i.name,
+        type=FilterModes.parse,
+        help="Filter mode (options: %s; default: %s)" % \
+            (', '.join(FilterModes.list_options()) , '%(default)s'))
+    parser.add_argument('--dry-run', action='store_true',
+        help="Print XML representations to be filtered and exit")
+    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
+        help="Output directory (default: update current project)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+    parser.set_defaults(command=extract_command)
+
+    return parser
+
+def extract_command(args):
+    project = load_project(args.project_dir)
+
+    if not args.dry_run:
+        dst_dir = args.dst_dir
+        if dst_dir:
+            if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+                raise CliException("Directory '%s' already exists "
+                    "(pass --overwrite to force creation)" % dst_dir)
+        else:
+            dst_dir = generate_next_dir_name('%s-filter' % \
+                project.config.project_name)
+        dst_dir = osp.abspath(dst_dir)
+
+    dataset = project.make_dataset()
+
+    filter_args = FilterModes.make_filter_args(args.mode)
+
+    if args.dry_run:
+        dataset = dataset.extract(filter_expr=args.filter, **filter_args)
+        for item in dataset:
+            encoded_item = DatasetItemEncoder.encode(item, dataset.categories())
+            xml_item = DatasetItemEncoder.to_string(encoded_item)
+            print(xml_item)
+        return 0
+
+    if not args.filter:
+        raise CliException("Expected a filter expression ('-e' argument)")
+
+    os.makedirs(dst_dir, exist_ok=False)
+    dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter,
+        **filter_args)
+
+    log.info("Subproject has been extracted to '%s'" % dst_dir)
+
+    return 0
+
+def build_merge_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Merge projects",
+        description="""
+            Updates items of the current project with items
+            from the other project.|n
+            |n
+            Examples:|n
+            - Update a project with items from other project:|n
+            |s|smerge -p path/to/first/project path/to/other/project
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('other_project_dir',
+        help="Directory of the project to get data updates from")
+    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
+        help="Output directory (default: current project's dir)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+    parser.set_defaults(command=merge_command)
+
+    return parser
+
+def merge_command(args):
+    first_project = load_project(args.project_dir)
+    second_project = load_project(args.other_project_dir)
+
+    dst_dir = args.dst_dir
+    if dst_dir:
+        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+            raise CliException("Directory '%s' already exists "
+                "(pass --overwrite to force creation)" % dst_dir)
+
+    first_dataset = first_project.make_dataset()
+    first_dataset.update(second_project.make_dataset())
+
+    first_dataset.save(save_dir=dst_dir)
+
+    if dst_dir is None:
+        dst_dir = first_project.config.project_dir
+    dst_dir = osp.abspath(dst_dir)
+    log.info("Merge results have been saved to '%s'" % dst_dir)
+
+    return 0
+
+def build_diff_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Compare projects",
+        description="""
+        Compares two projects.|n
+        |n
+        Examples:|n
+        - Compare two projects, consider bboxes matching if their IoU > 0.7,|n
+        |s|s|s|sprint results to Tensorboard:
+        |s|sdiff path/to/other/project -o diff/ -f tensorboard --iou-thresh 0.7
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('other_project_dir',
+        help="Directory of the second project to be compared")
+    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
+        help="Directory to save comparison results (default: do not save)")
+    parser.add_argument('-f', '--format',
+        default=DiffVisualizer.DEFAULT_FORMAT,
+        choices=[f.name for f in DiffVisualizer.Format],
+        help="Output format (default: %(default)s)")
+    parser.add_argument('--iou-thresh', default=0.5, type=float,
+        help="IoU match threshold for detections (default: %(default)s)")
+    parser.add_argument('--conf-thresh', default=0.5, type=float,
+        help="Confidence threshold for detections (default: %(default)s)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the first project to be compared (default: current dir)")
+    parser.set_defaults(command=diff_command)
+
+    return parser
+
+def diff_command(args):
+    first_project = load_project(args.project_dir)
+    second_project = load_project(args.other_project_dir)
+
+    comparator = Comparator(
+        iou_threshold=args.iou_thresh,
+        conf_threshold=args.conf_thresh)
+
+    dst_dir = args.dst_dir
+    if dst_dir:
+        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+            raise CliException("Directory '%s' already exists "
+                "(pass --overwrite to force creation)" % dst_dir)
+    else:
+        dst_dir = generate_next_dir_name('%s-%s-diff' % (
+            first_project.config.project_name,
+            second_project.config.project_name)
+        )
+    dst_dir = osp.abspath(dst_dir)
+    if dst_dir:
+        log.info("Saving diff to '%s'" % dst_dir)
+
+    visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
+        output_format=args.format)
+    visualizer.save_dataset_diff(
+        first_project.make_dataset(),
+        second_project.make_dataset())
+
+    return 0
+
+def build_transform_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Transform project",
+        description="""
+            Applies some operation to dataset items in the project
+            and produces a new project.
+
+            [NOT IMPLEMENTED YET]
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('-t', '--transform', required=True,
+        help="Transform to apply to the project")
+    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
+        help="Directory to save output (default: current dir)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+    parser.set_defaults(command=transform_command)
+
+    return parser
+
+def transform_command(args):
+    raise NotImplementedError("Not implemented yet.")
+
+    # project = load_project(args.project_dir)
+
+    # dst_dir = args.dst_dir
+    # if dst_dir:
+    #     if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+    #         raise CliException("Directory '%s' already exists "
+    #             "(pass --overwrite to force creation)" % dst_dir)
+    # dst_dir = osp.abspath(args.dst_dir)
+
+    # project.make_dataset().transform_project(
+    #     method=args.transform,
+    #     save_dir=dst_dir
+    # )
+
+    # log.info("Transform results saved to '%s'" % dst_dir)
+
+    # return 0
+
+def build_info_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Get project info",
+        description="""
+            Outputs project info.
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('--all', action='store_true',
+        help="Print all information")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+    parser.set_defaults(command=info_command)
+
+    return parser
+
+def info_command(args):
+    project = load_project(args.project_dir)
+    config = project.config
+    env = project.env
+    dataset = project.make_dataset()
+
+    print("Project:")
+    print("  name:", config.project_name)
+    print("  location:", config.project_dir)
+    print("Plugins:")
+    print("  importers:", ', '.join(env.importers.items))
+    print("  extractors:", ', '.join(env.extractors.items))
+    print("  converters:", ', '.join(env.converters.items))
+    print("  launchers:", ', '.join(env.launchers.items))
+
+    print("Sources:")
+    for source_name, source in config.sources.items():
+        print("  source '%s':" % source_name)
+        print("    format:", source.format)
+        print("    url:", source.url)
+        print("    location:", project.local_source_dir(source_name))
+
+    def print_extractor_info(extractor, indent=''):
+        print("%slength:" % indent, len(extractor))
+
+        categories = extractor.categories()
+        print("%scategories:" % indent, ', '.join(c.name for c in categories))
+
+        for cat_type, cat in categories.items():
+            print("%s  %s:" % (indent, cat_type.name))
+            if cat_type == AnnotationType.label:
+                print("%s    count:" % indent, len(cat.items))
+
+                count_threshold = 10
+                if args.all:
+                    count_threshold = len(cat.items)
+                labels = ', '.join(c.name for c in cat.items[:count_threshold])
+                if count_threshold < len(cat.items):
+                    labels += " (and %s more)" % (
+                        len(cat.items) - count_threshold)
+                print("%s    labels:" % indent, labels)
+
+    print("Dataset:")
+    print_extractor_info(dataset, indent="  ")
+
+    subsets = dataset.subsets()
+    print("  subsets:", ', '.join(subsets))
+    for subset_name in subsets:
+        subset = dataset.get_subset(subset_name)
+        print("    subset '%s':" % subset_name)
+        print_extractor_info(subset, indent="      ")
+
+    print("Models:")
+    for model_name, model in env.config.models.items():
+        print("  model '%s':" % model_name)
+        print("    type:", model.launcher)
+
+    return 0
+
+
+def build_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(
+        description="""
+            Manipulate projects.|n
+            |n
+            By default, the project to be operated on is searched for
+            in the current directory. An additional '-p' argument can be
+            passed to specify project location.
+        """,
+        formatter_class=MultilineFormatter)
+
+    subparsers = parser.add_subparsers()
+    add_subparser(subparsers, 'create', build_create_parser)
+    add_subparser(subparsers, 'import', build_import_parser)
+    add_subparser(subparsers, 'export', build_export_parser)
+    add_subparser(subparsers, 'extract', build_extract_parser)
+    add_subparser(subparsers, 'merge', build_merge_parser)
+    add_subparser(subparsers, 'diff', build_diff_parser)
+    add_subparser(subparsers, 'transform', build_transform_parser)
+    add_subparser(subparsers, 'info', build_info_parser)
+
+    return parser
--- a/datumaro/datumaro/cli/contexts/project/diff.py
+++ b/datumaro/datumaro/cli/contexts/project/diff.py
--- a/datumaro/datumaro/cli/contexts/source/init.py
+++ b/datumaro/datumaro/cli/contexts/source/init.py
@ -0,0 +1,247 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import argparse
+import logging as log
+import os
+import os.path as osp
+import shutil
+
+from ...util import add_subparser, CliException, MultilineFormatter
+from ...util.project import load_project
+
+
+def build_add_parser(parser_ctor=argparse.ArgumentParser):
+    import datumaro.components.extractors as extractors_module
+    extractors_list = [name for name, cls in extractors_module.items]
+
+    base_parser = argparse.ArgumentParser(add_help=False)
+    base_parser.add_argument('-n', '--name', default=None,
+        help="Name of the new source")
+    base_parser.add_argument('-f', '--format', required=True,
+        help="Source dataset format")
+    base_parser.add_argument('--skip-check', action='store_true',
+        help="Skip source checking")
+    base_parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+
+    parser = parser_ctor(help="Add data source to project",
+        description="""
+            Adds a data source to a project. The source can be:|n
+            - a dataset in a supported format (check 'formats' section below)|n
+            - a Datumaro project|n
+            |n
+            The source can be either a local directory or a remote
+            git repository. Each source type has its own parameters, which can
+            be checked by:|n
+            '%s'.|n
+            |n
+            Formats:|n
+            Datasets come in a wide variety of formats. Each dataset
+            format defines its own data structure and rules on how to
+            interpret the data. For example, the following data structure
+            is used in COCO format:|n
+            /dataset/|n
+            - /images/<id>.jpg|n
+            - /annotations/|n
+            |n
+            In Datumaro dataset formats are supported by Extractor-s.
+            An Extractor produces a list of dataset items corresponding
+            to the dataset. It is possible to add a custom Extractor.
+            To do this, you need to put an Extractor
+            definition script to <project_dir>/.datumaro/extractors.|n
+            |n
+            List of supported source formats: %s|n
+            |n
+            Examples:|n
+            - Add a local directory with VOC-like dataset:|n
+            |s|sadd path path/to/voc -f voc_detection|n
+            - Add a local file with CVAT annotations, call it 'mysource'|n
+            |s|s|s|sto the project somewhere else:|n
+            |s|sadd path path/to/cvat.xml -f cvat -n mysource -p somewhere/else/
+        """ % ('%(prog)s SOURCE_TYPE --help', ', '.join(extractors_list)),
+        formatter_class=MultilineFormatter,
+        add_help=False)
+    parser.set_defaults(command=add_command)
+
+    sp = parser.add_subparsers(dest='source_type', metavar='SOURCE_TYPE',
+        help="The type of the data source "
+            "(call '%s SOURCE_TYPE --help' for more info)" % parser.prog)
+
+    dir_parser = sp.add_parser('path', help="Add local path as source",
+        parents=[base_parser])
+    dir_parser.add_argument('url',
+        help="Path to the source")
+    dir_parser.add_argument('--copy', action='store_true',
+        help="Copy the dataset instead of saving source links")
+
+    repo_parser = sp.add_parser('git', help="Add git repository as source",
+        parents=[base_parser])
+    repo_parser.add_argument('url',
+        help="URL of the source git repository")
+    repo_parser.add_argument('-b', '--branch', default='master',
+        help="Branch of the source repository (default: %(default)s)")
+    repo_parser.add_argument('--checkout', action='store_true',
+        help="Do branch checkout")
+
+    # NOTE: add common parameters to the parent help output
+    # the other way could be to use parse_known_args()
+    display_parser = argparse.ArgumentParser(
+        parents=[base_parser, parser],
+        prog=parser.prog, usage="%(prog)s [-h] SOURCE_TYPE ...",
+        description=parser.description, formatter_class=MultilineFormatter)
+    class HelpAction(argparse._HelpAction):
+        def __call__(self, parser, namespace, values, option_string=None):
+            display_parser.print_help()
+            parser.exit()
+
+    parser.add_argument('-h', '--help', action=HelpAction,
+        help='show this help message and exit')
+
+    # TODO: needed distinction on how to add an extractor or a remote source
+
+    return parser
+
+def add_command(args):
+    project = load_project(args.project_dir)
+
+    if args.source_type == 'git':
+        name = args.name
+        if name is None:
+            name = osp.splitext(osp.basename(args.url))[0]
+
+        if project.env.git.has_submodule(name):
+            raise CliException("Git submodule '%s' already exists" % name)
+
+        try:
+            project.get_source(name)
+            raise CliException("Source '%s' already exists" % name)
+        except KeyError:
+            pass
+
+        rel_local_dir = project.local_source_dir(name)
+        local_dir = osp.join(project.config.project_dir, rel_local_dir)
+        url = args.url
+        project.env.git.create_submodule(name, local_dir,
+            url=url, branch=args.branch, no_checkout=not args.checkout)
+    elif args.source_type == 'path':
+        url = osp.abspath(args.url)
+        if not osp.exists(url):
+            raise CliException("Source path '%s' does not exist" % url)
+
+        name = args.name
+        if name is None:
+            name = osp.splitext(osp.basename(url))[0]
+
+        if project.env.git.has_submodule(name):
+            raise CliException("Git submodule '%s' already exists" % name)
+
+        try:
+            project.get_source(name)
+            raise CliException("Source '%s' already exists" % name)
+        except KeyError:
+            pass
+
+        rel_local_dir = project.local_source_dir(name)
+        local_dir = osp.join(project.config.project_dir, rel_local_dir)
+
+        if args.copy:
+            log.info("Copying from '%s' to '%s'" % (url, local_dir))
+            if osp.isdir(url):
+                # copytree requires destination dir not to exist
+                shutil.copytree(url, local_dir)
+                url = rel_local_dir
+            elif osp.isfile(url):
+                os.makedirs(local_dir)
+                shutil.copy2(url, local_dir)
+                url = osp.join(rel_local_dir, osp.basename(url))
+            else:
+                raise Exception("Expected file or directory")
+        else:
+            os.makedirs(local_dir)
+
+    project.add_source(name, { 'url': url, 'format': args.format })
+
+    if not args.skip_check:
+        log.info("Checking the source...")
+        try:
+            project.make_source_project(name).make_dataset()
+        except Exception:
+            shutil.rmtree(local_dir, ignore_errors=True)
+            raise
+
+    project.save()
+
+    log.info("Source '%s' has been added to the project, location: '%s'" \
+        % (name, rel_local_dir))
+
+    return 0
+
+def build_remove_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Remove source from project",
+        description="Remove a source from a project.")
+
+    parser.add_argument('-n', '--name', required=True,
+        help="Name of the source to be removed")
+    parser.add_argument('--force', action='store_true',
+        help="Ignore possible errors during removal")
+    parser.add_argument('--keep-data', action='store_true',
+        help="Do not remove source data")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to operate on (default: current dir)")
+    parser.set_defaults(command=remove_command)
+
+    return parser
+
+def remove_command(args):
+    project = load_project(args.project_dir)
+
+    name = args.name
+    if not name:
+        raise CliException("Expected source name")
+    try:
+        project.get_source(name)
+    except KeyError:
+        if not args.force:
+            raise CliException("Source '%s' does not exist" % name)
+
+    if project.env.git.has_submodule(name):
+        if args.force:
+            log.warning("Forcefully removing the '%s' source..." % name)
+
+        project.env.git.remove_submodule(name, force=args.force)
+
+    source_dir = osp.join(project.config.project_dir,
+        project.local_source_dir(name))
+    project.remove_source(name)
+    project.save()
+
+    if not args.keep_data:
+        shutil.rmtree(source_dir, ignore_errors=True)
+
+    log.info("Source '%s' has been removed from the project" % name)
+
+    return 0
+
+def build_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(description="""
+            Manipulate data sources inside of a project.|n
+            |n
+            A data source is a source of data for a project.
+            The project combines multiple data sources into one dataset.
+            The role of a data source is to provide dataset items - images
+            and/or annotations.|n
+            |n
+            By default, the project to be operated on is searched for
+            in the current directory. An additional '-p' argument can be
+            passed to specify project location.
+        """,
+        formatter_class=MultilineFormatter)
+
+    subparsers = parser.add_subparsers()
+    add_subparser(subparsers, 'add', build_add_parser)
+    add_subparser(subparsers, 'remove', build_remove_parser)
+
+    return parser
--- a/datumaro/datumaro/cli/create_command.py
+++ b/datumaro/datumaro/cli/create_command.py
@ -1,21 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-
-from . import project as project_module
-
-
-def build_parser(parser=argparse.ArgumentParser()):
-    project_module.build_create_parser(parser) \
-        .set_defaults(command=project_module.create_command)
-
-    return parser
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/export_command.py
+++ b/datumaro/datumaro/cli/export_command.py
@ -1,69 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import os.path as osp
-
-from datumaro.components.project import Project
-from datumaro.util.command_targets import (TargetKinds, target_selector,
-    ProjectTarget, SourceTarget, ImageTarget, ExternalDatasetTarget,
-    is_project_path
-)
-
-from . import project as project_module
-from . import source as source_module
-from . import item as item_module
-
-
-def export_external_dataset(target, params):
-    raise NotImplementedError()
-
-def build_parser(parser=argparse.ArgumentParser()):
-    parser.add_argument('target', nargs='?', default=None)
-    parser.add_argument('params', nargs=argparse.REMAINDER)
-
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-
-    return parser
-
-def process_command(target, params, args):
-    project_dir = args.project_dir
-    target_kind, target_value = target
-    if target_kind == TargetKinds.project:
-        return project_module.main(['export', '-p', target_value] + params)
-    elif target_kind == TargetKinds.source:
-        return source_module.main(['export', '-p', project_dir, '-n', target_value] + params)
-    elif target_kind == TargetKinds.item:
-        return item_module.main(['export', '-p', project_dir, target_value] + params)
-    elif target_kind == TargetKinds.external_dataset:
-        return export_external_dataset(target_value, params)
-    return 1
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-
-    project_path = args.project_dir
-    if is_project_path(project_path):
-        project = Project.load(project_path)
-    else:
-        project = None
-    try:
-        args.target = target_selector(
-            ProjectTarget(is_default=True, project=project),
-            SourceTarget(project=project),
-            ExternalDatasetTarget(),
-            ImageTarget()
-        )(args.target)
-        if args.target[0] == TargetKinds.project:
-            if is_project_path(args.target[1]):
-                args.project_dir = osp.dirname(osp.abspath(args.target[1]))
-    except argparse.ArgumentTypeError as e:
-        print(e)
-        parser.print_help()
-        return 1
-
-    return process_command(args.target, args.params, args)
--- a/datumaro/datumaro/cli/inference/init.py
+++ b/datumaro/datumaro/cli/inference/init.py
@ -1,33 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-
-
-def run_command(args):
-    return 0
-
-def build_run_parser(parser):
-    return parser
-
-def build_parser(parser=argparse.ArgumentParser()):
-    command_parsers = parser.add_subparsers(dest='command')
-
-    build_run_parser(command_parsers.add_parser('run')). \
-        set_defaults(command=run_command)
-
-    return parser
-
-def process_command(command, args):
-    return 0
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-    if 'command' not in args:
-        parser.print_help()
-        return 1
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/item/init.py
+++ b/datumaro/datumaro/cli/item/init.py
@ -1,38 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-
-
-def build_export_parser(parser):
-    return parser
-
-def build_stats_parser(parser):
-    return parser
-
-def build_diff_parser(parser):
-    return parser
-
-def build_edit_parser(parser):
-    return parser
-
-def build_parser(parser=argparse.ArgumentParser()):
-    command_parsers = parser.add_subparsers(dest='command_name')
-
-    build_export_parser(command_parsers.add_parser('export'))
-    build_stats_parser(command_parsers.add_parser('stats'))
-    build_diff_parser(command_parsers.add_parser('diff'))
-    build_edit_parser(command_parsers.add_parser('edit'))
-
-    return parser
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-    if 'command' not in args:
-        parser.print_help()
-        return 1
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/project/init.py
+++ b/datumaro/datumaro/cli/project/init.py
@ -1,361 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import os
-import os.path as osp
-import shutil
-
-from datumaro.components.project import Project
-from datumaro.components.comparator import Comparator
-from datumaro.components.dataset_filter import DatasetItemEncoder
-from .diff import DiffVisualizer
-from ..util.project import make_project_path, load_project
-
-
-def build_create_parser(parser):
-    parser.add_argument('-d', '--dest', default='.', dest='dst_dir',
-        help="Save directory for the new project (default: current dir")
-    parser.add_argument('-n', '--name', default=None,
-        help="Name of the new project (default: same as project dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    return parser
-
-def create_command(args):
-    project_dir = osp.abspath(args.dst_dir)
-    project_path = make_project_path(project_dir)
-
-    if osp.isdir(project_dir) and os.listdir(project_dir):
-        if not args.overwrite:
-            log.error("Directory '%s' already exists "
-                "(pass --overwrite to force creation)" % project_dir)
-            return 1
-        else:
-            shutil.rmtree(project_dir)
-    os.makedirs(project_dir, exist_ok=args.overwrite)
-
-    if not args.overwrite and osp.isfile(project_path):
-        log.error("Project file '%s' already exists "
-            "(pass --overwrite to force creation)" % project_path)
-        return 1
-
-    project_name = args.name
-    if project_name is None:
-        project_name = osp.basename(project_dir)
-
-    log.info("Creating project at '%s'" % (project_dir))
-
-    Project.generate(project_dir, {
-        'project_name': project_name,
-    })
-
-    log.info("Project has been created at '%s'" % (project_dir))
-
-    return 0
-
-def build_import_parser(parser):
-    import datumaro.components.importers as importers_module
-    importers_list = [name for name, cls in importers_module.items]
-
-    parser.add_argument('-s', '--source', required=True,
-        help="Path to import a project from")
-    parser.add_argument('-f', '--format', required=True,
-        help="Source project format (options: %s)" % (', '.join(importers_list)))
-    parser.add_argument('-d', '--dest', default='.', dest='dst_dir',
-        help="Directory to save the new project to (default: current dir)")
-    parser.add_argument('-n', '--name', default=None,
-        help="Name of the new project (default: same as project dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('--copy', action='store_true',
-        help="Copy the dataset instead of saving source links")
-    parser.add_argument('--skip-check', action='store_true',
-        help="Skip source checking")
-    # parser.add_argument('extra_args', nargs=argparse.REMAINDER,
-    #     help="Additional arguments for importer (pass '-- -h' for help)")
-    return parser
-
-def import_command(args):
-    project_dir = osp.abspath(args.dst_dir)
-    project_path = make_project_path(project_dir)
-
-    if osp.isdir(project_dir) and os.listdir(project_dir):
-        if not args.overwrite:
-            log.error("Directory '%s' already exists "
-                "(pass --overwrite to force creation)" % project_dir)
-            return 1
-        else:
-            shutil.rmtree(project_dir)
-    os.makedirs(project_dir, exist_ok=args.overwrite)
-
-    if not args.overwrite and osp.isfile(project_path):
-        log.error("Project file '%s' already exists "
-            "(pass --overwrite to force creation)" % project_path)
-        return 1
-
-    project_name = args.name
-    if project_name is None:
-        project_name = osp.basename(project_dir)
-
-    log.info("Importing project from '%s' as '%s'" % \
-        (args.source, args.format))
-
-    source = osp.abspath(args.source)
-    project = Project.import_from(source, args.format)
-    project.config.project_name = project_name
-    project.config.project_dir = project_dir
-
-    if not args.skip_check or args.copy:
-        log.info("Checking the dataset...")
-        dataset = project.make_dataset()
-    if args.copy:
-        log.info("Cloning data...")
-        dataset.save(merge=True, save_images=True)
-    else:
-        project.save()
-
-    log.info("Project has been created at '%s'" % (project_dir))
-
-    return 0
-
-def build_build_parser(parser):
-    return parser
-
-def build_export_parser(parser):
-    parser.add_argument('-e', '--filter', default=None,
-        help="Filter expression for dataset items. Examples: "
-             "extract images with width < height: "
-             "'/item[image/width < image/height]'; "
-             "extract images with large-area bboxes: "
-             "'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
-            "filter out irrelevant annotations from items: "
-             "'/item/annotation[label = \"person\"]'"
-        )
-    parser.add_argument('-a', '--filter-annotations', action='store_true',
-        help="Filter annotations instead of dataset "
-            "items (default: %(default)s)")
-    parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
-        help="Directory to save output")
-    parser.add_argument('-f', '--output-format', required=True,
-        help="Output format")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
-        help="Additional arguments for converter (pass '-- -h' for help)")
-    return parser
-
-def export_command(args):
-    project = load_project(args.project_dir)
-
-    dst_dir = osp.abspath(args.dst_dir)
-    if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-        log.error("Directory '%s' already exists "
-            "(pass --overwrite to force creation)" % dst_dir)
-        return 1
-    os.makedirs(dst_dir, exist_ok=args.overwrite)
-
-    log.info("Loading the project...")
-    dataset = project.make_dataset()
-
-    log.info("Exporting the project...")
-    dataset.export_project(
-        save_dir=dst_dir,
-        output_format=args.output_format,
-        filter_expr=args.filter,
-        filter_annotations=args.filter_annotations,
-        cmdline_args=args.extra_args)
-    log.info("Project exported to '%s' as '%s'" % \
-        (dst_dir, args.output_format))
-
-    return 0
-
-def build_stats_parser(parser):
-    parser.add_argument('name')
-    return parser
-
-def build_docs_parser(parser):
-    return parser
-
-def build_extract_parser(parser):
-    parser.add_argument('-e', '--filter', default=None,
-        help="XML XPath filter expression for dataset items. Examples: "
-             "extract images with width < height: "
-             "'/item[image/width < image/height]'; "
-             "extract images with large-area bboxes: "
-             "'/item[annotation/type=\"bbox\" and annotation/area>2000]' "
-             "filter out irrelevant annotations from items: "
-             "'/item/annotation[label = \"person\"]'"
-        )
-    parser.add_argument('-a', '--filter-annotations', action='store_true',
-        help="Filter annotations instead of dataset "
-            "items (default: %(default)s)")
-    parser.add_argument('--remove-empty', action='store_true',
-        help="Remove an item if there are no annotations left after filtration")
-    parser.add_argument('--dry-run', action='store_true',
-        help="Print XML representations to be filtered and exit")
-    parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
-        help="Output directory")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    return parser
-
-def extract_command(args):
-    project = load_project(args.project_dir)
-
-    dst_dir = osp.abspath(args.dst_dir)
-    if not args.dry_run:
-        os.makedirs(dst_dir, exist_ok=False)
-
-    dataset = project.make_dataset()
-
-    kwargs = {}
-    if args.filter_annotations:
-        kwargs['remove_empty'] = args.remove_empty
-
-    if args.dry_run:
-        dataset = dataset.extract(filter_expr=args.filter,
-            filter_annotations=args.filter_annotations, **kwargs)
-        for item in dataset:
-            encoded_item = DatasetItemEncoder.encode(item, dataset.categories())
-            xml_item = DatasetItemEncoder.to_string(encoded_item)
-            print(xml_item)
-        return 0
-
-    dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter,
-        filter_annotations=args.filter_annotations, **kwargs)
-
-    log.info("Subproject extracted to '%s'" % (dst_dir))
-
-    return 0
-
-def build_merge_parser(parser):
-    parser.add_argument('other_project_dir',
-        help="Directory of the project to get data updates from")
-    parser.add_argument('-d', '--dest', dest='dst_dir', default=None,
-        help="Output directory (default: current project's dir)")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    return parser
-
-def merge_command(args):
-    first_project = load_project(args.project_dir)
-    second_project = load_project(args.other_project_dir)
-
-    first_dataset = first_project.make_dataset()
-    first_dataset.update(second_project.make_dataset())
-
-    dst_dir = args.dst_dir
-    first_dataset.save(save_dir=dst_dir)
-
-    if dst_dir is None:
-        dst_dir = first_project.config.project_dir
-    dst_dir = osp.abspath(dst_dir)
-    log.info("Merge result saved to '%s'" % (dst_dir))
-
-    return 0
-
-def build_diff_parser(parser):
-    parser.add_argument('other_project_dir',
-        help="Directory of the second project to be compared")
-    parser.add_argument('-d', '--dest', default=None, dest='dst_dir',
-        help="Directory to save comparison results (default: do not save)")
-    parser.add_argument('-f', '--output-format',
-        default=DiffVisualizer.DEFAULT_FORMAT,
-        choices=[f.name for f in DiffVisualizer.Format],
-        help="Output format (default: %(default)s)")
-    parser.add_argument('--iou-thresh', default=0.5, type=float,
-        help="IoU match threshold for detections (default: %(default)s)")
-    parser.add_argument('--conf-thresh', default=0.5, type=float,
-        help="Confidence threshold for detections (default: %(default)s)")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the first project to be compared (default: current dir)")
-    return parser
-
-def diff_command(args):
-    first_project = load_project(args.project_dir)
-    second_project = load_project(args.other_project_dir)
-
-    comparator = Comparator(
-        iou_threshold=args.iou_thresh,
-        conf_threshold=args.conf_thresh)
-
-    save_dir = args.dst_dir
-    if save_dir is not None:
-        log.info("Saving diff to '%s'" % save_dir)
-        os.makedirs(osp.abspath(save_dir))
-    visualizer = DiffVisualizer(save_dir=save_dir, comparator=comparator,
-        output_format=args.output_format)
-    visualizer.save_dataset_diff(
-        first_project.make_dataset(),
-        second_project.make_dataset())
-
-    return 0
-
-def build_transform_parser(parser):
-    parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
-        help="Directory to save output")
-    parser.add_argument('-m', '--model', dest='model_name', required=True,
-        help="Model to apply to the project")
-    parser.add_argument('-f', '--output-format', required=True,
-        help="Output format")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    return parser
-
-def transform_command(args):
-    project = load_project(args.project_dir)
-
-    dst_dir = osp.abspath(args.dst_dir)
-    os.makedirs(dst_dir, exist_ok=False)
-    project.make_dataset().apply_model(
-        save_dir=dst_dir,
-        model_name=args.model_name)
-
-    log.info("Transform results saved to '%s'" % (dst_dir))
-
-    return 0
-
-
-def build_parser(parser=argparse.ArgumentParser()):
-    command_parsers = parser.add_subparsers(dest='command_name')
-
-    build_create_parser(command_parsers.add_parser('create')) \
-        .set_defaults(command=create_command)
-
-    build_import_parser(command_parsers.add_parser('import')) \
-        .set_defaults(command=import_command)
-
-    build_export_parser(command_parsers.add_parser('export')) \
-        .set_defaults(command=export_command)
-
-    build_extract_parser(command_parsers.add_parser('extract')) \
-        .set_defaults(command=extract_command)
-
-    build_merge_parser(command_parsers.add_parser('merge')) \
-        .set_defaults(command=merge_command)
-
-    build_build_parser(command_parsers.add_parser('build'))
-    build_stats_parser(command_parsers.add_parser('stats'))
-    build_docs_parser(command_parsers.add_parser('docs'))
-    build_diff_parser(command_parsers.add_parser('diff')) \
-        .set_defaults(command=diff_command)
-
-    build_transform_parser(command_parsers.add_parser('transform')) \
-        .set_defaults(command=transform_command)
-
-    return parser
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-    if 'command' not in args:
-        parser.print_help()
-        return 1
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/remove_command.py
+++ b/datumaro/datumaro/cli/remove_command.py
@ -1,21 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-
-from . import source as source_module
-
-
-def build_parser(parser=argparse.ArgumentParser()):
-    source_module.build_add_parser(parser). \
-        set_defaults(command=source_module.remove_command)
-
-    return parser
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/source/init.py
+++ b/datumaro/datumaro/cli/source/init.py
@ -1,254 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import os
-import os.path as osp
-import shutil
-
-from ..util.project import load_project
-
-
-def build_create_parser(parser):
-    parser.add_argument('-n', '--name', required=True,
-        help="Name of the source to be created")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    return parser
-
-def create_command(args):
-    project = load_project(args.project_dir)
-    config = project.config
-
-    name = args.name
-
-    if project.env.git.has_submodule(name):
-        log.fatal("Submodule '%s' already exists" % (name))
-        return 1
-
-    try:
-        project.get_source(name)
-        log.fatal("Source '%s' already exists" % (name))
-        return 1
-    except KeyError:
-        pass
-
-    dst_dir = osp.join(config.project_dir, config.sources_dir, name)
-    project.env.git.init(dst_dir)
-
-    project.add_source(name, { 'url': name })
-    project.save()
-
-    log.info("Source '%s' has been added to the project, location: '%s'" \
-        % (name, dst_dir))
-
-    return 0
-
-def build_import_parser(parser):
-    sp = parser.add_subparsers(dest='source_type')
-
-    repo_parser = sp.add_parser('repo')
-    repo_parser.add_argument('url',
-        help="URL of the source git repository")
-    repo_parser.add_argument('-b', '--branch', default='master',
-        help="Branch of the source repository (default: %(default)s)")
-    repo_parser.add_argument('--checkout', action='store_true',
-        help="Do branch checkout")
-
-    dir_parser = sp.add_parser('dir')
-    dir_parser.add_argument('url',
-        help="Path to the source directory")
-    dir_parser.add_argument('--copy', action='store_true',
-        help="Copy the dataset instead of saving source links")
-
-    parser.add_argument('-n', '--name', default=None,
-        help="Name of the new source")
-    parser.add_argument('-f', '--format', default=None,
-        help="Name of the source dataset format (default: 'project')")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.add_argument('--skip-check', action='store_true',
-        help="Skip source checking")
-    return parser
-
-def import_command(args):
-    project = load_project(args.project_dir)
-
-    if args.source_type == 'repo':
-        name = args.name
-        if name is None:
-            name = osp.splitext(osp.basename(args.url))[0]
-
-        if project.env.git.has_submodule(name):
-            log.fatal("Submodule '%s' already exists" % (name))
-            return 1
-
-        try:
-            project.get_source(name)
-            log.fatal("Source '%s' already exists" % (name))
-            return 1
-        except KeyError:
-            pass
-
-        dst_dir = project.local_source_dir(name)
-        project.env.git.create_submodule(name, dst_dir,
-            url=args.url, branch=args.branch, no_checkout=not args.checkout)
-
-        source = { 'url': args.url }
-        if args.format:
-            source['format'] = args.format
-        project.add_source(name, source)
-
-        if not args.skip_check:
-            log.info("Checking the source...")
-            project.make_source_project(name)
-        project.save()
-
-        log.info("Source '%s' has been added to the project, location: '%s'" \
-            % (name, dst_dir))
-    elif args.source_type == 'dir':
-        url = osp.abspath(args.url)
-        if not osp.exists(url):
-            log.fatal("Source path '%s' does not exist" % url)
-            return 1
-
-        name = args.name
-        if name is None:
-            name = osp.splitext(osp.basename(url))[0]
-
-        try:
-            project.get_source(name)
-            log.fatal("Source '%s' already exists" % (name))
-            return 1
-        except KeyError:
-            pass
-
-        dst_dir = url
-        if args.copy:
-            dst_dir = project.local_source_dir(name)
-            log.info("Copying from '%s' to '%s'" % (url, dst_dir))
-            shutil.copytree(url, dst_dir)
-            url = name
-
-        source = { 'url': url }
-        if args.format:
-            source['format'] = args.format
-        project.add_source(name, source)
-
-        if not args.skip_check:
-            log.info("Checking the source...")
-            project.make_source_project(name)
-        project.save()
-
-        log.info("Source '%s' has been added to the project, location: '%s'" \
-            % (name, dst_dir))
-
-    return 0
-
-def build_remove_parser(parser):
-    parser.add_argument('-n', '--name', required=True,
-        help="Name of the source to be removed")
-    parser.add_argument('--force', action='store_true',
-        help="Ignore possible errors during removal")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    return parser
-
-def remove_command(args):
-    project = load_project(args.project_dir)
-
-    name = args.name
-    if name is None:
-        log.fatal("Expected source name")
-        return
-
-    if project.env.git.has_submodule(name):
-        if args.force:
-            log.warning("Forcefully removing the '%s' source..." % (name))
-
-        project.env.git.remove_submodule(name, force=args.force)
-
-    project.remove_source(name)
-    project.save()
-
-    log.info("Source '%s' has been removed from the project" % (name))
-
-    return 0
-
-def build_export_parser(parser):
-    parser.add_argument('-n', '--name', required=True,
-        help="Source dataset to be extracted")
-    parser.add_argument('-e', '--filter', default=None,
-        help="Filter expression for dataset items. Examples: "
-             "extract images with width < height: "
-             "'/item[image/width < image/height]'; "
-             "extract images with large-area bboxes: "
-             "'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
-        )
-    parser.add_argument('-a', '--filter-annotations', action='store_true',
-        help="Filter annotations instead of dataset "
-            "items (default: %(default)s)")
-    parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
-        help="Directory to save output")
-    parser.add_argument('-f', '--output-format', required=True,
-        help="Output format")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
-        help="Additional arguments for converter (pass '-- -h' for help)")
-    return parser
-
-def export_command(args):
-    project = load_project(args.project_dir)
-
-    dst_dir = osp.abspath(args.dst_dir)
-    if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-        log.error("Directory '%s' already exists "
-            "(pass --overwrite to force creation)" % dst_dir)
-        return 1
-    os.makedirs(dst_dir, exist_ok=args.overwrite)
-
-    log.info("Loading the project...")
-    source_project = project.make_source_project(args.name)
-    dataset = source_project.make_dataset()
-
-    log.info("Exporting the project...")
-    dataset.export_project(
-        save_dir=dst_dir,
-        output_format=args.output_format,
-        filter_expr=args.filter,
-        filter_annotations=args.filter_annotations,
-        cmdline_args=args.extra_args)
-    log.info("Source '%s' exported to '%s' as '%s'" % \
-        (args.name, dst_dir, args.output_format))
-
-    return 0
-
-def build_parser(parser=argparse.ArgumentParser()):
-    command_parsers = parser.add_subparsers(dest='command_name')
-
-    build_create_parser(command_parsers.add_parser('create')) \
-        .set_defaults(command=create_command)
-    build_import_parser(command_parsers.add_parser('import')) \
-        .set_defaults(command=import_command)
-    build_remove_parser(command_parsers.add_parser('remove')) \
-        .set_defaults(command=remove_command)
-    build_export_parser(command_parsers.add_parser('export')) \
-        .set_defaults(command=export_command)
-
-    return parser
-
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-    if 'command' not in args:
-        parser.print_help()
-        return 1
-
-    return args.command(args)
--- a/datumaro/datumaro/cli/stats_command.py
+++ b/datumaro/datumaro/cli/stats_command.py
@ -1,69 +0,0 @@
-
-# Copyright (C) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import os.path as osp
-
-from datumaro.components.project import Project
-from datumaro.util.command_targets import (TargetKinds, target_selector,
-    ProjectTarget, SourceTarget, ExternalDatasetTarget, ImageTarget,
-    is_project_path
-)
-
-from . import project as project_module
-from . import source as source_module
-from . import item as item_module
-
-
-def compute_external_dataset_stats(target, params):
-    raise NotImplementedError()
-
-def build_parser(parser=argparse.ArgumentParser()):
-    parser.add_argument('target', nargs='?', default=None)
-    parser.add_argument('params', nargs=argparse.REMAINDER)
-
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-
-    return parser
-
-def process_command(target, params, args):
-    project_dir = args.project_dir
-    target_kind, target_value = target
-    if target_kind == TargetKinds.project:
-        return project_module.main(['stats', '-p', target_value] + params)
-    elif target_kind == TargetKinds.source:
-        return source_module.main(['stats', '-p', project_dir, target_value] + params)
-    elif target_kind == TargetKinds.item:
-        return item_module.main(['stats', '-p', project_dir, target_value] + params)
-    elif target_kind == TargetKinds.external_dataset:
-        return compute_external_dataset_stats(target_value, params)
-    return 1
-
-def main(args=None):
-    parser = build_parser()
-    args = parser.parse_args(args)
-
-    project_path = args.project_dir
-    if is_project_path(project_path):
-        project = Project.load(project_path)
-    else:
-        project = None
-    try:
-        args.target = target_selector(
-            ProjectTarget(is_default=True, project=project),
-            SourceTarget(project=project),
-            ExternalDatasetTarget(),
-            ImageTarget()
-        )(args.target)
-        if args.target[0] == TargetKinds.project:
-            if is_project_path(args.target[1]):
-                args.project_dir = osp.dirname(osp.abspath(args.target[1]))
-    except argparse.ArgumentTypeError as e:
-        print(e)
-        parser.print_help()
-        return 1
-
-    return process_command(args.target, args.params, args)
--- a/datumaro/datumaro/cli/util/init.py
+++ b/datumaro/datumaro/cli/util/init.py
@ -3,3 +3,36 @@
 #
 # SPDX-License-Identifier: MIT

+import argparse
+import textwrap
+
+
+class CliException(Exception): pass
+
+def add_subparser(subparsers, name, builder):
+    return builder(lambda **kwargs: subparsers.add_parser(name, **kwargs))
+
+class MultilineFormatter(argparse.HelpFormatter):
+    """
+    Keeps line breaks introduced with '|n' separator
+    and spaces introduced with '|s'.
+    """
+
+    def __init__(self, keep_natural=False, **kwargs):
+        super().__init__(**kwargs)
+        self._keep_natural = keep_natural
+
+    def _fill_text(self, text, width, indent):
+        text = self._whitespace_matcher.sub(' ', text).strip()
+        text = text.replace('|s', ' ')
+
+        paragraphs = text.split('|n ')
+        if self._keep_natural:
+            paragraphs = sum((p.split('\n ') for p in paragraphs), [])
+
+        multiline_text = ''
+        for paragraph in paragraphs:
+            formatted_paragraph = textwrap.fill(paragraph, width,
+                initial_indent=indent, subsequent_indent=indent) + '\n'
+            multiline_text += formatted_paragraph
+        return multiline_text
--- a/datumaro/datumaro/cli/util/project.py
+++ b/datumaro/datumaro/cli/util/project.py
@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: MIT

+import os
 import os.path as osp

 from datumaro.components.project import Project, \
@ -17,4 +18,26 @@ def make_project_path(project_dir, project_filename=None):
 def load_project(project_dir, project_filename=None):
    if project_filename:
        project_dir = osp.join(project_dir, project_filename)
-    return Project.load(project_dir)
+    return Project.load(project_dir)
+
+def generate_next_dir_name(dirname, basedir='.', sep='.'):
+    """
+    If basedir does not contain dirname, returns dirname itself,
+    else generates a dirname by appending separator to the dirname
+    and the number, next to the last used number in the basedir for
+    files with dirname prefix.
+    """
+
+    def _to_int(s):
+        try:
+            return int(s)
+        except Exception:
+            return 0
+    sep_count = dirname.count(sep) + 2
+
+    files = [e for e in os.listdir(basedir) if e.startswith(dirname)]
+    if files:
+        files = [e.split(sep) for e in files]
+        files = [_to_int(e[-1]) for e in files if len(e) == sep_count]
+        dirname += '%s%s' % (sep, max(files, default=0) + 1)
+    return dirname
--- a/datumaro/datumaro/components/algorithms/rise.py
+++ b/datumaro/datumaro/components/algorithms/rise.py
@ -8,7 +8,7 @@
 import numpy as np
 from math import ceil

-from datumaro.components.extractor import *
+from datumaro.components.extractor import AnnotationType


 def flatmatvec(mat):
--- a/datumaro/datumaro/components/converters/init.py
+++ b/datumaro/datumaro/components/converters/init.py
@ -5,7 +5,7 @@

 from datumaro.components.converters.datumaro import DatumaroConverter

-from datumaro.components.converters.ms_coco import (
+from datumaro.components.converters.coco import (
    CocoConverter,
    CocoImageInfoConverter,
    CocoCaptionsConverter,
--- a/datumaro/datumaro/components/converters/ms_coco.py
+++ b/datumaro/datumaro/components/converters/ms_coco.py
@ -14,9 +14,9 @@ import pycocotools.mask as mask_utils

 from datumaro.components.converter import Converter
 from datumaro.components.extractor import (
-    DEFAULT_SUBSET_NAME, AnnotationType, PointsObject, BboxObject, MaskObject
+    DEFAULT_SUBSET_NAME, AnnotationType, PointsObject, MaskObject
 )
-from datumaro.components.formats.ms_coco import CocoTask, CocoPath
+from datumaro.components.formats.coco import CocoTask, CocoPath
 from datumaro.util import find
 from datumaro.util.image import save_image
 import datumaro.util.mask_tools as mask_tools
@ -139,7 +139,10 @@ class _CaptionsConverter(_TaskConverter):
                'caption': ann.caption,
            }
            if 'score' in ann.attributes:
-                elem['score'] = float(ann.attributes['score'])
+                try:
+                    elem['score'] = float(ann.attributes['score'])
+                except Exception as e:
+                    log.warning("Failed to convert attribute 'score': %e" % e)

            self.annotations.append(elem)

@ -202,7 +205,7 @@ class _InstancesConverter(_TaskConverter):
        polygons = [p.get_polygon() for p in polygons]

        if self._context._segmentation_mode == SegmentationMode.guess:
-            use_masks = leader.attributes.get('is_crowd',
+            use_masks = True == leader.attributes.get('is_crowd',
                find(masks, lambda x: x.label == leader.label) is not None)
        elif self._context._segmentation_mode == SegmentationMode.polygons:
            use_masks = False
@ -342,7 +345,10 @@ class _InstancesConverter(_TaskConverter):
            'iscrowd': int(is_crowd),
        }
        if 'score' in ann.attributes:
-            elem['score'] = float(ann.attributes['score'])
+            try:
+                elem['score'] = float(ann.attributes['score'])
+            except Exception as e:
+                log.warning("Failed to convert attribute 'score': %e" % e)

        return elem

@ -448,7 +454,10 @@ class _LabelsConverter(_TaskConverter):
                'category_id': int(ann.label) + 1,
            }
            if 'score' in ann.attributes:
-                elem['score'] = float(ann.attributes['score'])
+                try:
+                    elem['score'] = float(ann.attributes['score'])
+                except Exception as e:
+                    log.warning("Failed to convert attribute 'score': %e" % e)

            self.annotations.append(elem)

@ -570,7 +579,7 @@ class CocoConverter(Converter):
    def build_cmdline_parser(cls, parser=None):
        import argparse
        if not parser:
-            parser = argparse.ArgumentParser()
+            parser = argparse.ArgumentParser(prog='coco')

        parser.add_argument('--save-images', action='store_true',
            help="Save images (default: %(default)s)")
--- a/datumaro/datumaro/components/converters/cvat.py
+++ b/datumaro/datumaro/components/converters/cvat.py
@ -14,6 +14,14 @@ from datumaro.components.formats.cvat import CvatPath
 from datumaro.util.image import save_image


+def _cast(value, type_conv, default=None):
+    if value is None:
+        return default
+    try:
+        return type_conv(value)
+    except Exception:
+        return default
+
 def pairwise(iterable):
    a = iter(iterable)
    return zip(a, a)
@ -261,6 +269,8 @@ class _SubsetWriter:
            raise NotImplementedError("unknown shape type")

        for attr_name, attr_value in shape.attributes.items():
+            if isinstance(attr_value, bool):
+                attr_value = 'true' if attr_value else 'false'
            if attr_name in self._get_label(shape.label).attributes:
                self._writer.add_attribute(OrderedDict([
                    ("name", str(attr_name)),
@ -325,7 +335,7 @@ class CvatConverter(Converter):
    def build_cmdline_parser(cls, parser=None):
        import argparse
        if not parser:
-            parser = argparse.ArgumentParser()
+            parser = argparse.ArgumentParser(prog='cvat')

        parser.add_argument('--save-images', action='store_true',
            help="Save images (default: %(default)s)")
--- a/datumaro/datumaro/components/converters/datumaro.py
+++ b/datumaro/datumaro/components/converters/datumaro.py
@ -287,7 +287,7 @@ class DatumaroConverter(Converter):
    def build_cmdline_parser(cls, parser=None):
        import argparse
        if not parser:
-            parser = argparse.ArgumentParser()
+            parser = argparse.ArgumentParser(prog='datumaro')

        parser.add_argument('--save-images', action='store_true',
            help="Save images (default: %(default)s)")
--- a/datumaro/datumaro/components/converters/tfrecord.py
+++ b/datumaro/datumaro/components/converters/tfrecord.py
@ -10,6 +10,7 @@ import os.path as osp
 import string

 from datumaro.components.extractor import AnnotationType, DEFAULT_SUBSET_NAME
+from datumaro.components.converter import Converter
 from datumaro.components.formats.tfrecord import DetectionApiPath
 from datumaro.util.image import encode_image
 from datumaro.util.tf_util import import_tf as _import_tf
@ -97,7 +98,7 @@ def _make_tf_example(item, get_label_id, get_label, save_images=False):

    return tf_example

-class DetectionApiConverter:
+class DetectionApiConverter(Converter):
    def __init__(self, save_images=False, cmdline_args=None):
        super().__init__()

@ -113,7 +114,7 @@ class DetectionApiConverter:
    def build_cmdline_parser(cls, parser=None):
        import argparse
        if not parser:
-            parser = argparse.ArgumentParser()
+            parser = argparse.ArgumentParser(prog='tf_detection_api')

        parser.add_argument('--save-images', action='store_true',
            help="Save images (default: %(default)s)")
--- a/datumaro/datumaro/components/converters/voc.py
+++ b/datumaro/datumaro/components/converters/voc.py
@ -23,6 +23,19 @@ from datumaro.util.image import save_image
 from datumaro.util.mask_tools import apply_colormap, remap_mask


+def _convert_attr(name, attributes, type_conv, default=None, warn=True):
+    d = object()
+    value = attributes.get(name, d)
+    if value is d:
+        return default
+
+    try:
+        return type_conv(value)
+    except Exception as e:
+        log.warning("Failed to convert attribute '%s'='%s': %s" % \
+            (name, value, e))
+        return default
+
 def _write_xml_bbox(bbox, parent_elem):
    x, y, w, h = bbox
    bbox_elem = ET.SubElement(parent_elem, 'bndbox')
@ -185,26 +198,17 @@ class _Converter:
                        obj_label =  self.get_label(obj.label)
                        ET.SubElement(obj_elem, 'name').text = obj_label

-                        pose = attr.get('pose')
-                        if pose is not None:
-                            pose = VocPose[pose]
-                        else:
-                            pose = VocPose.Unspecified
+                        pose = _convert_attr('pose', attr, lambda v: VocPose[v],
+                            VocPose.Unspecified)
                        ET.SubElement(obj_elem, 'pose').text = pose.name

-                        truncated = attr.get('truncated')
-                        if truncated is not None:
-                            truncated = int(truncated)
-                        else:
-                            truncated = 0
-                        ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated
+                        truncated = _convert_attr('truncated', attr, int, 0)
+                        ET.SubElement(obj_elem, 'truncated').text = \
+                            '%d' % truncated

-                        difficult = attr.get('difficult')
-                        if difficult is not None:
-                            difficult = int(difficult)
-                        else:
-                            difficult = 0
-                        ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult
+                        difficult = _convert_attr('difficult', attr, int, 0)
+                        ET.SubElement(obj_elem, 'difficult').text = \
+                            '%d' % difficult

                        bbox = obj.get_bbox()
                        if bbox is not None:
@ -219,16 +223,16 @@ class _Converter:

                            objects_with_parts.append(new_obj_id)

-                        actions = {k: v for k, v in obj.attributes.items()
-                            if self._is_action(obj_label, k)}
+                        label_actions = self._get_actions(obj_label)
                        actions_elem = ET.Element('actions')
-                        for action in self._get_actions(obj_label):
-                            presented = action in actions and actions[action]
+                        for action in label_actions:
+                            presented = _convert_attr(action, attr,
+                                lambda v: int(v == True), 0)
                            ET.SubElement(actions_elem, action).text = \
                                '%d' % presented

                            objects_with_actions[new_obj_id][action] = presented
-                        if len(actions) != 0:
+                        if len(actions_elem) != 0:
                            obj_elem.append(actions_elem)

                    if set(self._tasks) & set([None,
@ -502,7 +506,7 @@ class VocConverter(Converter):
    def build_cmdline_parser(cls, parser=None):
        import argparse
        if not parser:
-            parser = argparse.ArgumentParser()
+            parser = argparse.ArgumentParser(prog='voc')

        parser.add_argument('--save-images', action='store_true',
            help="Save images (default: %(default)s)")
--- a/datumaro/datumaro/components/converters/yolo.py
+++ b/datumaro/datumaro/components/converters/yolo.py
@ -41,7 +41,7 @@ class YoloConverter(Converter):
    def build_cmdline_parser(cls, parser=None):
        import argparse
        if not parser:
-            parser = argparse.ArgumentParser()
+            parser = argparse.ArgumentParser(prog='yolo')

        parser.add_argument('--save-images', action='store_true',
            help="Save images (default: %(default)s)")
--- a/datumaro/datumaro/components/dataset_filter.py
+++ b/datumaro/datumaro/components/dataset_filter.py
@ -57,6 +57,8 @@ class DatasetItemEncoder:
    @staticmethod
    def _get_label(label_id, categories):
        label = ''
+        if label_id is None:
+            return ''
        if categories is not None:
            label_cat = categories.get(AnnotationType.label)
            if label_cat is not None:
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@ -213,7 +213,7 @@ class MaskObject(Annotation):
            (self.label == other.label) and \
            (self.z_order == other.z_order) and \
            (self.image is not None and other.image is not None and \
-                np.all(self.image == other.image))
+                np.array_equal(self.image, other.image))

 class RleMask(MaskObject):
    # pylint: disable=redefined-builtin
@ -546,7 +546,7 @@ class DatasetItem:
            (self.annotations == other.annotations) and \
            (self.path == other.path) and \
            (self.has_image == other.has_image) and \
-            (self.has_image and np.all(self.image == other.image) or \
+            (self.has_image and np.array_equal(self.image, other.image) or \
                not self.has_image)

 class IExtractor:
--- a/datumaro/datumaro/components/extractors/init.py
+++ b/datumaro/datumaro/components/extractors/init.py
@ -5,7 +5,7 @@

 from datumaro.components.extractors.datumaro import DatumaroExtractor

-from datumaro.components.extractors.ms_coco import (
+from datumaro.components.extractors.coco import (
    CocoImageInfoExtractor,
    CocoCaptionsExtractor,
    CocoInstancesExtractor,
@ -29,6 +29,7 @@ from datumaro.components.extractors.voc import (
 from datumaro.components.extractors.yolo import YoloExtractor
 from datumaro.components.extractors.tfrecord import DetectionApiExtractor
 from datumaro.components.extractors.cvat import CvatExtractor
+from datumaro.components.extractors.image_dir import ImageDirExtractor

 items = [
    ('datumaro', DatumaroExtractor),
@ -56,4 +57,6 @@ items = [
    ('tf_detection_api', DetectionApiExtractor),

    ('cvat', CvatExtractor),
+
+    ('image_dir', ImageDirExtractor),
 ]
--- a/datumaro/datumaro/components/extractors/ms_coco.py
+++ b/datumaro/datumaro/components/extractors/ms_coco.py
@ -15,7 +15,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem,
    BboxObject, CaptionObject,
    LabelCategories, PointsCategories
 )
-from datumaro.components.formats.ms_coco import CocoTask, CocoPath
+from datumaro.components.formats.coco import CocoTask, CocoPath
 from datumaro.util.image import lazy_image


--- a/datumaro/datumaro/components/extractors/cvat.py
+++ b/datumaro/datumaro/components/extractors/cvat.py
@ -91,7 +91,15 @@ class CvatExtractor(Extractor):
                        shape.update(image)
            elif ev == 'end':
                if el.tag == 'attribute' and shape is not None:
-                    shape['attributes'][el.attrib['name']] = el.text
+                    attr_value = el.text
+                    if el.text in ['true', 'false']:
+                        attr_value = attr_value == 'true'
+                    else:
+                        try:
+                            attr_value = float(attr_value)
+                        except Exception:
+                            pass
+                    shape['attributes'][el.attrib['name']] = attr_value
                elif el.tag in cls._SUPPORTED_SHAPES:
                    if track is not None:
                        shape['frame'] = el.attrib['frame']
--- a/datumaro/datumaro/components/extractors/image_dir.py
+++ b/datumaro/datumaro/components/extractors/image_dir.py
@ -0,0 +1,55 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from collections import OrderedDict
+import os
+import os.path as osp
+
+from datumaro.components.extractor import DatasetItem, Extractor
+from datumaro.util.image import lazy_image
+
+
+class ImageDirExtractor(Extractor):
+    _SUPPORTED_FORMATS = ['.png', '.jpg']
+
+    def __init__(self, url):
+        super().__init__()
+
+        assert osp.isdir(url)
+
+        items = []
+        for name in os.listdir(url):
+            path = osp.join(url, name)
+            if self._is_image(path):
+                item_id = osp.splitext(name)[0]
+                item = DatasetItem(id=item_id, image=lazy_image(path))
+                items.append((item.id, item))
+
+        items = sorted(items, key=lambda e: e[0])
+        items = OrderedDict(items)
+        self._items = items
+
+        self._subsets = None
+
+    def __iter__(self):
+        for item in self._items.values():
+            yield item
+
+    def __len__(self):
+        return len(self._items)
+
+    def subsets(self):
+        return self._subsets
+
+    def get(self, item_id, subset=None, path=None):
+        if path or subset:
+            raise KeyError()
+        return self._items[item_id]
+
+    def _is_image(self, path):
+        for ext in self._SUPPORTED_FORMATS:
+            if osp.isfile(path) and path.endswith(ext):
+                return True
+        return False
--- a/datumaro/datumaro/components/extractors/voc.py
+++ b/datumaro/datumaro/components/extractors/voc.py
@ -230,6 +230,8 @@ class VocExtractor(Extractor):

                    if self._task is not VocTask.person_layout:
                        break
+                    if bbox is None:
+                        continue
                    item_annotations.append(BboxObject(
                        *bbox, label=part_label_id,
                        group=obj_id))
@ -247,16 +249,16 @@ class VocExtractor(Extractor):

    @staticmethod
    def _parse_bbox(object_elem):
-        try:
-            bbox_elem = object_elem.find('bndbox')
-            xmin = int(bbox_elem.find('xmin').text)
-            xmax = int(bbox_elem.find('xmax').text)
-            ymin = int(bbox_elem.find('ymin').text)
-            ymax = int(bbox_elem.find('ymax').text)
-            return [xmin, ymin, xmax - xmin, ymax - ymin]
-        except Exception:
+        bbox_elem = object_elem.find('bndbox')
+        if bbox_elem is None:
            return None

+        xmin = float(bbox_elem.find('xmin').text)
+        xmax = float(bbox_elem.find('xmax').text)
+        ymin = float(bbox_elem.find('ymin').text)
+        ymax = float(bbox_elem.find('ymax').text)
+        return [xmin, ymin, xmax - xmin, ymax - ymin]
+
 class VocClassificationExtractor(VocExtractor):
    def __init__(self, path):
        super().__init__(path, task=VocTask.classification)
--- a/datumaro/datumaro/components/formats/ms_coco.py
+++ b/datumaro/datumaro/components/formats/ms_coco.py
--- a/datumaro/datumaro/components/importers/init.py
+++ b/datumaro/datumaro/components/importers/init.py
@ -4,22 +4,18 @@
 # SPDX-License-Identifier: MIT

 from datumaro.components.importers.datumaro import DatumaroImporter
-from datumaro.components.importers.ms_coco import CocoImporter
-
-from datumaro.components.importers.voc import (
-    VocImporter,
-    VocResultsImporter,
-)
-
+from datumaro.components.importers.coco import CocoImporter
+from datumaro.components.importers.voc import VocImporter, VocResultsImporter
 from datumaro.components.importers.tfrecord import DetectionApiImporter
 from datumaro.components.importers.yolo import YoloImporter
 from datumaro.components.importers.cvat import CvatImporter
+from datumaro.components.importers.image_dir import ImageDirImporter


 items = [
    ('datumaro', DatumaroImporter),

-    ('ms_coco', CocoImporter),
+    ('coco', CocoImporter),

    ('voc', VocImporter),
    ('voc_results', VocResultsImporter),
@ -29,4 +25,6 @@ items = [
    ('tf_detection_api', DetectionApiImporter),

    ('cvat', CvatImporter),
+
+    ('image_dir', ImageDirImporter),
 ]
--- a/datumaro/datumaro/components/importers/ms_coco.py
+++ b/datumaro/datumaro/components/importers/ms_coco.py
@ -8,7 +8,7 @@ from glob import glob
 import logging as log
 import os.path as osp

-from datumaro.components.formats.ms_coco import CocoTask, CocoPath
+from datumaro.components.formats.coco import CocoTask, CocoPath


 class CocoImporter:
--- a/datumaro/datumaro/components/importers/image_dir.py
+++ b/datumaro/datumaro/components/importers/image_dir.py
@ -0,0 +1,26 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+
+
+class ImageDirImporter:
+    EXTRACTOR_NAME = 'image_dir'
+
+    def __call__(self, path, **extra_params):
+        from datumaro.components.project import Project # cyclic import
+        project = Project()
+
+        if not osp.isdir(path):
+            raise Exception("Can't find a directory at '%s'" % path)
+
+        source_name = osp.basename(osp.normpath(path))
+        project.add_source(source_name, {
+            'url': source_name,
+            'format': self.EXTRACTOR_NAME,
+            'options': dict(extra_params),
+        })
+
+        return project
--- a/datumaro/datumaro/components/project.py
+++ b/datumaro/datumaro/components/project.py
@ -105,15 +105,17 @@ class GitWrapper:
    def _git_dir(base_path):
        return osp.join(base_path, '.git')

-    def init(self, path):
-        spawn = not osp.isdir(GitWrapper._git_dir(path))
-        self.repo = git.Repo.init(path=path)
+    @classmethod
+    def spawn(cls, path):
+        spawn = not osp.isdir(cls._git_dir(path))
+        repo = git.Repo.init(path=path)
        if spawn:
            author = git.Actor("Nobody", "nobody@example.com")
-            self.repo.index.commit('Initial commit', author=author)
-        return self.repo
+            repo.index.commit('Initial commit', author=author)
+        return repo

-    def get_repo(self):
+    def init(self, path):
+        self.repo = self.spawn(path)
        return self.repo

    def is_initialized(self):
@ -316,7 +318,9 @@ class Dataset(Extractor):
            categories.update(source.categories())
        for source in sources:
            for cat_type, source_cat in source.categories().items():
-                assert categories[cat_type] == source_cat
+                if not categories[cat_type] == source_cat:
+                    raise NotImplementedError(
+                        "Merging different categories is not implemented yet")
        dataset = Dataset(categories=categories)

        # merge items
@ -395,11 +399,12 @@ class Dataset(Extractor):

        return item

-    def extract(self, filter_expr, filter_annotations=False, **kwargs):
+    def extract(self, filter_expr, filter_annotations=False, remove_empty=False):
        if filter_annotations:
-            return self.transform(XPathAnnotationsFilter, filter_expr, **kwargs)
+            return self.transform(XPathAnnotationsFilter, filter_expr,
+                remove_empty)
        else:
-            return self.transform(XPathDatasetFilter, filter_expr, **kwargs)
+            return self.transform(XPathDatasetFilter, filter_expr)

    def update(self, items):
        for item in items:
@ -468,7 +473,9 @@ class ProjectDataset(Dataset):
            categories.update(source.categories())
        for source in self._sources.values():
            for cat_type, source_cat in source.categories().items():
-                assert categories[cat_type] == source_cat
+                if not categories[cat_type] == source_cat:
+                    raise NotImplementedError(
+                        "Merging different categories is not implemented yet")
        if own_source is not None and len(own_source) != 0:
            categories.update(own_source.categories())
        self._categories = categories
@ -651,17 +658,18 @@ class ProjectDataset(Dataset):
        launcher = self._project.make_executable_model(model_name)
        self.transform_project(InferenceWrapper, launcher, save_dir=save_dir)

-    def export_project(self, save_dir, output_format,
-            filter_expr=None, filter_annotations=False, **converter_kwargs):
+    def export_project(self, save_dir, converter,
+            filter_expr=None, filter_annotations=False, remove_empty=False):
        # NOTE: probably this function should be in the ViewModel layer
        save_dir = osp.abspath(save_dir)
        os.makedirs(save_dir, exist_ok=True)

        dataset = self
        if filter_expr:
-            dataset = dataset.extract(filter_expr, filter_annotations)
+            dataset = dataset.extract(filter_expr,
+                filter_annotations=filter_annotations,
+                remove_empty=remove_empty)

-        converter = self.env.make_converter(output_format, **converter_kwargs)
        converter(dataset, save_dir)

    def extract_project(self, filter_expr, filter_annotations=False,
--- a/datumaro/datumaro/util/test_utils.py
+++ b/datumaro/datumaro/util/test_utils.py
@ -7,6 +7,7 @@ import inspect
 import os
 import os.path as osp
 import shutil
+import tempfile


 def current_function_name(depth=1):
@ -32,8 +33,22 @@ class FileRemover:
 class TestDir(FileRemover):
    def __init__(self, path=None, ignore_errors=False):
        if path is None:
-            path = osp.abspath('temp_%s' % current_function_name(2))
-
-        os.makedirs(path, exist_ok=ignore_errors)
-
-        super().__init__(path, is_dir=True, ignore_errors=ignore_errors)
+            path = osp.abspath('temp_%s-' % current_function_name(2))
+            path = tempfile.mkdtemp(dir=os.getcwd(), prefix=path)
+        else:
+            os.makedirs(path, exist_ok=ignore_errors)
+
+        super().__init__(path, is_dir=True, ignore_errors=ignore_errors)
+
+def ann_to_str(ann):
+    return vars(ann)
+
+def item_to_str(item):
+    return '\n'.join(
+        [
+            '%s' % vars(item)
+        ] + [
+            'ann[%s]: %s' % (i, ann_to_str(a))
+            for i, a in enumerate(item.annotations)
+        ]
+    )
--- a/datumaro/docs/cli_design.mm
+++ b/datumaro/docs/cli_design.mm
@ -2,146 +2,64 @@
 <!-- To view this file, download free mind mapping software FreeMind from http://freemind.sourceforge.net -->
 <node CREATED="1562588909441" ID="ID_362065379" MODIFIED="1562594436169" TEXT="datum">
 <node COLOR="#669900" CREATED="1562588926230" ID="ID_392208345" MODIFIED="1562594653553" POSITION="right" STYLE="fork" TEXT="project">
-<node CREATED="1562592021703" ID="ID_1131736910" MODIFIED="1567594093533" TEXT="create">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562592021703" ID="ID_1131736910" MODIFIED="1579775533832" TEXT="create">
 <node CREATED="1574330157737" ID="ID_507280937" MODIFIED="1574330158757" TEXT="Creates a Datumaro project"/>
 </node>
-<node CREATED="1562592669910" ID="ID_1273417784" MODIFIED="1567594103605" TEXT="import">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562592669910" ID="ID_1273417784" MODIFIED="1579775533832" TEXT="import">
 <node CREATED="1562592677270" ID="ID_1205701076" MODIFIED="1574330175510" TEXT="Generates a project from other project or dataset in a specific format"/>
 </node>
-<node CREATED="1562592764462" ID="ID_724395644" MODIFIED="1569927189023" TEXT="export">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562592764462" ID="ID_724395644" MODIFIED="1579775533832" TEXT="export">
 <node CREATED="1562592918908" ID="ID_44929477" MODIFIED="1574330221398" TEXT="Saves dataset in a specfic format"/>
 </node>
-<node CREATED="1562593914751" ID="ID_378739335" MODIFIED="1574330501157" TEXT="extract">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562593914751" ID="ID_378739335" MODIFIED="1579775533832" TEXT="extract">
 <node CREATED="1562593918968" ID="ID_424607257" MODIFIED="1569929409897" TEXT="Extracts subproject by filter"/>
 </node>
-<node CREATED="1569928239212" ID="ID_1246336762" MODIFIED="1574330501159" TEXT="merge">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1569928239212" ID="ID_1246336762" MODIFIED="1579775533832" TEXT="merge">
 <node CREATED="1569928465766" ID="ID_96716547" MODIFIED="1569928867634" TEXT="Adds new items to project"/>
 </node>
-<node CREATED="1562594882533" ID="ID_487465081" MODIFIED="1567594126105" TEXT="diff">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562594882533" ID="ID_487465081" MODIFIED="1579775533832" TEXT="diff">
 <node CREATED="1562594886583" ID="ID_1671375265" MODIFIED="1569928079633" TEXT="Compares two projects"/>
 </node>
-<node COLOR="#ff0000" CREATED="1563435039037" ID="ID_97578583" MODIFIED="1567594117984" TEXT="transform">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1563435039037" ID="ID_97578583" MODIFIED="1579775533832" TEXT="transform">
 <node CREATED="1563435074116" ID="ID_695576446" MODIFIED="1574330414686" TEXT="Applies specific transformation to the dataset"/>
 </node>
-<node CREATED="1562592759129" ID="ID_1934152899" MODIFIED="1574330506478" TEXT="build">
-<node CREATED="1562592866813" ID="ID_321145109" MODIFIED="1569929109413" TEXT="Compound operation which executes other required operations. &#xa;Probably, executes some pipeline based on a script provided"/>
-</node>
-<node CREATED="1569928254654" ID="ID_273542545" MODIFIED="1569928258098" TEXT="show">
-<node CREATED="1569928411749" ID="ID_842692369" MODIFIED="1569928852922" TEXT="Visualizes project"/>
-</node>
-<node CREATED="1569928386605" ID="ID_493330514" MODIFIED="1569928388754" TEXT="info">
+<node CREATED="1569928386605" ID="ID_493330514" MODIFIED="1579775533832" TEXT="info">
 <node CREATED="1569928423173" ID="ID_1273620035" MODIFIED="1569928429050" TEXT="Outputs valuable info"/>
 </node>
-<node CREATED="1562593076507" ID="ID_779027516" MODIFIED="1574330511948" TEXT="stats">
-<node CREATED="1562593079585" ID="ID_1498895180" MODIFIED="1562594653556" TEXT="Computes dataset statistics"/>
-</node>
-<node CREATED="1562593105322" ID="ID_117744850" MODIFIED="1574330511947" TEXT="docs">
-<node CREATED="1562593108705" ID="ID_878198723" MODIFIED="1562594653557" TEXT="Generates dataset documentation"/>
-</node>
 </node>
 <node COLOR="#669900" CREATED="1562592073422" ID="ID_1793909666" MODIFIED="1569928300945" POSITION="right" STYLE="fork" TEXT="source">
-<node CREATED="1568023057930" ID="ID_633965389" MODIFIED="1568023077570" TEXT="create">
-<icon BUILTIN="button_ok"/>
-<node CREATED="1569928185077" ID="ID_1231594305" MODIFIED="1574330396647" TEXT="Creates source dataset in project"/>
-</node>
-<node CREATED="1562592085302" ID="ID_199597063" MODIFIED="1568023069817" TEXT="import">
-<icon BUILTIN="button_ok"/>
-<node CREATED="1562592138228" ID="ID_1202153971" MODIFIED="1574330391766" TEXT="Adds source dataset by its URL under a name (like git submodule add)"/>
+<node CREATED="1562592085302" ID="ID_199597063" MODIFIED="1579775563506" TEXT="add">
+<node CREATED="1562592138228" ID="ID_1202153971" MODIFIED="1579775556115" TEXT="Adds data source by its URL"/>
 </node>
-<node CREATED="1562592088238" ID="ID_744367784" MODIFIED="1567594264178" TEXT="remove">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562592088238" ID="ID_744367784" MODIFIED="1579775533831" TEXT="remove">
 <node CREATED="1562592316435" ID="ID_810859340" MODIFIED="1574330377694" TEXT="Removes source dataset"/>
 </node>
-<node CREATED="1562592469569" ID="ID_329615614" MODIFIED="1569927769905" TEXT="export">
-<icon BUILTIN="button_ok"/>
-</node>
-<node CREATED="1562593134746" ID="ID_195077187" MODIFIED="1562594652955" TEXT="stats"/>
-<node CREATED="1569928327997" ID="ID_389265529" MODIFIED="1569928330154" TEXT="show"/>
-<node CREATED="1569928398580" ID="ID_348421413" MODIFIED="1569928400642" TEXT="info"/>
 </node>
 <node COLOR="#669900" CREATED="1563434979149" ID="ID_782927311" MODIFIED="1563435233504" POSITION="right" TEXT="model">
-<node CREATED="1563434987574" ID="ID_290716982" MODIFIED="1567594144970" TEXT="add">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1563434987574" ID="ID_290716982" MODIFIED="1579775533831" TEXT="add">
 <node CREATED="1563435018178" ID="ID_1059015375" MODIFIED="1574330372326" TEXT="Registers model for inference"/>
 </node>
-<node CREATED="1564500174410" ID="ID_451702794" MODIFIED="1567594149642" TEXT="remove">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1564500174410" ID="ID_451702794" MODIFIED="1579775533831" TEXT="remove">
 <node CREATED="1569928809165" ID="ID_1093915022" MODIFIED="1574330359950" TEXT="Removes model from project"/>
 </node>
-</node>
-<node COLOR="#669900" CREATED="1562593748114" ID="ID_970814064" MODIFIED="1562594652591" POSITION="right" STYLE="fork" TEXT="inference">
-<node CREATED="1562593758235" ID="ID_1984980861" MODIFIED="1563443545700" STYLE="fork" TEXT="run">
+<node CREATED="1562593758235" ID="ID_1984980861" MODIFIED="1579775533831" STYLE="fork" TEXT="run">
 <node CREATED="1562593765978" ID="ID_918840812" MODIFIED="1574330356630" TEXT="Executes network for inference"/>
 </node>
-<node CREATED="1564500277834" ID="ID_1264946351" MODIFIED="1564500279953" TEXT="parse">
-<node CREATED="1569927270764" ID="ID_1995847022" MODIFIED="1569927285793" TEXT="Parses training log file"/>
-</node>
 </node>
-<node COLOR="#669900" CREATED="1562594817022" ID="ID_133277273" MODIFIED="1562674963173" POSITION="right" TEXT="item">
-<font NAME="SansSerif" SIZE="12"/>
-<node CREATED="1562594955691" ID="ID_1344471806" MODIFIED="1569928758939" TEXT="export"/>
-<node CREATED="1562594960747" ID="ID_1898276667" MODIFIED="1562594963201" TEXT="stats"/>
-<node CREATED="1562594983907" ID="ID_218343857" MODIFIED="1562594985561" TEXT="diff"/>
-<node CREATED="1562595454823" ID="ID_1649071450" MODIFIED="1562595456796" TEXT="edit"/>
-</node>
-<node CREATED="1562594240501" ID="ID_1530017548" MODIFIED="1567594340403" POSITION="right" STYLE="fork" TEXT="create">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562594240501" ID="ID_1530017548" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="create">
 <node CREATED="1562594244868" ID="ID_1309935216" MODIFIED="1562594591882" TEXT="Calls project create"/>
 </node>
-<node CREATED="1562594254667" ID="ID_190882752" MODIFIED="1567594344740" POSITION="right" STYLE="fork" TEXT="add">
-<icon BUILTIN="button_ok"/>
-<node CREATED="1562594262484" ID="ID_949937557" MODIFIED="1569929674939" TEXT="Calls source add / import"/>
+<node CREATED="1562594254667" ID="ID_190882752" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="add">
+<node CREATED="1562594262484" ID="ID_949937557" MODIFIED="1579701784334" TEXT="Calls source add"/>
 </node>
-<node CREATED="1562594276540" ID="ID_1430572506" MODIFIED="1567594350421" POSITION="right" STYLE="fork" TEXT="remove">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1562594276540" ID="ID_1430572506" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="remove">
 <node CREATED="1562594281180" ID="ID_124160415" MODIFIED="1562594591248" TEXT="Calls source remove"/>
 </node>
-<node CREATED="1562594289395" ID="ID_1608995178" MODIFIED="1574330539766" POSITION="right" STYLE="fork" TEXT="export">
-<node CREATED="1562594293699" ID="ID_199067242" MODIFIED="1569930927620" TEXT="[project arg, default]">
-<node CREATED="1562594313250" ID="ID_1243481155" MODIFIED="1569927804137" TEXT="Calls project export"/>
-</node>
-<node CREATED="1562594323035" ID="ID_1281657568" MODIFIED="1569930961981" TEXT="[source/item arg]">
-<node CREATED="1562594338482" ID="ID_1085162426" MODIFIED="1569930968180" TEXT="Calls source/item export"/>
+<node CREATED="1562594289395" ID="ID_1608995178" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="export">
+<node CREATED="1579775617910" ID="ID_1698217727" MODIFIED="1579775622801" TEXT="Calls project export"/>
 </node>
-<node CREATED="1562594360266" ID="ID_840060495" MODIFIED="1562594590793" TEXT="[external dataset arg]">
-<node CREATED="1562594370348" ID="ID_778378456" MODIFIED="1569927504041" TEXT="Project import + project export"/>
-</node>
-</node>
-<node CREATED="1562594703543" ID="ID_210248464" MODIFIED="1562594705685" POSITION="right" TEXT="diff">
-<node CREATED="1569927601316" ID="ID_920307385" MODIFIED="1569927934921" TEXT="[2 item/project/source/ext.dataset args]">
-<node CREATED="1569927624724" ID="ID_1503422177" MODIFIED="1569927985130" TEXT="Import + project diff"/>
-</node>
-</node>
-<node CREATED="1569929167198" ID="ID_1583130184" MODIFIED="1569929169274" POSITION="right" TEXT="show"/>
-<node CREATED="1569929169942" ID="ID_912693725" MODIFIED="1569929174043" POSITION="right" TEXT="info"/>
-<node CREATED="1567594310257" ID="ID_995434490" MODIFIED="1567594363999" POSITION="right" TEXT="explain">
-<icon BUILTIN="button_ok"/>
+<node CREATED="1567594310257" ID="ID_995434490" MODIFIED="1579775542141" POSITION="right" TEXT="explain">
 <node CREATED="1567594365942" ID="ID_1529218756" MODIFIED="1567594404172" TEXT="Runs inference explanation"/>
 </node>
-<node CREATED="1562593914751" ID="ID_925304191" MODIFIED="1569927316928" POSITION="right" TEXT="extract">
-<node CREATED="1562593918968" ID="ID_1746788348" MODIFIED="1569929409897" TEXT="Extracts subproject by filter"/>
-</node>
-<node CREATED="1569928239212" ID="ID_874360504" MODIFIED="1569928241378" POSITION="right" TEXT="merge">
-<node CREATED="1569928465766" ID="ID_332142804" MODIFIED="1569928867634" TEXT="Adds new items to project"/>
-</node>
-<node CREATED="1562593031995" ID="ID_1818638085" MODIFIED="1569930889221" POSITION="right" STYLE="fork" TEXT="stats">
-<icon BUILTIN="button_ok"/>
-<node CREATED="1562593043258" ID="ID_280465436" MODIFIED="1562594682163" STYLE="fork" TEXT="[project arg, default]">
-<node CREATED="1562593064794" ID="ID_1859975421" MODIFIED="1562594682163" STYLE="fork" TEXT="Calls project stats"/>
-</node>
-<node CREATED="1562593187881" ID="ID_815427730" MODIFIED="1569930976940" STYLE="fork" TEXT="[source/item arg]">
-<node CREATED="1562593203687" ID="ID_1958444123" MODIFIED="1569930985172" STYLE="fork" TEXT="Calls source/item stats"/>
-</node>
-<node CREATED="1562593537868" ID="ID_1000873843" MODIFIED="1562594682163" STYLE="fork" TEXT="[external dataset arg]">
-<node CREATED="1562593695074" ID="ID_1931687508" MODIFIED="1569930999660" STYLE="fork" TEXT="Project import + project stats"/>
-</node>
-</node>
 </node>
 </map>
--- a/datumaro/docs/design.md
+++ b/datumaro/docs/design.md
@ -5,7 +5,6 @@
 ## Table of contents

 - [Concept](#concept)
- [Design](#design)
 - [RC 1 vision](#rc-1-vision)

 ## Concept
@ -70,53 +69,6 @@ Datumaro is:
 - guidance for key frame selection for tracking ([paper](https://arxiv.org/abs/1903.11779))
  Use case: more effective annotation, better predictions

-## Design
-
-### Command-line
-
-Use Docker as an example. Basically, the interface is partitioned
-on contexts and shortcuts. Contexts are semantically grouped commands,
-related to a single topic or target. Shortcuts are handy shorter
-alternatives for the most used commands and also special commands,
-which are hard to be put into specific context.
-
-![cli-design-image](images/cli_design.png)
-
- [FreeMind tool link](http://freemind.sourceforge.net/wiki/index.php/Main_Page)
-
-### High-level architecture
-
- Using MVVM UI pattern
-
-![mvvm-image](images/mvvm.png)
-
-### Datumaro project and environment structure
-
-<!--lint disable fenced-code-flag-->
-```
-├── [datumaro module]
-└── [project folder]
-    ├── .datumaro/
-    │   ├── config.yml
-    │   ├── .git/
-    │   ├── importers/
-    │   │   ├── custom_format_importer1.py
-    │   │   └── ...
-    │   ├── statistics/
-    │   │   ├── custom_statistic1.py
-    │   │   └── ...
-    │   ├── visualizers/
-    │   │   ├── custom_visualizer1.py
-    │   │   └── ...
-    │   └── extractors/
-    │       ├── custom_extractor1.py
-    │       └── ...
-    └── sources/
-        ├── source1
-        └── ...
-```
-<!--lint enable fenced-code-flag-->
-
 ## RC 1 vision

 In the first version Datumaro should be a project manager for CVAT.
@ -139,18 +91,20 @@ can be downloaded by user to be operated on with Datumaro CLI.
 ### Interfaces

 - [x] Python API for user code
-  - [ ] Installation as a package
+  - [x] Installation as a package
 - [x] A command-line tool for dataset manipulations

 ### Features

- Dataset format support (reading, exporting)
+- Dataset format support (reading, writing)
  - [x] Own format
+  - [x] CVAT
  - [x] COCO
  - [x] PASCAL VOC
+  - [x] YOLO
+  - [x] TF Detection API
  - [ ] Cityscapes
  - [ ] ImageNet
-  - [ ] CVAT

 - Dataset visualization (`show`)
  - [ ] Ability to visualize a dataset
@ -199,6 +153,7 @@ can be downloaded by user to be operated on with Datumaro CLI.
    - export the task
    - convert to a training format
    - train a DL model
+  - [x] Use case "annotate - reannotate problematic images - merge"
  - [ ] Use case "annotate and estimate quality"
    - create a task
    - annotate
--- a/datumaro/docs/images/cli_design.png
+++ b/datumaro/docs/images/cli_design.png
--- a/datumaro/docs/quickstart.md
+++ b/datumaro/docs/quickstart.md
@ -1,325 +0,0 @@
-# Quick start guide
-
-## Installation
-
-### Prerequisites
-
- Python (3.5+)
- OpenVINO (optional)
-
-### Installation steps
-
-Download the project to any directory.
-
-Set up a virtual environment:
-
-``` bash
-python -m pip install virtualenv
-python -m virtualenv venv
-. venv/bin/activate
-while read -r p; do pip install $p; done < requirements.txt
-```
-
-## Usage
-
-The directory containing the project should be in the
-`PYTHONPATH` environment variable. The other way is to invoke
-commands from that directory.
-
-As a python module:
-
-``` bash
-python -m datumaro --help
-```
-
-As a standalone python script:
-
-``` bash
-python datum.py --help
-```
-
-As a python library:
-
-``` python
-import datumaro
-```
-
-## Workflow
-
-> **Note**: command invocation **syntax is subject to change, refer to --help output**
-
-The key object is the project. It can be created or imported with
-`project create` and `project import` commands. The project is a combination of
-dataset and environment.
-
-If you want to interact with models, you should add them to project first.
-
-Implemented commands ([CLI design doc](images/cli_design.png)):
- project create
- project import
- project diff
- project transform
- source add
- explain
-
-### Create a project
-
-Usage:
-
-``` bash
-python datum.py project create --help
-
-python datum.py project create \
-     -d <project_dir>
-```
-
-Example:
-
-``` bash
-python datum.py project create -d /home/my_dataset
-```
-
-### Import a project
-
-This command creates a project from an existing dataset. Supported formats:
- MS COCO
- Custom formats via custom `importers` and `extractors`
-
-Usage:
-
-``` bash
-python -m datumaro project import --help
-
-python -m datumaro project import \
-     <dataset_path> \
-     -d <project_dir> \
-     -t <format>
-```
-
-Example:
-
-``` bash
-python -m datumaro project import \
-     /home/coco_dir \
-     -d /home/project_dir \
-     -t ms_coco
-```
-
-An _MS COCO_-like dataset should have the following directory structure:
-
-<!--lint disable fenced-code-flag-->
-```
-COCO/
-├── annotations/
-│   ├── instances_val2017.json
-│   ├── instances_train2017.json
-├── images/
-│   ├── val2017
-│   ├── train2017
-```
-<!--lint enable fenced-code-flag-->
-
-Everything after the last `_` is considered as a subset name.
-
-### Register a model
-
-Supported models:
- OpenVINO
- Custom models via custom `launchers`
-
-Usage:
-
-``` bash
-python -m datumaro model add --help
-```
-
-Example: register OpenVINO model
-
-A model consists of a graph description and weights. There is also a script
-used to convert model outputs to internal data structures.
-
-``` bash
-python -m datumaro model add \
-     <model_name> openvino \
-     -d <path_to_xml> -w <path_to_bin> -i <path_to_interpretation_script>
-```
-
-Interpretation script for an OpenVINO detection model (`convert.py`):
-
-``` python
-from datumaro.components.extractor import *
-
-max_det = 10
-conf_thresh = 0.1
-
-def process_outputs(inputs, outputs):
-     # inputs = model input, array or images, shape = (N, C, H, W)
-     # outputs = model output, shape = (N, 1, K, 7)
-     # results = conversion result, [ [ Annotation, ... ], ... ]
-     results = []
-     for input, output in zip(inputs, outputs):
-          input_height, input_width = input.shape[:2]
-          detections = output[0]
-          image_results = []
-          for i, det in enumerate(detections):
-               label = int(det[1])
-               conf = det[2]
-               if conf <= conf_thresh:
-                    continue
-
-               x = max(int(det[3] * input_width), 0)
-               y = max(int(det[4] * input_height), 0)
-               w = min(int(det[5] * input_width - x), input_width)
-               h = min(int(det[6] * input_height - y), input_height)
-               image_results.append(BboxObject(x, y, w, h,
-                    label=label, attributes={'score': conf} ))
-
-               results.append(image_results[:max_det])
-
-     return results
-
-def get_categories():
-     # Optionally, provide output categories - label map etc.
-     # Example:
-     label_categories = LabelCategories()
-     label_categories.add('person')
-     label_categories.add('car')
-     return { AnnotationType.label: label_categories }
-```
-
-### Run a model inference
-
-This command сreates a new project from the current project. The new
-one annotations are the model outputs.
-
-Usage:
-
-``` bash
-python -m datumaro project transform --help
-
-python -m datumaro project transform \
-     -m <model_name> \
-     -d <save_dir>
-```
-
-Example:
-
-``` bash
-python -m datumaro project import <...>
-python -m datumaro model add mymodel <...>
-python -m datumaro project transform -m mymodel -d ../mymodel_inference
-```
-
-### Compare datasets
-
-The command compares two datasets and saves the results in the
-specified directory. The current project is considered to be
-"ground truth".
-
-``` bash
-python -m datumaro project diff --help
-
-python -m datumaro project diff <other_project_dir> -d <save_dir>
-```
-
-Example: compare a dataset with model inference
-
-``` bash
-python -m datumaro project import <...>
-python -m datumaro model add mymodel <...>
-python -m datumaro project transform <...> -d ../inference
-python -m datumaro project diff ../inference -d ../diff
-```
-
-### Run inference explanation
-
-Usage:
-
-``` bash
-python -m datumaro explain --help
-
-python -m datumaro explain \
-     -m <model_name> \
-     -d <save_dir> \
-     -t <target> \
-     <method> \
-     <method_params>
-```
-
-Example: run inference explanation on a single image with visualization
-
-``` bash
-python -m datumaro project create <...>
-python -m datumaro model add mymodel <...>
-python -m datumaro explain \
-     -m mymodel \
-     -t 'image.png' \
-     rise \
-     -s 1000 --progressive
-```
-
-### Extract data subset based on filter
-
-This command allows to create a subprject form a project, which
-would include only items satisfying some condition. XPath is used as a query
-format.
-
-Usage:
-
-``` bash
-python -m datumaro project extract --help
-
-python -m datumaro project extract \
-     -p <source_project> \
-     -d <destinatin dir> \
-     -f '<filter expression>'
-```
-
-Example:
-
-``` bash
-python -m datumaro project extract \
-     -p ../test_project \
-     -d ../test_project-extract \
-     -f '/item[image/width < image/height]'
-```
-
-Item representation:
-
-``` xml
-<item>
-  <id>290768</id>
-  <subset>minival2014</subset>
-  <image>
-    <width>612</width>
-    <height>612</height>
-    <depth>3</depth>
-  </image>
-  <annotation>
-    <id>80154</id>
-    <type>bbox</type>
-    <label_id>39</label_id>
-    <x>264.59</x>
-    <y>150.25</y>
-    <w>11.199999999999989</w>
-    <h>42.31</h>
-    <area>473.87199999999956</area>
-  </annotation>
-  <annotation>
-    <id>669839</id>
-    <type>bbox</type>
-    <label_id>41</label_id>
-    <x>163.58</x>
-    <y>191.75</y>
-    <w>76.98999999999998</w>
-    <h>73.63</h>
-    <area>5668.773699999998</area>
-  </annotation>
-  ...
-</item>
-```
-
-## Links
- [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)
- [How to convert model to OpenVINO format](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html)
- [Model convert script for this model](https://github.com/opencv/cvat/blob/3e09503ba6c6daa6469a6c4d275a5a8b168dfa2c/components/tf_annotation/install.sh#L23)
--- a/datumaro/docs/user_manual.md
+++ b/datumaro/docs/user_manual.md
@ -0,0 +1,563 @@
+# Quick start guide
+
+## Contents
+
+- [Installation](#installation)
+- [Interfaces](#interfaces)
+- [Supported dataset formats and annotations](#formats-support)
+- [Command line workflow](#command-line-workflow)
+  - [Create a project](#create-project)
+  - [Add and remove data](#add-and-remove-data)
+  - [Import a project](#import-project)
+  - [Extract a subproject](#extract-subproject)
+  - [Merge projects](#merge-project)
+  - [Export a project](#export-project)
+  - [Compare projects](#compare-projects)
+  - [Get project info](#get-project-info)
+  - [Register a model](#register-model)
+  - [Run inference](#run-inference)
+  - [Run inference explanation](#explain-inference)
+- [Links](#links)
+
+## Installation
+
+### Prerequisites
+
+- Python (3.5+)
+- OpenVINO (optional)
+
+### Installation steps
+
+Optionally, set up a virtual environment:
+
+``` bash
+python -m pip install virtualenv
+python -m virtualenv venv
+. venv/bin/activate
+```
+
+Install Datumaro:
+``` bash
+pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro'
+```
+
+> You can change the installation branch with `.../cvat@<branch_name>#egg...`
+> Also note `--force-reinstall` parameter in this case.
+
+## Interfaces
+
+As a standalone tool:
+
+``` bash
+datum --help
+```
+
+As a python module:
+> The directory containing Datumaro should be in the `PYTHONPATH`
+> environment variable or `cvat/datumaro/` should be the current directory.
+
+``` bash
+python -m datumaro --help
+python datumaro/ --help
+python datum.py --help
+```
+
+As a python library:
+
+``` python
+import datumaro
+```
+
+## Formats support
+
+List of supported formats:
+- COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
+  - [Format specification](http://cocodataset.org/#format-data)
+  - `labels` are our extension - like `instances` with only `category_id`
+- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`)
+  - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html)
+- YOLO (`bboxes`)
+  - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data)
+- TF Detection API (`bboxes`, `masks`)
+  - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md)
+- CVAT
+  - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
+
+List of supported annotation types:
+- Labels
+- Bounding boxes
+- Polygons
+- Polylines
+- (Key-)Points
+- Captions
+- Masks
+
+## Command line workflow
+
+> **Note**: command invocation syntax is subject to change,
+> **always refer to command --help output**
+
+The key object is the Project. The Project is a combination of
+a Project's own dataset, a number of external data sources and an environment.
+An empty Project can be created by `project create` command,
+an existing dataset can be imported with `project import` command.
+A typical way to obtain projects is to export tasks in CVAT UI.
+
+Available CLI commands:
+![CLI design doc](images/cli_design.png)
+
+If you want to interact with models, you need to add them to project first.
+
+### Import project
+
+This command creates a Project from an existing dataset.
+
+Supported formats are listed in the command help.
+In Datumaro dataset formats are supported by Extractors and Importers.
+An Extractor produces a list of dataset items corresponding
+to the dataset. An Importer creates a Project from the
+data source location. It is possible to add a custom Extractor and Importer.
+To do this, you need to put an Extractor and Importer implementation scripts to
+`<project_dir>/.datumaro/extractors` and `<project_dir>/.datumaro/importers`.
+
+Usage:
+
+``` bash
+datum project import --help
+
+datum project import \
+     -i <dataset_path> \
+     -o <project_dir> \
+     -f <format>
+```
+
+Example: create a project from COCO-like dataset
+
+``` bash
+datum project import \
+     -i /home/coco_dir \
+     -o /home/project_dir \
+     -f coco
+```
+
+An _MS COCO_-like dataset should have the following directory structure:
+
+<!--lint disable fenced-code-flag-->
+```
+COCO/
+├── annotations/
+│   ├── instances_val2017.json
+│   ├── instances_train2017.json
+├── images/
+│   ├── val2017
+│   ├── train2017
+```
+<!--lint enable fenced-code-flag-->
+
+Everything after the last `_` is considered a subset name in the COCO format.
+
+### Create project
+
+The command creates an empty project. Once a Project is created, there are
+a few options to interact with it.
+
+Usage:
+
+``` bash
+datum project create --help
+
+datum project create \
+  -o <project_dir>
+```
+
+Example: create an empty project `my_dataset`
+
+``` bash
+datum project create -o my_dataset/
+```
+
+### Add and remove data
+
+A Project can be attached to a number of external Data Sources. Each Source
+describes a way to produce dataset items. A Project combines dataset items from
+all the sources and its own dataset into one composite dataset. You can manage
+project sources by commands in the `source` command line context.
+
+Datasets come in a wide variety of formats. Each dataset
+format defines its own data structure and rules on how to
+interpret the data. For example, the following data structure
+is used in COCO format:
+<!--lint disable fenced-code-flag-->
+```
+/dataset/
+- /images/<id>.jpg
+- /annotations/
+```
+<!--lint enable fenced-code-flag-->
+
+In Datumaro dataset formats are supported by Extractors.
+An Extractor produces a list of dataset items corresponding
+to the dataset. It is possible to add a custom Extractor.
+To do this, you need to put an Extractor
+definition script to `<project_dir>/.datumaro/extractors`.
+
+Usage:
+
+``` bash
+datum source add --help
+datum source remove --help
+
+datum source add \
+     path <path> \
+     -p <project dir> \
+     -n <name>
+
+datum source remove \
+     -p <project dir> \
+     -n <name>
+```
+
+Example: create a project from a bunch of different annotations and images,
+and generate TFrecord for TF Detection API for model training
+
+``` bash
+datum project create
+# 'default' is the name of the subset below
+datum source add path <path/to/coco/instances_default.json> -f coco_instances
+datum source add path <path/to/cvat/default.xml> -f cvat
+datum source add path <path/to/voc> -f voc_detection
+datum source add path <path/to/datumaro/default.json> -f datumaro
+datum source add path <path/to/images/dir> -f image_dir
+datum project export -f tf_detection_api
+```
+
+### Extract subproject
+
+This command allows to create a sub-Project from a Project. The new project
+includes only items satisfying some condition. [XPath](https://devhints.io/xpath)
+is used as query format.
+
+There are several filtering modes available ('-m/--mode' parameter).
+Supported modes:
+- 'i', 'items'
+- 'a', 'annotations'
+- 'i+a', 'a+i', 'items+annotations', 'annotations+items'
+
+When filtering annotations, use the 'items+annotations'
+mode to point that annotation-less dataset items should be
+removed. To select an annotation, write an XPath that
+returns 'annotation' elements (see examples).
+
+Usage:
+
+``` bash
+datum project extract --help
+
+datum project extract \
+     -p <project dir> \
+     -o <output dir> \
+     -e '<xpath filter expression>'
+```
+
+Example: extract a dataset with only images which width < height
+
+``` bash
+datum project extract \
+     -p test_project \
+     -o test_project-extract \
+     -e '/item[image/width < image/height]'
+```
+
+Example: extract a dataset with only large annotations of class `cat` and any non-`persons`
+
+``` bash
+datum project extract \
+     -p test_project \
+     -o test_project-extract \
+     --mode annotations -e '/item/annotation[(label="cat" and area > 999.5) or label!="person"]'
+```
+
+Example: extract a dataset with only occluded annotations, remove empty images
+
+``` bash
+datum project extract \
+     -p test_project \
+     -o test_project-extract \
+     -m i+a -e '/item/annotation[occluded="True"]'
+```
+
+Item representations are available with `--dry-run` parameter:
+
+``` xml
+<item>
+  <id>290768</id>
+  <subset>minival2014</subset>
+  <image>
+    <width>612</width>
+    <height>612</height>
+    <depth>3</depth>
+  </image>
+  <annotation>
+    <id>80154</id>
+    <type>bbox</type>
+    <label_id>39</label_id>
+    <x>264.59</x>
+    <y>150.25</y>
+    <w>11.199999999999989</w>
+    <h>42.31</h>
+    <area>473.87199999999956</area>
+  </annotation>
+  <annotation>
+    <id>669839</id>
+    <type>bbox</type>
+    <label_id>41</label_id>
+    <x>163.58</x>
+    <y>191.75</y>
+    <w>76.98999999999998</w>
+    <h>73.63</h>
+    <area>5668.773699999998</area>
+  </annotation>
+  ...
+</item>
+```
+
+### Merge projects
+
+This command combines multiple Projects into one.
+
+Usage:
+
+``` bash
+datum project merge --help
+
+datum project merge \
+     -p <project dir> \
+     -o <output dir> \
+     <other project dir>
+```
+
+Example: update annotations in the `first_project` with annotations
+from the `second_project` and save the result as `merged_project`
+
+``` bash
+datum project merge \
+     -p first_project \
+     -o merged_project \
+     second_project
+```
+
+### Export project
+
+This command exports a Project in some format.
+
+Supported formats are listed in the command help.
+In Datumaro dataset formats are supported by Converters.
+A Converter produces a dataset of a specific format
+from dataset items. It is possible to add a custom Converter.
+To do this, you need to put a Converter
+definition script to <project_dir>/.datumaro/converters.
+
+Usage:
+
+``` bash
+datum project export --help
+
+datum project export \
+     -p <project dir> \
+     -o <output dir> \
+     -f <format> \
+     [-- <additional format parameters>]
+```
+
+Example: save project as VOC-like dataset, include images
+
+``` bash
+datum project export \
+     -p test_project \
+     -o test_project-export \
+     -f voc \
+     -- --save-images
+```
+
+### Get project info
+
+This command outputs project status information.
+
+Usage:
+
+``` bash
+datum project info --help
+
+datum project info \
+     -p <project dir>
+```
+
+Example:
+
+``` bash
+datum project info -p /test_project
+
+Project:
+  name: test_project2
+  location: /test_project
+Sources:
+  source 'instances_minival2014':
+    format: coco_instances
+    url: /coco_like/annotations/instances_minival2014.json
+Dataset:
+  length: 5000
+  categories: label
+    label:
+      count: 80
+      labels: person, bicycle, car, motorcycle (and 76 more)
+  subsets: minival2014
+    subset 'minival2014':
+      length: 5000
+      categories: label
+        label:
+          count: 80
+          labels: person, bicycle, car, motorcycle (and 76 more)
+```
+
+### Register model
+
+Supported models:
+- OpenVINO
+- Custom models via custom `launchers`
+
+Usage:
+
+``` bash
+datum model add --help
+```
+
+Example: register an OpenVINO model
+
+A model consists of a graph description and weights. There is also a script
+used to convert model outputs to internal data structures.
+
+``` bash
+datum project create
+datum model add \
+     -n <model_name> openvino \
+     -d <path_to_xml> -w <path_to_bin> -i <path_to_interpretation_script>
+```
+
+Interpretation script for an OpenVINO detection model (`convert.py`):
+
+``` python
+from datumaro.components.extractor import *
+
+max_det = 10
+conf_thresh = 0.1
+
+def process_outputs(inputs, outputs):
+     # inputs = model input, array or images, shape = (N, C, H, W)
+     # outputs = model output, shape = (N, 1, K, 7)
+     # results = conversion result, [ [ Annotation, ... ], ... ]
+     results = []
+     for input, output in zip(inputs, outputs):
+          input_height, input_width = input.shape[:2]
+          detections = output[0]
+          image_results = []
+          for i, det in enumerate(detections):
+               label = int(det[1])
+               conf = det[2]
+               if conf <= conf_thresh:
+                    continue
+
+               x = max(int(det[3] * input_width), 0)
+               y = max(int(det[4] * input_height), 0)
+               w = min(int(det[5] * input_width - x), input_width)
+               h = min(int(det[6] * input_height - y), input_height)
+               image_results.append(BboxObject(x, y, w, h,
+                    label=label, attributes={'score': conf} ))
+
+               results.append(image_results[:max_det])
+
+     return results
+
+def get_categories():
+     # Optionally, provide output categories - label map etc.
+     # Example:
+     label_categories = LabelCategories()
+     label_categories.add('person')
+     label_categories.add('car')
+     return { AnnotationType.label: label_categories }
+```
+
+### Run model
+
+This command applies model to dataset images and produces a new project.
+
+Usage:
+
+``` bash
+datum model run --help
+
+datum model run \
+     -p <project dir> \
+     -m <model_name> \
+     -o <save_dir>
+```
+
+Example: launch inference on a dataset
+
+``` bash
+datum project import <...>
+datum model add mymodel <...>
+datum model run -m mymodel -o inference
+```
+
+### Compare projects
+
+The command compares two datasets and saves the results in the
+specified directory. The current project is considered to be
+"ground truth".
+
+``` bash
+datum project diff --help
+
+datum project diff <other_project_dir> -o <save_dir>
+```
+
+Example: compare a dataset with model inference
+
+``` bash
+datum project import <...>
+datum model add mymodel <...>
+datum project transform <...> -o inference
+datum project diff inference -o diff
+```
+
+### Explain inference
+
+Usage:
+
+``` bash
+datum explain --help
+
+datum explain \
+     -m <model_name> \
+     -o <save_dir> \
+     -t <target> \
+     <method> \
+     <method_params>
+```
+
+Example: run inference explanation on a single image with visualization
+
+``` bash
+datum project create <...>
+datum model add mymodel <...>
+datum explain \
+     -m mymodel \
+     -t 'image.png' \
+     rise \
+     -s 1000 --progressive
+```
+
+## Links
+- [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)
+- [How to convert model to OpenVINO format](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html)
+- [Model conversion script example](https://github.com/opencv/cvat/blob/3e09503ba6c6daa6469a6c4d275a5a8b168dfa2c/components/tf_annotation/install.sh#L23)
--- a/datumaro/setup.py
+++ b/datumaro/setup.py
@ -62,7 +62,7 @@ setuptools.setup(
    ],
    entry_points={
        'console_scripts': [
-            'datum=datumaro:main',
+            'datum=datumaro.cli.__main__:main',
        ],
    },
 )
--- a/datumaro/test.py
+++ b/datumaro/test.py
@ -1,5 +0,0 @@
-import unittest
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/datumaro/tests/test_coco_format.py
+++ b/datumaro/tests/test_coco_format.py
@ -12,7 +12,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem,
    BboxObject, CaptionObject,
    LabelCategories, PointsCategories
 )
-from datumaro.components.converters.ms_coco import (
+from datumaro.components.converters.coco import (
    CocoConverter,
    CocoImageInfoConverter,
    CocoCaptionsConverter,
@ -112,7 +112,7 @@ class CocoImporterTest(TestCase):
    def test_can_import(self):
        with TestDir() as temp_dir:
            self.COCO_dataset_generate(temp_dir.path)
-            project = Project.import_from(temp_dir.path, 'ms_coco')
+            project = Project.import_from(temp_dir.path, 'coco')
            dataset = project.make_dataset()

            self.assertListEqual(['val'], sorted(dataset.subsets()))
@ -142,7 +142,7 @@ class CocoConverterTest(TestCase):

        if not importer_params:
            importer_params = {}
-        project = Project.import_from(test_dir.path, 'ms_coco',
+        project = Project.import_from(test_dir.path, 'coco',
            **importer_params)
        parsed_dataset = project.make_dataset()

--- a/datumaro/tests/test_cvat_format.py
+++ b/datumaro/tests/test_cvat_format.py
@ -14,7 +14,7 @@ from datumaro.components.converters.cvat import CvatConverter
 from datumaro.components.project import Project
 import datumaro.components.formats.cvat as Cvat
 from datumaro.util.image import save_image
-from datumaro.util.test_utils import TestDir
+from datumaro.util.test_utils import TestDir, item_to_str


 class CvatExtractorTest(TestCase):
@ -108,7 +108,7 @@ class CvatExtractorTest(TestCase):
                            BboxObject(0, 2, 4, 2, label=0,
                                attributes={
                                    'occluded': True, 'z_order': 1,
-                                    'a1': 'true', 'a2': 'v3'
+                                    'a1': True, 'a2': 'v3'
                                }),
                            PolyLineObject([1, 2, 3, 4, 5, 6, 7, 8],
                                attributes={'occluded': False, 'z_order': 0}),
@ -175,7 +175,8 @@ class CvatConverterTest(TestCase):
            self.assertEqual(len(source_subset), len(parsed_subset))
            for idx, (item_a, item_b) in enumerate(
                    zip(source_subset, parsed_subset)):
-                self.assertEqual(item_a, item_b, str(idx))
+                self.assertEqual(item_a, item_b, '%s:\n%s\nvs.\n%s\n' % \
+                    (idx, item_to_str(item_a), item_to_str(item_b)))

    def test_can_save_and_load(self):
        label_categories = LabelCategories()
@ -209,12 +210,12 @@ class CvatConverterTest(TestCase):
                        ]
                    ),

-                    DatasetItem(id=0, subset='s2', image=np.zeros((5, 10, 3)),
+                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
                        annotations=[
                            PolygonObject([0, 0, 4, 0, 4, 4],
                                label=3, group=4,
                                attributes={ 'z_order': 1, 'occluded': False }),
-                            PolyLineObject([5, 0, 9, 0, 5, 5]), # will be skipped
+                            PolyLineObject([5, 0, 9, 0, 5, 5]), # will be skipped as no label
                        ]
                    ),
                ])
@ -236,7 +237,7 @@ class CvatConverterTest(TestCase):
                            PointsObject([1, 1, 3, 2, 2, 3],
                                label=2,
                                attributes={ 'z_order': 0, 'occluded': False,
-                                    'a1': 'x', 'a2': '42' }),
+                                    'a1': 'x', 'a2': 42 }),
                        ]
                    ),
                    DatasetItem(id=1, subset='s1',
@ -250,7 +251,7 @@ class CvatConverterTest(TestCase):
                        ]
                    ),

-                    DatasetItem(id=0, subset='s2', image=np.zeros((5, 10, 3)),
+                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
                        annotations=[
                            PolygonObject([0, 0, 4, 0, 4, 4],
                                label=3, group=4,
--- a/datumaro/tests/test_datumaro_format.py
+++ b/datumaro/tests/test_datumaro_format.py
@ -9,7 +9,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem,
    LabelCategories, MaskCategories, PointsCategories
 )
 from datumaro.components.converters.datumaro import DatumaroConverter
-from datumaro.util.test_utils import TestDir
+from datumaro.util.test_utils import TestDir, item_to_str
 from datumaro.util.mask_tools import generate_colormap


@ -26,7 +26,7 @@ class DatumaroConverterTest(TestCase):
                            'y': '2',
                        }),
                        BboxObject(1, 2, 3, 4, label=4, id=4, attributes={
-                            'score': 10.0,
+                            'score': 1.0,
                        }),
                        BboxObject(5, 6, 7, 8, id=5, group=5),
                        PointsObject([1, 2, 2, 0, 1, 1], label=0, id=5),
@ -92,7 +92,8 @@ class DatumaroConverterTest(TestCase):
                self.assertEqual(len(source_subset), len(parsed_subset))
                for idx, (item_a, item_b) in enumerate(
                        zip(source_subset, parsed_subset)):
-                    self.assertEqual(item_a, item_b, str(idx))
+                    self.assertEqual(item_a, item_b, '%s:\n%s\nvs.\n%s\n' % \
+                        (idx, item_to_str(item_a), item_to_str(item_b)))

            self.assertEqual(
                source_dataset.categories(),
--- a/datumaro/tests/test_image.py
+++ b/datumaro/tests/test_image.py
@ -31,7 +31,7 @@ class ImageTest(TestCase):
                image_module._IMAGE_BACKEND = load_backend
                dst_image = image_module.load_image(path)

-                self.assertTrue(np.all(src_image == dst_image),
+                self.assertTrue(np.array_equal(src_image, dst_image),
                    'save: %s, load: %s' % (save_backend, load_backend))

    def test_encode_and_decode_backends(self):
@ -48,5 +48,5 @@ class ImageTest(TestCase):
            image_module._IMAGE_BACKEND = load_backend
            dst_image = image_module.decode_image(buffer)

-            self.assertTrue(np.all(src_image == dst_image),
+            self.assertTrue(np.array_equal(src_image, dst_image),
                'save: %s, load: %s' % (save_backend, load_backend))
--- a/datumaro/tests/test_image_dir_format.py
+++ b/datumaro/tests/test_image_dir_format.py
@ -0,0 +1,48 @@
+import numpy as np
+import os.path as osp
+
+from unittest import TestCase
+
+from datumaro.components.project import Project
+from datumaro.components.extractor import Extractor, DatasetItem
+from datumaro.util.test_utils import TestDir
+from datumaro.util.image import save_image
+
+
+class ImageDirFormatTest(TestCase):
+    class TestExtractor(Extractor):
+        def __iter__(self):
+            return iter([
+                DatasetItem(id=1, image=np.ones((10, 6, 3))),
+                DatasetItem(id=2, image=np.ones((5, 4, 3))),
+            ])
+
+    def test_can_load(self):
+        with TestDir() as test_dir:
+            source_dataset = self.TestExtractor()
+
+            for item in source_dataset:
+                save_image(osp.join(test_dir.path, '%s.jpg' % item.id),
+                    item.image)
+
+            project = Project.import_from(test_dir.path, 'image_dir')
+            parsed_dataset = project.make_dataset()
+
+            self.assertListEqual(
+                sorted(source_dataset.subsets()),
+                sorted(parsed_dataset.subsets()),
+            )
+
+            self.assertEqual(len(source_dataset), len(parsed_dataset))
+
+            for subset_name in source_dataset.subsets():
+                source_subset = source_dataset.get_subset(subset_name)
+                parsed_subset = parsed_dataset.get_subset(subset_name)
+                self.assertEqual(len(source_subset), len(parsed_subset))
+                for idx, (item_a, item_b) in enumerate(
+                        zip(source_subset, parsed_subset)):
+                    self.assertEqual(item_a, item_b, str(idx))
+
+            self.assertEqual(
+                source_dataset.categories(),
+                parsed_dataset.categories())
--- a/datumaro/tests/test_project.py
+++ b/datumaro/tests/test_project.py
@ -353,6 +353,7 @@ class DatasetFilterTest(TestCase):
                BboxObject(1, 2, 3, 4, label=4, id=4, attributes={ 'a': 1.0 }),
                BboxObject(5, 6, 7, 8, id=5, group=5),
                PointsObject([1, 2, 2, 0, 1, 1], label=0, id=5),
+                MaskObject(id=5, image=np.ones((3, 2))),
                MaskObject(label=3, id=5, image=np.ones((2, 3))),
                PolyLineObject([1, 2, 3, 4, 5, 6, 7, 8], id=11),
                PolygonObject([1, 2, 3, 4, 5, 6, 7, 8]),
--- a/datumaro/tests/test_voc_format.py
+++ b/datumaro/tests/test_voc_format.py
@ -141,9 +141,9 @@ def generate_dummy_voc(path):
        obj2head_elem = ET.SubElement(obj2_elem, 'part')
        ET.SubElement(obj2head_elem, 'name').text = VOC.VocBodyPart(1).name
        obj2headbb_elem = ET.SubElement(obj2head_elem, 'bndbox')
-        ET.SubElement(obj2headbb_elem, 'xmin').text = '5'
+        ET.SubElement(obj2headbb_elem, 'xmin').text = '5.5'
        ET.SubElement(obj2headbb_elem, 'ymin').text = '6'
-        ET.SubElement(obj2headbb_elem, 'xmax').text = '7'
+        ET.SubElement(obj2headbb_elem, 'xmax').text = '7.5'
        ET.SubElement(obj2headbb_elem, 'ymax').text = '8'
        obj2act_elem = ET.SubElement(obj2_elem, 'actions')
        for act in VOC.VocAction:
@ -328,7 +328,7 @@ class VocExtractorTest(TestCase):
                    lambda x: x.type == AnnotationType.bbox and \
                        get_label(extractor, x.label) == VOC.VocBodyPart(1).name)
                self.assertTrue(obj2.id == obj2head.group)
-                self.assertListEqual([5, 6, 2, 2], obj2head.get_bbox())
+                self.assertListEqual([5.5, 6, 2, 2], obj2head.get_bbox())

                self.assertEqual(2, len(item.annotations))

--- a/utils/cli/requirements.txt
+++ b/utils/cli/requirements.txt
@ -1,2 +1,2 @@
-Pillow==6.2.0
-requests==2.20.1
+Pillow>=6.2.0
+requests>=2.20.1