Install Datumaro as package (#2163)

* Remove Datumaro sources * Install Datumaro as package * Update Datumaro links * fix * remove unnecessary dependencies * Update travis * update coverage config
6 years ago · 09a9509278
parent c2df460da6
commit 09a9509278
191 changed files with 11 additions and 20447 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -3,13 +3,10 @@ branch = true
 # relative_files = true # does not work?

 source =
-    datumaro/datumaro/
    cvat/apps/
    utils/cli/

 omit =
-    datumaro/datumaro/__main__.py
-    datumaro/datumaro/version.py
    cvat/settings/*
    */tests/*
    */test_*
--- a/.travis.yml
+++ b/.travis.yml
@ -1,5 +1,3 @@
-sudo: required
-
 language: python

 python:
@ -36,7 +34,7 @@ before_script:

 script:
  # FIXME: Git package and application name conflict in PATH and try to leave only one python test execution
-  - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'coverage run -a manage.py test cvat/apps && coverage run -a manage.py test --pattern="_test*.py" cvat/apps/dataset_manager/tests cvat/apps/engine/tests utils/cli && coverage run -a manage.py test datumaro/ && mv .coverage ${CONTAINER_COVERAGE_DATA_DIR}'
+  - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'coverage run -a manage.py test cvat/apps && coverage run -a manage.py test --pattern="_test*.py" cvat/apps/dataset_manager/tests cvat/apps/engine/tests utils/cli && mv .coverage ${CONTAINER_COVERAGE_DATA_DIR}'
  - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'cd cvat-data && npm install && cd ../cvat-core && npm install && npm run test && coveralls-lcov -v -n ./reports/coverage/lcov.info > ${CONTAINER_COVERAGE_DATA_DIR}/coverage.json'
  # Up all containers
  - docker-compose up -d
--- a/.vscode/python.env
+++ b/.vscode/python.env
@ -1 +0,0 @@
-PYTHONPATH="datumaro/:$PYTHONPATH"
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -25,13 +25,7 @@
        }
    ],
    "python.linting.pylintEnabled": true,
-    "python.envFile": "${workspaceFolder}/.vscode/python.env",
    "python.testing.unittestEnabled": true,
-    "python.testing.unittestArgs": [
-        "-v",
-        "-s",
-        "./datumaro",
-    ],
    "licenser.license": "Custom",
    "licenser.customHeader": "Copyright (C) @YEAR@ Intel Corporation\n\nSPDX-License-Identifier: MIT",
    "files.trimTrailingWhitespace": true
--- a/5
+++ b/5
@ -73,8 +73,6 @@ COPY components /tmp/components
 COPY cvat/requirements/ /tmp/requirements/
 COPY supervisord.conf mod_wsgi.conf wait-for-it.sh manage.py ${HOME}/
 RUN python3 -m pip install --no-cache-dir -r /tmp/requirements/${DJANGO_CONFIGURATION}.txt
-# pycocotools package is impossible to install with its dependencies by one pip install command
-RUN python3 -m pip install --no-cache-dir pycocotools==2.0.0

 ARG CLAM_AV
 ENV CLAM_AV=${CLAM_AV}
@ -95,9 +93,6 @@ COPY cvat/ ${HOME}/cvat
 COPY cvat-core/ ${HOME}/cvat-core
 COPY cvat-data/ ${HOME}/cvat-data
 COPY tests ${HOME}/tests
-COPY datumaro/ ${HOME}/datumaro
-
-RUN python3 -m pip install --no-cache-dir -r ${HOME}/datumaro/requirements.txt

 RUN chown -R ${USER}:${USER} .

--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@ -9,7 +9,7 @@ RUN apt-get update && \
        apt-utils \
        build-essential \
        python3-dev \
-        ruby \ 
+        ruby \
        && \
    curl https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
    echo 'deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main' | tee /etc/apt/sources.list.d/google-chrome.list && \
--- a/README.md
+++ b/README.md
@ -41,7 +41,8 @@ annotation team. Try it online [cvat.org](https://cvat.org).
 ## Supported annotation formats

 Format selection is possible after clicking on the Upload annotation
-and Dump annotation buttons. [Datumaro](datumaro/README.md) dataset
+and Dump annotation buttons.
+[Datumaro](https://github.com/openvinotoolkit/datumaro/README.md) dataset
 framework allows additional dataset transformations
 via its command line tool and Python library.

@ -49,7 +50,7 @@ via its command line tool and Python library.
 | ------------------------------------------------------------------------------------------ | ------ | ------ |
 | [CVAT for images](cvat/apps/documentation/xml_format.md#annotation)                        | X      | X      |
 | [CVAT for a video](cvat/apps/documentation/xml_format.md#interpolation)                    | X      | X      |
-| [Datumaro](datumaro/README.md)                                                             |        | X      |
+| [Datumaro](https://github.com/openvinotoolkit/datumaro)                                    |        | X      |
 | [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/)                                      | X      | X      |
 | Segmentation masks from [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/)              | X      | X      |
 | [YOLO](https://pjreddie.com/darknet/yolo/)                                                 | X      | X      |
--- a/cvat/apps/dataset_manager/formats/datumaro/init.py
+++ b/cvat/apps/dataset_manager/formats/datumaro/init.py
@ -11,7 +11,7 @@ from tempfile import TemporaryDirectory
 from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
    import_dm_annotations)
 from cvat.apps.dataset_manager.util import make_zip_archive
-from cvat.settings.base import BASE_DIR, DATUMARO_PATH
+from cvat.settings.base import BASE_DIR
 from datumaro.components.project import Project

 from ..registry import dm_env, exporter
@ -79,14 +79,7 @@ class DatumaroProjectExporter:
                osp.join(templates_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'),
                osp.join(target_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'))

-        # Make Datumaro and CVAT CLI modules available to the user
-        shutil.copytree(DATUMARO_PATH, osp.join(save_dir, 'datumaro'),
-            ignore=lambda src, names: ['__pycache__'] + [
-                n for n in names
-                if sum([int(n.endswith(ext)) for ext in
-                        ['.pyx', '.pyo', '.pyd', '.pyc']])
-            ])
-
+        # Make CVAT CLI module available to the user
        cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils')
        os.makedirs(cvat_utils_dst_dir)
        shutil.copytree(osp.join(BASE_DIR, 'utils', 'cli'),
--- a/cvat/apps/dataset_manager/formats/datumaro/export_templates/README.md
+++ b/cvat/apps/dataset_manager/formats/datumaro/export_templates/README.md
@ -6,7 +6,7 @@ python -m virtualenv .venv
 . .venv/bin/activate

 # install dependencies
-pip install -e datumaro/
+pip install 'git+https://github.com/openvinotoolkit/datumaro'
 pip install -r cvat/utils/cli/requirements.txt

 # set up environment
@ -17,4 +17,4 @@ export PYTHONPATH
 datum --help
 ```

-Check Datumaro [docs](datumaro/README.md) for more info.
+Check [Datumaro docs](https://github.com/openvinotoolkit/datumaro/README.md) for more info.
--- a/cvat/requirements/base.txt
+++ b/cvat/requirements/base.txt
@ -44,4 +44,5 @@ tensorflow==2.2.0 # Optional requirement of Datumaro
 # The package is used by pyunpack as a command line tool to support multiple
 # archives. Don't use as a python module because it has GPL license.
 patool==1.12
-diskcache==5.0.2
+diskcache==5.0.2
+git+https://github.com/openvinotoolkit/datumaro@v0.1.0
--- a/cvat/settings/base.py
+++ b/cvat/settings/base.py
@ -409,9 +409,6 @@ DATA_UPLOAD_MAX_NUMBER_FIELDS = None   # this django check disabled
 LOCAL_LOAD_MAX_FILES_COUNT = 500
 LOCAL_LOAD_MAX_FILES_SIZE = 512 * 1024 * 1024  # 512 MB

-DATUMARO_PATH = os.path.join(BASE_DIR, 'datumaro')
-sys.path.append(DATUMARO_PATH)
-
 RESTRICTIONS = {
    'user_agreements': [],

--- a/datumaro/.gitignore
+++ b/datumaro/.gitignore
@ -1 +0,0 @@
-/datumaro.egg-info
--- a/datumaro/CONTRIBUTING.md
+++ b/datumaro/CONTRIBUTING.md
@ -1,75 +0,0 @@
-## Table of Contents
-
- [Installation](#installation)
- [Usage](#usage)
- [Testing](#testing)
- [Design](#design-and-code-structure)
-
-## Installation
-
-### Prerequisites
-
- Python (3.5+)
- OpenVINO (optional)
-
-``` bash
-git clone https://github.com/opencv/cvat
-```
-
-Optionally, install a virtual environment:
-
-``` bash
-python -m pip install virtualenv
-python -m virtualenv venv
-. venv/bin/activate
-```
-
-Then install all dependencies:
-
-``` bash
-while read -r p; do pip install $p; done < requirements.txt
-```
-
-If you're working inside CVAT environment:
-``` bash
-. .env/bin/activate
-while read -r p; do pip install $p; done < datumaro/requirements.txt
-```
-
-## Usage
-
-> The directory containing Datumaro should be in the `PYTHONPATH`
-> environment variable or `cvat/datumaro/` should be the current directory.
-
-``` bash
-datum --help
-python -m datumaro --help
-python datumaro/ --help
-python datum.py --help
-```
-
-``` python
-import datumaro
-```
-
-## Testing
-
-It is expected that all Datumaro functionality is covered and checked by
-unit tests. Tests are placed in `tests/` directory.
-
-To run tests use:
-
-``` bash
-python -m unittest discover -s tests
-```
-
-If you're working inside CVAT environment, you can also use:
-
-``` bash
-python manage.py test datumaro/
-```
-
-## Design and code structure
-
- [Design document](docs/design.md)
- [Developer guide](docs/developer_guide.md)
--- a/datumaro/LICENSE
+++ b/datumaro/LICENSE
@ -1,22 +0,0 @@
-MIT License
-
-Copyright (C) 2019-2020 Intel Corporation
- 
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-the rights to use, copy, modify, merge, publish, distribute, sublicense,
-and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
- 
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
- 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
-OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
-OR OTHER DEALINGS IN THE SOFTWARE.
- 
--- a/datumaro/README.md
+++ b/datumaro/README.md
@ -1,205 +0,0 @@
-# Dataset Management Framework (Datumaro)
-
-A framework to build, transform, and analyze datasets.
-
-<!--lint disable fenced-code-flag-->
-```
-CVAT annotations  --                              ---> Annotation tool
-                     \                          /
-COCO-like dataset -----> Datumaro ---> dataset ------> Model training
-                     /                          \
-VOC-like dataset  --                              ---> Publication etc.
-```
-<!--lint enable fenced-code-flag-->
-
-## Contents
-
- [Documentation](#documentation)
- [Features](#features)
- [Installation](#installation)
- [Usage](#usage)
- [Examples](#examples)
- [Contributing](#contributing)
-
-## Documentation
-
- [User manual](docs/user_manual.md)
- [Design document](docs/design.md)
- [Contributing](CONTRIBUTING.md)
-
-## Features
-
- Dataset format conversions:
-  - COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
-    - [Format specification](http://cocodataset.org/#format-data)
-    - [Dataset example](tests/assets/coco_dataset)
-    - `labels` are our extension - like `instances` with only `category_id`
-  - PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`)
-    - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html)
-    - [Dataset example](tests/assets/voc_dataset)
-  - YOLO (`bboxes`)
-    - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data)
-    - [Dataset example](tests/assets/yolo_dataset)
-  - TF Detection API (`bboxes`, `masks`)
-    - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md)
-    - [Dataset example](tests/assets/tf_detection_api_dataset)
-  - MOT sequences
-    - [Format specification](https://arxiv.org/pdf/1906.04567.pdf)
-    - [Dataset example](tests/assets/mot_dataset)
-  - CVAT
-    - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
-    - [Dataset example](tests/assets/cvat_dataset)
-  - LabelMe
-    - [Format specification](http://labelme.csail.mit.edu/Release3.0)
-    - [Dataset example](tests/assets/labelme_dataset)
- Dataset building operations:
-  - Merging multiple datasets into one
-  - Dataset filtering with custom conditions, for instance:
-    - remove polygons of a certain class
-    - remove images without a specific class
-    - remove `occluded` annotations from images
-    - keep only vertically-oriented images
-    - remove small area bounding boxes from annotations
-  - Annotation conversions, for instance:
-    - polygons to instance masks and vise-versa
-    - apply a custom colormap for mask annotations
-    - rename or remove dataset labels
- Dataset comparison
- Model integration:
-  - Inference (OpenVINO and custom models)
-  - Explainable AI ([RISE algorithm](https://arxiv.org/abs/1806.07421))
-
-> Check the [design document](docs/design.md) for a full list of features
-
-## Installation
-
-Optionally, create a virtual environment:
-
-``` bash
-python -m pip install virtualenv
-python -m virtualenv venv
-. venv/bin/activate
-```
-
-Install Datumaro package:
-
-``` bash
-pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro'
-```
-
-## Usage
-
-There are several options available:
- [A standalone command-line tool](#standalone-tool)
- [A python module](#python-module)
-
-### Standalone tool
-
-<!--lint disable fenced-code-flag-->
-```
-    User
-        |
-        v
-+------------------+
-|       CVAT       |
-+--------v---------+       +------------------+       +--------------+
-| Datumaro module  | ----> | Datumaro project | <---> | Datumaro CLI | <--- User
-+------------------+       +------------------+       +--------------+
-```
-<!--lint enable fenced-code-flag-->
-
-``` bash
-datum --help
-python -m datumaro --help
-```
-
-### Python module
-
-Datumaro can be used in custom scripts as a library in the following way:
-
-``` python
-from datumaro.components.project import Project # project-related things
-import datumaro.components.extractor # annotations and high-level interfaces
-# etc.
-project = Project.load('directory')
-```
-
-## Examples
-
-<!--lint disable list-item-indent-->
-<!--lint disable list-item-bullet-indent-->
-
- Convert [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#data) to COCO, keep only images with `cat` class presented:
-  ```bash
-  # Download VOC dataset:
-  # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
-  datum convert --input-format voc --input-path <path/to/voc> \
-                --output-format coco --filter '/item[annotation/label="cat"]'
-  ```
-
- Convert only non-occluded annotations from a CVAT-annotated project to TFrecord:
-  ```bash
-  # export Datumaro dataset in CVAT UI, extract somewhere, go to the project dir
-  datum project extract --filter '/item/annotation[occluded="False"]' \
-    --mode items+anno --output-dir not_occluded
-  datum project export --project not_occluded \
-    --format tf_detection_api -- --save-images
-  ```
-
- Annotate COCO, extract image subset, re-annotate it in CVAT, update old dataset:
-  ```bash
-  # Download COCO dataset http://cocodataset.org/#download
-  # Put images to coco/images/ and annotations to coco/annotations/
-  datum project import --format coco --input-path <path/to/coco>
-  datum project export --filter '/image[images_I_dont_like]' --format cvat \
-    --output-dir reannotation
-  # import dataset and images to CVAT, re-annotate
-  # export Datumaro project, extract to 'reannotation-upd'
-  datum project project merge reannotation-upd
-  datum project export --format coco
-  ```
-
- Annotate instance polygons in CVAT, export as masks in COCO:
-  ```bash
-  datum convert --input-format cvat --input-path <path/to/cvat.xml> \
-                --output-format coco -- --segmentation-mode masks
-  ```
-
- Apply an OpenVINO detection model to some COCO-like dataset,
-  then compare annotations with ground truth and visualize in TensorBoard:
-  ```bash
-  datum project import --format coco --input-path <path/to/coco>
-  # create model results interpretation script
-  datum model add mymodel openvino \
-    --weights model.bin --description model.xml \
-    --interpretation-script parse_results.py
-  datum model run --model mymodel --output-dir mymodel_inference/
-  datum project diff mymodel_inference/ --format tensorboard --output-dir diff
-  ```
-
- Change colors in PASCAL VOC-like `.png` masks:
-  ```bash
-  datum project import --format voc --input-path <path/to/voc/dataset>
-
-  # Create a color map file with desired colors:
-  #
-  # label : color_rgb : parts : actions
-  # cat:0,0,255::
-  # dog:255,0,0::
-  #
-  # Save as mycolormap.txt
-
-  datum project export --format voc_segmentation -- --label-map mycolormap.txt
-  # add "--apply-colormap=0" to save grayscale (indexed) masks
-  # check "--help" option for more info
-  # use "datum --loglevel debug" for extra conversion info
-  ```
-
-<!--lint enable list-item-bullet-indent-->
-<!--lint enable list-item-indent-->
-
-## Contributing
-
-Feel free to [open an Issue](https://github.com/opencv/cvat/issues/new) if you
-think something needs to be changed. You are welcome to participate in development,
-development instructions are available in our [developer manual](CONTRIBUTING.md).
--- a/datumaro/datum.py
+++ b/datumaro/datum.py
@ -1,8 +0,0 @@
-#!/usr/bin/env python
-import sys
-
-from datumaro.cli.__main__ import main
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/datumaro/datumaro/init.py
+++ b/datumaro/datumaro/init.py
@ -1,4 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
--- a/datumaro/datumaro/main.py
+++ b/datumaro/datumaro/main.py
@ -1,12 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import sys
-
-from datumaro.cli.__main__ import main
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/datumaro/datumaro/cli/init.py
+++ b/datumaro/datumaro/cli/init.py
@ -1,4 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
--- a/datumaro/datumaro/cli/main.py
+++ b/datumaro/datumaro/cli/main.py
@ -1,125 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import sys
-
-from . import contexts, commands
-from .util import CliException, add_subparser
-from ..version import VERSION
-
-
-_log_levels = {
-    'debug': log.DEBUG,
-    'info': log.INFO,
-    'warning': log.WARNING,
-    'error': log.ERROR,
-    'critical': log.CRITICAL
-}
-
-def loglevel(name):
-    return _log_levels[name]
-
-class _LogManager:
-    @classmethod
-    def init_logger(cls, args=None):
-        # Define minimalistic parser only to obtain loglevel
-        parser = argparse.ArgumentParser(add_help=False)
-        cls._define_loglevel_option(parser)
-        args, _ = parser.parse_known_args(args)
-
-        log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
-            level=args.loglevel)
-
-    @staticmethod
-    def _define_loglevel_option(parser):
-        parser.add_argument('--loglevel', type=loglevel, default='info',
-            help="Logging level (options: %s; default: %s)" % \
-                (', '.join(_log_levels.keys()), "%(default)s"))
-        return parser
-
-
-def _make_subcommands_help(commands, help_line_start=0):
-    desc = ""
-    for command_name, _, command_help in commands:
-        desc += ("  %-" + str(max(0, help_line_start - 2 - 1)) + "s%s\n") % \
-            (command_name, command_help)
-    return desc
-
-def make_parser():
-    parser = argparse.ArgumentParser(prog="datumaro",
-        description="Dataset Framework",
-        formatter_class=argparse.RawDescriptionHelpFormatter)
-
-    parser.add_argument('--version', action='version', version=VERSION)
-    _LogManager._define_loglevel_option(parser)
-
-    known_contexts = [
-        ('project', contexts.project, "Actions on projects (datasets)"),
-        ('source', contexts.source, "Actions on data sources"),
-        ('model', contexts.model, "Actions on models"),
-    ]
-    known_commands = [
-        ('create', commands.create, "Create project"),
-        ('add', commands.add, "Add source to project"),
-        ('remove', commands.remove, "Remove source from project"),
-        ('export', commands.export, "Export project"),
-        ('explain', commands.explain, "Run Explainable AI algorithm for model"),
-        ('merge', commands.merge, "Merge datasets"),
-        ('convert', commands.convert, "Convert dataset"),
-    ]
-
-    # Argparse doesn't support subparser groups:
-    # https://stackoverflow.com/questions/32017020/grouping-argparse-subparser-arguments
-    help_line_start = max((len(e[0]) for e in known_contexts + known_commands),
-        default=0)
-    help_line_start = max((2 + help_line_start) // 4 + 1, 6) * 4 # align to tabs
-    subcommands_desc = ""
-    if known_contexts:
-        subcommands_desc += "Contexts:\n"
-        subcommands_desc += _make_subcommands_help(known_contexts,
-            help_line_start)
-    if known_commands:
-        if subcommands_desc:
-            subcommands_desc += "\n"
-        subcommands_desc += "Commands:\n"
-        subcommands_desc += _make_subcommands_help(known_commands,
-            help_line_start)
-    if subcommands_desc:
-        subcommands_desc += \
-            "\nRun '%s COMMAND --help' for more information on a command." % \
-                parser.prog
-
-    subcommands = parser.add_subparsers(title=subcommands_desc,
-        description="", help=argparse.SUPPRESS)
-    for command_name, command, _ in known_contexts + known_commands:
-        add_subparser(subcommands, command_name, command.build_parser)
-
-    return parser
-
-
-def main(args=None):
-    _LogManager.init_logger(args)
-
-    parser = make_parser()
-    args = parser.parse_args(args)
-
-    if 'command' not in args:
-        parser.print_help()
-        return 1
-
-    try:
-        return args.command(args)
-    except CliException as e:
-        log.error(e)
-        return 1
-    except Exception as e:
-        log.error(e)
-        raise
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/datumaro/datumaro/cli/commands/init.py
+++ b/datumaro/datumaro/cli/commands/init.py
@ -1,6 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from . import add, create, explain, export, remove, merge, convert
--- a/datumaro/datumaro/cli/commands/add.py
+++ b/datumaro/datumaro/cli/commands/add.py
@ -1,8 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=unused-import
-
-from ..contexts.source import build_add_parser as build_parser
--- a/datumaro/datumaro/cli/commands/convert.py
+++ b/datumaro/datumaro/cli/commands/convert.py
@ -1,137 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import os
-import os.path as osp
-
-from datumaro.components.project import Environment
-
-from ..contexts.project import FilterModes
-from ..util import CliException, MultilineFormatter, make_file_name
-from ..util.project import generate_next_file_name
-
-
-def build_parser(parser_ctor=argparse.ArgumentParser):
-    builtin_importers = sorted(Environment().importers.items)
-    builtin_converters = sorted(Environment().converters.items)
-
-    parser = parser_ctor(help="Convert an existing dataset to another format",
-        description="""
-            Converts a dataset from one format to another.
-            You can add your own formats using a project.|n
-            |n
-            Supported input formats: %s|n
-            |n
-            Supported output formats: %s|n
-            |n
-            Examples:|n
-            - Export a dataset as a PASCAL VOC dataset, include images:|n
-            |s|sconvert -i src/path -f voc -- --save-images|n
-            |n
-            - Export a dataset as a COCO dataset to a specific directory:|n
-            |s|sconvert -i src/path -f coco -o path/I/like/
-        """ % (', '.join(builtin_importers), ', '.join(builtin_converters)),
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-i', '--input-path', default='.', dest='source',
-        help="Path to look for a dataset")
-    parser.add_argument('-if', '--input-format',
-        help="Input dataset format. Will try to detect, if not specified.")
-    parser.add_argument('-f', '--output-format', required=True,
-        help="Output format")
-    parser.add_argument('-o', '--output-dir', dest='dst_dir',
-        help="Directory to save output (default: a subdir in the current one)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('-e', '--filter',
-        help="Filter expression for dataset items")
-    parser.add_argument('--filter-mode', default=FilterModes.i.name,
-        type=FilterModes.parse,
-        help="Filter mode (options: %s; default: %s)" % \
-            (', '.join(FilterModes.list_options()) , '%(default)s'))
-    parser.add_argument('extra_args', nargs=argparse.REMAINDER,
-        help="Additional arguments for output format (pass '-- -h' for help)")
-    parser.set_defaults(command=convert_command)
-
-    return parser
-
-def convert_command(args):
-    env = Environment()
-
-    try:
-        converter = env.converters.get(args.output_format)
-    except KeyError:
-        raise CliException("Converter for format '%s' is not found" % \
-            args.output_format)
-    extra_args = converter.from_cmdline(args.extra_args)
-    def converter_proxy(extractor, save_dir):
-        return converter.convert(extractor, save_dir, **extra_args)
-
-    filter_args = FilterModes.make_filter_args(args.filter_mode)
-
-    if not args.input_format:
-        matches = []
-        for format_name in env.importers.items:
-            log.debug("Checking '%s' format...", format_name)
-            importer = env.make_importer(format_name)
-            try:
-                match = importer.detect(args.source)
-                if match:
-                    log.debug("format matched")
-                    matches.append((format_name, importer))
-            except NotImplementedError:
-                log.debug("Format '%s' does not support auto detection.",
-                    format_name)
-
-        if len(matches) == 0:
-            log.error("Failed to detect dataset format. "
-                "Try to specify format with '-if/--input-format' parameter.")
-            return 1
-        elif len(matches) != 1:
-            log.error("Multiple formats match the dataset: %s. "
-                "Try to specify format with '-if/--input-format' parameter.",
-                ', '.join(m[0] for m in matches))
-            return 2
-
-        format_name, importer = matches[0]
-        args.input_format = format_name
-        log.info("Source dataset format detected as '%s'", args.input_format)
-    else:
-        try:
-            importer = env.make_importer(args.input_format)
-            if hasattr(importer, 'from_cmdline'):
-                extra_args = importer.from_cmdline()
-        except KeyError:
-            raise CliException("Importer for format '%s' is not found" % \
-                args.input_format)
-
-    source = osp.abspath(args.source)
-
-    dst_dir = args.dst_dir
-    if dst_dir:
-        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % dst_dir)
-    else:
-        dst_dir = generate_next_file_name('%s-%s' % \
-            (osp.basename(source), make_file_name(args.output_format)))
-    dst_dir = osp.abspath(dst_dir)
-
-    project = importer(source)
-    dataset = project.make_dataset()
-
-    log.info("Exporting the dataset")
-    dataset.export_project(
-        save_dir=dst_dir,
-        converter=converter_proxy,
-        filter_expr=args.filter,
-        **filter_args)
-
-    log.info("Dataset exported to '%s' as '%s'" % \
-        (dst_dir, args.output_format))
-
-    return 0
--- a/datumaro/datumaro/cli/commands/create.py
+++ b/datumaro/datumaro/cli/commands/create.py
@ -1,8 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=unused-import
-
-from ..contexts.project import build_create_parser as build_parser
--- a/datumaro/datumaro/cli/commands/explain.py
+++ b/datumaro/datumaro/cli/commands/explain.py
@ -1,183 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import os
-import os.path as osp
-
-from datumaro.components.project import Project
-from datumaro.util.command_targets import (TargetKinds, target_selector,
-    ProjectTarget, SourceTarget, ImageTarget, is_project_path)
-from datumaro.util.image import load_image, save_image
-from ..util import MultilineFormatter
-from ..util.project import load_project
-
-
-def build_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Run Explainable AI algorithm",
-        description="Runs an explainable AI algorithm for a model.")
-
-    parser.add_argument('-m', '--model', required=True,
-        help="Model to use for inference")
-    parser.add_argument('-t', '--target', default=None,
-        help="Inference target - image, source, project "
-             "(default: current dir)")
-    parser.add_argument('-o', '--output-dir', dest='save_dir', default=None,
-        help="Directory to save output (default: display only)")
-
-    method_sp = parser.add_subparsers(dest='algorithm')
-
-    rise_parser = method_sp.add_parser('rise',
-        description="""
-        RISE: Randomized Input Sampling for
-        Explanation of Black-box Models algorithm|n
-        |n
-        See explanations at: https://arxiv.org/pdf/1806.07421.pdf
-        """,
-        formatter_class=MultilineFormatter)
-    rise_parser.add_argument('-s', '--max-samples', default=None, type=int,
-        help="Number of algorithm iterations (default: mask size ^ 2)")
-    rise_parser.add_argument('--mw', '--mask-width',
-        dest='mask_width', default=7, type=int,
-        help="Mask width (default: %(default)s)")
-    rise_parser.add_argument('--mh', '--mask-height',
-        dest='mask_height', default=7, type=int,
-        help="Mask height (default: %(default)s)")
-    rise_parser.add_argument('--prob', default=0.5, type=float,
-        help="Mask pixel inclusion probability (default: %(default)s)")
-    rise_parser.add_argument('--iou', '--iou-thresh',
-        dest='iou_thresh', default=0.9, type=float,
-        help="IoU match threshold for detections (default: %(default)s)")
-    rise_parser.add_argument('--nms', '--nms-iou-thresh',
-        dest='nms_iou_thresh', default=0.0, type=float,
-        help="IoU match threshold in Non-maxima suppression (default: no NMS)")
-    rise_parser.add_argument('--conf', '--det-conf-thresh',
-        dest='det_conf_thresh', default=0.0, type=float,
-        help="Confidence threshold for detections (default: include all)")
-    rise_parser.add_argument('-b', '--batch-size', default=1, type=int,
-        help="Inference batch size (default: %(default)s)")
-    rise_parser.add_argument('--display', action='store_true',
-        help="Visualize results during computations")
-
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=explain_command)
-
-    return parser
-
-def explain_command(args):
-    project_path = args.project_dir
-    if is_project_path(project_path):
-        project = Project.load(project_path)
-    else:
-        project = None
-    args.target = target_selector(
-        ProjectTarget(is_default=True, project=project),
-        SourceTarget(project=project),
-        ImageTarget()
-    )(args.target)
-    if args.target[0] == TargetKinds.project:
-        if is_project_path(args.target[1]):
-            args.project_dir = osp.dirname(osp.abspath(args.target[1]))
-
-
-    import cv2
-    from matplotlib import cm
-
-    project = load_project(args.project_dir)
-
-    model = project.make_executable_model(args.model)
-
-    if str(args.algorithm).lower() != 'rise':
-        raise NotImplementedError()
-
-    from datumaro.components.algorithms.rise import RISE
-    rise = RISE(model,
-        max_samples=args.max_samples,
-        mask_width=args.mask_width,
-        mask_height=args.mask_height,
-        prob=args.prob,
-        iou_thresh=args.iou_thresh,
-        nms_thresh=args.nms_iou_thresh,
-        det_conf_thresh=args.det_conf_thresh,
-        batch_size=args.batch_size)
-
-    if args.target[0] == TargetKinds.image:
-        image_path = args.target[1]
-        image = load_image(image_path)
-
-        log.info("Running inference explanation for '%s'" % image_path)
-        heatmap_iter = rise.apply(image, progressive=args.display)
-
-        image = image / 255.0
-        file_name = osp.splitext(osp.basename(image_path))[0]
-        if args.display:
-            for i, heatmaps in enumerate(heatmap_iter):
-                for j, heatmap in enumerate(heatmaps):
-                    hm_painted = cm.jet(heatmap)[:, :, 2::-1]
-                    disp = (image + hm_painted) / 2
-                    cv2.imshow('heatmap-%s' % j, hm_painted)
-                    cv2.imshow(file_name + '-heatmap-%s' % j, disp)
-                cv2.waitKey(10)
-                print("Iter", i, "of", args.max_samples, end='\r')
-        else:
-            heatmaps = next(heatmap_iter)
-
-        if args.save_dir is not None:
-            log.info("Saving inference heatmaps at '%s'" % args.save_dir)
-            os.makedirs(args.save_dir, exist_ok=True)
-
-            for j, heatmap in enumerate(heatmaps):
-                save_path = osp.join(args.save_dir,
-                    file_name + '-heatmap-%s.png' % j)
-                save_image(save_path, heatmap * 255.0)
-        else:
-            for j, heatmap in enumerate(heatmaps):
-                disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2
-                cv2.imshow(file_name + '-heatmap-%s' % j, disp)
-            cv2.waitKey(0)
-    elif args.target[0] == TargetKinds.source or \
-         args.target[0] == TargetKinds.project:
-        if args.target[0] == TargetKinds.source:
-            source_name = args.target[1]
-            dataset = project.make_source_project(source_name).make_dataset()
-            log.info("Running inference explanation for '%s'" % source_name)
-        else:
-            project_name = project.config.project_name
-            dataset = project.make_dataset()
-            log.info("Running inference explanation for '%s'" % project_name)
-
-        for item in dataset:
-            image = item.image.data
-            if image is None:
-                log.warn(
-                    "Dataset item %s does not have image data. Skipping." % \
-                    (item.id))
-                continue
-
-            heatmap_iter = rise.apply(image)
-
-            image = image / 255.0
-            heatmaps = next(heatmap_iter)
-
-            if args.save_dir is not None:
-                log.info("Saving inference heatmaps to '%s'" % args.save_dir)
-                os.makedirs(args.save_dir, exist_ok=True)
-
-                for j, heatmap in enumerate(heatmaps):
-                    save_image(osp.join(args.save_dir,
-                            item.id + '-heatmap-%s.png' % j),
-                        heatmap * 255.0, create_dir=True)
-
-            if not args.save_dir or args.display:
-                for j, heatmap in enumerate(heatmaps):
-                    disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2
-                    cv2.imshow(item.id + '-heatmap-%s' % j, disp)
-                cv2.waitKey(0)
-    else:
-        raise NotImplementedError()
-
-    return 0
--- a/datumaro/datumaro/cli/commands/export.py
+++ b/datumaro/datumaro/cli/commands/export.py
@ -1,8 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=unused-import
-
-from ..contexts.project import build_export_parser as build_parser
--- a/datumaro/datumaro/cli/commands/merge.py
+++ b/datumaro/datumaro/cli/commands/merge.py
@ -1,124 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import json
-import logging as log
-import os.path as osp
-from collections import OrderedDict
-
-from datumaro.components.project import Project
-from datumaro.components.operations import (IntersectMerge,
-    QualityError, MergeError)
-
-from ..util import at_least, MultilineFormatter, CliException
-from ..util.project import generate_next_file_name, load_project
-
-
-def build_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Merge few projects",
-        description="""
-            Merges multiple datasets into one. This can be useful if you
-            have few annotations and wish to merge them,
-            taking into consideration potential overlaps and conflicts.
-            This command can try to find a common ground by voting or
-            return a list of conflicts.|n
-            |n
-            Examples:|n
-            - Merge annotations from 3 (or more) annotators:|n
-            |s|smerge project1/ project2/ project3/|n
-            - Check groups of the merged dataset for consistence:|n
-            |s|s|slook for groups consising of 'person', 'hand' 'head', 'foot'|n
-            |s|smerge project1/ project2/ -g 'person,hand?,head,foot?'
-        """,
-        formatter_class=MultilineFormatter)
-
-    def _group(s):
-        return s.split(',')
-
-    parser.add_argument('project', nargs='+', action=at_least(2),
-        help="Path to a project (repeatable)")
-    parser.add_argument('-iou', '--iou-thresh', default=0.25, type=float,
-        help="IoU match threshold for segments (default: %(default)s)")
-    parser.add_argument('-oconf', '--output-conf-thresh',
-        default=0.0, type=float,
-        help="Confidence threshold for output "
-            "annotations (default: %(default)s)")
-    parser.add_argument('--quorum', default=0, type=int,
-        help="Minimum count for a label and attribute voting "
-            "results to be counted (default: %(default)s)")
-    parser.add_argument('-g', '--groups', action='append', type=_group,
-        default=[],
-        help="A comma-separated list of labels in "
-            "annotation groups to check. '?' postfix can be added to a label to"
-            "make it optional in the group (repeatable)")
-    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
-        help="Output directory (default: current project's dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.set_defaults(command=merge_command)
-
-    return parser
-
-def merge_command(args):
-    source_projects = [load_project(p) for p in args.project]
-
-    dst_dir = args.dst_dir
-    if dst_dir:
-        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % dst_dir)
-    else:
-        dst_dir = generate_next_file_name('merged')
-
-    source_datasets = []
-    for p in source_projects:
-        log.debug("Loading project '%s' dataset", p.config.project_name)
-        source_datasets.append(p.make_dataset())
-
-    merger = IntersectMerge(conf=IntersectMerge.Conf(
-        pairwise_dist=args.iou_thresh, groups=args.groups,
-        output_conf_thresh=args.output_conf_thresh, quorum=args.quorum
-    ))
-    merged_dataset = merger(source_datasets)
-
-    merged_project = Project()
-    output_dataset = merged_project.make_dataset()
-    output_dataset.define_categories(merged_dataset.categories())
-    merged_dataset = output_dataset.update(merged_dataset)
-    merged_dataset.save(save_dir=dst_dir)
-
-    report_path = osp.join(dst_dir, 'merge_report.json')
-    save_merge_report(merger, report_path)
-
-    dst_dir = osp.abspath(dst_dir)
-    log.info("Merge results have been saved to '%s'" % dst_dir)
-    log.info("Report has been saved to '%s'" % report_path)
-
-    return 0
-
-def save_merge_report(merger, path):
-    item_errors = OrderedDict()
-    source_errors = OrderedDict()
-    all_errors = []
-
-    for e in merger.errors:
-        if isinstance(e, QualityError):
-            item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1
-        elif isinstance(e, MergeError):
-            for s in e.sources:
-                source_errors[s] = source_errors.get(s, 0) + 1
-            item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1
-
-        all_errors.append(str(e))
-
-    errors = OrderedDict([
-        ('Item errors', item_errors),
-        ('Source errors', source_errors),
-        ('All errors', all_errors),
-    ])
-
-    with open(path, 'w') as f:
-        json.dump(errors, f, indent=4)
--- a/datumaro/datumaro/cli/commands/remove.py
+++ b/datumaro/datumaro/cli/commands/remove.py
@ -1,8 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=unused-import
-
-from ..contexts.source import build_remove_parser as build_parser
--- a/datumaro/datumaro/cli/contexts/init.py
+++ b/datumaro/datumaro/cli/contexts/init.py
@ -1,6 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from . import project, source, model, item
--- a/datumaro/datumaro/cli/contexts/item/init.py
+++ b/datumaro/datumaro/cli/contexts/item/init.py
@ -1,36 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-
-from ...util import add_subparser
-
-
-def build_export_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-    return parser
-
-def build_stats_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-    return parser
-
-def build_diff_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-    return parser
-
-def build_edit_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-    return parser
-
-def build_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-
-    subparsers = parser.add_subparsers()
-    add_subparser(subparsers, 'export', build_export_parser)
-    add_subparser(subparsers, 'stats', build_stats_parser)
-    add_subparser(subparsers, 'diff', build_diff_parser)
-    add_subparser(subparsers, 'edit', build_edit_parser)
-
-    return parser
--- a/datumaro/datumaro/cli/contexts/model/init.py
+++ b/datumaro/datumaro/cli/contexts/model/init.py
@ -1,183 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import os
-import os.path as osp
-import re
-
-from datumaro.components.config import DEFAULT_FORMAT
-from datumaro.components.project import Environment
-
-from ...util import CliException, MultilineFormatter, add_subparser
-from ...util.project import load_project, \
-    generate_next_name, generate_next_file_name
-
-
-def build_add_parser(parser_ctor=argparse.ArgumentParser):
-    builtins = sorted(Environment().launchers.items)
-
-    parser = parser_ctor(help="Add model to project",
-        description="""
-            Registers an executable model into a project. A model requires
-            a launcher to be executed. Each launcher has its own options, which
-            are passed after '--' separator, pass '-- -h' for more info.
-            |n
-            List of builtin launchers: %s
-        """ % ', '.join(builtins),
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-l', '--launcher', required=True,
-        help="Model launcher")
-    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
-        help="Additional arguments for converter (pass '-- -h' for help)")
-    parser.add_argument('--copy', action='store_true',
-        help="Copy the model to the project")
-    parser.add_argument('-n', '--name', default=None,
-        help="Name of the model to be added (default: generate automatically)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite if exists")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=add_command)
-
-    return parser
-
-def add_command(args):
-    project = load_project(args.project_dir)
-
-    if args.name:
-        if not args.overwrite and args.name in project.config.models:
-            raise CliException("Model '%s' already exists "
-                "(pass --overwrite to overwrite)" % args.name)
-    else:
-        args.name = generate_next_name(
-            project.config.models, 'model', '-', default=0)
-        assert args.name not in project.config.models, args.name
-
-    try:
-        launcher = project.env.launchers.get(args.launcher)
-    except KeyError:
-        raise CliException("Launcher '%s' is not found" % args.launcher)
-
-    cli_plugin = getattr(launcher, 'cli_plugin', launcher)
-    model_args = cli_plugin.from_cmdline(args.extra_args)
-
-    if args.copy:
-        log.info("Copying model data")
-
-        model_dir = project.local_model_dir(args.name)
-        os.makedirs(model_dir, exist_ok=False)
-
-        try:
-            cli_plugin.copy_model(model_dir, model_args)
-        except (AttributeError, NotImplementedError):
-            log.error("Can't copy: copying is not available for '%s' models" % \
-                args.launcher)
-
-    log.info("Checking the model")
-    project.add_model(args.name, {
-        'launcher': args.launcher,
-        'options': model_args,
-    })
-    project.make_executable_model(args.name)
-
-    project.save()
-
-    log.info("Model '%s' with launcher '%s' has been added to project '%s'" % \
-        (args.name, args.launcher, project.config.project_name))
-
-    return 0
-
-def build_remove_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-
-    parser.add_argument('name',
-        help="Name of the model to be removed")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=remove_command)
-
-    return parser
-
-def remove_command(args):
-    project = load_project(args.project_dir)
-
-    project.remove_model(args.name)
-    project.save()
-
-    return 0
-
-def build_run_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-
-    parser.add_argument('-o', '--output-dir', dest='dst_dir',
-        help="Directory to save output")
-    parser.add_argument('-m', '--model', dest='model_name', required=True,
-        help="Model to apply to the project")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite if exists")
-    parser.set_defaults(command=run_command)
-
-    return parser
-
-def run_command(args):
-    project = load_project(args.project_dir)
-
-    dst_dir = args.dst_dir
-    if dst_dir:
-        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite overwrite)" % dst_dir)
-    else:
-        dst_dir = generate_next_file_name('%s-inference' % \
-            project.config.project_name)
-
-    project.make_dataset().apply_model(
-        save_dir=osp.abspath(dst_dir),
-        model=args.model_name)
-
-    log.info("Inference results have been saved to '%s'" % dst_dir)
-
-    return 0
-
-def build_info_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-
-    parser.add_argument('-n', '--name',
-        help="Model name")
-    parser.add_argument('-v', '--verbose', action='store_true',
-        help="Show details")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=info_command)
-
-    return parser
-
-def info_command(args):
-    project = load_project(args.project_dir)
-
-    if args.name:
-        model = project.get_model(args.name)
-        print(model)
-    else:
-        for name, conf in project.config.models.items():
-            print(name)
-            if args.verbose:
-                print(dict(conf))
-
-def build_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-
-    subparsers = parser.add_subparsers()
-    add_subparser(subparsers, 'add', build_add_parser)
-    add_subparser(subparsers, 'remove', build_remove_parser)
-    add_subparser(subparsers, 'run', build_run_parser)
-    add_subparser(subparsers, 'info', build_info_parser)
-
-    return parser
--- a/datumaro/datumaro/cli/contexts/project/init.py
+++ b/datumaro/datumaro/cli/contexts/project/init.py
@ -1,826 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import json
-import logging as log
-import os
-import os.path as osp
-import shutil
-from enum import Enum
-
-from datumaro.components.cli_plugin import CliPlugin
-from datumaro.components.dataset_filter import DatasetItemEncoder
-from datumaro.components.extractor import AnnotationType
-from datumaro.components.operations import (DistanceComparator,
-    ExactComparator, compute_ann_statistics, compute_image_statistics, mean_std)
-from datumaro.components.project import \
-    PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
-from datumaro.components.project import Environment, Project
-
-from ...util import (CliException, MultilineFormatter, add_subparser,
-    make_file_name)
-from ...util.project import generate_next_file_name, load_project
-from .diff import DiffVisualizer
-
-
-def build_create_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Create empty project",
-        description="""
-            Create a new empty project.|n
-            |n
-            Examples:|n
-            - Create a project in the current directory:|n
-            |s|screate -n myproject|n
-            |n
-            - Create a project in other directory:|n
-            |s|screate -o path/I/like/
-        """,
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir',
-        help="Save directory for the new project (default: current dir")
-    parser.add_argument('-n', '--name', default=None,
-        help="Name of the new project (default: same as project dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.set_defaults(command=create_command)
-
-    return parser
-
-def create_command(args):
-    project_dir = osp.abspath(args.dst_dir)
-
-    project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir)
-    if osp.isdir(project_env_dir) and os.listdir(project_env_dir):
-        if not args.overwrite:
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % project_env_dir)
-        else:
-            shutil.rmtree(project_env_dir, ignore_errors=True)
-
-    own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir)
-    if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir):
-        if not args.overwrite:
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % own_dataset_dir)
-        else:
-            # NOTE: remove the dir to avoid using data from previous project
-            shutil.rmtree(own_dataset_dir)
-
-    project_name = args.name
-    if project_name is None:
-        project_name = osp.basename(project_dir)
-
-    log.info("Creating project at '%s'" % project_dir)
-
-    Project.generate(project_dir, {
-        'project_name': project_name,
-    })
-
-    log.info("Project has been created at '%s'" % project_dir)
-
-    return 0
-
-def build_import_parser(parser_ctor=argparse.ArgumentParser):
-    builtins = sorted(Environment().importers.items)
-
-    parser = parser_ctor(help="Create project from existing dataset",
-        description="""
-            Creates a project from an existing dataset. The source can be:|n
-            - a dataset in a supported format (check 'formats' section below)|n
-            - a Datumaro project|n
-            |n
-            Formats:|n
-            Datasets come in a wide variety of formats. Each dataset
-            format defines its own data structure and rules on how to
-            interpret the data. For example, the following data structure
-            is used in COCO format:|n
-            /dataset/|n
-            - /images/<id>.jpg|n
-            - /annotations/|n
-            |n
-            In Datumaro dataset formats are supported by
-            Extractor-s and Importer-s.
-            An Extractor produces a list of dataset items corresponding
-            to the dataset. An Importer creates a project from the
-            data source location.
-            It is possible to add a custom Extractor and Importer.
-            To do this, you need to put an Extractor and
-            Importer implementation scripts to
-            <project_dir>/.datumaro/extractors
-            and <project_dir>/.datumaro/importers.|n
-            |n
-            List of builtin dataset formats: %s|n
-            |n
-            Examples:|n
-            - Create a project from VOC dataset in the current directory:|n
-            |s|simport -f voc -i path/to/voc|n
-            |n
-            - Create a project from COCO dataset in other directory:|n
-            |s|simport -f coco -i path/to/coco -o path/I/like/
-        """ % ', '.join(builtins),
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir',
-        help="Directory to save the new project to (default: current dir)")
-    parser.add_argument('-n', '--name', default=None,
-        help="Name of the new project (default: same as project dir)")
-    parser.add_argument('--copy', action='store_true',
-        help="Copy the dataset instead of saving source links")
-    parser.add_argument('--skip-check', action='store_true',
-        help="Skip source checking")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('-i', '--input-path', required=True, dest='source',
-        help="Path to import project from")
-    parser.add_argument('-f', '--format',
-        help="Source project format. Will try to detect, if not specified.")
-    parser.add_argument('extra_args', nargs=argparse.REMAINDER,
-        help="Additional arguments for importer (pass '-- -h' for help)")
-    parser.set_defaults(command=import_command)
-
-    return parser
-
-def import_command(args):
-    project_dir = osp.abspath(args.dst_dir)
-
-    project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir)
-    if osp.isdir(project_env_dir) and os.listdir(project_env_dir):
-        if not args.overwrite:
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % project_env_dir)
-        else:
-            shutil.rmtree(project_env_dir, ignore_errors=True)
-
-    own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir)
-    if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir):
-        if not args.overwrite:
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % own_dataset_dir)
-        else:
-            # NOTE: remove the dir to avoid using data from previous project
-            shutil.rmtree(own_dataset_dir)
-
-    project_name = args.name
-    if project_name is None:
-        project_name = osp.basename(project_dir)
-
-    env = Environment()
-    log.info("Importing project from '%s'" % args.source)
-
-    extra_args = {}
-    if not args.format:
-        if args.extra_args:
-            raise CliException("Extra args can not be used without format")
-
-        log.info("Trying to detect dataset format...")
-
-        matches = []
-        for format_name in env.importers.items:
-            log.debug("Checking '%s' format...", format_name)
-            importer = env.make_importer(format_name)
-            try:
-                match = importer.detect(args.source)
-                if match:
-                    log.debug("format matched")
-                    matches.append((format_name, importer))
-            except NotImplementedError:
-                log.debug("Format '%s' does not support auto detection.",
-                    format_name)
-
-        if len(matches) == 0:
-            log.error("Failed to detect dataset format automatically. "
-                "Try to specify format with '-f/--format' parameter.")
-            return 1
-        elif len(matches) != 1:
-            log.error("Multiple formats match the dataset: %s. "
-                "Try to specify format with '-f/--format' parameter.",
-                ', '.join(m[0] for m in matches))
-            return 2
-
-        format_name, importer = matches[0]
-        args.format = format_name
-    else:
-        try:
-            importer = env.make_importer(args.format)
-            if hasattr(importer, 'from_cmdline'):
-                extra_args = importer.from_cmdline(args.extra_args)
-        except KeyError:
-            raise CliException("Importer for format '%s' is not found" % \
-                args.format)
-
-    log.info("Importing project as '%s'" % args.format)
-
-    source = osp.abspath(args.source)
-    project = importer(source, **extra_args)
-    project.config.project_name = project_name
-    project.config.project_dir = project_dir
-
-    if not args.skip_check or args.copy:
-        log.info("Checking the dataset...")
-        dataset = project.make_dataset()
-    if args.copy:
-        log.info("Cloning data...")
-        dataset.save(merge=True, save_images=True)
-    else:
-        project.save()
-
-    log.info("Project has been created at '%s'" % project_dir)
-
-    return 0
-
-
-class FilterModes(Enum):
-    # primary
-    items = 1
-    annotations = 2
-    items_annotations = 3
-
-    # shortcuts
-    i = 1
-    a = 2
-    i_a = 3
-    a_i = 3
-    annotations_items = 3
-
-    @staticmethod
-    def parse(s):
-        s = s.lower()
-        s = s.replace('+', '_')
-        return FilterModes[s]
-
-    @classmethod
-    def make_filter_args(cls, mode):
-        if mode == cls.items:
-            return {}
-        elif mode == cls.annotations:
-            return {
-                'filter_annotations': True
-            }
-        elif mode == cls.items_annotations:
-            return {
-                'filter_annotations': True,
-                'remove_empty': True,
-            }
-        else:
-            raise NotImplementedError()
-
-    @classmethod
-    def list_options(cls):
-        return [m.name.replace('_', '+') for m in cls]
-
-def build_export_parser(parser_ctor=argparse.ArgumentParser):
-    builtins = sorted(Environment().converters.items)
-
-    parser = parser_ctor(help="Export project",
-        description="""
-            Exports the project dataset in some format. Optionally, a filter
-            can be passed, check 'filter' command description for more info.
-            Each dataset format has its own options, which
-            are passed after '--' separator (see examples), pass '-- -h'
-            for more info. If not stated otherwise, by default
-            only annotations are exported, to include images pass
-            '--save-images' parameter.|n
-            |n
-            Formats:|n
-            In Datumaro dataset formats are supported by Converter-s.
-            A Converter produces a dataset of a specific format
-            from dataset items. It is possible to add a custom Converter.
-            To do this, you need to put a Converter
-            definition script to <project_dir>/.datumaro/converters.|n
-            |n
-            List of builtin dataset formats: %s|n
-            |n
-            Examples:|n
-            - Export project as a VOC-like dataset, include images:|n
-            |s|sexport -f voc -- --save-images|n
-            |n
-            - Export project as a COCO-like dataset in other directory:|n
-            |s|sexport -f coco -o path/I/like/
-        """ % ', '.join(builtins),
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-e', '--filter', default=None,
-        help="Filter expression for dataset items")
-    parser.add_argument('--filter-mode', default=FilterModes.i.name,
-        type=FilterModes.parse,
-        help="Filter mode (options: %s; default: %s)" % \
-            (', '.join(FilterModes.list_options()) , '%(default)s'))
-    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
-        help="Directory to save output (default: a subdir in the current one)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.add_argument('-f', '--format', required=True,
-        help="Output format")
-    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
-        help="Additional arguments for converter (pass '-- -h' for help)")
-    parser.set_defaults(command=export_command)
-
-    return parser
-
-def export_command(args):
-    project = load_project(args.project_dir)
-
-    dst_dir = args.dst_dir
-    if dst_dir:
-        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % dst_dir)
-    else:
-        dst_dir = generate_next_file_name('%s-%s' % \
-            (project.config.project_name, make_file_name(args.format)))
-    dst_dir = osp.abspath(dst_dir)
-
-    try:
-        converter = project.env.converters.get(args.format)
-    except KeyError:
-        raise CliException("Converter for format '%s' is not found" % \
-            args.format)
-
-    extra_args = converter.from_cmdline(args.extra_args)
-    def converter_proxy(extractor, save_dir):
-        return converter.convert(extractor, save_dir, **extra_args)
-
-    filter_args = FilterModes.make_filter_args(args.filter_mode)
-
-    log.info("Loading the project...")
-    dataset = project.make_dataset()
-
-    log.info("Exporting the project...")
-    dataset.export_project(
-        save_dir=dst_dir,
-        converter=converter_proxy,
-        filter_expr=args.filter,
-        **filter_args)
-    log.info("Project exported to '%s' as '%s'" % \
-        (dst_dir, args.format))
-
-    return 0
-
-def build_filter_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Extract subproject",
-        description="""
-            Extracts a subproject that contains only items matching filter.
-            A filter is an XPath expression, which is applied to XML
-            representation of a dataset item. Check '--dry-run' parameter
-            to see XML representations of the dataset items.|n
-            |n
-            To filter annotations use the mode ('-m') parameter.|n
-            Supported modes:|n
-            - 'i', 'items'|n
-            - 'a', 'annotations'|n
-            - 'i+a', 'a+i', 'items+annotations', 'annotations+items'|n
-            When filtering annotations, use the 'items+annotations'
-            mode to point that annotation-less dataset items should be
-            removed. To select an annotation, write an XPath that
-            returns 'annotation' elements (see examples).|n
-            |n
-            Examples:|n
-            - Filter images with width < height:|n
-            |s|sextract -e '/item[image/width < image/height]'|n
-            |n
-            - Filter images with large-area bboxes:|n
-            |s|sextract -e '/item[annotation/type="bbox" and
-                annotation/area>2000]'|n
-            |n
-            - Filter out all irrelevant annotations from items:|n
-            |s|sextract -m a -e '/item/annotation[label = "person"]'|n
-            |n
-            - Filter out all irrelevant annotations from items:|n
-            |s|sextract -m a -e '/item/annotation[label="cat" and
-            area > 99.5]'|n
-            |n
-            - Filter occluded annotations and items, if no annotations left:|n
-            |s|sextract -m i+a -e '/item/annotation[occluded="True"]'
-        """,
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-e', '--filter', default=None,
-        help="XML XPath filter expression for dataset items")
-    parser.add_argument('-m', '--mode', default=FilterModes.i.name,
-        type=FilterModes.parse,
-        help="Filter mode (options: %s; default: %s)" % \
-            (', '.join(FilterModes.list_options()) , '%(default)s'))
-    parser.add_argument('--dry-run', action='store_true',
-        help="Print XML representations to be filtered and exit")
-    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
-        help="Output directory (default: update current project)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=filter_command)
-
-    return parser
-
-def filter_command(args):
-    project = load_project(args.project_dir)
-
-    if not args.dry_run:
-        dst_dir = args.dst_dir
-        if dst_dir:
-            if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-                raise CliException("Directory '%s' already exists "
-                    "(pass --overwrite to overwrite)" % dst_dir)
-        else:
-            dst_dir = generate_next_file_name('%s-filter' % \
-                project.config.project_name)
-        dst_dir = osp.abspath(dst_dir)
-
-    dataset = project.make_dataset()
-
-    filter_args = FilterModes.make_filter_args(args.mode)
-
-    if args.dry_run:
-        dataset = dataset.filter(expr=args.filter, **filter_args)
-        for item in dataset:
-            encoded_item = DatasetItemEncoder.encode(item, dataset.categories())
-            xml_item = DatasetItemEncoder.to_string(encoded_item)
-            print(xml_item)
-        return 0
-
-    if not args.filter:
-        raise CliException("Expected a filter expression ('-e' argument)")
-
-    dataset.filter_project(save_dir=dst_dir, expr=args.filter, **filter_args)
-
-    log.info("Subproject has been extracted to '%s'" % dst_dir)
-
-    return 0
-
-def build_merge_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Merge two projects",
-        description="""
-            Updates items of the current project with items
-            from other project.|n
-            |n
-            Examples:|n
-            - Update a project with items from other project:|n
-            |s|smerge -p path/to/first/project path/to/other/project
-        """,
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('other_project_dir',
-        help="Path to a project")
-    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
-        help="Output directory (default: current project's dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=merge_command)
-
-    return parser
-
-def merge_command(args):
-    first_project = load_project(args.project_dir)
-    second_project = load_project(args.other_project_dir)
-
-    dst_dir = args.dst_dir
-    if dst_dir:
-        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % dst_dir)
-
-    first_dataset = first_project.make_dataset()
-    second_dataset = second_project.make_dataset()
-
-    first_dataset.update(second_dataset)
-    first_dataset.save(save_dir=dst_dir)
-
-    if dst_dir is None:
-        dst_dir = first_project.config.project_dir
-    dst_dir = osp.abspath(dst_dir)
-    log.info("Merge results have been saved to '%s'" % dst_dir)
-
-    return 0
-
-def build_diff_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Compare projects",
-        description="""
-        Compares two projects, match annotations by distance.|n
-        |n
-        Examples:|n
-        - Compare two projects, match boxes if IoU > 0.7,|n
-        |s|s|s|sprint results to Tensorboard:
-        |s|sdiff path/to/other/project -o diff/ -v tensorboard --iou-thresh 0.7
-        """,
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('other_project_dir',
-        help="Directory of the second project to be compared")
-    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
-        help="Directory to save comparison results (default: do not save)")
-    parser.add_argument('-v', '--visualizer',
-        default=DiffVisualizer.DEFAULT_FORMAT,
-        choices=[f.name for f in DiffVisualizer.Format],
-        help="Output format (default: %(default)s)")
-    parser.add_argument('--iou-thresh', default=0.5, type=float,
-        help="IoU match threshold for detections (default: %(default)s)")
-    parser.add_argument('--conf-thresh', default=0.5, type=float,
-        help="Confidence threshold for detections (default: %(default)s)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the first project to be compared (default: current dir)")
-    parser.set_defaults(command=diff_command)
-
-    return parser
-
-def diff_command(args):
-    first_project = load_project(args.project_dir)
-    second_project = load_project(args.other_project_dir)
-
-    comparator = DistanceComparator(iou_threshold=args.iou_thresh)
-
-    dst_dir = args.dst_dir
-    if dst_dir:
-        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % dst_dir)
-    else:
-        dst_dir = generate_next_file_name('%s-%s-diff' % (
-            first_project.config.project_name,
-            second_project.config.project_name)
-        )
-    dst_dir = osp.abspath(dst_dir)
-    log.info("Saving diff to '%s'" % dst_dir)
-
-    dst_dir_existed = osp.exists(dst_dir)
-    try:
-        visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
-            output_format=args.visualizer)
-        visualizer.save_dataset_diff(
-            first_project.make_dataset(),
-            second_project.make_dataset())
-    except BaseException:
-        if not dst_dir_existed and osp.isdir(dst_dir):
-            shutil.rmtree(dst_dir, ignore_errors=True)
-        raise
-
-    return 0
-
-def build_ediff_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Compare projects for equality",
-        description="""
-        Compares two projects for equality.|n
-        |n
-        Examples:|n
-        - Compare two projects, exclude annotation group |n
-        |s|s|sand the 'is_crowd' attribute from comparison:|n
-        |s|sediff other/project/ -if group -ia is_crowd
-        """,
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('other_project_dir',
-        help="Directory of the second project to be compared")
-    parser.add_argument('-iia', '--ignore-item-attr', action='append',
-        help="Ignore item attribute (repeatable)")
-    parser.add_argument('-ia', '--ignore-attr', action='append',
-        help="Ignore annotation attribute (repeatable)")
-    parser.add_argument('-if', '--ignore-field',
-        action='append', default=['id', 'group'],
-        help="Ignore annotation field (repeatable, default: %(default)s)")
-    parser.add_argument('--match-images', action='store_true',
-        help='Match dataset items by images instead of ids')
-    parser.add_argument('--all', action='store_true',
-        help="Include matches in the output")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the first project to be compared (default: current dir)")
-    parser.set_defaults(command=ediff_command)
-
-    return parser
-
-def ediff_command(args):
-    first_project = load_project(args.project_dir)
-    second_project = load_project(args.other_project_dir)
-
-    comparator = ExactComparator(
-        match_images=args.match_images,
-        ignored_fields=args.ignore_field,
-        ignored_attrs=args.ignore_attr,
-        ignored_item_attrs=args.ignore_item_attr)
-    matches, mismatches, a_extra, b_extra, errors = \
-        comparator.compare_datasets(
-            first_project.make_dataset(), second_project.make_dataset())
-    output = {
-        "mismatches": mismatches,
-        "a_extra_items": sorted(a_extra),
-        "b_extra_items": sorted(b_extra),
-        "errors": errors,
-    }
-    if args.all:
-        output["matches"] = matches
-
-    output_file = generate_next_file_name('diff', ext='.json')
-    with open(output_file, 'w') as f:
-        json.dump(output, f, indent=4, sort_keys=True)
-
-    print("Found:")
-    print("The first project has %s unmatched items" % len(a_extra))
-    print("The second project has %s unmatched items" % len(b_extra))
-    print("%s item conflicts" % len(errors))
-    print("%s matching annotations" % len(matches))
-    print("%s mismatching annotations" % len(mismatches))
-
-    log.info("Output has been saved to '%s'" % output_file)
-
-    return 0
-
-def build_transform_parser(parser_ctor=argparse.ArgumentParser):
-    builtins = sorted(Environment().transforms.items)
-
-    parser = parser_ctor(help="Transform project",
-        description="""
-            Applies some operation to dataset items in the project
-            and produces a new project.|n
-            |n
-            Builtin transforms: %s|n
-            |n
-            Examples:|n
-            - Convert instance polygons to masks:|n
-            |s|stransform -t polygons_to_masks
-        """ % ', '.join(builtins),
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-t', '--transform', required=True,
-        help="Transform to apply to the project")
-    parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
-        help="Directory to save output (default: current dir)")
-    parser.add_argument('--overwrite', action='store_true',
-        help="Overwrite existing files in the save directory")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
-        help="Additional arguments for transformation (pass '-- -h' for help)")
-    parser.set_defaults(command=transform_command)
-
-    return parser
-
-def transform_command(args):
-    project = load_project(args.project_dir)
-
-    dst_dir = args.dst_dir
-    if dst_dir:
-        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
-            raise CliException("Directory '%s' already exists "
-                "(pass --overwrite to overwrite)" % dst_dir)
-    else:
-        dst_dir = generate_next_file_name('%s-%s' % \
-            (project.config.project_name, make_file_name(args.transform)))
-    dst_dir = osp.abspath(dst_dir)
-
-    try:
-        transform = project.env.transforms.get(args.transform)
-    except KeyError:
-        raise CliException("Transform '%s' is not found" % args.transform)
-
-    extra_args = {}
-    if hasattr(transform, 'from_cmdline'):
-        extra_args = transform.from_cmdline(args.extra_args)
-
-    log.info("Loading the project...")
-    dataset = project.make_dataset()
-
-    log.info("Transforming the project...")
-    dataset.transform_project(
-        method=transform,
-        save_dir=dst_dir,
-        **extra_args
-    )
-
-    log.info("Transform results have been saved to '%s'" % dst_dir)
-
-    return 0
-
-def build_stats_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Get project statistics",
-        description="""
-            Outputs various project statistics like image mean and std,
-            annotations count etc.
-        """,
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=stats_command)
-
-    return parser
-
-def stats_command(args):
-    project = load_project(args.project_dir)
-
-    dataset = project.make_dataset()
-    stats = {}
-    stats.update(compute_image_statistics(dataset))
-    stats.update(compute_ann_statistics(dataset))
-
-    dst_file = generate_next_file_name('statistics', ext='.json')
-    log.info("Writing project statistics to '%s'" % dst_file)
-    with open(dst_file, 'w') as f:
-        json.dump(stats, f, indent=4, sort_keys=True)
-
-def build_info_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Get project info",
-        description="""
-            Outputs project info.
-        """,
-        formatter_class=MultilineFormatter)
-
-    parser.add_argument('--all', action='store_true',
-        help="Print all information")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=info_command)
-
-    return parser
-
-def info_command(args):
-    project = load_project(args.project_dir)
-    config = project.config
-    env = project.env
-    dataset = project.make_dataset()
-
-    print("Project:")
-    print("  name:", config.project_name)
-    print("  location:", config.project_dir)
-    print("Plugins:")
-    print("  importers:", ', '.join(env.importers.items))
-    print("  extractors:", ', '.join(env.extractors.items))
-    print("  converters:", ', '.join(env.converters.items))
-    print("  launchers:", ', '.join(env.launchers.items))
-
-    print("Sources:")
-    for source_name, source in config.sources.items():
-        print("  source '%s':" % source_name)
-        print("    format:", source.format)
-        print("    url:", source.url)
-        print("    location:", project.local_source_dir(source_name))
-
-    def print_extractor_info(extractor, indent=''):
-        print("%slength:" % indent, len(extractor))
-
-        categories = extractor.categories()
-        print("%scategories:" % indent, ', '.join(c.name for c in categories))
-
-        for cat_type, cat in categories.items():
-            print("%s  %s:" % (indent, cat_type.name))
-            if cat_type == AnnotationType.label:
-                print("%s    count:" % indent, len(cat.items))
-
-                count_threshold = 10
-                if args.all:
-                    count_threshold = len(cat.items)
-                labels = ', '.join(c.name for c in cat.items[:count_threshold])
-                if count_threshold < len(cat.items):
-                    labels += " (and %s more)" % (
-                        len(cat.items) - count_threshold)
-                print("%s    labels:" % indent, labels)
-
-    print("Dataset:")
-    print_extractor_info(dataset, indent="  ")
-
-    subsets = dataset.subsets()
-    print("  subsets:", ', '.join(subsets))
-    for subset_name in subsets:
-        subset = dataset.get_subset(subset_name)
-        print("    subset '%s':" % subset_name)
-        print_extractor_info(subset, indent="      ")
-
-    print("Models:")
-    for model_name, model in config.models.items():
-        print("  model '%s':" % model_name)
-        print("    type:", model.launcher)
-
-    return 0
-
-
-def build_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(
-        description="""
-            Manipulate projects.|n
-            |n
-            By default, the project to be operated on is searched for
-            in the current directory. An additional '-p' argument can be
-            passed to specify project location.
-        """,
-        formatter_class=MultilineFormatter)
-
-    subparsers = parser.add_subparsers()
-    add_subparser(subparsers, 'create', build_create_parser)
-    add_subparser(subparsers, 'import', build_import_parser)
-    add_subparser(subparsers, 'export', build_export_parser)
-    add_subparser(subparsers, 'filter', build_filter_parser)
-    add_subparser(subparsers, 'merge', build_merge_parser)
-    add_subparser(subparsers, 'diff', build_diff_parser)
-    add_subparser(subparsers, 'ediff', build_ediff_parser)
-    add_subparser(subparsers, 'transform', build_transform_parser)
-    add_subparser(subparsers, 'info', build_info_parser)
-    add_subparser(subparsers, 'stats', build_stats_parser)
-
-    return parser
--- a/datumaro/datumaro/cli/contexts/project/diff.py
+++ b/datumaro/datumaro/cli/contexts/project/diff.py
@ -1,290 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import Counter
-from enum import Enum
-import numpy as np
-import os
-import os.path as osp
-
-_formats = ['simple']
-
-import warnings
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    import tensorboardX as tb
-    _formats.append('tensorboard')
-
-from datumaro.components.extractor import AnnotationType
-from datumaro.util.image import save_image
-
-
-Format = Enum('Formats', _formats)
-
-class DiffVisualizer:
-    Format = Format
-    DEFAULT_FORMAT = Format.simple
-
-    _UNMATCHED_LABEL = -1
-
-
-    def __init__(self, comparator, save_dir, output_format=DEFAULT_FORMAT):
-        self.comparator = comparator
-
-        if isinstance(output_format, str):
-            output_format = Format[output_format]
-        assert output_format in Format
-        self.output_format = output_format
-
-        self.save_dir = save_dir
-        if output_format is Format.tensorboard:
-            logdir = osp.join(self.save_dir, 'logs', 'diff')
-            self.file_writer = tb.SummaryWriter(logdir)
-        if output_format is Format.simple:
-            self.label_diff_writer = None
-
-        self.categories = {}
-
-        self.label_confusion_matrix = Counter()
-        self.bbox_confusion_matrix = Counter()
-
-    def save_dataset_diff(self, extractor_a, extractor_b):
-        if self.save_dir:
-            os.makedirs(self.save_dir, exist_ok=True)
-
-        if len(extractor_a) != len(extractor_b):
-            print("Datasets have different lengths: %s vs %s" % \
-                (len(extractor_a), len(extractor_b)))
-
-        self.categories = {}
-
-        label_mismatch = self.comparator. \
-            compare_dataset_labels(extractor_a, extractor_b)
-        if label_mismatch is None:
-            print("Datasets have no label information")
-        elif len(label_mismatch) != 0:
-            print("Datasets have mismatching labels:")
-            for a_label, b_label in label_mismatch:
-                if a_label is None:
-                    print("  > %s" % b_label.name)
-                elif b_label is None:
-                    print("  < %s" % a_label.name)
-                else:
-                    print("  %s != %s" % (a_label.name, b_label.name))
-        else:
-            self.categories.update(extractor_a.categories())
-            self.categories.update(extractor_b.categories())
-
-        self.label_confusion_matrix = Counter()
-        self.bbox_confusion_matrix = Counter()
-
-        if self.output_format is Format.tensorboard:
-            self.file_writer.reopen()
-
-        ids_a = set((item.id, item.subset) for item in extractor_a)
-        ids_b = set((item.id, item.subset) for item in extractor_b)
-        ids = ids_a & ids_b
-
-        if len(ids) != len(ids_a):
-            print("Unmatched items in the first dataset: ")
-            print(ids_a - ids)
-        if len(ids) != len(ids_b):
-            print("Unmatched items in the second dataset: ")
-            print(ids_b - ids)
-
-        for item_id, item_subset in ids:
-            item_a = extractor_a.get(item_id, item_subset)
-            item_b = extractor_a.get(item_id, item_subset)
-
-            label_diff = self.comparator.compare_item_labels(item_a, item_b)
-            self.update_label_confusion(label_diff)
-
-            bbox_diff = self.comparator.compare_item_bboxes(item_a, item_b)
-            self.update_bbox_confusion(bbox_diff)
-
-            self.save_item_label_diff(item_a, item_b, label_diff)
-            self.save_item_bbox_diff(item_a, item_b, bbox_diff)
-
-        if len(self.label_confusion_matrix) != 0:
-            self.save_conf_matrix(self.label_confusion_matrix,
-                'labels_confusion.png')
-        if len(self.bbox_confusion_matrix) != 0:
-            self.save_conf_matrix(self.bbox_confusion_matrix,
-                'bbox_confusion.png')
-
-        if self.output_format is Format.tensorboard:
-            self.file_writer.flush()
-            self.file_writer.close()
-        elif self.output_format is Format.simple:
-            if self.label_diff_writer:
-                self.label_diff_writer.flush()
-                self.label_diff_writer.close()
-
-    def update_label_confusion(self, label_diff):
-        matches, a_unmatched, b_unmatched = label_diff
-        for label in matches:
-            self.label_confusion_matrix[(label, label)] += 1
-        for a_label in a_unmatched:
-            self.label_confusion_matrix[(a_label, self._UNMATCHED_LABEL)] += 1
-        for b_label in b_unmatched:
-            self.label_confusion_matrix[(self._UNMATCHED_LABEL, b_label)] += 1
-
-    def update_bbox_confusion(self, bbox_diff):
-        matches, mispred, a_unmatched, b_unmatched = bbox_diff
-        for a_bbox, b_bbox in matches:
-            self.bbox_confusion_matrix[(a_bbox.label, b_bbox.label)] += 1
-        for a_bbox, b_bbox in mispred:
-            self.bbox_confusion_matrix[(a_bbox.label, b_bbox.label)] += 1
-        for a_bbox in a_unmatched:
-            self.bbox_confusion_matrix[(a_bbox.label, self._UNMATCHED_LABEL)] += 1
-        for b_bbox in b_unmatched:
-            self.bbox_confusion_matrix[(self._UNMATCHED_LABEL, b_bbox.label)] += 1
-
-    @classmethod
-    def draw_text_with_background(cls, frame, text, origin,
-            font=None, scale=1.0,
-            color=(0, 0, 0), thickness=1, bgcolor=(1, 1, 1)):
-        import cv2
-
-        if not font:
-            font = cv2.FONT_HERSHEY_SIMPLEX
-
-        text_size, baseline = cv2.getTextSize(text, font, scale, thickness)
-        cv2.rectangle(frame,
-            tuple((origin + (0, baseline)).astype(int)),
-            tuple((origin + (text_size[0], -text_size[1])).astype(int)),
-            bgcolor, cv2.FILLED)
-        cv2.putText(frame, text,
-            tuple(origin.astype(int)),
-            font, scale, color, thickness)
-        return text_size, baseline
-
-    def draw_detection_roi(self, frame, x, y, w, h, label, conf, color):
-        import cv2
-
-        cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
-
-        text = '%s %.2f%%' % (label, 100.0 * conf)
-        text_scale = 0.5
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        text_size = cv2.getTextSize(text, font, text_scale, 1)
-        line_height = np.array([0, text_size[0][1]])
-        self.draw_text_with_background(frame, text,
-            np.array([x, y]) - line_height * 0.5,
-            font, scale=text_scale, color=[255 - c for c in color])
-
-    def get_label(self, label_id):
-        cat = self.categories.get(AnnotationType.label)
-        if cat is None:
-            return str(label_id)
-        return cat.items[label_id].name
-
-    def draw_bbox(self, img, shape, color):
-        x, y, w, h = shape.get_bbox()
-        self.draw_detection_roi(img, int(x), int(y), int(w), int(h),
-            self.get_label(shape.label), shape.attributes.get('score', 1),
-            color)
-
-    def get_label_diff_file(self):
-        if self.label_diff_writer is None:
-            self.label_diff_writer = \
-                open(osp.join(self.save_dir, 'label_diff.txt'), 'w')
-        return self.label_diff_writer
-
-    def save_item_label_diff(self, item_a, item_b, diff):
-        _, a_unmatched, b_unmatched = diff
-
-        if 0 < len(a_unmatched) + len(b_unmatched):
-            if self.output_format is Format.simple:
-                f = self.get_label_diff_file()
-                f.write(item_a.id + '\n')
-                for a_label in a_unmatched:
-                    f.write('  >%s\n' % self.get_label(a_label))
-                for b_label in b_unmatched:
-                    f.write('  <%s\n' % self.get_label(b_label))
-            elif self.output_format is Format.tensorboard:
-                tag = item_a.id
-                for a_label in a_unmatched:
-                    self.file_writer.add_text(tag,
-                        '>%s\n' % self.get_label(a_label))
-                for b_label in b_unmatched:
-                    self.file_writer.add_text(tag,
-                        '<%s\n' % self.get_label(b_label))
-
-    def save_item_bbox_diff(self, item_a, item_b, diff):
-        _, mispred, a_unmatched, b_unmatched = diff
-
-        if 0 < len(a_unmatched) + len(b_unmatched) + len(mispred):
-            img_a = item_a.image.data.copy()
-            img_b = img_a.copy()
-            for a_bbox, b_bbox in mispred:
-                self.draw_bbox(img_a, a_bbox, (0, 255, 0))
-                self.draw_bbox(img_b, b_bbox, (0, 0, 255))
-            for a_bbox in a_unmatched:
-                self.draw_bbox(img_a, a_bbox, (255, 255, 0))
-            for b_bbox in b_unmatched:
-                self.draw_bbox(img_b, b_bbox, (255, 255, 0))
-
-            img = np.hstack([img_a, img_b])
-
-            path = osp.join(self.save_dir, item_a.id)
-
-            if self.output_format is Format.simple:
-                save_image(path + '.png', img, create_dir=True)
-            elif self.output_format is Format.tensorboard:
-                self.save_as_tensorboard(img, path)
-
-    def save_as_tensorboard(self, img, name):
-        img = img[:, :, ::-1] # to RGB
-        img = np.transpose(img, (2, 0, 1)) # to (C, H, W)
-        img = img.astype(dtype=np.uint8)
-        self.file_writer.add_image(name, img)
-
-    def save_conf_matrix(self, conf_matrix, filename):
-        import matplotlib.pyplot as plt
-
-        classes = None
-        label_categories = self.categories.get(AnnotationType.label)
-        if label_categories is not None:
-            classes = { id: c.name for id, c in enumerate(label_categories.items) }
-        if classes is None:
-            classes = { c: 'label_%s' % c for c, _ in conf_matrix }
-        classes[self._UNMATCHED_LABEL] = 'unmatched'
-
-        class_idx = { id: i for i, id in enumerate(classes.keys()) }
-        matrix = np.zeros((len(classes), len(classes)), dtype=int)
-        for idx_pair in conf_matrix:
-            index = (class_idx[idx_pair[0]], class_idx[idx_pair[1]])
-            matrix[index] = conf_matrix[idx_pair]
-
-        labels = [label for id, label in classes.items()]
-
-        fig = plt.figure()
-        fig.add_subplot(111)
-        table = plt.table(
-            cellText=matrix,
-            colLabels=labels,
-            rowLabels=labels,
-            loc ='center')
-        table.auto_set_font_size(False)
-        table.set_fontsize(8)
-        table.scale(3, 3)
-        # Removing ticks and spines enables you to get the figure only with table
-        plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
-        plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)
-        for pos in ['right','top','bottom','left']:
-            plt.gca().spines[pos].set_visible(False)
-
-        for idx_pair in conf_matrix:
-            i = class_idx[idx_pair[0]]
-            j = class_idx[idx_pair[1]]
-            if conf_matrix[idx_pair] != 0:
-                if i != j:
-                    table._cells[(i + 1, j)].set_facecolor('#FF0000')
-                else:
-                    table._cells[(i + 1, j)].set_facecolor('#00FF00')
-
-        plt.savefig(osp.join(self.save_dir, filename),
-            bbox_inches='tight', pad_inches=0.05)
--- a/datumaro/datumaro/cli/contexts/source/init.py
+++ b/datumaro/datumaro/cli/contexts/source/init.py
@ -1,273 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import logging as log
-import os
-import os.path as osp
-import shutil
-
-from datumaro.components.project import Environment
-from ...util import add_subparser, CliException, MultilineFormatter
-from ...util.project import load_project
-
-
-def build_add_parser(parser_ctor=argparse.ArgumentParser):
-    builtins = sorted(Environment().extractors.items)
-
-    base_parser = argparse.ArgumentParser(add_help=False)
-    base_parser.add_argument('-n', '--name', default=None,
-        help="Name of the new source")
-    base_parser.add_argument('-f', '--format', required=True,
-        help="Source dataset format")
-    base_parser.add_argument('--skip-check', action='store_true',
-        help="Skip source checking")
-    base_parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-
-    parser = parser_ctor(help="Add data source to project",
-        description="""
-            Adds a data source to a project. The source can be:|n
-            - a dataset in a supported format (check 'formats' section below)|n
-            - a Datumaro project|n
-            |n
-            The source can be either a local directory or a remote
-            git repository. Each source type has its own parameters, which can
-            be checked by:|n
-            '%s'.|n
-            |n
-            Formats:|n
-            Datasets come in a wide variety of formats. Each dataset
-            format defines its own data structure and rules on how to
-            interpret the data. For example, the following data structure
-            is used in COCO format:|n
-            /dataset/|n
-            - /images/<id>.jpg|n
-            - /annotations/|n
-            |n
-            In Datumaro dataset formats are supported by Extractor-s.
-            An Extractor produces a list of dataset items corresponding
-            to the dataset. It is possible to add a custom Extractor.
-            To do this, you need to put an Extractor
-            definition script to <project_dir>/.datumaro/extractors.|n
-            |n
-            List of builtin source formats: %s|n
-            |n
-            Examples:|n
-            - Add a local directory with VOC-like dataset:|n
-            |s|sadd path path/to/voc -f voc_detection|n
-            - Add a local file with CVAT annotations, call it 'mysource'|n
-            |s|s|s|sto the project somewhere else:|n
-            |s|sadd path path/to/cvat.xml -f cvat -n mysource -p somewhere/else/
-        """ % ('%(prog)s SOURCE_TYPE --help', ', '.join(builtins)),
-        formatter_class=MultilineFormatter,
-        add_help=False)
-    parser.set_defaults(command=add_command)
-
-    sp = parser.add_subparsers(dest='source_type', metavar='SOURCE_TYPE',
-        help="The type of the data source "
-            "(call '%s SOURCE_TYPE --help' for more info)" % parser.prog)
-
-    dir_parser = sp.add_parser('path', help="Add local path as source",
-        parents=[base_parser])
-    dir_parser.add_argument('url',
-        help="Path to the source")
-    dir_parser.add_argument('--copy', action='store_true',
-        help="Copy the dataset instead of saving source links")
-
-    repo_parser = sp.add_parser('git', help="Add git repository as source",
-        parents=[base_parser])
-    repo_parser.add_argument('url',
-        help="URL of the source git repository")
-    repo_parser.add_argument('-b', '--branch', default='master',
-        help="Branch of the source repository (default: %(default)s)")
-    repo_parser.add_argument('--checkout', action='store_true',
-        help="Do branch checkout")
-
-    # NOTE: add common parameters to the parent help output
-    # the other way could be to use parse_known_args()
-    display_parser = argparse.ArgumentParser(
-        parents=[base_parser, parser],
-        prog=parser.prog, usage="%(prog)s [-h] SOURCE_TYPE ...",
-        description=parser.description, formatter_class=MultilineFormatter)
-    class HelpAction(argparse._HelpAction):
-        def __call__(self, parser, namespace, values, option_string=None):
-            display_parser.print_help()
-            parser.exit()
-
-    parser.add_argument('-h', '--help', action=HelpAction,
-        help='show this help message and exit')
-
-    # TODO: needed distinction on how to add an extractor or a remote source
-
-    return parser
-
-def add_command(args):
-    project = load_project(args.project_dir)
-
-    if args.source_type == 'git':
-        name = args.name
-        if name is None:
-            name = osp.splitext(osp.basename(args.url))[0]
-
-        if project.env.git.has_submodule(name):
-            raise CliException("Git submodule '%s' already exists" % name)
-
-        try:
-            project.get_source(name)
-            raise CliException("Source '%s' already exists" % name)
-        except KeyError:
-            pass
-
-        rel_local_dir = project.local_source_dir(name)
-        local_dir = osp.join(project.config.project_dir, rel_local_dir)
-        url = args.url
-        project.env.git.create_submodule(name, local_dir,
-            url=url, branch=args.branch, no_checkout=not args.checkout)
-    elif args.source_type == 'path':
-        url = osp.abspath(args.url)
-        if not osp.exists(url):
-            raise CliException("Source path '%s' does not exist" % url)
-
-        name = args.name
-        if name is None:
-            name = osp.splitext(osp.basename(url))[0]
-
-        if project.env.git.has_submodule(name):
-            raise CliException("Git submodule '%s' already exists" % name)
-
-        try:
-            project.get_source(name)
-            raise CliException("Source '%s' already exists" % name)
-        except KeyError:
-            pass
-
-        rel_local_dir = project.local_source_dir(name)
-        local_dir = osp.join(project.config.project_dir, rel_local_dir)
-
-        if args.copy:
-            log.info("Copying from '%s' to '%s'" % (url, local_dir))
-            if osp.isdir(url):
-                # copytree requires destination dir not to exist
-                shutil.copytree(url, local_dir)
-                url = rel_local_dir
-            elif osp.isfile(url):
-                os.makedirs(local_dir)
-                shutil.copy2(url, local_dir)
-                url = osp.join(rel_local_dir, osp.basename(url))
-            else:
-                raise Exception("Expected file or directory")
-        else:
-            os.makedirs(local_dir)
-
-    project.add_source(name, { 'url': url, 'format': args.format })
-
-    if not args.skip_check:
-        log.info("Checking the source...")
-        try:
-            project.make_source_project(name).make_dataset()
-        except Exception:
-            shutil.rmtree(local_dir, ignore_errors=True)
-            raise
-
-    project.save()
-
-    log.info("Source '%s' has been added to the project, location: '%s'" \
-        % (name, rel_local_dir))
-
-    return 0
-
-def build_remove_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(help="Remove source from project",
-        description="Remove a source from a project.")
-
-    parser.add_argument('-n', '--name', required=True,
-        help="Name of the source to be removed")
-    parser.add_argument('--force', action='store_true',
-        help="Ignore possible errors during removal")
-    parser.add_argument('--keep-data', action='store_true',
-        help="Do not remove source data")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=remove_command)
-
-    return parser
-
-def remove_command(args):
-    project = load_project(args.project_dir)
-
-    name = args.name
-    if not name:
-        raise CliException("Expected source name")
-    try:
-        project.get_source(name)
-    except KeyError:
-        if not args.force:
-            raise CliException("Source '%s' does not exist" % name)
-
-    if project.env.git.has_submodule(name):
-        if args.force:
-            log.warning("Forcefully removing the '%s' source..." % name)
-
-        project.env.git.remove_submodule(name, force=args.force)
-
-    source_dir = osp.join(project.config.project_dir,
-        project.local_source_dir(name))
-    project.remove_source(name)
-    project.save()
-
-    if not args.keep_data:
-        shutil.rmtree(source_dir, ignore_errors=True)
-
-    log.info("Source '%s' has been removed from the project" % name)
-
-    return 0
-
-def build_info_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor()
-
-    parser.add_argument('-n', '--name',
-        help="Source name")
-    parser.add_argument('-v', '--verbose', action='store_true',
-        help="Show details")
-    parser.add_argument('-p', '--project', dest='project_dir', default='.',
-        help="Directory of the project to operate on (default: current dir)")
-    parser.set_defaults(command=info_command)
-
-    return parser
-
-def info_command(args):
-    project = load_project(args.project_dir)
-
-    if args.name:
-        source = project.get_source(args.name)
-        print(source)
-    else:
-        for name, conf in project.config.sources.items():
-            print(name)
-            if args.verbose:
-                print(dict(conf))
-
-def build_parser(parser_ctor=argparse.ArgumentParser):
-    parser = parser_ctor(description="""
-            Manipulate data sources inside of a project.|n
-            |n
-            A data source is a source of data for a project.
-            The project combines multiple data sources into one dataset.
-            The role of a data source is to provide dataset items - images
-            and/or annotations.|n
-            |n
-            By default, the project to be operated on is searched for
-            in the current directory. An additional '-p' argument can be
-            passed to specify project location.
-        """,
-        formatter_class=MultilineFormatter)
-
-    subparsers = parser.add_subparsers()
-    add_subparser(subparsers, 'add', build_add_parser)
-    add_subparser(subparsers, 'remove', build_remove_parser)
-    add_subparser(subparsers, 'info', build_info_parser)
-
-    return parser
--- a/datumaro/datumaro/cli/util/init.py
+++ b/datumaro/datumaro/cli/util/init.py
@ -1,74 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-import textwrap
-
-
-class CliException(Exception): pass
-
-def add_subparser(subparsers, name, builder):
-    return builder(lambda **kwargs: subparsers.add_parser(name, **kwargs))
-
-class MultilineFormatter(argparse.HelpFormatter):
-    """
-    Keeps line breaks introduced with '|n' separator
-    and spaces introduced with '|s'.
-    """
-
-    def __init__(self, keep_natural=False, **kwargs):
-        super().__init__(**kwargs)
-        self._keep_natural = keep_natural
-
-    def _fill_text(self, text, width, indent):
-        text = self._whitespace_matcher.sub(' ', text).strip()
-        text = text.replace('|s', ' ')
-
-        paragraphs = text.split('|n ')
-        if self._keep_natural:
-            paragraphs = sum((p.split('\n ') for p in paragraphs), [])
-
-        multiline_text = ''
-        for paragraph in paragraphs:
-            formatted_paragraph = textwrap.fill(paragraph, width,
-                initial_indent=indent, subsequent_indent=indent) + '\n'
-            multiline_text += formatted_paragraph
-        return multiline_text
-
-def required_count(nmin=0, nmax=0):
-    assert 0 <= nmin and 0 <= nmax and nmin or nmax
-
-    class RequiredCount(argparse.Action):
-        def __call__(self, parser, args, values, option_string=None):
-            k = len(values)
-            if not ((nmin and (nmin <= k) or not nmin) and \
-                    (nmax and (k <= nmax) or not nmax)):
-                msg = "Argument '%s' requires" % self.dest
-                if nmin and nmax:
-                    msg += " from %s to %s arguments" % (nmin, nmax)
-                elif nmin:
-                    msg += " at least %s arguments" % nmin
-                else:
-                    msg += " no more %s arguments" % nmax
-                raise argparse.ArgumentTypeError(msg)
-            setattr(args, self.dest, values)
-    return RequiredCount
-
-def at_least(n):
-    return required_count(n, 0)
-
-def make_file_name(s):
-    # adapted from
-    # https://docs.djangoproject.com/en/2.1/_modules/django/utils/text/#slugify
-    """
-    Normalizes string, converts to lowercase, removes non-alpha characters,
-    and converts spaces to hyphens.
-    """
-    import unicodedata, re
-    s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
-    s = s.decode()
-    s = re.sub(r'[^\w\s-]', '', s).strip().lower()
-    s = re.sub(r'[-\s]+', '-', s)
-    return s
--- a/datumaro/datumaro/cli/util/project.py
+++ b/datumaro/datumaro/cli/util/project.py
@ -1,39 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import os
-import re
-
-from datumaro.components.project import Project
-from datumaro.util import cast
-
-
-def load_project(project_dir):
-    return Project.load(project_dir)
-
-def generate_next_file_name(basename, basedir='.', sep='.', ext=''):
-    """
-    If basedir does not contain basename, returns basename,
-    otherwise generates a name by appending sep to the basename
-    and the number, next to the last used number in the basedir for
-    files with basename prefix. Optionally, appends ext.
-    """
-
-    return generate_next_name(os.listdir(basedir), basename, sep, ext)
-
-def generate_next_name(names, basename, sep='.', suffix='', default=None):
-    pattern = re.compile(r'%s(?:%s(\d+))?%s' % \
-        tuple(map(re.escape, [basename, sep, suffix])))
-    matches = [match for match in (pattern.match(n) for n in names) if match]
-
-    max_idx = max([cast(match[1], int, 0) for match in matches], default=None)
-    if max_idx is None:
-        if default is not None:
-            idx = sep + str(default)
-        else:
-            idx = ''
-    else:
-        idx = sep + str(max_idx + 1)
-    return basename + idx + suffix
--- a/datumaro/datumaro/components/init.py
+++ b/datumaro/datumaro/components/init.py
@ -1,5 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
--- a/datumaro/datumaro/components/algorithms/init.py
+++ b/datumaro/datumaro/components/algorithms/init.py
@ -1,5 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
--- a/datumaro/datumaro/components/algorithms/rise.py
+++ b/datumaro/datumaro/components/algorithms/rise.py
@ -1,203 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=unused-variable
-
-import numpy as np
-from math import ceil
-
-from datumaro.components.extractor import AnnotationType
-from datumaro.util.annotation_util import nms
-
-
-def flatmatvec(mat):
-    return np.reshape(mat, (len(mat), -1))
-
-def expand(array, axis=None):
-    if axis is None:
-        axis = len(array.shape)
-    return np.expand_dims(array, axis=axis)
-
-class RISE:
-    """
-    Implements RISE: Randomized Input Sampling for
-    Explanation of Black-box Models algorithm
-    See explanations at: https://arxiv.org/pdf/1806.07421.pdf
-    """
-
-    def __init__(self, model,
-            max_samples=None, mask_width=7, mask_height=7, prob=0.5,
-            iou_thresh=0.9, nms_thresh=0.0, det_conf_thresh=0.0,
-            batch_size=1):
-        self.model = model
-        self.max_samples = max_samples
-        self.mask_height = mask_height
-        self.mask_width = mask_width
-        self.prob = prob
-        self.iou_thresh = iou_thresh
-        self.nms_thresh = nms_thresh
-        self.det_conf_thresh = det_conf_thresh
-        self.batch_size = batch_size
-
-    @staticmethod
-    def split_outputs(annotations):
-        labels = []
-        bboxes = []
-        for r in annotations:
-            if r.type is AnnotationType.label:
-                labels.append(r)
-            elif r.type is AnnotationType.bbox:
-                bboxes.append(r)
-        return labels, bboxes
-
-    def normalize_hmaps(self, heatmaps, counts):
-        eps = np.finfo(heatmaps.dtype).eps
-        mhmaps = flatmatvec(heatmaps)
-        mhmaps /= expand(counts * self.prob + eps)
-        mhmaps -= expand(np.min(mhmaps, axis=1))
-        mhmaps /= expand(np.max(mhmaps, axis=1) + eps)
-        return np.reshape(mhmaps, heatmaps.shape)
-
-    def apply(self, image, progressive=False):
-        import cv2
-
-        assert len(image.shape) in [2, 3], \
-            "Expected an input image in (H, W, C) format"
-        if len(image.shape) == 3:
-            assert image.shape[2] in [3, 4], "Expected BGR or BGRA input"
-        image = image[:, :, :3].astype(np.float32)
-
-        model = self.model
-        iou_thresh = self.iou_thresh
-
-        image_size = np.array((image.shape[:2]))
-        mask_size = np.array((self.mask_height, self.mask_width))
-        cell_size = np.ceil(image_size / mask_size)
-        upsampled_size = np.ceil((mask_size + 1) * cell_size)
-
-        rng = lambda shape=None: np.random.rand(*shape)
-        samples = np.prod(image_size)
-        if self.max_samples is not None:
-            samples = min(self.max_samples, samples)
-        batch_size = self.batch_size
-
-        result = next(iter(model.launch(expand(image, 0))))
-        result_labels, result_bboxes = self.split_outputs(result)
-        if 0 < self.det_conf_thresh:
-            result_bboxes = [b for b in result_bboxes \
-                if self.det_conf_thresh <= b.attributes['score']]
-        if 0 < self.nms_thresh:
-            result_bboxes = nms(result_bboxes, self.nms_thresh)
-
-        predicted_labels = set()
-        if len(result_labels) != 0:
-            predicted_label = max(result_labels,
-                key=lambda r: r.attributes['score']).label
-            predicted_labels.add(predicted_label)
-        if len(result_bboxes) != 0:
-            for bbox in result_bboxes:
-                predicted_labels.add(bbox.label)
-        predicted_labels = { label: idx \
-            for idx, label in enumerate(predicted_labels) }
-
-        predicted_bboxes = result_bboxes
-
-        heatmaps_count = len(predicted_labels) + len(predicted_bboxes)
-        heatmaps = np.zeros((heatmaps_count, *image_size), dtype=np.float32)
-        total_counts = np.zeros(heatmaps_count, dtype=np.int32)
-        confs = np.zeros(heatmaps_count, dtype=np.float32)
-
-        heatmap_id = 0
-
-        label_heatmaps = None
-        label_total_counts = None
-        label_confs = None
-        if len(predicted_labels) != 0:
-            step = len(predicted_labels)
-            label_heatmaps = heatmaps[heatmap_id : heatmap_id + step]
-            label_total_counts = total_counts[heatmap_id : heatmap_id + step]
-            label_confs = confs[heatmap_id : heatmap_id + step]
-            heatmap_id += step
-
-        bbox_heatmaps = None
-        bbox_total_counts = None
-        bbox_confs = None
-        if len(predicted_bboxes) != 0:
-            step = len(predicted_bboxes)
-            bbox_heatmaps = heatmaps[heatmap_id : heatmap_id + step]
-            bbox_total_counts = total_counts[heatmap_id : heatmap_id + step]
-            bbox_confs = confs[heatmap_id : heatmap_id + step]
-            heatmap_id += step
-
-        ups_mask = np.empty(upsampled_size.astype(int), dtype=np.float32)
-        masks = np.empty((batch_size, *image_size), dtype=np.float32)
-
-        full_batch_inputs = np.empty((batch_size, *image.shape), dtype=np.float32)
-        current_heatmaps = np.empty_like(heatmaps)
-        for b in range(ceil(samples / batch_size)):
-            batch_pos = b * batch_size
-            current_batch_size = min(samples - batch_pos, batch_size)
-
-            batch_masks = masks[: current_batch_size]
-            for i in range(current_batch_size):
-                mask = (rng(mask_size) < self.prob).astype(np.float32)
-                cv2.resize(mask, (int(upsampled_size[1]), int(upsampled_size[0])),
-                    ups_mask)
-
-                offsets = np.round(rng((2,)) * cell_size)
-                mask = ups_mask[
-                    int(offsets[0]):int(image_size[0] + offsets[0]),
-                    int(offsets[1]):int(image_size[1] + offsets[1]) ]
-                batch_masks[i] = mask
-
-            batch_inputs = full_batch_inputs[:current_batch_size]
-            np.multiply(expand(batch_masks), expand(image, 0), out=batch_inputs)
-
-            results = model.launch(batch_inputs)
-            for mask, result in zip(batch_masks, results):
-                result_labels, result_bboxes = self.split_outputs(result)
-
-                confs.fill(0)
-                if len(predicted_labels) != 0:
-                    for r in result_labels:
-                        idx = predicted_labels.get(r.label, None)
-                        if idx is not None:
-                            label_total_counts[idx] += 1
-                            label_confs[idx] += r.attributes['score']
-                    for r in result_bboxes:
-                        idx = predicted_labels.get(r.label, None)
-                        if idx is not None:
-                            label_total_counts[idx] += 1
-                            label_confs[idx] += r.attributes['score']
-
-                if len(predicted_bboxes) != 0 and len(result_bboxes) != 0:
-                    if 0 < self.det_conf_thresh:
-                        result_bboxes = [b for b in result_bboxes \
-                            if self.det_conf_thresh <= b.attributes['score']]
-                    if 0 < self.nms_thresh:
-                        result_bboxes = nms(result_bboxes, self.nms_thresh)
-
-                    for detection in result_bboxes:
-                        for pred_idx, pred in enumerate(predicted_bboxes):
-                            if pred.label != detection.label:
-                                continue
-
-                            iou = pred.iou(detection)
-                            assert iou == -1 or 0 <= iou and iou <= 1
-                            if iou < iou_thresh:
-                                continue
-
-                            bbox_total_counts[pred_idx] += 1
-
-                            conf = detection.attributes['score']
-                            bbox_confs[pred_idx] += conf
-
-                np.multiply.outer(confs, mask, out=current_heatmaps)
-                heatmaps += current_heatmaps
-
-                if progressive:
-                    yield self.normalize_hmaps(heatmaps.copy(), total_counts)
-
-        yield self.normalize_hmaps(heatmaps, total_counts)
--- a/datumaro/datumaro/components/cli_plugin.py
+++ b/datumaro/datumaro/components/cli_plugin.py
@ -1,44 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-
-from datumaro.cli.util import MultilineFormatter
-from datumaro.util import to_snake_case
-
-
-class CliPlugin:
-    @staticmethod
-    def _get_name(cls):
-        return getattr(cls, 'NAME',
-            remove_plugin_type(to_snake_case(cls.__name__)))
-
-    @staticmethod
-    def _get_doc(cls):
-        return getattr(cls, '__doc__', "")
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        args = {
-            'prog': cls._get_name(cls),
-            'description': cls._get_doc(cls),
-            'formatter_class': MultilineFormatter,
-        }
-        args.update(kwargs)
-
-        return argparse.ArgumentParser(**args)
-
-    @classmethod
-    def from_cmdline(cls, args=None):
-        if args and args[0] == '--':
-            args = args[1:]
-        parser = cls.build_cmdline_parser()
-        args = parser.parse_args(args)
-        return vars(args)
-
-def remove_plugin_type(s):
-    for t in {'transform', 'extractor', 'converter', 'launcher', 'importer'}:
-        s = s.replace('_' + t, '')
-    return s
--- a/datumaro/datumaro/components/config.py
+++ b/datumaro/datumaro/components/config.py
@ -1,237 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import yaml
-
-
-class Schema:
-    class Item:
-        def __init__(self, ctor, internal=False):
-            self.ctor = ctor
-            self.internal = internal
-
-        def __call__(self, *args, **kwargs):
-            return self.ctor(*args, **kwargs)
-
-    def __init__(self, items=None, fallback=None):
-        self._items = {}
-        if items is not None:
-            self._items.update(items)
-        self._fallback = fallback
-
-    def _get_items(self, allow_fallback=True):
-        all_items = {}
-
-        if allow_fallback and self._fallback is not None:
-            all_items.update(self._fallback)
-        all_items.update(self._items)
-
-        return all_items
-
-    def items(self, allow_fallback=True):
-        return self._get_items(allow_fallback=allow_fallback).items()
-
-    def keys(self, allow_fallback=True):
-        return self._get_items(allow_fallback=allow_fallback).keys()
-
-    def values(self, allow_fallback=True):
-        return self._get_items(allow_fallback=allow_fallback).values()
-
-    def __contains__(self, key):
-        return key in self.keys()
-
-    def __len__(self):
-        return len(self._get_items())
-
-    def __iter__(self):
-        return iter(self._get_items())
-
-    def __getitem__(self, key):
-        default = object()
-        value = self.get(key, default=default)
-        if value is default:
-            raise KeyError('Key "%s" does not exist' % (key))
-        return value
-
-    def get(self, key, default=None):
-        found = self._items.get(key, default)
-        if found is not default:
-            return found
-
-        if self._fallback is not None:
-            return self._fallback.get(key, default)
-
-class SchemaBuilder:
-    def __init__(self):
-        self._items = {}
-
-    def add(self, name, ctor=str, internal=False):
-        if name in self._items:
-            raise KeyError('Key "%s" already exists' % (name))
-
-        self._items[name] = Schema.Item(ctor, internal=internal)
-        return self
-
-    def build(self):
-        return Schema(self._items)
-
-class Config:
-    def __init__(self, config=None, fallback=None, schema=None, mutable=True):
-        # schema should be established first
-        self.__dict__['_schema'] = schema
-        self.__dict__['_mutable'] = True
-
-        self.__dict__['_config'] = {}
-        if fallback is not None:
-            for k, v in fallback.items(allow_fallback=False):
-                self.set(k, v)
-        if config is not None:
-            self.update(config)
-
-        self.__dict__['_mutable'] = mutable
-
-    def _items(self, allow_fallback=True, allow_internal=True):
-        all_config = {}
-        if allow_fallback and self._schema is not None:
-            for key, item in self._schema.items():
-                all_config[key] = item()
-        all_config.update(self._config)
-
-        if not allow_internal and self._schema is not None:
-            for key, item in self._schema.items():
-                if item.internal:
-                    all_config.pop(key)
-        return all_config
-
-    def items(self, allow_fallback=True, allow_internal=True):
-        return self._items(
-                allow_fallback=allow_fallback,
-                allow_internal=allow_internal
-            ).items()
-
-    def keys(self, allow_fallback=True, allow_internal=True):
-        return self._items(
-                allow_fallback=allow_fallback,
-                allow_internal=allow_internal
-            ).keys()
-
-    def values(self, allow_fallback=True, allow_internal=True):
-        return self._items(
-                allow_fallback=allow_fallback,
-                allow_internal=allow_internal
-            ).values()
-
-    def __contains__(self, key):
-        return key in self.keys()
-
-    def __len__(self):
-        return len(self.items())
-
-    def __iter__(self):
-        return iter(self.keys())
-
-    def __getitem__(self, key):
-        default = object()
-        value = self.get(key, default=default)
-        if value is default:
-            raise KeyError('Key "%s" does not exist' % (key))
-        return value
-
-    def __setitem__(self, key, value):
-        return self.set(key, value)
-
-    def __getattr__(self, key):
-        return self.get(key)
-
-    def __setattr__(self, key, value):
-        return self.set(key, value)
-
-    def __eq__(self, other):
-        try:
-            for k, my_v in self.items(allow_internal=False):
-                other_v = other[k]
-                if my_v != other_v:
-                    return False
-            return True
-        except Exception:
-            return False
-
-    def update(self, other):
-        for k, v in other.items():
-            self.set(k, v)
-
-    def remove(self, key):
-        if not self._mutable:
-            raise Exception("Cannot set value of immutable object")
-
-        self._config.pop(key, None)
-
-    def get(self, key, default=None):
-        found = self._config.get(key, default)
-        if found is not default:
-            return found
-
-        if self._schema is not None:
-            found = self._schema.get(key, default)
-            if found is not default:
-                # ignore mutability
-                found = found()
-                self._config[key] = found
-                return found
-
-        return found
-
-    def set(self, key, value):
-        if not self._mutable:
-            raise Exception("Cannot set value of immutable object")
-
-        if self._schema is not None:
-            if key not in self._schema:
-                raise Exception("Can not set key '%s' - schema mismatch" % (key))
-
-            schema_entry = self._schema[key]
-            schema_entry_instance = schema_entry()
-            if not isinstance(value, type(schema_entry_instance)):
-                if isinstance(value, dict) and \
-                        isinstance(schema_entry_instance, Config):
-                    schema_entry_instance.update(value)
-                    value = schema_entry_instance
-                else:
-                    raise Exception("Can not set key '%s' - schema mismatch" % (key))
-
-        self._config[key] = value
-        return value
-
-    @staticmethod
-    def parse(path):
-        with open(path, 'r') as f:
-            return Config(yaml.safe_load(f))
-
-    @staticmethod
-    def yaml_representer(dumper, value):
-        return dumper.represent_data(
-            value._items(allow_internal=False, allow_fallback=False))
-
-    def dump(self, path):
-        with open(path, 'w+') as f:
-            yaml.dump(self, f)
-
-yaml.add_multi_representer(Config, Config.yaml_representer)
-
-
-class DefaultConfig(Config):
-    def __init__(self, default=None):
-        super().__init__()
-        self.__dict__['_default'] = default
-
-    def set(self, key, value):
-        if key not in self.keys(allow_fallback=False):
-            value = self._default(value)
-            return super().set(key, value)
-        else:
-            return super().set(key, value)
-
-
-DEFAULT_FORMAT = 'datumaro'
--- a/datumaro/datumaro/components/config_model.py
+++ b/datumaro/datumaro/components/config_model.py
@ -1,63 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from datumaro.components.config import Config, \
-    DefaultConfig as _DefaultConfig, \
-    SchemaBuilder as _SchemaBuilder
-
-
-SOURCE_SCHEMA = _SchemaBuilder() \
-    .add('url', str) \
-    .add('format', str) \
-    .add('options', dict) \
-    .build()
-
-class Source(Config):
-    def __init__(self, config=None):
-        super().__init__(config, schema=SOURCE_SCHEMA)
-
-
-MODEL_SCHEMA = _SchemaBuilder() \
-    .add('launcher', str) \
-    .add('options', dict) \
-    .build()
-
-class Model(Config):
-    def __init__(self, config=None):
-        super().__init__(config, schema=MODEL_SCHEMA)
-
-
-PROJECT_SCHEMA = _SchemaBuilder() \
-    .add('project_name', str) \
-    .add('format_version', int) \
-    \
-    .add('subsets', list) \
-    .add('sources', lambda: _DefaultConfig(
-        lambda v=None: Source(v))) \
-    .add('models', lambda: _DefaultConfig(
-        lambda v=None: Model(v))) \
-    \
-    .add('models_dir', str, internal=True) \
-    .add('plugins_dir', str, internal=True) \
-    .add('sources_dir', str, internal=True) \
-    .add('dataset_dir', str, internal=True) \
-    .add('project_filename', str, internal=True) \
-    .add('project_dir', str, internal=True) \
-    .add('env_dir', str, internal=True) \
-    .build()
-
-PROJECT_DEFAULT_CONFIG = Config({
-    'project_name': 'undefined',
-    'format_version': 1,
-
-    'sources_dir': 'sources',
-    'dataset_dir': 'dataset',
-    'models_dir': 'models',
-    'plugins_dir': 'plugins',
-
-    'project_filename': 'config.yaml',
-    'project_dir': '',
-    'env_dir': '.datumaro',
-}, mutable=False, schema=PROJECT_SCHEMA)
--- a/datumaro/datumaro/components/converter.py
+++ b/datumaro/datumaro/components/converter.py
@ -1,79 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import logging as log
-import os
-import os.path as osp
-import shutil
-
-from datumaro.components.cli_plugin import CliPlugin
-from datumaro.util.image import save_image
-
-
-class IConverter:
-    @classmethod
-    def convert(cls, extractor, save_dir, **options):
-        raise NotImplementedError("Should be implemented in a subclass")
-
-class Converter(IConverter, CliPlugin):
-    DEFAULT_IMAGE_EXT = None
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('--save-images', action='store_true',
-            help="Save images (default: %(default)s)")
-        parser.add_argument('--image-ext', default=None,
-            help="Image extension (default: keep or use format default%s)" % \
-                (' ' + cls.DEFAULT_IMAGE_EXT if cls.DEFAULT_IMAGE_EXT else ''))
-
-        return parser
-
-    @classmethod
-    def convert(cls, extractor, save_dir, **options):
-        converter = cls(extractor, save_dir, **options)
-        return converter.apply()
-
-    def apply(self):
-        raise NotImplementedError("Should be implemented in a subclass")
-
-    def __init__(self, extractor, save_dir, save_images=False,
-            image_ext=None, default_image_ext=None):
-        default_image_ext = default_image_ext or self.DEFAULT_IMAGE_EXT
-        assert default_image_ext
-        self._default_image_ext = default_image_ext
-
-        self._save_images = save_images
-        self._image_ext = image_ext
-
-        self._extractor = extractor
-        self._save_dir = save_dir
-
-    def _find_image_ext(self, item):
-        src_ext = None
-        if item.has_image:
-            src_ext = osp.splitext(osp.basename(item.image.path))[1]
-
-        return self._image_ext or src_ext or self._default_image_ext
-
-    def _make_image_filename(self, item):
-        return item.id + self._find_image_ext(item)
-
-    def _save_image(self, item, path=None):
-        image = item.image.data
-        if image is None:
-            log.warning("Item '%s' has no image", item.id)
-            return item.image.path
-
-        path = path or self._make_image_filename(item)
-
-        src_ext = osp.splitext(osp.basename(item.image.path))[1]
-        dst_ext = osp.splitext(osp.basename(path))[1]
-
-        os.makedirs(osp.dirname(path), exist_ok=True)
-        if src_ext == dst_ext and osp.isfile(item.image.path):
-            shutil.copyfile(item.image.path, path)
-        else:
-            save_image(path, image)
--- a/datumaro/datumaro/components/dataset_filter.py
+++ b/datumaro/datumaro/components/dataset_filter.py
@ -1,261 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import logging as log
-from lxml import etree as ET # lxml has proper XPath implementation
-from datumaro.components.extractor import (Transform,
-    Annotation, AnnotationType,
-    Label, Mask, Points, Polygon, PolyLine, Bbox, Caption,
-)
-
-
-class DatasetItemEncoder:
-    @classmethod
-    def encode(cls, item, categories=None):
-        item_elem = ET.Element('item')
-        ET.SubElement(item_elem, 'id').text = str(item.id)
-        ET.SubElement(item_elem, 'subset').text = str(item.subset)
-        ET.SubElement(item_elem, 'path').text = str('/'.join(item.path))
-
-        image = item.image
-        if image is not None:
-            item_elem.append(cls.encode_image(image))
-
-        for ann in item.annotations:
-            item_elem.append(cls.encode_annotation(ann, categories))
-
-        return item_elem
-
-    @classmethod
-    def encode_image(cls, image):
-        image_elem = ET.Element('image')
-
-        size = image.size
-        if size is not None:
-            h, w = size
-        else:
-            h = 'unknown'
-            w = h
-        ET.SubElement(image_elem, 'width').text = str(w)
-        ET.SubElement(image_elem, 'height').text = str(h)
-
-        ET.SubElement(image_elem, 'has_data').text = '%d' % int(image.has_data)
-        ET.SubElement(image_elem, 'path').text = image.path
-
-        return image_elem
-
-    @classmethod
-    def encode_annotation_base(cls, annotation):
-        assert isinstance(annotation, Annotation)
-        ann_elem = ET.Element('annotation')
-        ET.SubElement(ann_elem, 'id').text = str(annotation.id)
-        ET.SubElement(ann_elem, 'type').text = str(annotation.type.name)
-
-        for k, v in annotation.attributes.items():
-            ET.SubElement(ann_elem, k.replace(' ', '-')).text = str(v)
-
-        ET.SubElement(ann_elem, 'group').text = str(annotation.group)
-
-        return ann_elem
-
-    @staticmethod
-    def _get_label(label_id, categories):
-        label = ''
-        if label_id is None:
-            return ''
-        if categories is not None:
-            label_cat = categories.get(AnnotationType.label)
-            if label_cat is not None:
-                label = label_cat.items[label_id].name
-        return label
-
-    @classmethod
-    def encode_label_object(cls, obj, categories):
-        ann_elem = cls.encode_annotation_base(obj)
-
-        ET.SubElement(ann_elem, 'label').text = \
-            str(cls._get_label(obj.label, categories))
-        ET.SubElement(ann_elem, 'label_id').text = str(obj.label)
-
-        return ann_elem
-
-    @classmethod
-    def encode_mask_object(cls, obj, categories):
-        ann_elem = cls.encode_annotation_base(obj)
-
-        ET.SubElement(ann_elem, 'label').text = \
-            str(cls._get_label(obj.label, categories))
-        ET.SubElement(ann_elem, 'label_id').text = str(obj.label)
-
-        return ann_elem
-
-    @classmethod
-    def encode_bbox_object(cls, obj, categories):
-        ann_elem = cls.encode_annotation_base(obj)
-
-        ET.SubElement(ann_elem, 'label').text = \
-            str(cls._get_label(obj.label, categories))
-        ET.SubElement(ann_elem, 'label_id').text = str(obj.label)
-        ET.SubElement(ann_elem, 'x').text = str(obj.x)
-        ET.SubElement(ann_elem, 'y').text = str(obj.y)
-        ET.SubElement(ann_elem, 'w').text = str(obj.w)
-        ET.SubElement(ann_elem, 'h').text = str(obj.h)
-        ET.SubElement(ann_elem, 'area').text = str(obj.get_area())
-
-        return ann_elem
-
-    @classmethod
-    def encode_points_object(cls, obj, categories):
-        ann_elem = cls.encode_annotation_base(obj)
-
-        ET.SubElement(ann_elem, 'label').text = \
-            str(cls._get_label(obj.label, categories))
-        ET.SubElement(ann_elem, 'label_id').text = str(obj.label)
-
-        x, y, w, h = obj.get_bbox()
-        area = w * h
-        bbox_elem = ET.SubElement(ann_elem, 'bbox')
-        ET.SubElement(bbox_elem, 'x').text = str(x)
-        ET.SubElement(bbox_elem, 'y').text = str(y)
-        ET.SubElement(bbox_elem, 'w').text = str(w)
-        ET.SubElement(bbox_elem, 'h').text = str(h)
-        ET.SubElement(bbox_elem, 'area').text = str(area)
-
-        points = obj.points
-        for i in range(0, len(points), 2):
-            point_elem = ET.SubElement(ann_elem, 'point')
-            ET.SubElement(point_elem, 'x').text = str(points[i])
-            ET.SubElement(point_elem, 'y').text = str(points[i + 1])
-            ET.SubElement(point_elem, 'visible').text = \
-                str(obj.visibility[i // 2].name)
-
-        return ann_elem
-
-    @classmethod
-    def encode_polygon_object(cls, obj, categories):
-        ann_elem = cls.encode_annotation_base(obj)
-
-        ET.SubElement(ann_elem, 'label').text = \
-            str(cls._get_label(obj.label, categories))
-        ET.SubElement(ann_elem, 'label_id').text = str(obj.label)
-
-        x, y, w, h = obj.get_bbox()
-        area = w * h
-        bbox_elem = ET.SubElement(ann_elem, 'bbox')
-        ET.SubElement(bbox_elem, 'x').text = str(x)
-        ET.SubElement(bbox_elem, 'y').text = str(y)
-        ET.SubElement(bbox_elem, 'w').text = str(w)
-        ET.SubElement(bbox_elem, 'h').text = str(h)
-        ET.SubElement(bbox_elem, 'area').text = str(area)
-
-        points = obj.points
-        for i in range(0, len(points), 2):
-            point_elem = ET.SubElement(ann_elem, 'point')
-            ET.SubElement(point_elem, 'x').text = str(points[i])
-            ET.SubElement(point_elem, 'y').text = str(points[i + 1])
-
-        return ann_elem
-
-    @classmethod
-    def encode_polyline_object(cls, obj, categories):
-        ann_elem = cls.encode_annotation_base(obj)
-
-        ET.SubElement(ann_elem, 'label').text = \
-            str(cls._get_label(obj.label, categories))
-        ET.SubElement(ann_elem, 'label_id').text = str(obj.label)
-
-        x, y, w, h = obj.get_bbox()
-        area = w * h
-        bbox_elem = ET.SubElement(ann_elem, 'bbox')
-        ET.SubElement(bbox_elem, 'x').text = str(x)
-        ET.SubElement(bbox_elem, 'y').text = str(y)
-        ET.SubElement(bbox_elem, 'w').text = str(w)
-        ET.SubElement(bbox_elem, 'h').text = str(h)
-        ET.SubElement(bbox_elem, 'area').text = str(area)
-
-        points = obj.points
-        for i in range(0, len(points), 2):
-            point_elem = ET.SubElement(ann_elem, 'point')
-            ET.SubElement(point_elem, 'x').text = str(points[i])
-            ET.SubElement(point_elem, 'y').text = str(points[i + 1])
-
-        return ann_elem
-
-    @classmethod
-    def encode_caption_object(cls, obj):
-        ann_elem = cls.encode_annotation_base(obj)
-
-        ET.SubElement(ann_elem, 'caption').text = str(obj.caption)
-
-        return ann_elem
-
-    @classmethod
-    def encode_annotation(cls, o, categories=None):
-        if isinstance(o, Label):
-            return cls.encode_label_object(o, categories)
-        if isinstance(o, Mask):
-            return cls.encode_mask_object(o, categories)
-        if isinstance(o, Bbox):
-            return cls.encode_bbox_object(o, categories)
-        if isinstance(o, Points):
-            return cls.encode_points_object(o, categories)
-        if isinstance(o, PolyLine):
-            return cls.encode_polyline_object(o, categories)
-        if isinstance(o, Polygon):
-            return cls.encode_polygon_object(o, categories)
-        if isinstance(o, Caption):
-            return cls.encode_caption_object(o)
-        raise NotImplementedError("Unexpected annotation object passed: %s" % o)
-
-    @staticmethod
-    def to_string(encoded_item):
-        return ET.tostring(encoded_item, encoding='unicode', pretty_print=True)
-
-def XPathDatasetFilter(extractor, xpath=None):
-    if xpath is None:
-        return extractor
-    try:
-        xpath = ET.XPath(xpath)
-    except Exception:
-        log.error("Failed to create XPath from expression '%s'", xpath)
-        raise
-    f = lambda item: bool(xpath(
-        DatasetItemEncoder.encode(item, extractor.categories())))
-    return extractor.select(f)
-
-class XPathAnnotationsFilter(Transform):
-    def __init__(self, extractor, xpath=None, remove_empty=False):
-        super().__init__(extractor)
-
-        if xpath is not None:
-            try:
-                xpath = ET.XPath(xpath)
-            except Exception:
-                log.error("Failed to create XPath from expression '%s'", xpath)
-                raise
-        self._filter = xpath
-
-        self._remove_empty = remove_empty
-
-    def __iter__(self):
-        for item in self._extractor:
-            item = self.transform_item(item)
-            if item is not None:
-                yield item
-
-    def transform_item(self, item):
-        if self._filter is None:
-            return item
-
-        encoded = DatasetItemEncoder.encode(item, self._extractor.categories())
-        filtered = self._filter(encoded)
-        filtered = [elem for elem in filtered if elem.tag == 'annotation']
-
-        encoded = encoded.findall('annotation')
-        annotations = [item.annotations[encoded.index(e)] for e in filtered]
-
-        if self._remove_empty and len(annotations) == 0:
-            return None
-        return self.wrap_item(item, annotations=annotations)
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@ -1,621 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import namedtuple
-from enum import Enum
-import numpy as np
-
-import attr
-from attr import attrs, attrib
-
-from datumaro.util.image import Image
-from datumaro.util.attrs_util import not_empty, default_if_none
-
-
-AnnotationType = Enum('AnnotationType',
-    [
-        'label',
-        'mask',
-        'points',
-        'polygon',
-        'polyline',
-        'bbox',
-        'caption',
-    ])
-
-_COORDINATE_ROUNDING_DIGITS = 2
-
-@attrs(kw_only=True)
-class Annotation:
-    id = attrib(default=0, validator=default_if_none(int))
-    attributes = attrib(factory=dict, validator=default_if_none(dict))
-    group = attrib(default=0, validator=default_if_none(int))
-
-    def __attrs_post_init__(self):
-        assert isinstance(self.type, AnnotationType)
-
-    @property
-    def type(self) -> AnnotationType:
-        return self._type # must be set in subclasses
-
-    def wrap(self, **kwargs):
-        return attr.evolve(self, **kwargs)
-
-@attrs(kw_only=True)
-class Categories:
-    attributes = attrib(factory=set, validator=default_if_none(set), eq=False)
-
-@attrs
-class LabelCategories(Categories):
-    @attrs(repr_ns='LabelCategories')
-    class Category:
-        name = attrib(converter=str, validator=not_empty)
-        parent = attrib(default='', validator=default_if_none(str))
-        attributes = attrib(factory=set, validator=default_if_none(set))
-
-    items = attrib(factory=list, validator=default_if_none(list))
-    _indices = attrib(factory=dict, init=False, eq=False)
-
-    @classmethod
-    def from_iterable(cls, iterable):
-        """Generation of LabelCategories from iterable object
-
-        Args:
-            iterable ([type]): This iterable object can be:
-            1)simple str - will generate one Category with str as name
-            2)list of str - will interpreted as list of Category names
-            3)list of positional argumetns - will generate Categories
-            with this arguments
-
-
-        Returns:
-            LabelCategories: LabelCategories object
-        """
-        temp_categories = cls()
-
-        if isinstance(iterable, str):
-            iterable = [[iterable]]
-
-        for category in iterable:
-            if isinstance(category, str):
-                category = [category]
-            temp_categories.add(*category)
-
-        return temp_categories
-
-    def __attrs_post_init__(self):
-        self._reindex()
-
-    def _reindex(self):
-        indices = {}
-        for index, item in enumerate(self.items):
-            assert item.name not in self._indices
-            indices[item.name] = index
-        self._indices = indices
-
-    def add(self, name: str, parent: str = None, attributes: dict = None):
-        assert name not in self._indices, name
-
-        index = len(self.items)
-        self.items.append(self.Category(name, parent, attributes))
-        self._indices[name] = index
-        return index
-
-    def find(self, name: str):
-        index = self._indices.get(name)
-        if index is not None:
-            return index, self.items[index]
-        return index, None
-
-@attrs
-class Label(Annotation):
-    _type = AnnotationType.label
-    label = attrib(converter=int)
-
-@attrs(eq=False)
-class MaskCategories(Categories):
-    colormap = attrib(factory=dict, validator=default_if_none(dict))
-    _inverse_colormap = attrib(default=None,
-        validator=attr.validators.optional(dict))
-
-    @property
-    def inverse_colormap(self):
-        from datumaro.util.mask_tools import invert_colormap
-        if self._inverse_colormap is None:
-            if self.colormap is not None:
-                self._inverse_colormap = invert_colormap(self.colormap)
-        return self._inverse_colormap
-
-    def __eq__(self, other):
-        if not super().__eq__(other):
-            return False
-        if not isinstance(other, __class__):
-            return False
-        for label_id, my_color in self.colormap.items():
-            other_color = other.colormap.get(label_id)
-            if not np.array_equal(my_color, other_color):
-                return False
-        return True
-
-@attrs(eq=False)
-class Mask(Annotation):
-    _type = AnnotationType.mask
-    _image = attrib()
-    label = attrib(converter=attr.converters.optional(int),
-        default=None, kw_only=True)
-    z_order = attrib(default=0, validator=default_if_none(int), kw_only=True)
-
-    @property
-    def image(self):
-        if callable(self._image):
-            return self._image()
-        return self._image
-
-    def as_class_mask(self, label_id=None):
-        if label_id is None:
-            label_id = self.label
-        return self.image * label_id
-
-    def as_instance_mask(self, instance_id):
-        return self.image * instance_id
-
-    def get_area(self):
-        return np.count_nonzero(self.image)
-
-    def get_bbox(self):
-        from datumaro.util.mask_tools import find_mask_bbox
-        return find_mask_bbox(self.image)
-
-    def paint(self, colormap):
-        from datumaro.util.mask_tools import paint_mask
-        return paint_mask(self.as_class_mask(), colormap)
-
-    def __eq__(self, other):
-        if not super().__eq__(other):
-            return False
-        if not isinstance(other, __class__):
-            return False
-        return \
-            (self.label == other.label) and \
-            (self.z_order == other.z_order) and \
-            (np.array_equal(self.image, other.image))
-
-@attrs(eq=False)
-class RleMask(Mask):
-    rle = attrib()
-    _image = attrib(default=attr.Factory(
-        lambda self: self._lazy_decode(self.rle),
-        takes_self=True), init=False)
-
-    @staticmethod
-    def _lazy_decode(rle):
-        from pycocotools import mask as mask_utils
-        return lambda: mask_utils.decode(rle).astype(np.bool)
-
-    def get_area(self):
-        from pycocotools import mask as mask_utils
-        return mask_utils.area(self.rle)
-
-    def get_bbox(self):
-        from pycocotools import mask as mask_utils
-        return mask_utils.toBbox(self.rle)
-
-    def __eq__(self, other):
-        if not isinstance(other, __class__):
-            return super().__eq__(other)
-        return self.rle == other.rle
-
-class CompiledMask:
-    @staticmethod
-    def from_instance_masks(instance_masks,
-            instance_ids=None, instance_labels=None):
-        from datumaro.util.mask_tools import merge_masks
-
-        if instance_ids is not None:
-            assert len(instance_ids) == len(instance_masks)
-        else:
-            instance_ids = [None] * len(instance_masks)
-
-        if instance_labels is not None:
-            assert len(instance_labels) == len(instance_masks)
-        else:
-            instance_labels = [None] * len(instance_masks)
-
-        instance_masks = sorted(
-            zip(instance_masks, instance_ids, instance_labels),
-            key=lambda m: m[0].z_order)
-
-        instance_mask = [m.as_instance_mask(id if id is not None else 1 + idx)
-            for idx, (m, id, _) in enumerate(instance_masks)]
-        instance_mask = merge_masks(instance_mask)
-
-        cls_mask = [m.as_class_mask(c) for m, _, c in instance_masks]
-        cls_mask = merge_masks(cls_mask)
-        return __class__(class_mask=cls_mask, instance_mask=instance_mask)
-
-    def __init__(self, class_mask=None, instance_mask=None):
-        self._class_mask = class_mask
-        self._instance_mask = instance_mask
-
-    @staticmethod
-    def _get_image(image):
-        if callable(image):
-            return image()
-        return image
-
-    @property
-    def class_mask(self):
-        return self._get_image(self._class_mask)
-
-    @property
-    def instance_mask(self):
-        return self._get_image(self._instance_mask)
-
-    @property
-    def instance_count(self):
-        return int(self.instance_mask.max())
-
-    def get_instance_labels(self):
-        class_shift = 16
-        m = (self.class_mask.astype(np.uint32) << class_shift) \
-            + self.instance_mask.astype(np.uint32)
-        keys = np.unique(m)
-        instance_labels = {k & ((1 << class_shift) - 1): k >> class_shift
-            for k in keys if k & ((1 << class_shift) - 1) != 0
-        }
-        return instance_labels
-
-    def extract(self, instance_id):
-        return self.instance_mask == instance_id
-
-    def lazy_extract(self, instance_id):
-        return lambda: self.extract(instance_id)
-
-@attrs
-class _Shape(Annotation):
-    points = attrib(converter=lambda x:
-        [round(p, _COORDINATE_ROUNDING_DIGITS) for p in x])
-    label = attrib(converter=attr.converters.optional(int),
-        default=None, kw_only=True)
-    z_order = attrib(default=0, validator=default_if_none(int), kw_only=True)
-
-    def get_area(self):
-        raise NotImplementedError()
-
-    def get_bbox(self):
-        points = self.points
-        if not points:
-            return None
-
-        xs = [p for p in points[0::2]]
-        ys = [p for p in points[1::2]]
-        x0 = min(xs)
-        x1 = max(xs)
-        y0 = min(ys)
-        y1 = max(ys)
-        return [x0, y0, x1 - x0, y1 - y0]
-
-@attrs
-class PolyLine(_Shape):
-    _type = AnnotationType.polyline
-
-    def as_polygon(self):
-        return self.points[:]
-
-    def get_area(self):
-        return 0
-
-@attrs
-class Polygon(_Shape):
-    _type = AnnotationType.polygon
-
-    def __attrs_post_init__(self):
-        super().__attrs_post_init__()
-        # keep the message on a single line to produce informative output
-        assert len(self.points) % 2 == 0 and 3 <= len(self.points) // 2, "Wrong polygon points: %s" % self.points
-
-    def get_area(self):
-        import pycocotools.mask as mask_utils
-
-        x, y, w, h = self.get_bbox()
-        rle = mask_utils.frPyObjects([self.points], y + h, x + w)
-        area = mask_utils.area(rle)[0]
-        return area
-
-@attrs
-class Bbox(_Shape):
-    _type = AnnotationType.bbox
-
-    # will be overridden by attrs, then will be overridden again by us
-    # attrs' method will be renamed to __attrs_init__
-    def __init__(self, x, y, w, h, *args, **kwargs):
-        kwargs.pop('points', None) # comes from wrap()
-        self.__attrs_init__([x, y, x + w, y + h], *args, **kwargs)
-    __actual_init__ = __init__ # save pointer
-
-    @property
-    def x(self):
-        return self.points[0]
-
-    @property
-    def y(self):
-        return self.points[1]
-
-    @property
-    def w(self):
-        return self.points[2] - self.points[0]
-
-    @property
-    def h(self):
-        return self.points[3] - self.points[1]
-
-    def get_area(self):
-        return self.w * self.h
-
-    def get_bbox(self):
-        return [self.x, self.y, self.w, self.h]
-
-    def as_polygon(self):
-        x, y, w, h = self.get_bbox()
-        return [
-            x, y,
-            x + w, y,
-            x + w, y + h,
-            x, y + h
-        ]
-
-    def iou(self, other):
-        from datumaro.util.annotation_util import bbox_iou
-        return bbox_iou(self.get_bbox(), other.get_bbox())
-
-    def wrap(item, **kwargs):
-        d = {'x': item.x, 'y': item.y, 'w': item.w, 'h': item.h}
-        d.update(kwargs)
-        return attr.evolve(item, **d)
-
-assert not hasattr(Bbox, '__attrs_init__') # hopefully, it will be supported
-setattr(Bbox, '__attrs_init__', Bbox.__init__)
-setattr(Bbox, '__init__', Bbox.__actual_init__)
-
-@attrs
-class PointsCategories(Categories):
-    @attrs(repr_ns="PointsCategories")
-    class Category:
-        labels = attrib(factory=list, validator=default_if_none(list))
-        joints = attrib(factory=set, validator=default_if_none(set))
-
-    items = attrib(factory=dict, validator=default_if_none(dict))
-
-    @classmethod
-    def from_iterable(cls, iterable):
-        """Generation of PointsCategories from iterable object
-
-        Args:
-            iterable ([type]): This iterable object can be:
-            1) list of positional argumetns - will generate Categories
-                with these arguments
-
-        Returns:
-            PointsCategories: PointsCategories object
-        """
-        temp_categories = cls()
-
-        for category in iterable:
-            temp_categories.add(*category)
-        return temp_categories
-
-    def add(self, label_id, labels=None, joints=None):
-        if joints is None:
-            joints = []
-        joints = set(map(tuple, joints))
-        self.items[label_id] = self.Category(labels, joints)
-
-@attrs
-class Points(_Shape):
-    Visibility = Enum('Visibility', [
-        ('absent', 0),
-        ('hidden', 1),
-        ('visible', 2),
-    ])
-    _type = AnnotationType.points
-
-    visibility = attrib(type=list, default=None)
-    @visibility.validator
-    def _visibility_validator(self, attribute, visibility):
-        if visibility is None:
-            visibility = [self.Visibility.visible] * (len(self.points) // 2)
-        else:
-            for i, v in enumerate(visibility):
-                if not isinstance(v, self.Visibility):
-                    visibility[i] = self.Visibility(v)
-        assert len(visibility) == len(self.points) // 2
-        self.visibility = visibility
-
-    def __attrs_post_init__(self):
-        super().__attrs_post_init__()
-        assert len(self.points) % 2 == 0, self.points
-
-    def get_area(self):
-        return 0
-
-    def get_bbox(self):
-        xs = [p for p, v in zip(self.points[0::2], self.visibility)
-            if v != __class__.Visibility.absent]
-        ys = [p for p, v in zip(self.points[1::2], self.visibility)
-            if v != __class__.Visibility.absent]
-        x0 = min(xs, default=0)
-        x1 = max(xs, default=0)
-        y0 = min(ys, default=0)
-        y1 = max(ys, default=0)
-        return [x0, y0, x1 - x0, y1 - y0]
-
-@attrs
-class Caption(Annotation):
-    _type = AnnotationType.caption
-    caption = attrib(converter=str)
-
-@attrs
-class DatasetItem:
-    id = attrib(converter=lambda x: str(x).replace('\\', '/'),
-        type=str, validator=not_empty)
-    annotations = attrib(factory=list, validator=default_if_none(list))
-    subset = attrib(default='', validator=default_if_none(str))
-    path = attrib(factory=list, validator=default_if_none(list))
-
-    image = attrib(type=Image, default=None)
-    @image.validator
-    def _image_validator(self, attribute, image):
-        if callable(image) or isinstance(image, np.ndarray):
-            image = Image(data=image)
-        elif isinstance(image, str):
-            image = Image(path=image)
-        assert image is None or isinstance(image, Image)
-        self.image = image
-
-    attributes = attrib(factory=dict, validator=default_if_none(dict))
-
-    @property
-    def has_image(self):
-        return self.image is not None
-
-    def wrap(item, **kwargs):
-        return attr.evolve(item, **kwargs)
-
-class IExtractor:
-    def __iter__(self):
-        raise NotImplementedError()
-
-    def __len__(self):
-        raise NotImplementedError()
-
-    def subsets(self):
-        raise NotImplementedError()
-
-    def get_subset(self, name):
-        raise NotImplementedError()
-
-    def categories(self):
-        raise NotImplementedError()
-
-    def select(self, pred):
-        raise NotImplementedError()
-
-class _DatasetFilter:
-    def __init__(self, iterable, predicate):
-        self.iterable = iterable
-        self.predicate = predicate
-
-    def __iter__(self):
-        return filter(self.predicate, self.iterable)
-
-class _ExtractorBase(IExtractor):
-    def __init__(self, length=None, subsets=None):
-        self._length = length
-        self._subsets = subsets
-
-    def _init_cache(self):
-        subsets = set()
-        length = -1
-        for length, item in enumerate(self):
-            subsets.add(item.subset)
-        length += 1
-
-        if self._length is None:
-            self._length = length
-        if self._subsets is None:
-            self._subsets = subsets
-
-    def __len__(self):
-        if self._length is None:
-            self._init_cache()
-        return self._length
-
-    def subsets(self):
-        if self._subsets is None:
-            self._init_cache()
-        return list(self._subsets)
-
-    def get_subset(self, name):
-        if name in self.subsets():
-            return self.select(lambda item: item.subset == name)
-        else:
-            raise Exception("Unknown subset '%s' requested" % name)
-
-    def transform(self, method, *args, **kwargs):
-        return method(self, *args, **kwargs)
-
-class DatasetIteratorWrapper(_ExtractorBase):
-    def __init__(self, iterable, categories, subsets=None):
-        super().__init__(length=None, subsets=subsets)
-        self._iterable = iterable
-        self._categories = categories
-
-    def __iter__(self):
-        return iter(self._iterable)
-
-    def categories(self):
-        return self._categories
-
-    def select(self, pred):
-        return DatasetIteratorWrapper(
-            _DatasetFilter(self, pred), self.categories(), self.subsets())
-
-class Extractor(_ExtractorBase):
-    def __init__(self, length=None):
-        super().__init__(length=None)
-
-    def categories(self):
-        return {}
-
-    def select(self, pred):
-        return DatasetIteratorWrapper(
-            _DatasetFilter(self, pred), self.categories(), self.subsets())
-
-DEFAULT_SUBSET_NAME = 'default'
-
-
-class SourceExtractor(Extractor):
-    def __init__(self, length=None, subset=None):
-        super().__init__(length=length)
-
-        if subset == DEFAULT_SUBSET_NAME:
-            subset = None
-        self._subset = subset
-
-    def subsets(self):
-        return [self._subset]
-
-    def get_subset(self, name):
-        if name != self._subset:
-            raise Exception("Unknown subset '%s' requested" % name)
-        return self
-
-class Importer:
-    @classmethod
-    def detect(cls, path):
-        raise NotImplementedError()
-
-    def __call__(self, path, **extra_params):
-        raise NotImplementedError()
-
-class Transform(Extractor):
-    @staticmethod
-    def wrap_item(item, **kwargs):
-        return item.wrap(**kwargs)
-
-    def __init__(self, extractor):
-        super().__init__()
-
-        self._extractor = extractor
-
-    def __iter__(self):
-        for item in self._extractor:
-            yield self.transform_item(item)
-
-    def categories(self):
-        return self._extractor.categories()
-
-    def transform_item(self, item: DatasetItem) -> DatasetItem:
-        raise NotImplementedError()
--- a/datumaro/datumaro/components/launcher.py
+++ b/datumaro/datumaro/components/launcher.py
@ -1,67 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import numpy as np
-
-from datumaro.components.extractor import (Transform, LabelCategories,
-    AnnotationType)
-from datumaro.util import take_by
-
-
-# pylint: disable=no-self-use
-class Launcher:
-    def __init__(self, model_dir=None):
-        pass
-
-    def launch(self, inputs):
-        raise NotImplementedError()
-
-    def categories(self):
-        return None
-# pylint: enable=no-self-use
-
-class ModelTransform(Transform):
-    def __init__(self, extractor, launcher, batch_size=1):
-        super().__init__(extractor)
-        self._launcher = launcher
-        self._batch_size = batch_size
-
-    def __iter__(self):
-        for batch in take_by(self._extractor, self._batch_size):
-            inputs = np.array([item.image.data for item in batch])
-            inference = self._launcher.launch(inputs)
-
-            for item, annotations in zip(batch, inference):
-                self._check_annotations(annotations)
-                yield self.wrap_item(item, annotations=annotations)
-
-    def get_subset(self, name):
-        subset = self._extractor.get_subset(name)
-        return __class__(subset, self._launcher, self._batch_size)
-
-    def categories(self):
-        launcher_override = self._launcher.categories()
-        if launcher_override is not None:
-            return launcher_override
-        return self._extractor.categories()
-
-    def transform_item(self, item):
-        inputs = np.expand_dims(item.image, axis=0)
-        annotations = self._launcher.launch(inputs)[0]
-        return self.wrap_item(item, annotations=annotations)
-
-    def _check_annotations(self, annotations):
-        labels_count = len(self.categories().get(
-            AnnotationType.label, LabelCategories()).items)
-
-        for ann in annotations:
-            label = getattr(ann, 'label')
-            if label is None:
-                continue
-
-            if label not in range(labels_count):
-                raise Exception("Annotation has unexpected label id %s, "
-                    "while there is only %s defined labels." % \
-                    (label, labels_count))
--- a/datumaro/datumaro/components/operations.py
+++ b/datumaro/datumaro/components/operations.py
--- a/datumaro/datumaro/components/project.py
+++ b/datumaro/datumaro/components/project.py
@ -1,850 +0,0 @@
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import OrderedDict, defaultdict
-from functools import reduce
-from glob import glob
-from typing import Iterable, Union, Dict, List
-import git
-import importlib
-import inspect
-import logging as log
-import os
-import os.path as osp
-import shutil
-import sys
-
-from datumaro.components.config import Config, DEFAULT_FORMAT
-from datumaro.components.config_model import (Model, Source,
-    PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA)
-from datumaro.components.extractor import Extractor, LabelCategories,\
-    AnnotationType, DatasetItem
-from datumaro.components.launcher import ModelTransform
-from datumaro.components.dataset_filter import \
-    XPathDatasetFilter, XPathAnnotationsFilter
-
-
-def import_foreign_module(name, path, package=None):
-    module = None
-    default_path = sys.path.copy()
-    try:
-        sys.path = [ osp.abspath(path), ] + default_path
-        sys.modules.pop(name, None) # remove from cache
-        module = importlib.import_module(name, package=package)
-        sys.modules.pop(name) # remove from cache
-    except Exception:
-        raise
-    finally:
-        sys.path = default_path
-    return module
-
-
-class Registry:
-    def __init__(self, config=None, item_type=None):
-        self.item_type = item_type
-
-        self.items = {}
-
-        if config is not None:
-            self.load(config)
-
-    def load(self, config):
-        pass
-
-    def register(self, name, value):
-        if self.item_type:
-            value = self.item_type(value)
-        self.items[name] = value
-        return value
-
-    def unregister(self, name):
-        return self.items.pop(name, None)
-
-    def get(self, key):
-        return self.items[key] # returns a class / ctor
-
-
-class ModelRegistry(Registry):
-    def __init__(self, config=None):
-        super().__init__(config, item_type=Model)
-
-    def load(self, config):
-        # TODO: list default dir, insert values
-        if 'models' in config:
-            for name, model in config.models.items():
-                self.register(name, model)
-
-
-class SourceRegistry(Registry):
-    def __init__(self, config=None):
-        super().__init__(config, item_type=Source)
-
-    def load(self, config):
-        # TODO: list default dir, insert values
-        if 'sources' in config:
-            for name, source in config.sources.items():
-                self.register(name, source)
-
-class PluginRegistry(Registry):
-    def __init__(self, config=None, builtin=None, local=None):
-        super().__init__(config)
-
-        from datumaro.components.cli_plugin import CliPlugin
-
-        if builtin is not None:
-            for v in builtin:
-                k = CliPlugin._get_name(v)
-                self.register(k, v)
-        if local is not None:
-            for v in local:
-                k = CliPlugin._get_name(v)
-                self.register(k, v)
-
-class GitWrapper:
-    def __init__(self, config=None):
-        self.repo = None
-
-        if config is not None and config.project_dir:
-            self.init(config.project_dir)
-
-    @staticmethod
-    def _git_dir(base_path):
-        return osp.join(base_path, '.git')
-
-    @classmethod
-    def spawn(cls, path):
-        spawn = not osp.isdir(cls._git_dir(path))
-        repo = git.Repo.init(path=path)
-        if spawn:
-            repo.config_writer().set_value("user", "name", "User") \
-                .set_value("user", "email", "user@nowhere.com") \
-                .release()
-            # gitpython does not support init, use git directly
-            repo.git.init()
-            repo.git.commit('-m', 'Initial commit', '--allow-empty')
-        return repo
-
-    def init(self, path):
-        self.repo = self.spawn(path)
-        return self.repo
-
-    def is_initialized(self):
-        return self.repo is not None
-
-    def create_submodule(self, name, dst_dir, **kwargs):
-        self.repo.create_submodule(name, dst_dir, **kwargs)
-
-    def has_submodule(self, name):
-        return name in [submodule.name for submodule in self.repo.submodules]
-
-    def remove_submodule(self, name, **kwargs):
-        return self.repo.submodule(name).remove(**kwargs)
-
-def load_project_as_dataset(url):
-    # symbol forward declaration
-    raise NotImplementedError()
-
-class Environment:
-    _builtin_plugins = None
-    PROJECT_EXTRACTOR_NAME = 'datumaro_project'
-
-    def __init__(self, config=None):
-        config = Config(config,
-            fallback=PROJECT_DEFAULT_CONFIG, schema=PROJECT_SCHEMA)
-
-        self.models = ModelRegistry(config)
-        self.sources = SourceRegistry(config)
-
-        self.git = GitWrapper(config)
-
-        env_dir = osp.join(config.project_dir, config.env_dir)
-        builtin = self._load_builtin_plugins()
-        custom = self._load_plugins2(osp.join(env_dir, config.plugins_dir))
-        select = lambda seq, t: [e for e in seq if issubclass(e, t)]
-        from datumaro.components.extractor import Transform
-        from datumaro.components.extractor import SourceExtractor
-        from datumaro.components.extractor import Importer
-        from datumaro.components.converter import Converter
-        from datumaro.components.launcher import Launcher
-        self.extractors = PluginRegistry(
-            builtin=select(builtin, SourceExtractor),
-            local=select(custom, SourceExtractor)
-        )
-        self.extractors.register(self.PROJECT_EXTRACTOR_NAME,
-            load_project_as_dataset)
-
-        self.importers = PluginRegistry(
-            builtin=select(builtin, Importer),
-            local=select(custom, Importer)
-        )
-        self.launchers = PluginRegistry(
-            builtin=select(builtin, Launcher),
-            local=select(custom, Launcher)
-        )
-        self.converters = PluginRegistry(
-            builtin=select(builtin, Converter),
-            local=select(custom, Converter)
-        )
-        self.transforms = PluginRegistry(
-            builtin=select(builtin, Transform),
-            local=select(custom, Transform)
-        )
-
-    @staticmethod
-    def _find_plugins(plugins_dir):
-        plugins = []
-        if not osp.exists(plugins_dir):
-            return plugins
-
-        for plugin_name in os.listdir(plugins_dir):
-            p = osp.join(plugins_dir, plugin_name)
-            if osp.isfile(p) and p.endswith('.py'):
-                plugins.append((plugins_dir, plugin_name, None))
-            elif osp.isdir(p):
-                plugins += [(plugins_dir,
-                        osp.splitext(plugin_name)[0] + '.' + osp.basename(p),
-                        osp.splitext(plugin_name)[0]
-                    )
-                    for p in glob(osp.join(p, '*.py'))]
-        return plugins
-
-    @classmethod
-    def _import_module(cls, module_dir, module_name, types, package=None):
-        module = import_foreign_module(osp.splitext(module_name)[0], module_dir,
-            package=package)
-
-        exports = []
-        if hasattr(module, 'exports'):
-            exports = module.exports
-        else:
-            for symbol in dir(module):
-                if symbol.startswith('_'):
-                    continue
-                exports.append(getattr(module, symbol))
-
-        exports = [s for s in exports
-            if inspect.isclass(s) and issubclass(s, types) and not s in types]
-
-        return exports
-
-    @classmethod
-    def _load_plugins(cls, plugins_dir, types):
-        types = tuple(types)
-
-        plugins = cls._find_plugins(plugins_dir)
-
-        all_exports = []
-        for module_dir, module_name, package in plugins:
-            try:
-                exports = cls._import_module(module_dir, module_name, types,
-                    package)
-            except Exception as e:
-                module_search_error = ImportError
-                try:
-                    module_search_error = ModuleNotFoundError # python 3.6+
-                except NameError:
-                    pass
-
-                message = ["Failed to import module '%s': %s", module_name, e]
-                if isinstance(e, module_search_error):
-                    log.debug(*message)
-                else:
-                    log.warning(*message)
-                continue
-
-            log.debug("Imported the following symbols from %s: %s" % \
-                (
-                    module_name,
-                    ', '.join(s.__name__ for s in exports)
-                )
-            )
-            all_exports.extend(exports)
-
-        return all_exports
-
-    @classmethod
-    def _load_builtin_plugins(cls):
-        if not cls._builtin_plugins:
-            plugins_dir = osp.join(
-                __file__[: __file__.rfind(osp.join('datumaro', 'components'))],
-                osp.join('datumaro', 'plugins')
-            )
-            assert osp.isdir(plugins_dir), plugins_dir
-            cls._builtin_plugins = cls._load_plugins2(plugins_dir)
-        return cls._builtin_plugins
-
-    @classmethod
-    def _load_plugins2(cls, plugins_dir):
-        from datumaro.components.extractor import Transform
-        from datumaro.components.extractor import SourceExtractor
-        from datumaro.components.extractor import Importer
-        from datumaro.components.converter import Converter
-        from datumaro.components.launcher import Launcher
-        types = [SourceExtractor, Converter, Importer, Launcher, Transform]
-
-        return cls._load_plugins(plugins_dir, types)
-
-    def make_extractor(self, name, *args, **kwargs):
-        return self.extractors.get(name)(*args, **kwargs)
-
-    def make_importer(self, name, *args, **kwargs):
-        return self.importers.get(name)(*args, **kwargs)
-
-    def make_launcher(self, name, *args, **kwargs):
-        return self.launchers.get(name)(*args, **kwargs)
-
-    def make_converter(self, name, *args, **kwargs):
-        return self.converters.get(name)(*args, **kwargs)
-
-    def register_model(self, name, model):
-        self.models.register(name, model)
-
-    def unregister_model(self, name):
-        self.models.unregister(name)
-
-
-class Dataset(Extractor):
-    class Subset(Extractor):
-        def __init__(self, parent):
-            self.parent = parent
-            self.items = OrderedDict()
-
-        def __iter__(self):
-            yield from self.items.values()
-
-        def __len__(self):
-            return len(self.items)
-
-        def categories(self):
-            return self.parent.categories()
-
-    @classmethod
-    def from_iterable(cls, iterable: Iterable[DatasetItem],
-            categories: Union[Dict, List[str]] = None):
-        if isinstance(categories, list):
-            categories = { AnnotationType.label:
-                LabelCategories.from_iterable(categories)
-            }
-
-        if not categories:
-            categories = {}
-
-        class _extractor(Extractor):
-            def __iter__(self):
-                return iter(iterable)
-
-            def categories(self):
-                return categories
-
-        return cls.from_extractors(_extractor())
-
-    @classmethod
-    def from_extractors(cls, *sources):
-        categories = cls._merge_categories(s.categories() for s in sources)
-        dataset = Dataset(categories=categories)
-
-        # merge items
-        subsets = defaultdict(lambda: cls.Subset(dataset))
-        for source in sources:
-            for item in source:
-                existing_item = subsets[item.subset].items.get(item.id)
-                if existing_item is not None:
-                    path = existing_item.path
-                    if item.path != path:
-                        path = None
-                    item = cls._merge_items(existing_item, item, path=path)
-
-                subsets[item.subset].items[item.id] = item
-
-        dataset._subsets = dict(subsets)
-        return dataset
-
-    def __init__(self, categories=None):
-        super().__init__()
-
-        self._subsets = {}
-
-        if not categories:
-            categories = {}
-        self._categories = categories
-
-    def __iter__(self):
-        for subset in self._subsets.values():
-            for item in subset:
-                yield item
-
-    def __len__(self):
-        if self._length is None:
-            self._length = reduce(lambda s, x: s + len(x),
-                self._subsets.values(), 0)
-        return self._length
-
-    def get_subset(self, name):
-        return self._subsets[name]
-
-    def subsets(self):
-        return list(self._subsets)
-
-    def categories(self):
-        return self._categories
-
-    def get(self, item_id, subset=None, path=None):
-        if path:
-            raise KeyError("Requested dataset item path is not found")
-        item_id = str(item_id)
-        subset = subset or ''
-        subset = self._subsets[subset]
-        return subset.items[item_id]
-
-    def put(self, item, item_id=None, subset=None, path=None):
-        if path:
-            raise KeyError("Requested dataset item path is not found")
-
-        if item_id is None:
-            item_id = item.id
-        if subset is None:
-            subset = item.subset
-
-        item = item.wrap(id=item_id, subset=subset, path=None)
-        if subset not in self._subsets:
-            self._subsets[subset] = self.Subset(self)
-        self._subsets[subset].items[item_id] = item
-        self._length = None
-
-        return item
-
-    def filter(self, expr, filter_annotations=False, remove_empty=False):
-        if filter_annotations:
-            return self.transform(XPathAnnotationsFilter, expr, remove_empty)
-        else:
-            return self.transform(XPathDatasetFilter, expr)
-
-    def update(self, items):
-        for item in items:
-            self.put(item)
-        return self
-
-    def define_categories(self, categories):
-        assert not self._categories
-        self._categories = categories
-
-    @staticmethod
-    def _lazy_image(item):
-        # NOTE: avoid https://docs.python.org/3/faq/programming.html#why-do-lambdas-defined-in-a-loop-with-different-values-all-return-the-same-result
-        return lambda: item.image
-
-    @classmethod
-    def _merge_items(cls, existing_item, current_item, path=None):
-        return existing_item.wrap(path=path,
-            image=cls._merge_images(existing_item, current_item),
-            annotations=cls._merge_anno(
-                existing_item.annotations, current_item.annotations))
-
-    @staticmethod
-    def _merge_images(existing_item, current_item):
-        image = None
-        if existing_item.has_image and current_item.has_image:
-            if existing_item.image.has_data:
-                image = existing_item.image
-            else:
-                image = current_item.image
-
-            if existing_item.image.path != current_item.image.path:
-                if not existing_item.image.path:
-                    image._path = current_item.image.path
-
-            if all([existing_item.image._size, current_item.image._size]):
-                assert existing_item.image._size == current_item.image._size, "Image info differs for item '%s'" % existing_item.id
-            elif existing_item.image._size:
-                image._size = existing_item.image._size
-            else:
-                image._size = current_item.image._size
-        elif existing_item.has_image:
-            image = existing_item.image
-        else:
-            image = current_item.image
-
-        return image
-
-    @staticmethod
-    def _merge_anno(a, b):
-        # TODO: implement properly with merging and annotations remapping
-        from .operations import merge_annotations_equal
-        return merge_annotations_equal(a, b)
-
-    @staticmethod
-    def _merge_categories(sources):
-        # TODO: implement properly with merging and annotations remapping
-        from .operations import merge_categories
-        return merge_categories(sources)
-
-class ProjectDataset(Dataset):
-    def __init__(self, project):
-        super().__init__()
-
-        self._project = project
-        config = self.config
-        env = self.env
-
-        sources = {}
-        for s_name, source in config.sources.items():
-            s_format = source.format or env.PROJECT_EXTRACTOR_NAME
-            options = {}
-            options.update(source.options)
-
-            url = source.url
-            if not source.url:
-                url = osp.join(config.project_dir, config.sources_dir, s_name)
-            sources[s_name] = env.make_extractor(s_format, url, **options)
-        self._sources = sources
-
-        own_source = None
-        own_source_dir = osp.join(config.project_dir, config.dataset_dir)
-        if config.project_dir and osp.isdir(own_source_dir):
-            log.disable(log.INFO)
-            own_source = env.make_importer(DEFAULT_FORMAT)(own_source_dir) \
-                .make_dataset()
-            log.disable(log.NOTSET)
-
-        # merge categories
-        # TODO: implement properly with merging and annotations remapping
-        categories = self._merge_categories(s.categories()
-            for s in self._sources.values())
-        # ovewrite with own categories
-        if own_source is not None and (not categories or len(own_source) != 0):
-            categories.update(own_source.categories())
-        self._categories = categories
-
-        # merge items
-        subsets = defaultdict(lambda: self.Subset(self))
-        for source_name, source in self._sources.items():
-            log.debug("Loading '%s' source contents..." % source_name)
-            for item in source:
-                existing_item = subsets[item.subset].items.get(item.id)
-                if existing_item is not None:
-                    path = existing_item.path
-                    if item.path != path:
-                        path = None # NOTE: move to our own dataset
-                    item = self._merge_items(existing_item, item, path=path)
-                else:
-                    s_config = config.sources[source_name]
-                    if s_config and \
-                            s_config.format != env.PROJECT_EXTRACTOR_NAME:
-                        # NOTE: consider imported sources as our own dataset
-                        path = None
-                    else:
-                        path = [source_name] + (item.path or [])
-                    item = item.wrap(path=path)
-
-                subsets[item.subset].items[item.id] = item
-
-        # override with our items, fallback to existing images
-        if own_source is not None:
-            log.debug("Loading own dataset...")
-            for item in own_source:
-                existing_item = subsets[item.subset].items.get(item.id)
-                if existing_item is not None:
-                    item = item.wrap(path=None,
-                        image=self._merge_images(existing_item, item))
-
-                subsets[item.subset].items[item.id] = item
-
-        # TODO: implement subset remapping when needed
-        subsets_filter = config.subsets
-        if len(subsets_filter) != 0:
-            subsets = { k: v for k, v in subsets.items() if k in subsets_filter}
-        self._subsets = dict(subsets)
-
-        self._length = None
-
-    def iterate_own(self):
-        return self.select(lambda item: not item.path)
-
-    def get(self, item_id, subset=None, path=None):
-        if path:
-            source = path[0]
-            rest_path = path[1:]
-            return self._sources[source].get(
-                item_id=item_id, subset=subset, path=rest_path)
-        return super().get(item_id, subset)
-
-    def put(self, item, item_id=None, subset=None, path=None):
-        if path is None:
-            path = item.path
-
-        if path:
-            source = path[0]
-            rest_path = path[1:]
-            # TODO: reverse remapping
-            self._sources[source].put(item,
-                item_id=item_id, subset=subset, path=rest_path)
-
-        if item_id is None:
-            item_id = item.id
-        if subset is None:
-            subset = item.subset
-
-        item = item.wrap(path=path)
-        if subset not in self._subsets:
-            self._subsets[subset] = self.Subset(self)
-        self._subsets[subset].items[item_id] = item
-        self._length = None
-
-        return item
-
-    def save(self, save_dir=None, merge=False, recursive=True,
-            save_images=False):
-        if save_dir is None:
-            assert self.config.project_dir
-            save_dir = self.config.project_dir
-            project = self._project
-        else:
-            merge = True
-
-        if merge:
-            project = Project(Config(self.config))
-            project.config.remove('sources')
-
-        save_dir = osp.abspath(save_dir)
-        dataset_save_dir = osp.join(save_dir, project.config.dataset_dir)
-
-        converter_kwargs = {
-            'save_images': save_images,
-        }
-
-        save_dir_existed = osp.exists(save_dir)
-        try:
-            os.makedirs(save_dir, exist_ok=True)
-            os.makedirs(dataset_save_dir, exist_ok=True)
-
-            if merge:
-                # merge and save the resulting dataset
-                self.env.converters.get(DEFAULT_FORMAT).convert(
-                    self, dataset_save_dir, **converter_kwargs)
-            else:
-                if recursive:
-                    # children items should already be updated
-                    # so we just save them recursively
-                    for source in self._sources.values():
-                        if isinstance(source, ProjectDataset):
-                            source.save(**converter_kwargs)
-
-                self.env.converters.get(DEFAULT_FORMAT).convert(
-                    self.iterate_own(), dataset_save_dir, **converter_kwargs)
-
-            project.save(save_dir)
-        except BaseException:
-            if not save_dir_existed and osp.isdir(save_dir):
-                shutil.rmtree(save_dir, ignore_errors=True)
-            raise
-
-    @property
-    def env(self):
-        return self._project.env
-
-    @property
-    def config(self):
-        return self._project.config
-
-    @property
-    def sources(self):
-        return self._sources
-
-    def _save_branch_project(self, extractor, save_dir=None):
-        extractor = Dataset.from_extractors(extractor) # apply lazy transforms
-
-        # NOTE: probably this function should be in the ViewModel layer
-        save_dir = osp.abspath(save_dir)
-        if save_dir:
-            dst_project = Project()
-        else:
-            if not self.config.project_dir:
-                raise Exception("Either a save directory or a project "
-                    "directory should be specified")
-            save_dir = self.config.project_dir
-
-            dst_project = Project(Config(self.config))
-            dst_project.config.remove('project_dir')
-            dst_project.config.remove('sources')
-        dst_project.config.project_name = osp.basename(save_dir)
-
-        dst_dataset = dst_project.make_dataset()
-        dst_dataset.define_categories(extractor.categories())
-        dst_dataset.update(extractor)
-
-        dst_dataset.save(save_dir=save_dir, merge=True)
-
-    def transform_project(self, method, save_dir=None, **method_kwargs):
-        # NOTE: probably this function should be in the ViewModel layer
-        if isinstance(method, str):
-            method = self.env.make_transform(method)
-
-        transformed = self.transform(method, **method_kwargs)
-        self._save_branch_project(transformed, save_dir=save_dir)
-
-    def apply_model(self, model, save_dir=None, batch_size=1):
-        # NOTE: probably this function should be in the ViewModel layer
-        if isinstance(model, str):
-            launcher = self._project.make_executable_model(model)
-
-        self.transform_project(ModelTransform, launcher=launcher,
-            save_dir=save_dir, batch_size=batch_size)
-
-    def export_project(self, save_dir, converter,
-            filter_expr=None, filter_annotations=False, remove_empty=False):
-        # NOTE: probably this function should be in the ViewModel layer
-        dataset = self
-        if filter_expr:
-            dataset = dataset.filter(filter_expr,
-                filter_annotations=filter_annotations,
-                remove_empty=remove_empty)
-
-        save_dir = osp.abspath(save_dir)
-        save_dir_existed = osp.exists(save_dir)
-        try:
-            os.makedirs(save_dir, exist_ok=True)
-            converter(dataset, save_dir)
-        except BaseException:
-            if not save_dir_existed:
-                shutil.rmtree(save_dir)
-            raise
-
-    def filter_project(self, filter_expr, filter_annotations=False,
-            save_dir=None, remove_empty=False):
-        # NOTE: probably this function should be in the ViewModel layer
-        dataset = self
-        if filter_expr:
-            dataset = dataset.filter(filter_expr,
-                filter_annotations=filter_annotations,
-                remove_empty=remove_empty)
-        self._save_branch_project(dataset, save_dir=save_dir)
-
-class Project:
-    @classmethod
-    def load(cls, path):
-        path = osp.abspath(path)
-        config_path = osp.join(path, PROJECT_DEFAULT_CONFIG.env_dir,
-            PROJECT_DEFAULT_CONFIG.project_filename)
-        config = Config.parse(config_path)
-        config.project_dir = path
-        config.project_filename = osp.basename(config_path)
-        return Project(config)
-
-    def save(self, save_dir=None):
-        config = self.config
-
-        if save_dir is None:
-            assert config.project_dir
-            project_dir = config.project_dir
-        else:
-            project_dir = save_dir
-
-        env_dir = osp.join(project_dir, config.env_dir)
-        save_dir = osp.abspath(env_dir)
-
-        project_dir_existed = osp.exists(project_dir)
-        env_dir_existed = osp.exists(env_dir)
-        try:
-            os.makedirs(save_dir, exist_ok=True)
-
-            config_path = osp.join(save_dir, config.project_filename)
-            config.dump(config_path)
-        except BaseException:
-            if not env_dir_existed:
-                shutil.rmtree(save_dir, ignore_errors=True)
-            if not project_dir_existed:
-                shutil.rmtree(project_dir, ignore_errors=True)
-            raise
-
-    @staticmethod
-    def generate(save_dir, config=None):
-        config = Config(config)
-        config.project_dir = save_dir
-        project = Project(config)
-        project.save(save_dir)
-        return project
-
-    @staticmethod
-    def import_from(path, dataset_format, env=None, **kwargs):
-        if env is None:
-            env = Environment()
-        importer = env.make_importer(dataset_format)
-        return importer(path, **kwargs)
-
-    def __init__(self, config=None):
-        self.config = Config(config,
-            fallback=PROJECT_DEFAULT_CONFIG, schema=PROJECT_SCHEMA)
-        self.env = Environment(self.config)
-
-    def make_dataset(self):
-        return ProjectDataset(self)
-
-    def add_source(self, name, value=None):
-        if value is None or isinstance(value, (dict, Config)):
-            value = Source(value)
-        self.config.sources[name] = value
-        self.env.sources.register(name, value)
-
-    def remove_source(self, name):
-        self.config.sources.remove(name)
-        self.env.sources.unregister(name)
-
-    def get_source(self, name):
-        try:
-            return self.config.sources[name]
-        except KeyError:
-            raise KeyError("Source '%s' is not found" % name)
-
-    def get_subsets(self):
-        return self.config.subsets
-
-    def set_subsets(self, value):
-        if not value:
-            self.config.remove('subsets')
-        else:
-            self.config.subsets = value
-
-    def add_model(self, name, value=None):
-        if value is None or isinstance(value, (dict, Config)):
-            value = Model(value)
-        self.env.register_model(name, value)
-        self.config.models[name] = value
-
-    def get_model(self, name):
-        try:
-            return self.env.models.get(name)
-        except KeyError:
-            raise KeyError("Model '%s' is not found" % name)
-
-    def remove_model(self, name):
-        self.config.models.remove(name)
-        self.env.unregister_model(name)
-
-    def make_executable_model(self, name):
-        model = self.get_model(name)
-        return self.env.make_launcher(model.launcher,
-            **model.options, model_dir=self.local_model_dir(name))
-
-    def make_source_project(self, name):
-        source = self.get_source(name)
-
-        config = Config(self.config)
-        config.remove('sources')
-        config.remove('subsets')
-        project = Project(config)
-        project.add_source(name, source)
-        return project
-
-    def local_model_dir(self, model_name):
-        return osp.join(
-            self.config.env_dir, self.config.models_dir, model_name)
-
-    def local_source_dir(self, source_name):
-        return osp.join(self.config.sources_dir, source_name)
-
-# pylint: disable=function-redefined
-def load_project_as_dataset(url):
-    # implement the function declared above
-    return Project.load(url).make_dataset()
-# pylint: enable=function-redefined
--- a/datumaro/datumaro/plugins/init.py
+++ b/datumaro/datumaro/plugins/init.py
--- a/datumaro/datumaro/plugins/accuracy_checker_plugin/init.py
+++ b/datumaro/datumaro/plugins/accuracy_checker_plugin/init.py
@ -1,4 +0,0 @@
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
--- a/datumaro/datumaro/plugins/accuracy_checker_plugin/details/ac.py
+++ b/datumaro/datumaro/plugins/accuracy_checker_plugin/details/ac.py
@ -1,116 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from datumaro.util.tf_util import import_tf
-import_tf() # prevent TF loading and potential interpeter crash
-
-from itertools import groupby
-
-from accuracy_checker.adapters import create_adapter
-from accuracy_checker.data_readers import DataRepresentation
-from accuracy_checker.launcher import InputFeeder, create_launcher
-from accuracy_checker.postprocessor import PostprocessingExecutor
-from accuracy_checker.preprocessor import PreprocessingExecutor
-from accuracy_checker.utils import extract_image_representations
-
-from datumaro.components.extractor import AnnotationType, LabelCategories
-
-from .representation import import_predictions
-
-
-class _FakeDataset:
-    def __init__(self, metadata=None):
-        self.metadata = metadata or {}
-
-class GenericAcLauncher:
-    @staticmethod
-    def from_config(config):
-        launcher_config = config['launcher']
-        launcher = create_launcher(launcher_config)
-
-        dataset = _FakeDataset()
-        adapter_config = config.get('adapter') or launcher_config.get('adapter')
-        label_config = adapter_config.get('labels') \
-            if isinstance(adapter_config, dict) else None
-        if label_config:
-            assert isinstance(label_config, (list, dict))
-            if isinstance(label_config, list):
-                label_config = dict(enumerate(label_config))
-
-            dataset.metadata = {'label_map': {
-                int(key): label for key, label in label_config.items()
-            }}
-        adapter = create_adapter(adapter_config, launcher, dataset)
-
-        preproc_config = config.get('preprocessing')
-        preproc = None
-        if preproc_config:
-            preproc = PreprocessingExecutor(preproc_config,
-                dataset_meta=dataset.metadata,
-                input_shapes=launcher.inputs_info_for_meta()
-            )
-
-        postproc_config = config.get('postprocessing')
-        postproc = None
-        if postproc_config:
-            postproc = PostprocessingExecutor(postproc_config,
-                dataset_meta=dataset.metadata,
-            )
-
-        return __class__(launcher,
-            adapter=adapter, preproc=preproc, postproc=postproc)
-
-    def __init__(self, launcher, adapter=None,
-            preproc=None, postproc=None, input_feeder=None):
-        self._launcher = launcher
-        self._input_feeder = input_feeder or InputFeeder(
-            launcher.config.get('inputs', []), launcher.inputs,
-            launcher.fit_to_input, launcher.default_layout
-        )
-        self._adapter = adapter
-        self._preproc = preproc
-        self._postproc = postproc
-
-        self._categories = self._init_categories()
-
-    def launch_raw(self, inputs):
-        ids = range(len(inputs))
-        inputs = [DataRepresentation(inp, identifier=id)
-            for id, inp in zip(ids, inputs)]
-        _, batch_meta = extract_image_representations(inputs)
-
-        if self._preproc:
-            inputs = self._preproc.process(inputs)
-
-        inputs = self._input_feeder.fill_inputs(inputs)
-        outputs = self._launcher.predict(inputs, batch_meta)
-
-        if self._adapter:
-            outputs = self._adapter.process(outputs, ids, batch_meta)
-
-        if self._postproc:
-            outputs = self._postproc.process(outputs)
-
-        return outputs
-
-    def launch(self, inputs):
-        outputs = self.launch_raw(inputs)
-        return [import_predictions(g) for _, g in
-            groupby(outputs, key=lambda o: o.identifier)]
-
-    def categories(self):
-        return self._categories
-
-    def _init_categories(self):
-        if self._adapter is None or self._adapter.label_map is None:
-            return None
-
-        label_map = sorted(self._adapter.label_map.items(), key=lambda e: e[0])
-
-        label_cat = LabelCategories()
-        for _, label in label_map:
-            label_cat.add(label)
-
-        return { AnnotationType.label: label_cat }
--- a/datumaro/datumaro/plugins/accuracy_checker_plugin/details/representation.py
+++ b/datumaro/datumaro/plugins/accuracy_checker_plugin/details/representation.py
@ -1,62 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from datumaro.util.tf_util import import_tf
-import_tf() # prevent TF loading and potential interpeter crash
-
-import accuracy_checker.representation as ac
-
-import datumaro.components.extractor as dm
-from datumaro.util.annotation_util import softmax
-
-def import_predictions(predictions):
-    # Convert Accuracy checker predictions to Datumaro annotations
-
-    anns = []
-
-    for pred in predictions:
-        anns.extend(import_prediction(pred))
-
-    return anns
-
-def import_prediction(pred):
-    if isinstance(pred, ac.ClassificationPrediction):
-        scores = softmax(pred.scores)
-        return (dm.Label(label_id, attributes={'score': float(score)})
-            for label_id, score in enumerate(scores))
-    elif isinstance(pred, ac.ArgMaxClassificationPrediction):
-        return (dm.Label(int(pred.label)), )
-    elif isinstance(pred, ac.CharacterRecognitionPrediction):
-        return (dm.Label(int(pred.label)), )
-    elif isinstance(pred, (ac.DetectionPrediction, ac.ActionDetectionPrediction)):
-        return (dm.Bbox(x0, y0, x1 - x0, y1 - y0, int(label_id),
-                attributes={'score': float(score)})
-            for label, score, x0, y0, x1, y1 in zip(pred.labels, pred.scores,
-                pred.x_mins, pred.y_mins, pred.x_maxs, pred.y_maxs)
-        )
-    elif isinstance(pred, ac.DepthEstimationPrediction):
-        return (dm.Mask(pred.depth_map), ) # 2d floating point mask
-    # elif isinstance(pred, ac.HitRatioPrediction):
-    #     -
-    elif isinstance(pred, ac.ImageInpaintingPrediction):
-        return (dm.Mask(pred.value), ) # an image
-    # elif isinstance(pred, ac.MultiLabelRecognitionPrediction):
-    #     -
-    # elif isinstance(pred, ac.MachineTranslationPrediction):
-    #     -
-    # elif isinstance(pred, ac.QuestionAnsweringPrediction):
-    #     -
-    # elif isinstance(pred, ac.PoseEstimation3dPrediction):
-    #     -
-    # elif isinstance(pred, ac.PoseEstimationPrediction):
-    #     -
-    # elif isinstance(pred, ac.RegressionPrediction):
-    #     -
-    else:
-        raise NotImplementedError("Can't convert %s" % type(pred))
-
-
-
-
--- a/datumaro/datumaro/plugins/accuracy_checker_plugin/launcher.py
+++ b/datumaro/datumaro/plugins/accuracy_checker_plugin/launcher.py
@ -1,37 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import os.path as osp
-import yaml
-
-from datumaro.components.cli_plugin import CliPlugin
-from datumaro.components.launcher import Launcher
-
-from .details.ac import GenericAcLauncher as _GenericAcLauncher
-
-
-class AcLauncher(Launcher, CliPlugin):
-    """
-    Generic model launcher with Accuracy Checker backend.
-    """
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('-c', '--config', type=osp.abspath, required=True,
-            help="Path to the launcher configuration file (.yml)")
-        return parser
-
-    def __init__(self, config, model_dir=None):
-        model_dir = model_dir or ''
-        with open(osp.join(model_dir, config), 'r') as f:
-            config = yaml.safe_load(f)
-        self._launcher = _GenericAcLauncher.from_config(config)
-
-    def launch(self, inputs):
-        return self._launcher.launch(inputs)
-
-    def categories(self):
-        return self._launcher.categories()
--- a/datumaro/datumaro/plugins/coco_format/init.py
+++ b/datumaro/datumaro/plugins/coco_format/init.py
--- a/datumaro/datumaro/plugins/coco_format/converter.py
+++ b/datumaro/datumaro/plugins/coco_format/converter.py
@ -1,596 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import json
-import logging as log
-import os
-import os.path as osp
-from enum import Enum
-from itertools import groupby
-
-import pycocotools.mask as mask_utils
-
-import datumaro.util.annotation_util as anno_tools
-import datumaro.util.mask_tools as mask_tools
-from datumaro.components.converter import Converter
-from datumaro.components.extractor import (_COORDINATE_ROUNDING_DIGITS,
-    DEFAULT_SUBSET_NAME, AnnotationType, Points)
-from datumaro.util import cast, find, str_to_bool
-
-from .format import CocoPath, CocoTask
-
-SegmentationMode = Enum('SegmentationMode', ['guess', 'polygons', 'mask'])
-
-class _TaskConverter:
-    def __init__(self, context):
-        self._min_ann_id = 1
-        self._context = context
-
-        data = {
-            'licenses': [],
-            'info': {},
-            'categories': [],
-            'images': [],
-            'annotations': []
-            }
-
-        data['licenses'].append({
-            'name': '',
-            'id': 0,
-            'url': ''
-        })
-
-        data['info'] = {
-            'contributor': '',
-            'date_created': '',
-            'description': '',
-            'url': '',
-            'version': '',
-            'year': ''
-        }
-        self._data = data
-
-    def is_empty(self):
-        return len(self._data['annotations']) == 0
-
-    def _get_image_id(self, item):
-        return self._context._get_image_id(item)
-
-    def save_image_info(self, item, filename):
-        if item.has_image:
-            h, w = item.image.size
-        else:
-            h = 0
-            w = 0
-
-        self._data['images'].append({
-            'id': self._get_image_id(item),
-            'width': int(w),
-            'height': int(h),
-            'file_name': cast(filename, str, ''),
-            'license': 0,
-            'flickr_url': '',
-            'coco_url': '',
-            'date_captured': 0,
-        })
-
-    def save_categories(self, dataset):
-        raise NotImplementedError()
-
-    def save_annotations(self, item):
-        raise NotImplementedError()
-
-    def write(self, path):
-        next_id = self._min_ann_id
-        for ann in self.annotations:
-            if ann['id'] is None:
-                ann['id'] = next_id
-                next_id += 1
-
-        with open(path, 'w') as outfile:
-            json.dump(self._data, outfile)
-
-    @property
-    def annotations(self):
-        return self._data['annotations']
-
-    @property
-    def categories(self):
-        return self._data['categories']
-
-    def _get_ann_id(self, annotation):
-        ann_id = annotation.id
-        if ann_id:
-            self._min_ann_id = max(ann_id, self._min_ann_id)
-        return ann_id
-
-    @staticmethod
-    def _convert_attributes(ann):
-        return { k: v for k, v in ann.attributes.items()
-            if k not in {'is_crowd', 'score'}
-        }
-
-class _ImageInfoConverter(_TaskConverter):
-    def is_empty(self):
-        return len(self._data['images']) == 0
-
-    def save_categories(self, dataset):
-        pass
-
-    def save_annotations(self, item):
-        pass
-
-class _CaptionsConverter(_TaskConverter):
-    def save_categories(self, dataset):
-        pass
-
-    def save_annotations(self, item):
-        for ann_idx, ann in enumerate(item.annotations):
-            if ann.type != AnnotationType.caption:
-                continue
-
-            elem = {
-                'id': self._get_ann_id(ann),
-                'image_id': self._get_image_id(item),
-                'category_id': 0, # NOTE: workaround for a bug in cocoapi
-                'caption': ann.caption,
-            }
-            if 'score' in ann.attributes:
-                try:
-                    elem['score'] = float(ann.attributes['score'])
-                except Exception as e:
-                    log.warning("Item '%s', ann #%s: failed to convert "
-                        "attribute 'score': %e" % (item.id, ann_idx, e))
-            if self._context._allow_attributes:
-                elem['attributes'] = self._convert_attributes(ann)
-
-            self.annotations.append(elem)
-
-class _InstancesConverter(_TaskConverter):
-    def save_categories(self, dataset):
-        label_categories = dataset.categories().get(AnnotationType.label)
-        if label_categories is None:
-            return
-
-        for idx, cat in enumerate(label_categories.items):
-            self.categories.append({
-                'id': 1 + idx,
-                'name': cast(cat.name, str, ''),
-                'supercategory': cast(cat.parent, str, ''),
-            })
-
-    @classmethod
-    def crop_segments(cls, instances, img_width, img_height):
-        instances = sorted(instances, key=lambda x: x[0].z_order)
-
-        segment_map = []
-        segments = []
-        for inst_idx, (_, polygons, mask, _) in enumerate(instances):
-            if polygons:
-                segment_map.extend(inst_idx for p in polygons)
-                segments.extend(polygons)
-            elif mask is not None:
-                segment_map.append(inst_idx)
-                segments.append(mask)
-
-        segments = mask_tools.crop_covered_segments(
-            segments, img_width, img_height)
-
-        for inst_idx, inst in enumerate(instances):
-            new_segments = [s for si_id, s in zip(segment_map, segments)
-                if si_id == inst_idx]
-
-            if not new_segments:
-                inst[1] = []
-                inst[2] = None
-                continue
-
-            if inst[1]:
-                inst[1] = sum(new_segments, [])
-            else:
-                mask = mask_tools.merge_masks(new_segments)
-                inst[2] = mask_tools.mask_to_rle(mask)
-
-        return instances
-
-    def find_instance_parts(self, group, img_width, img_height):
-        boxes = [a for a in group if a.type == AnnotationType.bbox]
-        polygons = [a for a in group if a.type == AnnotationType.polygon]
-        masks = [a for a in group if a.type == AnnotationType.mask]
-
-        anns = boxes + polygons + masks
-        leader = anno_tools.find_group_leader(anns)
-        bbox = anno_tools.max_bbox(anns)
-        mask = None
-        polygons = [p.points for p in polygons]
-
-        if self._context._segmentation_mode == SegmentationMode.guess:
-            use_masks = True == leader.attributes.get('is_crowd',
-                find(masks, lambda x: x.label == leader.label) is not None)
-        elif self._context._segmentation_mode == SegmentationMode.polygons:
-            use_masks = False
-        elif self._context._segmentation_mode == SegmentationMode.mask:
-            use_masks = True
-        else:
-            raise NotImplementedError("Unexpected segmentation mode '%s'" % \
-                self._context._segmentation_mode)
-
-        if use_masks:
-            if polygons:
-                mask = mask_tools.rles_to_mask(polygons, img_width, img_height)
-
-            if masks:
-                if mask is not None:
-                    masks += [mask]
-                mask = mask_tools.merge_masks([m.image for m in masks])
-
-            if mask is not None:
-                mask = mask_tools.mask_to_rle(mask)
-            polygons = []
-        else:
-            if masks:
-                mask = mask_tools.merge_masks([m.image for m in masks])
-                polygons += mask_tools.mask_to_polygons(mask)
-            mask = None
-
-        return [leader, polygons, mask, bbox]
-
-    @staticmethod
-    def find_instance_anns(annotations):
-        return [a for a in annotations
-            if a.type in { AnnotationType.bbox,
-                AnnotationType.polygon, AnnotationType.mask }
-        ]
-
-    @classmethod
-    def find_instances(cls, annotations):
-        return anno_tools.find_instances(cls.find_instance_anns(annotations))
-
-    def save_annotations(self, item):
-        instances = self.find_instances(item.annotations)
-        if not instances:
-            return
-
-        if not item.has_image:
-            log.warn("Item '%s': skipping writing instances "
-                "since no image info available" % item.id)
-            return
-        h, w = item.image.size
-        instances = [self.find_instance_parts(i, w, h) for i in instances]
-
-        if self._context._crop_covered:
-            instances = self.crop_segments(instances, w, h)
-
-        for instance in instances:
-            elem = self.convert_instance(instance, item)
-            if elem:
-                self.annotations.append(elem)
-
-    def convert_instance(self, instance, item):
-        ann, polygons, mask, bbox = instance
-
-        is_crowd = mask is not None
-        if is_crowd:
-            segmentation = {
-                'counts': list(int(c) for c in mask['counts']),
-                'size': list(int(c) for c in mask['size'])
-            }
-        else:
-            segmentation = [list(map(float, p)) for p in polygons]
-
-        area = 0
-        if segmentation:
-            if item.has_image:
-                h, w = item.image.size
-            else:
-                # NOTE: here we can guess the image size as
-                # it is only needed for the area computation
-                w = bbox[0] + bbox[2]
-                h = bbox[1] + bbox[3]
-
-            rles = mask_utils.frPyObjects(segmentation, h, w)
-            if is_crowd:
-                rles = [rles]
-            else:
-                rles = mask_utils.merge(rles)
-            area = mask_utils.area(rles)
-        else:
-            _, _, w, h = bbox
-            segmentation = []
-            area = w * h
-
-        elem = {
-            'id': self._get_ann_id(ann),
-            'image_id': self._get_image_id(item),
-            'category_id': cast(ann.label, int, -1) + 1,
-            'segmentation': segmentation,
-            'area': float(area),
-            'bbox': [round(float(n), _COORDINATE_ROUNDING_DIGITS) for n in bbox],
-            'iscrowd': int(is_crowd),
-        }
-        if 'score' in ann.attributes:
-            try:
-                elem['score'] = float(ann.attributes['score'])
-            except Exception as e:
-                log.warning("Item '%s': failed to convert attribute "
-                    "'score': %e" % (item.id, e))
-        if self._context._allow_attributes:
-                elem['attributes'] = self._convert_attributes(ann)
-
-        return elem
-
-class _KeypointsConverter(_InstancesConverter):
-    def save_categories(self, dataset):
-        label_categories = dataset.categories().get(AnnotationType.label)
-        if label_categories is None:
-            return
-        point_categories = dataset.categories().get(AnnotationType.points)
-
-        for idx, label_cat in enumerate(label_categories.items):
-            cat = {
-                'id': 1 + idx,
-                'name': cast(label_cat.name, str, ''),
-                'supercategory': cast(label_cat.parent, str, ''),
-                'keypoints': [],
-                'skeleton': [],
-            }
-
-            if point_categories is not None:
-                kp_cat = point_categories.items.get(idx)
-                if kp_cat is not None:
-                    cat.update({
-                        'keypoints': [str(l) for l in kp_cat.labels],
-                        'skeleton': [list(map(int, j)) for j in kp_cat.joints],
-                    })
-            self.categories.append(cat)
-
-    def save_annotations(self, item):
-        point_annotations = [a for a in item.annotations
-            if a.type == AnnotationType.points]
-        if not point_annotations:
-            return
-
-        # Create annotations for solitary keypoints annotations
-        for points in self.find_solitary_points(item.annotations):
-            instance = [points, [], None, points.get_bbox()]
-            elem = super().convert_instance(instance, item)
-            elem.update(self.convert_points_object(points))
-            self.annotations.append(elem)
-
-        # Create annotations for complete instance + keypoints annotations
-        super().save_annotations(item)
-
-    @classmethod
-    def find_solitary_points(cls, annotations):
-        annotations = sorted(annotations, key=lambda a: a.group)
-        solitary_points = []
-
-        for g_id, group in groupby(annotations, lambda a: a.group):
-            if not g_id or g_id and not cls.find_instance_anns(group):
-                group = [a for a in group if a.type == AnnotationType.points]
-                solitary_points.extend(group)
-
-        return solitary_points
-
-    @staticmethod
-    def convert_points_object(ann):
-        keypoints = []
-        points = ann.points
-        visibility = ann.visibility
-        for index in range(0, len(points), 2):
-            kp = points[index : index + 2]
-            state = visibility[index // 2].value
-            keypoints.extend([*kp, state])
-
-        num_annotated = len([v for v in visibility \
-            if v != Points.Visibility.absent])
-
-        return {
-            'keypoints': keypoints,
-            'num_keypoints': num_annotated,
-        }
-
-    def convert_instance(self, instance, item):
-        points_ann = find(item.annotations, lambda x: \
-            x.type == AnnotationType.points and \
-            instance[0].group and x.group == instance[0].group)
-        if not points_ann:
-            return None
-
-        elem = super().convert_instance(instance, item)
-        elem.update(self.convert_points_object(points_ann))
-
-        return elem
-
-class _LabelsConverter(_TaskConverter):
-    def save_categories(self, dataset):
-        label_categories = dataset.categories().get(AnnotationType.label)
-        if label_categories is None:
-            return
-
-        for idx, cat in enumerate(label_categories.items):
-            self.categories.append({
-                'id': 1 + idx,
-                'name': cast(cat.name, str, ''),
-                'supercategory': cast(cat.parent, str, ''),
-            })
-
-    def save_annotations(self, item):
-        for ann in item.annotations:
-            if ann.type != AnnotationType.label:
-                continue
-
-            elem = {
-                'id': self._get_ann_id(ann),
-                'image_id': self._get_image_id(item),
-                'category_id': int(ann.label) + 1,
-            }
-            if 'score' in ann.attributes:
-                try:
-                    elem['score'] = float(ann.attributes['score'])
-                except Exception as e:
-                    log.warning("Item '%s': failed to convert attribute "
-                        "'score': %e" % (item.id, e))
-            if self._context._allow_attributes:
-                elem['attributes'] = self._convert_attributes(ann)
-
-            self.annotations.append(elem)
-
-class CocoConverter(Converter):
-    @staticmethod
-    def _split_tasks_string(s):
-        return [CocoTask[i.strip()] for i in s.split(',')]
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('--segmentation-mode',
-            choices=[m.name for m in SegmentationMode],
-            default=SegmentationMode.guess.name,
-            help="""
-                Save mode for instance segmentation:|n
-                - '{sm.guess.name}': guess the mode for each instance,|n
-                |s|suse 'is_crowd' attribute as hint|n
-                - '{sm.polygons.name}': save polygons,|n
-                |s|smerge and convert masks, prefer polygons|n
-                - '{sm.mask.name}': save masks,|n
-                |s|smerge and convert polygons, prefer masks|n
-                Default: %(default)s.
-                """.format(sm=SegmentationMode))
-        parser.add_argument('--crop-covered', action='store_true',
-            help="Crop covered segments so that background objects' "
-                "segmentation was more accurate (default: %(default)s)")
-        parser.add_argument('--allow-attributes',
-            type=str_to_bool, default=True,
-            help="Allow export of attributes (default: %(default)s)")
-        parser.add_argument('--tasks', type=cls._split_tasks_string,
-            help="COCO task filter, comma-separated list of {%s} "
-                "(default: all)" % ', '.join(t.name for t in CocoTask))
-        return parser
-
-    DEFAULT_IMAGE_EXT = CocoPath.IMAGE_EXT
-
-    _TASK_CONVERTER = {
-        CocoTask.image_info: _ImageInfoConverter,
-        CocoTask.instances: _InstancesConverter,
-        CocoTask.person_keypoints: _KeypointsConverter,
-        CocoTask.captions: _CaptionsConverter,
-        CocoTask.labels: _LabelsConverter,
-    }
-
-    def __init__(self, extractor, save_dir,
-            tasks=None, segmentation_mode=None, crop_covered=False,
-            allow_attributes=True, **kwargs):
-        super().__init__(extractor, save_dir, **kwargs)
-
-        assert tasks is None or isinstance(tasks, (CocoTask, list, str))
-        if isinstance(tasks, CocoTask):
-            tasks = [tasks]
-        elif isinstance(tasks, str):
-            tasks = [CocoTask[tasks]]
-        elif tasks:
-            for i, t in enumerate(tasks):
-                if isinstance(t, str):
-                    tasks[i] = CocoTask[t]
-                else:
-                    assert t in CocoTask, t
-        self._tasks = tasks
-
-        assert segmentation_mode is None or \
-            isinstance(segmentation_mode, str) or \
-            segmentation_mode in SegmentationMode
-        if segmentation_mode is None:
-            segmentation_mode = SegmentationMode.guess
-        if isinstance(segmentation_mode, str):
-            segmentation_mode = SegmentationMode[segmentation_mode]
-        self._segmentation_mode = segmentation_mode
-
-        self._crop_covered = crop_covered
-        self._allow_attributes = allow_attributes
-
-        self._image_ids = {}
-
-    def _make_dirs(self):
-        self._images_dir = osp.join(self._save_dir, CocoPath.IMAGES_DIR)
-        os.makedirs(self._images_dir, exist_ok=True)
-
-        self._ann_dir = osp.join(self._save_dir, CocoPath.ANNOTATIONS_DIR)
-        os.makedirs(self._ann_dir, exist_ok=True)
-
-    def _make_task_converter(self, task):
-        if task not in self._TASK_CONVERTER:
-            raise NotImplementedError()
-        return self._TASK_CONVERTER[task](self)
-
-    def _make_task_converters(self):
-        return { task: self._make_task_converter(task)
-            for task in (self._tasks or self._TASK_CONVERTER) }
-
-    def _get_image_id(self, item):
-        image_id = self._image_ids.get(item.id)
-        if image_id is None:
-            image_id = cast(item.attributes.get('id'), int,
-                len(self._image_ids) + 1)
-            self._image_ids[item.id] = image_id
-        return image_id
-
-    def _save_image(self, item, path=None):
-        super()._save_image(item,
-            osp.join(self._images_dir, self._make_image_filename(item)))
-
-    def apply(self):
-        self._make_dirs()
-
-        for subset_name in self._extractor.subsets() or [None]:
-            if subset_name:
-                subset = self._extractor.get_subset(subset_name)
-            else:
-                subset_name = DEFAULT_SUBSET_NAME
-                subset = self._extractor
-
-            task_converters = self._make_task_converters()
-            for task_conv in task_converters.values():
-                task_conv.save_categories(subset)
-            for item in subset:
-                if self._save_images:
-                    if item.has_image:
-                        self._save_image(item)
-                    else:
-                        log.debug("Item '%s' has no image info", item.id)
-                for task_conv in task_converters.values():
-                    task_conv.save_image_info(item,
-                        self._make_image_filename(item))
-                    task_conv.save_annotations(item)
-
-            for task, task_conv in task_converters.items():
-                if task_conv.is_empty() and not self._tasks:
-                    continue
-                task_conv.write(osp.join(self._ann_dir,
-                    '%s_%s.json' % (task.name, subset_name)))
-
-class CocoInstancesConverter(CocoConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = CocoTask.instances
-        super().__init__(*args, **kwargs)
-
-class CocoImageInfoConverter(CocoConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = CocoTask.image_info
-        super().__init__(*args, **kwargs)
-
-class CocoPersonKeypointsConverter(CocoConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = CocoTask.person_keypoints
-        super().__init__(*args, **kwargs)
-
-class CocoCaptionsConverter(CocoConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = CocoTask.captions
-        super().__init__(*args, **kwargs)
-
-class CocoLabelsConverter(CocoConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = CocoTask.labels
-        super().__init__(*args, **kwargs)
--- a/datumaro/datumaro/plugins/coco_format/extractor.py
+++ b/datumaro/datumaro/plugins/coco_format/extractor.py
@ -1,261 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import OrderedDict
-import logging as log
-import os.path as osp
-
-from pycocotools.coco import COCO
-import pycocotools.mask as mask_utils
-
-from datumaro.components.extractor import (SourceExtractor,
-    DEFAULT_SUBSET_NAME, DatasetItem,
-    AnnotationType, Label, RleMask, Points, Polygon, Bbox, Caption,
-    LabelCategories, PointsCategories
-)
-from datumaro.util.image import Image
-
-from .format import CocoTask, CocoPath
-
-
-class _CocoExtractor(SourceExtractor):
-    def __init__(self, path, task, merge_instance_polygons=False):
-        assert osp.isfile(path), path
-
-        subset = osp.splitext(osp.basename(path))[0].rsplit('_', maxsplit=1)
-        subset = subset[1] if len(subset) == 2 else None
-        super().__init__(subset=subset)
-
-        rootpath = ''
-        if path.endswith(osp.join(CocoPath.ANNOTATIONS_DIR, osp.basename(path))):
-            rootpath = path.rsplit(CocoPath.ANNOTATIONS_DIR, maxsplit=1)[0]
-        images_dir = ''
-        if rootpath and osp.isdir(osp.join(rootpath, CocoPath.IMAGES_DIR)):
-            images_dir = osp.join(rootpath, CocoPath.IMAGES_DIR)
-            if osp.isdir(osp.join(images_dir, subset or DEFAULT_SUBSET_NAME)):
-                images_dir = osp.join(images_dir, subset or DEFAULT_SUBSET_NAME)
-        self._images_dir = images_dir
-        self._task = task
-
-        self._merge_instance_polygons = merge_instance_polygons
-
-        loader = self._make_subset_loader(path)
-        self._load_categories(loader)
-        self._items = self._load_items(loader)
-
-    def categories(self):
-        return self._categories
-
-    def __iter__(self):
-        for item in self._items.values():
-            yield item
-
-    def __len__(self):
-        return len(self._items)
-
-    @staticmethod
-    def _make_subset_loader(path):
-        # COCO API has an 'unclosed file' warning
-        coco_api = COCO()
-        with open(path, 'r') as f:
-            import json
-            dataset = json.load(f)
-
-        coco_api.dataset = dataset
-        coco_api.createIndex()
-        return coco_api
-
-    def _load_categories(self, loader):
-        self._categories = {}
-
-        if self._task in [CocoTask.instances, CocoTask.labels,
-                CocoTask.person_keypoints,
-                # TODO: Task.stuff, CocoTask.panoptic
-                ]:
-            label_categories, label_map = self._load_label_categories(loader)
-            self._categories[AnnotationType.label] = label_categories
-            self._label_map = label_map
-
-        if self._task == CocoTask.person_keypoints:
-            person_kp_categories = self._load_person_kp_categories(loader)
-            self._categories[AnnotationType.points] = person_kp_categories
-
-    # pylint: disable=no-self-use
-    def _load_label_categories(self, loader):
-        catIds = loader.getCatIds()
-        cats = loader.loadCats(catIds)
-
-        categories = LabelCategories()
-        label_map = {}
-        for idx, cat in enumerate(cats):
-            label_map[cat['id']] = idx
-            categories.add(name=cat['name'], parent=cat['supercategory'])
-
-        return categories, label_map
-    # pylint: enable=no-self-use
-
-    def _load_person_kp_categories(self, loader):
-        catIds = loader.getCatIds()
-        cats = loader.loadCats(catIds)
-
-        categories = PointsCategories()
-        for cat in cats:
-            label_id = self._label_map[cat['id']]
-            categories.add(label_id=label_id,
-                labels=cat['keypoints'], joints=cat['skeleton']
-            )
-
-        return categories
-
-    def _load_items(self, loader):
-        items = OrderedDict()
-
-        for img_id in loader.getImgIds():
-            image_info = loader.loadImgs(img_id)[0]
-            image_path = osp.join(self._images_dir, image_info['file_name'])
-            image_size = (image_info.get('height'), image_info.get('width'))
-            if all(image_size):
-                image_size = (int(image_size[0]), int(image_size[1]))
-            else:
-                image_size = None
-            image = Image(path=image_path, size=image_size)
-
-            anns = loader.getAnnIds(imgIds=img_id)
-            anns = loader.loadAnns(anns)
-            anns = sum((self._load_annotations(a, image_info) for a in anns), [])
-
-            items[img_id] = DatasetItem(
-                id=osp.splitext(image_info['file_name'])[0],
-                subset=self._subset, image=image, annotations=anns,
-                attributes={'id': img_id})
-
-        return items
-
-    def _get_label_id(self, ann):
-        cat_id = ann.get('category_id')
-        if cat_id in [0, None]:
-            return None
-        return self._label_map[cat_id]
-
-    def _load_annotations(self, ann, image_info=None):
-        parsed_annotations = []
-
-        ann_id = ann.get('id')
-
-        attributes = {}
-        if 'attributes' in ann:
-            try:
-                attributes.update(ann['attributes'])
-            except Exception as e:
-                log.debug("item #%s: failed to read annotation attributes: %s",
-                    image_info['id'], e)
-        if 'score' in ann:
-            attributes['score'] = ann['score']
-
-        group = ann_id # make sure all tasks' annotations are merged
-
-        if self._task in [CocoTask.instances, CocoTask.person_keypoints]:
-            x, y, w, h = ann['bbox']
-            label_id = self._get_label_id(ann)
-
-            is_crowd = bool(ann['iscrowd'])
-            attributes['is_crowd'] = is_crowd
-
-            if self._task is CocoTask.person_keypoints:
-                keypoints = ann['keypoints']
-                points = [p for i, p in enumerate(keypoints) if i % 3 != 2]
-                visibility = keypoints[2::3]
-                parsed_annotations.append(
-                    Points(points, visibility, label=label_id,
-                        id=ann_id, attributes=attributes, group=group)
-                )
-
-            segmentation = ann.get('segmentation')
-            if segmentation and segmentation != [[]]:
-                rle = None
-
-                if isinstance(segmentation, list):
-                    if not self._merge_instance_polygons:
-                        # polygon - a single object can consist of multiple parts
-                        for polygon_points in segmentation:
-                            parsed_annotations.append(Polygon(
-                                points=polygon_points, label=label_id,
-                                id=ann_id, attributes=attributes, group=group
-                            ))
-                    else:
-                        # merge all parts into a single mask RLE
-                        img_h = image_info['height']
-                        img_w = image_info['width']
-                        rles = mask_utils.frPyObjects(segmentation, img_h, img_w)
-                        rle = mask_utils.merge(rles)
-                elif isinstance(segmentation['counts'], list):
-                    # uncompressed RLE
-                    img_h = image_info['height']
-                    img_w = image_info['width']
-                    mask_h, mask_w = segmentation['size']
-                    if img_h == mask_h and img_w == mask_w:
-                        rle = mask_utils.frPyObjects(
-                            [segmentation], mask_h, mask_w)[0]
-                    else:
-                        log.warning("item #%s: mask #%s "
-                            "does not match image size: %s vs. %s. "
-                            "Skipping this annotation.",
-                            image_info['id'], ann_id,
-                            (mask_h, mask_w), (img_h, img_w)
-                        )
-                else:
-                    # compressed RLE
-                    rle = segmentation
-
-                if rle is not None:
-                    parsed_annotations.append(RleMask(rle=rle, label=label_id,
-                        id=ann_id, attributes=attributes, group=group
-                    ))
-            else:
-                parsed_annotations.append(
-                    Bbox(x, y, w, h, label=label_id,
-                        id=ann_id, attributes=attributes, group=group)
-                )
-        elif self._task is CocoTask.labels:
-            label_id = self._get_label_id(ann)
-            parsed_annotations.append(
-                Label(label=label_id,
-                    id=ann_id, attributes=attributes, group=group)
-            )
-        elif self._task is CocoTask.captions:
-            caption = ann['caption']
-            parsed_annotations.append(
-                Caption(caption,
-                    id=ann_id, attributes=attributes, group=group)
-            )
-        else:
-            raise NotImplementedError()
-
-        return parsed_annotations
-
-class CocoImageInfoExtractor(_CocoExtractor):
-    def __init__(self, path, **kwargs):
-        kwargs['task'] = CocoTask.image_info
-        super().__init__(path, **kwargs)
-
-class CocoCaptionsExtractor(_CocoExtractor):
-    def __init__(self, path, **kwargs):
-        kwargs['task'] = CocoTask.captions
-        super().__init__(path, **kwargs)
-
-class CocoInstancesExtractor(_CocoExtractor):
-    def __init__(self, path, **kwargs):
-        kwargs['task'] = CocoTask.instances
-        super().__init__(path, **kwargs)
-
-class CocoPersonKeypointsExtractor(_CocoExtractor):
-    def __init__(self, path, **kwargs):
-        kwargs['task'] = CocoTask.person_keypoints
-        super().__init__(path, **kwargs)
-
-class CocoLabelsExtractor(_CocoExtractor):
-    def __init__(self, path, **kwargs):
-        kwargs['task'] = CocoTask.labels
-        super().__init__(path, **kwargs)
--- a/datumaro/datumaro/plugins/coco_format/format.py
+++ b/datumaro/datumaro/plugins/coco_format/format.py
@ -1,23 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from enum import Enum
-
-
-CocoTask = Enum('CocoTask', [
-    'instances',
-    'person_keypoints',
-    'captions',
-    'labels', # extension, does not exist in the original COCO format
-    'image_info',
-    # 'panoptic',
-    # 'stuff',
-])
-
-class CocoPath:
-    IMAGES_DIR = 'images'
-    ANNOTATIONS_DIR = 'annotations'
-
-    IMAGE_EXT = '.jpg'
--- a/datumaro/datumaro/plugins/coco_format/importer.py
+++ b/datumaro/datumaro/plugins/coco_format/importer.py
@ -1,95 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import defaultdict
-from glob import glob
-import logging as log
-import os.path as osp
-
-from datumaro.components.extractor import Importer
-from datumaro.util.log_utils import logging_disabled
-
-from .format import CocoTask
-
-
-class CocoImporter(Importer):
-    _COCO_EXTRACTORS = {
-        CocoTask.instances: 'coco_instances',
-        CocoTask.person_keypoints: 'coco_person_keypoints',
-        CocoTask.captions: 'coco_captions',
-        CocoTask.labels: 'coco_labels',
-        CocoTask.image_info: 'coco_image_info',
-    }
-
-    @classmethod
-    def detect(cls, path):
-        with logging_disabled(log.WARN):
-            return len(cls.find_subsets(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        subsets = self.find_subsets(path)
-
-        if len(subsets) == 0:
-            raise Exception("Failed to find 'coco' dataset at '%s'" % path)
-
-        # TODO: should be removed when proper label merging is implemented
-        conflicting_types = {CocoTask.instances,
-            CocoTask.person_keypoints, CocoTask.labels}
-        ann_types = set(t for s in subsets.values() for t in s) \
-            & conflicting_types
-        if 1 <= len(ann_types):
-            selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0]
-        if 1 < len(ann_types):
-            log.warning("Not implemented: "
-                "Found potentially conflicting source types with labels: %s. "
-                "Only one type will be used: %s" \
-                % (", ".join(t.name for t in ann_types), selected_ann_type.name))
-
-        for ann_files in subsets.values():
-            for ann_type, ann_file in ann_files.items():
-                if ann_type in conflicting_types:
-                    if ann_type is not selected_ann_type:
-                        log.warning("Not implemented: "
-                            "conflicting source '%s' is skipped." % ann_file)
-                        continue
-                log.info("Found a dataset at '%s'" % ann_file)
-
-                source_name = osp.splitext(osp.basename(ann_file))[0]
-                project.add_source(source_name, {
-                    'url': ann_file,
-                    'format': self._COCO_EXTRACTORS[ann_type],
-                    'options': dict(extra_params),
-                })
-
-        return project
-
-    @staticmethod
-    def find_subsets(path):
-        if path.endswith('.json') and osp.isfile(path):
-            subset_paths = [path]
-        else:
-            subset_paths = glob(osp.join(path, '**', '*_*.json'),
-                recursive=True)
-
-        subsets = defaultdict(dict)
-        for subset_path in subset_paths:
-            name_parts = osp.splitext(osp.basename(subset_path))[0] \
-                .rsplit('_', maxsplit=1)
-
-            ann_type = name_parts[0]
-            try:
-                ann_type = CocoTask[ann_type]
-            except KeyError:
-                log.warn("Skipping '%s': unknown subset "
-                    "type '%s', the only known are: %s" % \
-                    (subset_path, ann_type,
-                        ', '.join([e.name for e in CocoTask])))
-                continue
-            subset_name = name_parts[1]
-            subsets[subset_name][ann_type] = subset_path
-        return dict(subsets)
--- a/datumaro/datumaro/plugins/cvat_format/init.py
+++ b/datumaro/datumaro/plugins/cvat_format/init.py
--- a/datumaro/datumaro/plugins/cvat_format/converter.py
+++ b/datumaro/datumaro/plugins/cvat_format/converter.py
@ -1,331 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import logging as log
-import os
-import os.path as osp
-from collections import OrderedDict
-from xml.sax.saxutils import XMLGenerator
-
-from datumaro.components.converter import Converter
-from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType
-from datumaro.util import cast, pairs
-
-from .format import CvatPath
-
-
-class XmlAnnotationWriter:
-    VERSION = '1.1'
-
-    def __init__(self, f):
-        self.xmlgen = XMLGenerator(f, 'utf-8')
-        self._level = 0
-
-    def _indent(self, newline = True):
-        if newline:
-            self.xmlgen.ignorableWhitespace('\n')
-        self.xmlgen.ignorableWhitespace('  ' * self._level)
-
-    def _add_version(self):
-        self._indent()
-        self.xmlgen.startElement('version', {})
-        self.xmlgen.characters(self.VERSION)
-        self.xmlgen.endElement('version')
-
-    def open_root(self):
-        self.xmlgen.startDocument()
-        self.xmlgen.startElement('annotations', {})
-        self._level += 1
-        self._add_version()
-
-    def _add_meta(self, meta):
-        self._level += 1
-        for k, v in meta.items():
-            if isinstance(v, OrderedDict):
-                self._indent()
-                self.xmlgen.startElement(k, {})
-                self._add_meta(v)
-                self._indent()
-                self.xmlgen.endElement(k)
-            elif isinstance(v, list):
-                self._indent()
-                self.xmlgen.startElement(k, {})
-                for tup in v:
-                    self._add_meta(OrderedDict([tup]))
-                self._indent()
-                self.xmlgen.endElement(k)
-            else:
-                self._indent()
-                self.xmlgen.startElement(k, {})
-                self.xmlgen.characters(v)
-                self.xmlgen.endElement(k)
-        self._level -= 1
-
-    def write_meta(self, meta):
-        self._indent()
-        self.xmlgen.startElement('meta', {})
-        self._add_meta(meta)
-        self._indent()
-        self.xmlgen.endElement('meta')
-
-    def open_track(self, track):
-        self._indent()
-        self.xmlgen.startElement('track', track)
-        self._level += 1
-
-    def open_image(self, image):
-        self._indent()
-        self.xmlgen.startElement('image', image)
-        self._level += 1
-
-    def open_box(self, box):
-        self._indent()
-        self.xmlgen.startElement('box', box)
-        self._level += 1
-
-    def open_polygon(self, polygon):
-        self._indent()
-        self.xmlgen.startElement('polygon', polygon)
-        self._level += 1
-
-    def open_polyline(self, polyline):
-        self._indent()
-        self.xmlgen.startElement('polyline', polyline)
-        self._level += 1
-
-    def open_points(self, points):
-        self._indent()
-        self.xmlgen.startElement('points', points)
-        self._level += 1
-
-    def open_tag(self, tag):
-        self._indent()
-        self.xmlgen.startElement("tag", tag)
-        self._level += 1
-
-    def add_attribute(self, attribute):
-        self._indent()
-        self.xmlgen.startElement('attribute', {'name': attribute['name']})
-        self.xmlgen.characters(attribute['value'])
-        self.xmlgen.endElement('attribute')
-
-    def _close_element(self, element):
-        self._level -= 1
-        self._indent()
-        self.xmlgen.endElement(element)
-
-    def close_box(self):
-        self._close_element('box')
-
-    def close_polygon(self):
-        self._close_element('polygon')
-
-    def close_polyline(self):
-        self._close_element('polyline')
-
-    def close_points(self):
-        self._close_element('points')
-
-    def close_tag(self):
-        self._close_element('tag')
-
-    def close_image(self):
-        self._close_element('image')
-
-    def close_track(self):
-        self._close_element('track')
-
-    def close_root(self):
-        self._close_element('annotations')
-        self.xmlgen.endDocument()
-
-class _SubsetWriter:
-    def __init__(self, file, name, extractor, context):
-        self._writer = XmlAnnotationWriter(file)
-        self._name = name
-        self._extractor = extractor
-        self._context = context
-
-    def write(self):
-        self._writer.open_root()
-        self._write_meta()
-
-        for index, item in enumerate(self._extractor):
-            self._write_item(item, index)
-
-        self._writer.close_root()
-
-    def _write_item(self, item, index):
-        image_info = OrderedDict([
-            ("id", str(cast(item.attributes.get('frame'), int, index))),
-        ])
-        filename = item.id + CvatPath.IMAGE_EXT
-        image_info["name"] = filename
-        if item.has_image:
-            size = item.image.size
-            if size:
-                h, w = size
-                image_info["width"] = str(w)
-                image_info["height"] = str(h)
-
-            if self._context._save_images:
-                self._context._save_image(item,
-                    osp.join(self._context._images_dir, filename))
-        else:
-            log.debug("Item '%s' has no image info", item.id)
-        self._writer.open_image(image_info)
-
-        for ann in item.annotations:
-            if ann.type in {AnnotationType.points, AnnotationType.polyline,
-                    AnnotationType.polygon, AnnotationType.bbox}:
-                self._write_shape(ann)
-            elif ann.type == AnnotationType.label:
-                self._write_tag(ann)
-            else:
-                continue
-
-        self._writer.close_image()
-
-    def _write_meta(self):
-        label_cat = self._extractor.categories()[AnnotationType.label]
-        meta = OrderedDict([
-            ("task", OrderedDict([
-                ("id", ""),
-                ("name", self._name),
-                ("size", str(len(self._extractor))),
-                ("mode", "annotation"),
-                ("overlap", ""),
-                ("start_frame", "0"),
-                ("stop_frame", str(len(self._extractor))),
-                ("frame_filter", ""),
-                ("z_order", "True"),
-
-                ("labels", [
-                    ("label", OrderedDict([
-                        ("name", label.name),
-                        ("attributes", [
-                            ("attribute", OrderedDict([
-                                ("name", attr),
-                                ("mutable", "True"),
-                                ("input_type", "text"),
-                                ("default_value", ""),
-                                ("values", ""),
-                            ])) for attr in label.attributes
-                        ])
-                    ])) for label in label_cat.items
-                ]),
-            ])),
-        ])
-        self._writer.write_meta(meta)
-
-    def _get_label(self, label_id):
-        label_cat = self._extractor.categories()[AnnotationType.label]
-        return label_cat.items[label_id]
-
-    def _write_shape(self, shape):
-        if shape.label is None:
-            return
-
-        shape_data = OrderedDict([
-            ("label", self._get_label(shape.label).name),
-            ("occluded", str(int(shape.attributes.get('occluded', False)))),
-        ])
-
-        if shape.type == AnnotationType.bbox:
-            shape_data.update(OrderedDict([
-                ("xtl", "{:.2f}".format(shape.points[0])),
-                ("ytl", "{:.2f}".format(shape.points[1])),
-                ("xbr", "{:.2f}".format(shape.points[2])),
-                ("ybr", "{:.2f}".format(shape.points[3]))
-            ]))
-        else:
-            shape_data.update(OrderedDict([
-                ("points", ';'.join((
-                    ','.join((
-                        "{:.2f}".format(x),
-                        "{:.2f}".format(y)
-                    )) for x, y in pairs(shape.points))
-                )),
-            ]))
-
-        shape_data['z_order'] = str(int(shape.z_order))
-        if shape.group:
-            shape_data['group_id'] = str(shape.group)
-
-        if shape.type == AnnotationType.bbox:
-            self._writer.open_box(shape_data)
-        elif shape.type == AnnotationType.polygon:
-            self._writer.open_polygon(shape_data)
-        elif shape.type == AnnotationType.polyline:
-            self._writer.open_polyline(shape_data)
-        elif shape.type == AnnotationType.points:
-            self._writer.open_points(shape_data)
-        else:
-            raise NotImplementedError("unknown shape type")
-
-        for attr_name, attr_value in shape.attributes.items():
-            if isinstance(attr_value, bool):
-                attr_value = 'true' if attr_value else 'false'
-            if attr_name in self._get_label(shape.label).attributes:
-                self._writer.add_attribute(OrderedDict([
-                    ("name", str(attr_name)),
-                    ("value", str(attr_value)),
-                ]))
-
-        if shape.type == AnnotationType.bbox:
-            self._writer.close_box()
-        elif shape.type == AnnotationType.polygon:
-            self._writer.close_polygon()
-        elif shape.type == AnnotationType.polyline:
-            self._writer.close_polyline()
-        elif shape.type == AnnotationType.points:
-            self._writer.close_points()
-        else:
-            raise NotImplementedError("unknown shape type")
-
-    def _write_tag(self, label):
-        if label.label is None:
-            return
-
-        tag_data = OrderedDict([
-            ('label', self._get_label(label.label).name),
-        ])
-        if label.group:
-            tag_data['group_id'] = str(label.group)
-        self._writer.open_tag(tag_data)
-
-        for attr_name, attr_value in label.attributes.items():
-            if isinstance(attr_value, bool):
-                attr_value = 'true' if attr_value else 'false'
-            if attr_name in self._get_label(label.label).attributes:
-                self._writer.add_attribute(OrderedDict([
-                    ("name", str(attr_name)),
-                    ("value", str(attr_value)),
-                ]))
-
-        self._writer.close_tag()
-
-class CvatConverter(Converter):
-    DEFAULT_IMAGE_EXT = CvatPath.IMAGE_EXT
-
-    def apply(self):
-        images_dir = osp.join(self._save_dir, CvatPath.IMAGES_DIR)
-        os.makedirs(images_dir, exist_ok=True)
-        self._images_dir = images_dir
-
-        subsets = self._extractor.subsets()
-        if len(subsets) == 0:
-            subsets = [ None ]
-
-        for subset_name in subsets:
-            if subset_name:
-                subset = self._extractor.get_subset(subset_name)
-            else:
-                subset_name = DEFAULT_SUBSET_NAME
-                subset = self._extractor
-
-            with open(osp.join(self._save_dir, '%s.xml' % subset_name), 'w') as f:
-                writer = _SubsetWriter(f, subset_name, subset, self)
-                writer.write()
--- a/datumaro/datumaro/plugins/cvat_format/extractor.py
+++ b/datumaro/datumaro/plugins/cvat_format/extractor.py
@ -1,316 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import OrderedDict
-import os.path as osp
-from defusedxml import ElementTree
-
-from datumaro.components.extractor import (SourceExtractor, DatasetItem,
-    AnnotationType, Points, Polygon, PolyLine, Bbox, Label,
-    LabelCategories
-)
-from datumaro.util.image import Image
-
-from .format import CvatPath
-
-
-class CvatExtractor(SourceExtractor):
-    _SUPPORTED_SHAPES = ('box', 'polygon', 'polyline', 'points')
-
-    def __init__(self, path):
-        assert osp.isfile(path), path
-        rootpath = osp.dirname(path)
-        images_dir = ''
-        if osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)):
-            images_dir = osp.join(rootpath, CvatPath.IMAGES_DIR)
-        self._images_dir = images_dir
-        self._path = path
-
-        super().__init__(subset=osp.splitext(osp.basename(path))[0])
-
-        items, categories = self._parse(path)
-        self._items = self._load_items(items)
-        self._categories = categories
-
-    def categories(self):
-        return self._categories
-
-    def __iter__(self):
-        for item in self._items.values():
-            yield item
-
-    def __len__(self):
-        return len(self._items)
-
-    @classmethod
-    def _parse(cls, path):
-        context = ElementTree.iterparse(path, events=("start", "end"))
-        context = iter(context)
-
-        categories, frame_size = cls._parse_meta(context)
-
-        items = OrderedDict()
-
-        track = None
-        shape = None
-        tag = None
-        attributes = None
-        image = None
-        for ev, el in context:
-            if ev == 'start':
-                if el.tag == 'track':
-                    track = {
-                        'id': el.attrib['id'],
-                        'label': el.attrib.get('label'),
-                        'group': int(el.attrib.get('group_id', 0)),
-                        'height': frame_size[0],
-                        'width': frame_size[1],
-                    }
-                elif el.tag == 'image':
-                    image = {
-                        'name': el.attrib.get('name'),
-                        'frame': el.attrib['id'],
-                        'width': el.attrib.get('width'),
-                        'height': el.attrib.get('height'),
-                    }
-                elif el.tag in cls._SUPPORTED_SHAPES and (track or image):
-                    attributes = {}
-                    shape = {
-                        'type': None,
-                        'attributes': attributes,
-                    }
-                    if track:
-                        shape.update(track)
-                        shape['track_id'] = int(track['id'])
-                    if image:
-                        shape.update(image)
-                elif el.tag == 'tag' and image:
-                    attributes = {}
-                    tag = {
-                        'frame': image['frame'],
-                        'attributes': attributes,
-                        'group': int(el.attrib.get('group_id', 0)),
-                        'label': el.attrib['label'],
-                    }
-            elif ev == 'end':
-                if el.tag == 'attribute' and attributes is not None:
-                    attr_value = el.text
-                    if el.text in ['true', 'false']:
-                        attr_value = attr_value == 'true'
-                    else:
-                        try:
-                            attr_value = float(attr_value)
-                        except ValueError:
-                            pass
-                    attributes[el.attrib['name']] = attr_value
-                elif el.tag in cls._SUPPORTED_SHAPES:
-                    if track is not None:
-                        shape['frame'] = el.attrib['frame']
-                        shape['outside'] = (el.attrib.get('outside') == '1')
-                        shape['keyframe'] = (el.attrib.get('keyframe') == '1')
-                    if image is not None:
-                        shape['label'] = el.attrib.get('label')
-                        shape['group'] = int(el.attrib.get('group_id', 0))
-
-                    shape['type'] = el.tag
-                    shape['occluded'] = (el.attrib.get('occluded') == '1')
-                    shape['z_order'] = int(el.attrib.get('z_order', 0))
-
-                    if el.tag == 'box':
-                        shape['points'] = list(map(float, [
-                            el.attrib['xtl'], el.attrib['ytl'],
-                            el.attrib['xbr'], el.attrib['ybr'],
-                        ]))
-                    else:
-                        shape['points'] = []
-                        for pair in el.attrib['points'].split(';'):
-                            shape['points'].extend(map(float, pair.split(',')))
-
-                    frame_desc = items.get(shape['frame'], {'annotations': []})
-                    frame_desc['annotations'].append(
-                        cls._parse_shape_ann(shape, categories))
-                    items[shape['frame']] = frame_desc
-                    shape = None
-
-                elif el.tag == 'tag':
-                    frame_desc = items.get(tag['frame'], {'annotations': []})
-                    frame_desc['annotations'].append(
-                        cls._parse_tag_ann(tag, categories))
-                    items[tag['frame']] = frame_desc
-                    tag = None
-                elif el.tag == 'track':
-                    track = None
-                elif el.tag == 'image':
-                    frame_desc = items.get(image['frame'], {'annotations': []})
-                    frame_desc.update({
-                        'name': image.get('name'),
-                        'height': image.get('height'),
-                        'width': image.get('width'),
-                    })
-                    items[image['frame']] = frame_desc
-                    image = None
-                el.clear()
-
-        return items, categories
-
-    @staticmethod
-    def _parse_meta(context):
-        ev, el = next(context)
-        if not (ev == 'start' and el.tag == 'annotations'):
-            raise Exception("Unexpected token ")
-
-        categories = {}
-
-        frame_size = None
-        mode = None
-        labels = OrderedDict()
-        label = None
-
-        # Recursive descent parser
-        el = None
-        states = ['annotations']
-        def accepted(expected_state, tag, next_state=None):
-            state = states[-1]
-            if state == expected_state and el is not None and el.tag == tag:
-                if not next_state:
-                    next_state = tag
-                states.append(next_state)
-                return True
-            return False
-        def consumed(expected_state, tag):
-            state = states[-1]
-            if state == expected_state and el is not None and el.tag == tag:
-                states.pop()
-                return True
-            return False
-
-        for ev, el in context:
-            if ev == 'start':
-                if accepted('annotations', 'meta'): pass
-                elif accepted('meta', 'task'): pass
-                elif accepted('task', 'mode'): pass
-                elif accepted('task', 'original_size'):
-                    frame_size = [None, None]
-                elif accepted('original_size', 'height', next_state='frame_height'): pass
-                elif accepted('original_size', 'width', next_state='frame_width'): pass
-                elif accepted('task', 'labels'): pass
-                elif accepted('labels', 'label'):
-                    label = { 'name': None, 'attributes': set() }
-                elif accepted('label', 'name', next_state='label_name'): pass
-                elif accepted('label', 'attributes'): pass
-                elif accepted('attributes', 'attribute'): pass
-                elif accepted('attribute', 'name', next_state='attr_name'): pass
-                elif accepted('annotations', 'image') or \
-                     accepted('annotations', 'track') or \
-                     accepted('annotations', 'tag'):
-                    break
-                else:
-                    pass
-            elif ev == 'end':
-                if consumed('meta', 'meta'):
-                    break
-                elif consumed('task', 'task'): pass
-                elif consumed('mode', 'mode'):
-                    mode = el.text
-                elif consumed('original_size', 'original_size'): pass
-                elif consumed('frame_height', 'height'):
-                    frame_size[0] = int(el.text)
-                elif consumed('frame_width', 'width'):
-                    frame_size[1] = int(el.text)
-                elif consumed('label_name', 'name'):
-                    label['name'] = el.text
-                elif consumed('attr_name', 'name'):
-                    label['attributes'].add(el.text)
-                elif consumed('attribute', 'attribute'): pass
-                elif consumed('attributes', 'attributes'): pass
-                elif consumed('label', 'label'):
-                    labels[label['name']] = label['attributes']
-                    label = None
-                elif consumed('labels', 'labels'): pass
-                else:
-                    pass
-
-        assert len(states) == 1 and states[0] == 'annotations', \
-            "Expected 'meta' section in the annotation file, path: %s" % states
-
-        common_attrs = ['occluded']
-        if mode == 'interpolation':
-            common_attrs.append('keyframe')
-            common_attrs.append('outside')
-            common_attrs.append('track_id')
-
-        label_cat = LabelCategories(attributes=common_attrs)
-        for label, attrs in labels.items():
-            label_cat.add(label, attributes=attrs)
-
-        categories[AnnotationType.label] = label_cat
-
-        return categories, frame_size
-
-    @classmethod
-    def _parse_shape_ann(cls, ann, categories):
-        ann_id = ann.get('id', 0)
-        ann_type = ann['type']
-
-        attributes = ann.get('attributes') or {}
-        if 'occluded' in categories[AnnotationType.label].attributes:
-            attributes['occluded'] = ann.get('occluded', False)
-        if 'outside' in ann:
-            attributes['outside'] = ann['outside']
-        if 'keyframe' in ann:
-            attributes['keyframe'] = ann['keyframe']
-        if 'track_id' in ann:
-            attributes['track_id'] = ann['track_id']
-
-        group = ann.get('group')
-
-        label = ann.get('label')
-        label_id = categories[AnnotationType.label].find(label)[0]
-
-        z_order = ann.get('z_order', 0)
-        points = ann.get('points', [])
-
-        if ann_type == 'polyline':
-            return PolyLine(points, label=label_id, z_order=z_order,
-                id=ann_id, attributes=attributes, group=group)
-
-        elif ann_type == 'polygon':
-            return Polygon(points, label=label_id, z_order=z_order,
-                id=ann_id, attributes=attributes, group=group)
-
-        elif ann_type == 'points':
-            return Points(points, label=label_id, z_order=z_order,
-                id=ann_id, attributes=attributes, group=group)
-
-        elif ann_type == 'box':
-            x, y = points[0], points[1]
-            w, h = points[2] - x, points[3] - y
-            return Bbox(x, y, w, h, label=label_id, z_order=z_order,
-                id=ann_id, attributes=attributes, group=group)
-
-        else:
-            raise NotImplementedError("Unknown annotation type '%s'" % ann_type)
-
-    @classmethod
-    def _parse_tag_ann(cls, ann, categories):
-        label = ann.get('label')
-        label_id = categories[AnnotationType.label].find(label)[0]
-        group = ann.get('group')
-        attributes = ann.get('attributes')
-        return Label(label_id, attributes=attributes, group=group)
-
-    def _load_items(self, parsed):
-        for frame_id, item_desc in parsed.items():
-            name = item_desc.get('name', 'frame_%06d.png' % int(frame_id))
-            image = osp.join(self._images_dir, name)
-            image_size = (item_desc.get('height'), item_desc.get('width'))
-            if all(image_size):
-                image = Image(path=image, size=tuple(map(int, image_size)))
-
-            parsed[frame_id] = DatasetItem(id=osp.splitext(name)[0],
-                subset=self._subset, image=image,
-                annotations=item_desc.get('annotations'),
-                attributes={'frame': int(frame_id)})
-        return parsed
--- a/datumaro/datumaro/plugins/cvat_format/format.py
+++ b/datumaro/datumaro/plugins/cvat_format/format.py
@ -1,9 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-class CvatPath:
-    IMAGES_DIR = 'images'
-
-    IMAGE_EXT = '.jpg'
--- a/datumaro/datumaro/plugins/cvat_format/importer.py
+++ b/datumaro/datumaro/plugins/cvat_format/importer.py
@ -1,51 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from glob import glob
-import logging as log
-import os.path as osp
-
-from datumaro.components.extractor import Importer
-
-
-class CvatImporter(Importer):
-    EXTRACTOR_NAME = 'cvat'
-
-    @classmethod
-    def detect(cls, path):
-        return len(cls.find_subsets(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        subset_paths = self.find_subsets(path)
-
-        if len(subset_paths) == 0:
-            raise Exception("Failed to find 'cvat' dataset at '%s'" % path)
-
-        for subset_path in subset_paths:
-            if not osp.isfile(subset_path):
-                continue
-
-            log.info("Found a dataset at '%s'" % subset_path)
-
-            subset_name = osp.splitext(osp.basename(subset_path))[0]
-
-            project.add_source(subset_name, {
-                'url': subset_path,
-                'format': self.EXTRACTOR_NAME,
-                'options': dict(extra_params),
-            })
-
-        return project
-
-    @staticmethod
-    def find_subsets(path):
-        if path.endswith('.xml') and osp.isfile(path):
-            subset_paths = [path]
-        else:
-            subset_paths = glob(osp.join(path, '**', '*.xml'), recursive=True)
-        return subset_paths
--- a/datumaro/datumaro/plugins/datumaro_format/init.py
+++ b/datumaro/datumaro/plugins/datumaro_format/init.py
--- a/datumaro/datumaro/plugins/datumaro_format/converter.py
+++ b/datumaro/datumaro/plugins/datumaro_format/converter.py
@ -1,261 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=no-self-use
-
-import json
-import numpy as np
-import os
-import os.path as osp
-
-from datumaro.components.converter import Converter
-from datumaro.components.extractor import (
-    DEFAULT_SUBSET_NAME, Annotation, _Shape,
-    Label, Mask, RleMask, Points, Polygon, PolyLine, Bbox, Caption,
-    LabelCategories, MaskCategories, PointsCategories
-)
-from datumaro.util import cast
-import pycocotools.mask as mask_utils
-
-from .format import DatumaroPath
-
-
-class _SubsetWriter:
-    def __init__(self, name, context):
-        self._name = name
-        self._context = context
-
-        self._data = {
-            'info': {},
-            'categories': {},
-            'items': [],
-        }
-
-    @property
-    def categories(self):
-        return self._data['categories']
-
-    @property
-    def items(self):
-        return self._data['items']
-
-    def write_item(self, item):
-        annotations = []
-        item_desc = {
-            'id': item.id,
-            'annotations': annotations,
-        }
-        if item.attributes:
-            item_desc['attr'] = item.attributes
-        if item.path:
-            item_desc['path'] = item.path
-        if item.has_image:
-            path = item.image.path
-            if self._context._save_images:
-                path = self._context._make_image_filename(item)
-                self._context._save_image(item, path)
-
-            item_desc['image'] = {
-                'size': item.image.size,
-                'path': path,
-            }
-        self.items.append(item_desc)
-
-        for ann in item.annotations:
-            if isinstance(ann, Label):
-                converted_ann = self._convert_label_object(ann)
-            elif isinstance(ann, Mask):
-                converted_ann = self._convert_mask_object(ann)
-            elif isinstance(ann, Points):
-                converted_ann = self._convert_points_object(ann)
-            elif isinstance(ann, PolyLine):
-                converted_ann = self._convert_polyline_object(ann)
-            elif isinstance(ann, Polygon):
-                converted_ann = self._convert_polygon_object(ann)
-            elif isinstance(ann, Bbox):
-                converted_ann = self._convert_bbox_object(ann)
-            elif isinstance(ann, Caption):
-                converted_ann = self._convert_caption_object(ann)
-            else:
-                raise NotImplementedError()
-            annotations.append(converted_ann)
-
-    def write_categories(self, categories):
-        for ann_type, desc in categories.items():
-            if isinstance(desc, LabelCategories):
-                converted_desc = self._convert_label_categories(desc)
-            elif isinstance(desc, MaskCategories):
-                converted_desc = self._convert_mask_categories(desc)
-            elif isinstance(desc, PointsCategories):
-                converted_desc = self._convert_points_categories(desc)
-            else:
-                raise NotImplementedError()
-            self.categories[ann_type.name] = converted_desc
-
-    def write(self, save_dir):
-        with open(osp.join(save_dir, '%s.json' % (self._name)), 'w') as f:
-            json.dump(self._data, f)
-
-    def _convert_annotation(self, obj):
-        assert isinstance(obj, Annotation)
-
-        ann_json = {
-            'id': cast(obj.id, int),
-            'type': cast(obj.type.name, str),
-            'attributes': obj.attributes,
-            'group': cast(obj.group, int, 0),
-        }
-        return ann_json
-
-    def _convert_label_object(self, obj):
-        converted = self._convert_annotation(obj)
-
-        converted.update({
-            'label_id': cast(obj.label, int),
-        })
-        return converted
-
-    def _convert_mask_object(self, obj):
-        converted = self._convert_annotation(obj)
-
-        if isinstance(obj, RleMask):
-            rle = obj.rle
-        else:
-            rle = mask_utils.encode(
-                np.require(obj.image, dtype=np.uint8, requirements='F'))
-
-        converted.update({
-            'label_id': cast(obj.label, int),
-            'rle': {
-                # serialize as compressed COCO mask
-                'counts': rle['counts'].decode('ascii'),
-                'size': list(int(c) for c in rle['size']),
-            },
-            'z_order': obj.z_order,
-        })
-        return converted
-
-    def _convert_shape_object(self, obj):
-        assert isinstance(obj, _Shape)
-        converted = self._convert_annotation(obj)
-
-        converted.update({
-            'label_id': cast(obj.label, int),
-            'points': [float(p) for p in obj.points],
-            'z_order': obj.z_order,
-        })
-        return converted
-
-    def _convert_polyline_object(self, obj):
-        return self._convert_shape_object(obj)
-
-    def _convert_polygon_object(self, obj):
-        return self._convert_shape_object(obj)
-
-    def _convert_bbox_object(self, obj):
-        converted = self._convert_shape_object(obj)
-        converted.pop('points', None)
-        converted['bbox'] = [float(p) for p in obj.get_bbox()]
-        return converted
-
-    def _convert_points_object(self, obj):
-        converted = self._convert_shape_object(obj)
-
-        converted.update({
-            'visibility': [int(v.value) for v in obj.visibility],
-        })
-        return converted
-
-    def _convert_caption_object(self, obj):
-        converted = self._convert_annotation(obj)
-
-        converted.update({
-            'caption': cast(obj.caption, str),
-        })
-        return converted
-
-    def _convert_label_categories(self, obj):
-        converted = {
-            'labels': [],
-        }
-        for label in obj.items:
-            converted['labels'].append({
-                'name': cast(label.name, str),
-                'parent': cast(label.parent, str),
-            })
-        return converted
-
-    def _convert_mask_categories(self, obj):
-        converted = {
-            'colormap': [],
-        }
-        for label_id, color in obj.colormap.items():
-            converted['colormap'].append({
-                'label_id': int(label_id),
-                'r': int(color[0]),
-                'g': int(color[1]),
-                'b': int(color[2]),
-            })
-        return converted
-
-    def _convert_points_categories(self, obj):
-        converted = {
-            'items': [],
-        }
-        for label_id, item in obj.items.items():
-            converted['items'].append({
-                'label_id': int(label_id),
-                'labels': [cast(label, str) for label in item.labels],
-                'joints': [list(map(int, j)) for j in item.joints],
-            })
-        return converted
-
-class DatumaroConverter(Converter):
-    DEFAULT_IMAGE_EXT = DatumaroPath.IMAGE_EXT
-
-    def apply(self):
-        os.makedirs(self._save_dir, exist_ok=True)
-
-        images_dir = osp.join(self._save_dir, DatumaroPath.IMAGES_DIR)
-        os.makedirs(images_dir, exist_ok=True)
-        self._images_dir = images_dir
-
-        annotations_dir = osp.join(self._save_dir, DatumaroPath.ANNOTATIONS_DIR)
-        os.makedirs(annotations_dir, exist_ok=True)
-        self._annotations_dir = annotations_dir
-
-        subsets = self._extractor.subsets() or [None]
-        subsets = [n or DEFAULT_SUBSET_NAME for n in subsets]
-        subsets = { name: _SubsetWriter(name, self) for name in subsets }
-
-        for subset, writer in subsets.items():
-            writer.write_categories(self._extractor.categories())
-
-        for item in self._extractor:
-            subset = item.subset or DEFAULT_SUBSET_NAME
-            writer = subsets[subset]
-
-            writer.write_item(item)
-
-        for subset, writer in subsets.items():
-            writer.write(annotations_dir)
-
-    def _save_image(self, item, path=None):
-        super()._save_image(item,
-            osp.join(self._images_dir, self._make_image_filename(item)))
-
-class DatumaroProjectConverter(Converter):
-    @classmethod
-    def convert(cls, extractor, save_dir, **kwargs):
-        os.makedirs(save_dir, exist_ok=True)
-
-        from datumaro.components.project import Project
-        project = Project.generate(save_dir,
-            config=kwargs.pop('project_config', None))
-
-        DatumaroConverter.convert(extractor,
-            save_dir=osp.join(
-                project.config.project_dir, project.config.dataset_dir),
-            **kwargs)
--- a/datumaro/datumaro/plugins/datumaro_format/extractor.py
+++ b/datumaro/datumaro/plugins/datumaro_format/extractor.py
@ -1,157 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import json
-import os.path as osp
-
-from datumaro.components.extractor import (SourceExtractor, DatasetItem,
-    AnnotationType, Label, RleMask, Points, Polygon, PolyLine, Bbox, Caption,
-    LabelCategories, MaskCategories, PointsCategories
-)
-from datumaro.util.image import Image
-
-from .format import DatumaroPath
-
-
-class DatumaroExtractor(SourceExtractor):
-    def __init__(self, path):
-        assert osp.isfile(path), path
-        rootpath = ''
-        if path.endswith(osp.join(DatumaroPath.ANNOTATIONS_DIR, osp.basename(path))):
-            rootpath = path.rsplit(DatumaroPath.ANNOTATIONS_DIR, maxsplit=1)[0]
-        images_dir = ''
-        if rootpath and osp.isdir(osp.join(rootpath, DatumaroPath.IMAGES_DIR)):
-            images_dir = osp.join(rootpath, DatumaroPath.IMAGES_DIR)
-        self._images_dir = images_dir
-
-        super().__init__(subset=osp.splitext(osp.basename(path))[0])
-
-        with open(path, 'r') as f:
-            parsed_anns = json.load(f)
-        self._categories = self._load_categories(parsed_anns)
-        self._items = self._load_items(parsed_anns)
-
-    def categories(self):
-        return self._categories
-
-    def __iter__(self):
-        for item in self._items:
-            yield item
-
-    def __len__(self):
-        return len(self._items)
-
-    @staticmethod
-    def _load_categories(parsed):
-        categories = {}
-
-        parsed_label_cat = parsed['categories'].get(AnnotationType.label.name)
-        if parsed_label_cat:
-            label_categories = LabelCategories()
-            for item in parsed_label_cat['labels']:
-                label_categories.add(item['name'], parent=item['parent'])
-
-            categories[AnnotationType.label] = label_categories
-
-        parsed_mask_cat = parsed['categories'].get(AnnotationType.mask.name)
-        if parsed_mask_cat:
-            colormap = {}
-            for item in parsed_mask_cat['colormap']:
-                colormap[int(item['label_id'])] = \
-                    (item['r'], item['g'], item['b'])
-
-            mask_categories = MaskCategories(colormap=colormap)
-            categories[AnnotationType.mask] = mask_categories
-
-        parsed_points_cat = parsed['categories'].get(AnnotationType.points.name)
-        if parsed_points_cat:
-            point_categories = PointsCategories()
-            for item in parsed_points_cat['items']:
-                point_categories.add(int(item['label_id']),
-                    item['labels'], joints=item['joints'])
-
-            categories[AnnotationType.points] = point_categories
-
-        return categories
-
-    def _load_items(self, parsed):
-        items = []
-        for item_desc in parsed['items']:
-            item_id = item_desc['id']
-
-            image = None
-            image_info = item_desc.get('image')
-            if image_info:
-                image_path = image_info.get('path') or \
-                    item_id + DatumaroPath.IMAGE_EXT
-                image_path = osp.join(self._images_dir, image_path)
-                image = Image(path=image_path, size=image_info.get('size'))
-
-            annotations = self._load_annotations(item_desc)
-
-            item = DatasetItem(id=item_id, subset=self._subset,
-                annotations=annotations, image=image,
-                attributes=item_desc.get('attr'))
-
-            items.append(item)
-
-        return items
-
-    @staticmethod
-    def _load_annotations(item):
-        parsed = item['annotations']
-        loaded = []
-
-        for ann in parsed:
-            ann_id = ann.get('id')
-            ann_type = AnnotationType[ann['type']]
-            attributes = ann.get('attributes')
-            group = ann.get('group')
-
-            label_id = ann.get('label_id')
-            z_order = ann.get('z_order')
-            points = ann.get('points')
-
-            if ann_type == AnnotationType.label:
-                loaded.append(Label(label=label_id,
-                    id=ann_id, attributes=attributes, group=group))
-
-            elif ann_type == AnnotationType.mask:
-                rle = ann['rle']
-                rle['counts'] = rle['counts'].encode('ascii')
-                loaded.append(RleMask(rle=rle, label=label_id,
-                    id=ann_id, attributes=attributes, group=group,
-                    z_order=z_order))
-
-            elif ann_type == AnnotationType.polyline:
-                loaded.append(PolyLine(points, label=label_id,
-                    id=ann_id, attributes=attributes, group=group,
-                    z_order=z_order))
-
-            elif ann_type == AnnotationType.polygon:
-                loaded.append(Polygon(points, label=label_id,
-                    id=ann_id, attributes=attributes, group=group,
-                    z_order=z_order))
-
-            elif ann_type == AnnotationType.bbox:
-                x, y, w, h = ann['bbox']
-                loaded.append(Bbox(x, y, w, h, label=label_id,
-                    id=ann_id, attributes=attributes, group=group,
-                    z_order=z_order))
-
-            elif ann_type == AnnotationType.points:
-                loaded.append(Points(points, label=label_id,
-                    id=ann_id, attributes=attributes, group=group,
-                    z_order=z_order))
-
-            elif ann_type == AnnotationType.caption:
-                caption = ann.get('caption')
-                loaded.append(Caption(caption,
-                    id=ann_id, attributes=attributes, group=group))
-
-            else:
-                raise NotImplementedError()
-
-        return loaded
--- a/datumaro/datumaro/plugins/datumaro_format/format.py
+++ b/datumaro/datumaro/plugins/datumaro_format/format.py
@ -1,12 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-class DatumaroPath:
-    IMAGES_DIR = 'images'
-    ANNOTATIONS_DIR = 'annotations'
-    MASKS_DIR = 'masks'
-
-    IMAGE_EXT = '.jpg'
-    MASK_EXT = '.png'
--- a/datumaro/datumaro/plugins/datumaro_format/importer.py
+++ b/datumaro/datumaro/plugins/datumaro_format/importer.py
@ -1,56 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from glob import glob
-import logging as log
-import os.path as osp
-
-from datumaro.components.extractor import Importer
-
-from .format import DatumaroPath
-
-
-class DatumaroImporter(Importer):
-    EXTRACTOR_NAME = 'datumaro'
-
-    @classmethod
-    def detect(cls, path):
-        return len(cls.find_subsets(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        subset_paths = self.find_subsets(path)
-        if len(subset_paths) == 0:
-            raise Exception("Failed to find 'datumaro' dataset at '%s'" % path)
-
-        for subset_path in subset_paths:
-            if not osp.isfile(subset_path):
-                continue
-
-            log.info("Found a dataset at '%s'" % subset_path)
-
-            subset_name = osp.splitext(osp.basename(subset_path))[0]
-
-            project.add_source(subset_name, {
-                'url': subset_path,
-                'format': self.EXTRACTOR_NAME,
-                'options': dict(extra_params),
-            })
-
-        return project
-
-    @staticmethod
-    def find_subsets(path):
-        if path.endswith('.json') and osp.isfile(path):
-            subset_paths = [path]
-        else:
-            subset_paths = glob(osp.join(path, '*.json'))
-
-            if osp.basename(osp.normpath(path)) != DatumaroPath.ANNOTATIONS_DIR:
-                path = osp.join(path, DatumaroPath.ANNOTATIONS_DIR)
-                subset_paths += glob(osp.join(path, '*.json'))
-        return subset_paths
--- a/datumaro/datumaro/plugins/image_dir.py
+++ b/datumaro/datumaro/plugins/image_dir.py
@ -1,76 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import logging as log
-import os
-import os.path as osp
-
-from datumaro.components.extractor import DatasetItem, SourceExtractor, Importer
-from datumaro.components.converter import Converter
-from datumaro.util.image import Image
-
-
-class ImageDirImporter(Importer):
-    EXTRACTOR_NAME = 'image_dir'
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        if not osp.isdir(path):
-            raise Exception("Can't find a directory at '%s'" % path)
-
-        source_name = osp.basename(osp.normpath(path))
-        project.add_source(source_name, {
-            'url': source_name,
-            'format': self.EXTRACTOR_NAME,
-            'options': dict(extra_params),
-        })
-
-        return project
-
-
-class ImageDirExtractor(SourceExtractor):
-    def __init__(self, url):
-        super().__init__()
-
-        assert osp.isdir(url), url
-
-        items = []
-        for dirpath, _, filenames in os.walk(url):
-            for name in filenames:
-                path = osp.join(dirpath, name)
-                try:
-                    image = Image(path)
-                    # force loading
-                    image.data # pylint: disable=pointless-statement
-                except Exception:
-                    continue
-
-                item_id = osp.relpath(osp.splitext(path)[0], url)
-                items.append(DatasetItem(id=item_id, image=image))
-
-        self._items = items
-
-    def __iter__(self):
-        for item in self._items:
-            yield item
-
-    def __len__(self):
-        return len(self._items)
-
-
-class ImageDirConverter(Converter):
-    DEFAULT_IMAGE_EXT = '.jpg'
-
-    def apply(self):
-        os.makedirs(self._save_dir, exist_ok=True)
-
-        for item in self._extractor:
-            if item.has_image:
-                self._save_image(item,
-                    osp.join(self._save_dir, self._make_image_filename(item)))
-            else:
-                log.debug("Item '%s' has no image info", item.id)
--- a/datumaro/datumaro/plugins/labelme_format.py
+++ b/datumaro/datumaro/plugins/labelme_format.py
@ -1,437 +0,0 @@
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import defaultdict
-from defusedxml import ElementTree
-import logging as log
-import numpy as np
-import os
-import os.path as osp
-
-from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME,
-    DatasetItem, AnnotationType, Mask, Bbox, Polygon, LabelCategories
-)
-from datumaro.components.extractor import Importer
-from datumaro.components.converter import Converter
-from datumaro.util.image import Image, save_image
-from datumaro.util.mask_tools import load_mask, find_mask_bbox
-
-
-class LabelMePath:
-    MASKS_DIR = 'Masks'
-    IMAGE_EXT = '.jpg'
-
-class LabelMeExtractor(SourceExtractor):
-    def __init__(self, path, subset_name=None):
-        assert osp.isdir(path), path
-        super().__init__(subset=subset_name)
-
-        items, categories = self._parse(path)
-        self._categories = categories
-        self._items = items
-
-    def categories(self):
-        return self._categories
-
-    def __iter__(self):
-        for item in self._items:
-            yield item
-
-    def __len__(self):
-        return len(self._items)
-
-    def _parse(self, path):
-        categories = {
-            AnnotationType.label: LabelCategories(attributes={
-                'occluded', 'username'
-            })
-        }
-
-        items = []
-        for p in sorted(p for p in os.listdir(path) if p.endswith('.xml')):
-            root = ElementTree.parse(osp.join(path, p))
-
-            image_path = osp.join(path, root.find('filename').text)
-            image_size = None
-            imagesize_elem = root.find('imagesize')
-            if imagesize_elem is not None:
-                width_elem = imagesize_elem.find('ncols')
-                height_elem = imagesize_elem.find('nrows')
-                image_size = (int(height_elem.text), int(width_elem.text))
-            image = Image(path=image_path, size=image_size)
-
-            annotations = self._parse_annotations(root, path, categories)
-
-            items.append(DatasetItem(id=osp.splitext(p)[0],
-                subset=self._subset, image=image, annotations=annotations))
-        return items, categories
-
-    @classmethod
-    def _parse_annotations(cls, xml_root, dataset_root, categories):
-        def parse_attributes(attr_str):
-            parsed = []
-            if not attr_str:
-                return parsed
-
-            for attr in [a.strip() for a in attr_str.split(',') if a.strip()]:
-                if '=' in attr:
-                    name, value = attr.split('=', maxsplit=1)
-                    if value.lower() in {'true', 'false'}:
-                        value = value.lower() == 'true'
-                    else:
-                        try:
-                            value = float(value)
-                        except ValueError:
-                            pass
-                    parsed.append((name, value))
-                else:
-                    parsed.append((attr, True))
-
-            return parsed
-
-        label_cat = categories[AnnotationType.label]
-        def get_label_id(label):
-            if not label:
-                return None
-            idx, _ = label_cat.find(label)
-            if idx is None:
-                idx = label_cat.add(label)
-            return idx
-
-        image_annotations = []
-
-        parsed_annotations = dict()
-        group_assignments = dict()
-        root_annotations = set()
-        for obj_elem in xml_root.iter('object'):
-            obj_id = int(obj_elem.find('id').text)
-
-            ann_items = []
-
-            label = get_label_id(obj_elem.find('name').text)
-
-            attributes = []
-            attributes_elem = obj_elem.find('attributes')
-            if attributes_elem is not None and attributes_elem.text:
-                attributes = parse_attributes(attributes_elem.text)
-
-            occluded = False
-            occluded_elem = obj_elem.find('occluded')
-            if occluded_elem is not None and occluded_elem.text:
-                occluded = (occluded_elem.text == 'yes')
-            attributes.append(('occluded', occluded))
-
-            deleted = False
-            deleted_elem = obj_elem.find('deleted')
-            if deleted_elem is not None and deleted_elem.text:
-                deleted = bool(int(deleted_elem.text))
-
-            user = ''
-
-            poly_elem = obj_elem.find('polygon')
-            segm_elem = obj_elem.find('segm')
-            type_elem = obj_elem.find('type') # the only value is 'bounding_box'
-            if poly_elem is not None:
-                user_elem = poly_elem.find('username')
-                if user_elem is not None and user_elem.text:
-                    user = user_elem.text
-                attributes.append(('username', user))
-
-                points = []
-                for point_elem in poly_elem.iter('pt'):
-                    x = float(point_elem.find('x').text)
-                    y = float(point_elem.find('y').text)
-                    points.append(x)
-                    points.append(y)
-
-                if type_elem is not None and type_elem.text == 'bounding_box':
-                    xmin = min(points[::2])
-                    xmax = max(points[::2])
-                    ymin = min(points[1::2])
-                    ymax = max(points[1::2])
-                    ann_items.append(Bbox(xmin, ymin, xmax - xmin, ymax - ymin,
-                        label=label, attributes=attributes, id=obj_id,
-                    ))
-                else:
-                    ann_items.append(Polygon(points,
-                        label=label, attributes=attributes, id=obj_id,
-                    ))
-            elif segm_elem is not None:
-                user_elem = segm_elem.find('username')
-                if user_elem is not None and user_elem.text:
-                    user = user_elem.text
-                attributes.append(('username', user))
-
-                mask_path = osp.join(dataset_root, LabelMePath.MASKS_DIR,
-                    segm_elem.find('mask').text)
-                if not osp.isfile(mask_path):
-                    raise Exception("Can't find mask at '%s'" % mask_path)
-                mask = load_mask(mask_path)
-                mask = np.any(mask, axis=2)
-                ann_items.append(Mask(image=mask, label=label, id=obj_id,
-                    attributes=attributes))
-
-            if not deleted:
-                parsed_annotations[obj_id] = ann_items
-
-            # Find parents and children
-            parts_elem = obj_elem.find('parts')
-            if parts_elem is not None:
-                children_ids = []
-                hasparts_elem = parts_elem.find('hasparts')
-                if hasparts_elem is not None and hasparts_elem.text:
-                    children_ids = [int(c) for c in hasparts_elem.text.split(',')]
-
-                parent_ids = []
-                ispartof_elem = parts_elem.find('ispartof')
-                if ispartof_elem is not None and ispartof_elem.text:
-                    parent_ids = [int(c) for c in ispartof_elem.text.split(',')]
-
-                if children_ids and not parent_ids and hasparts_elem.text:
-                    root_annotations.add(obj_id)
-                group_assignments[obj_id] = [None, children_ids]
-
-        # assign single group to all grouped annotations
-        current_group_id = 0
-        annotations_to_visit = list(root_annotations)
-        while annotations_to_visit:
-            ann_id = annotations_to_visit.pop()
-            ann_assignment = group_assignments[ann_id]
-            group_id, children_ids = ann_assignment
-            if group_id:
-                continue
-
-            if ann_id in root_annotations:
-                current_group_id += 1 # start a new group
-
-            group_id = current_group_id
-            ann_assignment[0] = group_id
-
-            # continue with children
-            annotations_to_visit.extend(children_ids)
-
-        assert current_group_id == len(root_annotations)
-
-        for ann_id, ann_items in parsed_annotations.items():
-            group_id = 0
-            if ann_id in group_assignments:
-                ann_assignment = group_assignments[ann_id]
-                group_id = ann_assignment[0]
-
-            for ann_item in ann_items:
-                if group_id:
-                    ann_item.group = group_id
-
-                image_annotations.append(ann_item)
-
-        return image_annotations
-
-
-class LabelMeImporter(Importer):
-    _EXTRACTOR_NAME = 'label_me'
-
-    @classmethod
-    def detect(cls, path):
-        if not osp.isdir(path):
-            return False
-        return len(cls.find_subsets(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        subset_paths = self.find_subsets(path)
-        if len(subset_paths) == 0:
-            raise Exception("Failed to find 'label_me' dataset at '%s'" % path)
-
-        for subset_path, subset_name in subset_paths:
-            params = {}
-            if subset_name:
-                params['subset_name'] = subset_name
-            params.update(extra_params)
-
-            source_name = osp.splitext(osp.basename(subset_path))[0]
-            project.add_source(source_name, {
-                'url': subset_path,
-                'format': self._EXTRACTOR_NAME,
-                'options': params,
-            })
-
-        return project
-
-    @staticmethod
-    def find_subsets(path):
-        subset_paths = []
-        if not osp.isdir(path):
-            raise Exception("Expected directory path, got '%s'" % path)
-
-        path = osp.normpath(path)
-
-        def has_annotations(d):
-            return len([p for p in os.listdir(d) if p.endswith('.xml')]) != 0
-
-        if has_annotations(path):
-            subset_paths = [(path, None)]
-        else:
-            for d in os.listdir(path):
-                subset = d
-                d = osp.join(path, d)
-                if osp.isdir(d) and has_annotations(d):
-                    subset_paths.append((d, subset))
-        return subset_paths
-
-
-class LabelMeConverter(Converter):
-    DEFAULT_IMAGE_EXT = LabelMePath.IMAGE_EXT
-
-    def apply(self):
-        for subset_name in self._extractor.subsets() or [None]:
-            if subset_name:
-                subset = self._extractor.get_subset(subset_name)
-            else:
-                subset_name = DEFAULT_SUBSET_NAME
-                subset = self._extractor
-
-            subset_dir = osp.join(self._save_dir, subset_name)
-            os.makedirs(subset_dir, exist_ok=True)
-            os.makedirs(osp.join(subset_dir, LabelMePath.MASKS_DIR),
-                exist_ok=True)
-
-            for item in subset:
-                self._save_item(item, subset_dir)
-
-    def _get_label(self, label_id):
-        if label_id is None:
-            return ''
-        return self._extractor.categories()[AnnotationType.label] \
-            .items[label_id].name
-
-    def _save_item(self, item, subset_dir):
-        from lxml import etree as ET
-
-        log.debug("Converting item '%s'", item.id)
-
-        if '/' in item.id:
-            raise Exception("Can't export item '%s': "
-                "LabelMe format only supports flat image layout" % item.id)
-
-        image_filename = self._make_image_filename(item)
-        if self._save_images:
-            if item.has_image and item.image.has_data:
-                self._save_image(item, osp.join(subset_dir, image_filename))
-            else:
-                log.debug("Item '%s' has no image", item.id)
-
-        root_elem = ET.Element('annotation')
-        ET.SubElement(root_elem, 'filename').text = image_filename
-        ET.SubElement(root_elem, 'folder').text = ''
-
-        source_elem = ET.SubElement(root_elem, 'source')
-        ET.SubElement(source_elem, 'sourceImage').text = ''
-        ET.SubElement(source_elem, 'sourceAnnotation').text = 'Datumaro'
-
-        if item.has_image:
-            image_elem = ET.SubElement(root_elem, 'imagesize')
-            image_size = item.image.size
-            ET.SubElement(image_elem, 'nrows').text = str(image_size[0])
-            ET.SubElement(image_elem, 'ncols').text = str(image_size[1])
-
-        groups = defaultdict(list)
-
-        obj_id = 0
-        for ann in item.annotations:
-            if not ann.type in { AnnotationType.polygon,
-                    AnnotationType.bbox, AnnotationType.mask }:
-                continue
-
-            obj_elem = ET.SubElement(root_elem, 'object')
-            ET.SubElement(obj_elem, 'name').text = self._get_label(ann.label)
-            ET.SubElement(obj_elem, 'deleted').text = '0'
-            ET.SubElement(obj_elem, 'verified').text = '0'
-            ET.SubElement(obj_elem, 'occluded').text = \
-                'yes' if ann.attributes.pop('occluded', '') == True else 'no'
-            ET.SubElement(obj_elem, 'date').text = ''
-            ET.SubElement(obj_elem, 'id').text = str(obj_id)
-
-            parts_elem = ET.SubElement(obj_elem, 'parts')
-            if ann.group:
-                groups[ann.group].append((obj_id, parts_elem))
-            else:
-                ET.SubElement(parts_elem, 'hasparts').text = ''
-                ET.SubElement(parts_elem, 'ispartof').text = ''
-
-            if ann.type == AnnotationType.bbox:
-                ET.SubElement(obj_elem, 'type').text = 'bounding_box'
-
-                poly_elem = ET.SubElement(obj_elem, 'polygon')
-                x0, y0, x1, y1 = ann.points
-                points = [ (x0, y0), (x1, y0), (x1, y1), (x0, y1) ]
-                for x, y in points:
-                    point_elem = ET.SubElement(poly_elem, 'pt')
-                    ET.SubElement(point_elem, 'x').text = '%.2f' % x
-                    ET.SubElement(point_elem, 'y').text = '%.2f' % y
-
-                ET.SubElement(poly_elem, 'username').text = \
-                    str(ann.attributes.pop('username', ''))
-            elif ann.type == AnnotationType.polygon:
-                poly_elem = ET.SubElement(obj_elem, 'polygon')
-                for x, y in zip(ann.points[::2], ann.points[1::2]):
-                    point_elem = ET.SubElement(poly_elem, 'pt')
-                    ET.SubElement(point_elem, 'x').text = '%.2f' % x
-                    ET.SubElement(point_elem, 'y').text = '%.2f' % y
-
-                ET.SubElement(poly_elem, 'username').text = \
-                    str(ann.attributes.pop('username', ''))
-            elif ann.type == AnnotationType.mask:
-                mask_filename = '%s_mask_%s.png' % (item.id, obj_id)
-                save_image(osp.join(subset_dir, LabelMePath.MASKS_DIR,
-                        mask_filename),
-                    self._paint_mask(ann.image))
-
-                segm_elem = ET.SubElement(obj_elem, 'segm')
-                ET.SubElement(segm_elem, 'mask').text = mask_filename
-
-                bbox = find_mask_bbox(ann.image)
-                box_elem = ET.SubElement(segm_elem, 'box')
-                ET.SubElement(box_elem, 'xmin').text = '%.2f' % bbox[0]
-                ET.SubElement(box_elem, 'ymin').text = '%.2f' % bbox[1]
-                ET.SubElement(box_elem, 'xmax').text = \
-                    '%.2f' % (bbox[0] + bbox[2])
-                ET.SubElement(box_elem, 'ymax').text = \
-                    '%.2f' % (bbox[1] + bbox[3])
-
-                ET.SubElement(segm_elem, 'username').text = \
-                    str(ann.attributes.pop('username', ''))
-            else:
-                raise NotImplementedError("Unknown shape type '%s'" % ann.type)
-
-            attrs = []
-            for k, v in ann.attributes.items():
-                attrs.append('%s=%s' % (k, v))
-            ET.SubElement(obj_elem, 'attributes').text = ', '.join(attrs)
-
-            obj_id += 1
-
-        for _, group in groups.items():
-            leader_id, leader_parts_elem = group[0]
-            leader_parts = [str(o_id) for o_id, _ in group[1:]]
-            ET.SubElement(leader_parts_elem, 'hasparts').text = \
-                ','.join(leader_parts)
-            ET.SubElement(leader_parts_elem, 'ispartof').text = ''
-
-            for obj_id, parts_elem in group[1:]:
-                ET.SubElement(parts_elem, 'hasparts').text = ''
-                ET.SubElement(parts_elem, 'ispartof').text = str(leader_id)
-
-        xml_path = osp.join(subset_dir, '%s.xml' % item.id)
-        with open(xml_path, 'w', encoding='utf-8') as f:
-            xml_data = ET.tostring(root_elem, encoding='unicode',
-                pretty_print=True)
-            f.write(xml_data)
-
-    @staticmethod
-    def _paint_mask(mask):
-        # TODO: check if mask colors are random
-        return np.array([[0, 0, 0, 0], [255, 203, 0, 153]],
-            dtype=np.uint8)[mask.astype(np.uint8)]
--- a/datumaro/datumaro/plugins/mot_format.py
+++ b/datumaro/datumaro/plugins/mot_format.py
@ -1,314 +0,0 @@
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# The Multiple Object Tracking Benchmark challenge format support
-# Format description: https://arxiv.org/pdf/1906.04567.pdf
-# Another description: https://motchallenge.net/instructions
-
-from collections import OrderedDict
-import csv
-from enum import Enum
-import logging as log
-import os
-import os.path as osp
-
-from datumaro.components.extractor import (SourceExtractor,
-    DatasetItem, AnnotationType, Bbox, LabelCategories
-)
-from datumaro.components.extractor import Importer
-from datumaro.components.converter import Converter
-from datumaro.util import cast
-from datumaro.util.image import Image
-
-
-MotLabel = Enum('MotLabel', [
-    ('pedestrian', 1),
-    ('person on vehicle', 2),
-    ('car', 3),
-    ('bicycle', 4),
-    ('motorbike', 5),
-    ('non motorized vehicle', 6),
-    ('static person', 7),
-    ('distractor', 8),
-    ('occluder', 9),
-    ('occluder on the ground', 10),
-    ('occluder full', 11),
-    ('reflection', 12),
-])
-
-class MotPath:
-    IMAGE_DIR = 'img1'
-    SEQINFO_FILE = 'seqinfo.ini'
-    LABELS_FILE = 'labels.txt'
-    GT_FILENAME = 'gt.txt'
-    DET_FILENAME = 'det.txt'
-
-    IMAGE_EXT = '.jpg'
-
-    FIELDS = [
-        'frame_id',
-        'track_id',
-        'x',
-        'y',
-        'w',
-        'h',
-        'confidence', # or 'not ignored' flag for GT anns
-        'class_id',
-        'visibility'
-    ]
-
-
-class MotSeqExtractor(SourceExtractor):
-    def __init__(self, path, labels=None, occlusion_threshold=0, is_gt=None):
-        super().__init__()
-
-        assert osp.isfile(path)
-        seq_root = osp.dirname(osp.dirname(path))
-        self._image_dir = ''
-        if osp.isdir(osp.join(seq_root, MotPath.IMAGE_DIR)):
-            self._image_dir = osp.join(seq_root, MotPath.IMAGE_DIR)
-
-        seq_info = osp.join(seq_root, MotPath.SEQINFO_FILE)
-        if osp.isfile(seq_info):
-            seq_info = self._parse_seq_info(seq_info)
-            self._image_dir = osp.join(seq_root, seq_info['imdir'])
-        else:
-            seq_info = None
-        self._seq_info = seq_info
-
-        self._occlusion_threshold = float(occlusion_threshold)
-
-        assert is_gt in {None, True, False}
-        if is_gt is None:
-            if osp.basename(path) == MotPath.DET_FILENAME:
-                is_gt = False
-            else:
-                is_gt = True
-        self._is_gt = is_gt
-
-        if labels is None:
-            labels = osp.join(osp.dirname(path), MotPath.LABELS_FILE)
-            if not osp.isfile(labels):
-                labels = [lbl.name for lbl in MotLabel]
-        if isinstance(labels, str):
-            labels = self._parse_labels(labels)
-        elif isinstance(labels, list):
-            assert all(isinstance(lbl, str) for lbl in labels), labels
-        else:
-            raise TypeError("Unexpected type of 'labels' argument: %s" % labels)
-        self._categories = self._load_categories(labels)
-        self._items = self._load_items(path)
-
-    def categories(self):
-        return self._categories
-
-    def __iter__(self):
-        for item in self._items.values():
-            yield item
-
-    def __len__(self):
-        return len(self._items)
-
-    @staticmethod
-    def _parse_labels(path):
-        with open(path, encoding='utf-8') as labels_file:
-            return [s.strip() for s in labels_file]
-
-    def _load_categories(self, labels):
-        attributes = ['track_id']
-        if self._is_gt:
-            attributes += ['occluded', 'visibility', 'ignored']
-        else:
-            attributes += ['score']
-        label_cat = LabelCategories(attributes=attributes)
-        for label in labels:
-            label_cat.add(label)
-
-        return { AnnotationType.label: label_cat }
-
-    def _load_items(self, path):
-        labels_count = len(self._categories[AnnotationType.label].items)
-        items = OrderedDict()
-
-        if self._seq_info:
-            for frame_id in range(self._seq_info['seqlength']):
-                items[frame_id] = DatasetItem(
-                    id=frame_id,
-                    subset=self._subset,
-                    image=Image(
-                        path=osp.join(self._image_dir,
-                            '%06d%s' % (frame_id, self._seq_info['imext'])),
-                        size=(self._seq_info['imheight'], self._seq_info['imwidth'])
-                    )
-                )
-        elif osp.isdir(self._image_dir):
-            for p in os.listdir(self._image_dir):
-                if p.endswith(MotPath.IMAGE_EXT):
-                    frame_id = int(osp.splitext(p)[0])
-                    items[frame_id] = DatasetItem(
-                        id=frame_id,
-                        subset=self._subset,
-                        image=osp.join(self._image_dir, p),
-                    )
-
-        with open(path, newline='', encoding='utf-8') as csv_file:
-            # NOTE: Different MOT files have different count of fields
-            # (7, 9 or 10). This is handled by reader:
-            # - all extra fields go to a separate field
-            # - all unmet fields have None values
-            for row in csv.DictReader(csv_file, fieldnames=MotPath.FIELDS):
-                frame_id = int(row['frame_id'])
-                item = items.get(frame_id)
-                if item is None:
-                    item = DatasetItem(id=frame_id, subset=self._subset)
-                annotations = item.annotations
-
-                x, y = float(row['x']), float(row['y'])
-                w, h = float(row['w']), float(row['h'])
-                label_id = row.get('class_id')
-                if label_id and label_id != '-1':
-                    label_id = int(label_id) - 1
-                    assert label_id < labels_count, label_id
-                else:
-                    label_id = None
-
-                attributes = {}
-
-                # Annotations for detection task are not related to any track
-                track_id = int(row['track_id'])
-                if 0 < track_id:
-                    attributes['track_id'] = track_id
-
-                confidence = cast(row.get('confidence'), float, 1)
-                visibility = cast(row.get('visibility'), float, 1)
-                if self._is_gt:
-                    attributes['visibility'] = visibility
-                    attributes['occluded'] = \
-                        visibility <= self._occlusion_threshold
-                    attributes['ignored'] = confidence == 0
-                else:
-                    attributes['score'] = float(confidence)
-
-                annotations.append(Bbox(x, y, w, h, label=label_id,
-                    attributes=attributes))
-
-                items[frame_id] = item
-        return items
-
-    @classmethod
-    def _parse_seq_info(cls, path):
-        fields = {}
-        with open(path, encoding='utf-8') as f:
-            for line in f:
-                entry = line.lower().strip().split('=', maxsplit=1)
-                if len(entry) == 2:
-                    fields[entry[0]] = entry[1]
-        cls._check_seq_info(fields)
-        for k in { 'framerate', 'seqlength', 'imwidth', 'imheight' }:
-            fields[k] = int(fields[k])
-        return fields
-
-    @staticmethod
-    def _check_seq_info(seq_info):
-        assert set(seq_info) == {'name', 'imdir', 'framerate', 'seqlength', 'imwidth', 'imheight', 'imext'}, seq_info
-
-class MotSeqImporter(Importer):
-    _EXTRACTOR_NAME = 'mot_seq'
-
-    @classmethod
-    def detect(cls, path):
-        return len(cls.find_subsets(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        subsets = self.find_subsets(path)
-        if len(subsets) == 0:
-            raise Exception("Failed to find 'mot' dataset at '%s'" % path)
-
-        for ann_file in subsets:
-            log.info("Found a dataset at '%s'" % ann_file)
-
-            source_name = osp.splitext(osp.basename(ann_file))[0]
-            project.add_source(source_name, {
-                'url': ann_file,
-                'format': self._EXTRACTOR_NAME,
-                'options': extra_params,
-            })
-
-        return project
-
-    @staticmethod
-    def find_subsets(path):
-        subsets = []
-        if path.endswith('.txt') and osp.isfile(path):
-            subsets = [path]
-        elif osp.isdir(path):
-            p = osp.join(path, 'gt', MotPath.GT_FILENAME)
-            if osp.isfile(p):
-                subsets.append(p)
-        return subsets
-
-class MotSeqGtConverter(Converter):
-    DEFAULT_IMAGE_EXT = MotPath.IMAGE_EXT
-
-    def apply(self):
-        extractor = self._extractor
-
-        images_dir = osp.join(self._save_dir, MotPath.IMAGE_DIR)
-        os.makedirs(images_dir, exist_ok=True)
-        self._images_dir = images_dir
-
-        anno_dir = osp.join(self._save_dir, 'gt')
-        os.makedirs(anno_dir, exist_ok=True)
-        anno_file = osp.join(anno_dir, MotPath.GT_FILENAME)
-        with open(anno_file, 'w', encoding="utf-8") as csv_file:
-            writer = csv.DictWriter(csv_file, fieldnames=MotPath.FIELDS)
-
-            track_id_mapping = {-1: -1}
-            for idx, item in enumerate(extractor):
-                log.debug("Converting item '%s'", item.id)
-
-                frame_id = cast(item.id, int, 1 + idx)
-
-                for anno in item.annotations:
-                    if anno.type != AnnotationType.bbox:
-                        continue
-
-                    track_id = int(anno.attributes.get('track_id', -1))
-                    if track_id not in track_id_mapping:
-                        track_id_mapping[track_id] = len(track_id_mapping)
-                    track_id = track_id_mapping[track_id]
-
-                    writer.writerow({
-                        'frame_id': frame_id,
-                        'track_id': track_id,
-                        'x': anno.x,
-                        'y': anno.y,
-                        'w': anno.w,
-                        'h': anno.h,
-                        'confidence': int(anno.attributes.get('ignored') != True),
-                        'class_id': 1 + cast(anno.label, int, -2),
-                        'visibility': float(
-                            anno.attributes.get('visibility',
-                                1 - float(
-                                    anno.attributes.get('occluded', False)
-                                )
-                            )
-                        )
-                    })
-
-                if self._save_images:
-                    if item.has_image and item.image.has_data:
-                        self._save_image(item, osp.join(self._images_dir,
-                            '%06d%s' % (frame_id, self._find_image_ext(item))))
-                    else:
-                        log.debug("Item '%s' has no image", item.id)
-
-        labels_file = osp.join(anno_dir, MotPath.LABELS_FILE)
-        with open(labels_file, 'w', encoding='utf-8') as f:
-            f.write('\n'.join(l.name
-                for l in extractor.categories()[AnnotationType.label].items)
-            )
--- a/datumaro/datumaro/plugins/openvino_launcher.py
+++ b/datumaro/datumaro/plugins/openvino_launcher.py
@ -1,188 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=exec-used
-
-import cv2
-import logging as log
-import numpy as np
-import os.path as osp
-import shutil
-
-from openvino.inference_engine import IECore
-
-from datumaro.components.cli_plugin import CliPlugin
-from datumaro.components.launcher import Launcher
-
-
-class OpenVinoImporter(CliPlugin):
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('-d', '--description', required=True,
-            help="Path to the model description file (.xml)")
-        parser.add_argument('-w', '--weights', required=True,
-            help="Path to the model weights file (.bin)")
-        parser.add_argument('-i', '--interpreter', required=True,
-            help="Path to the network output interprter script (.py)")
-        parser.add_argument('--device', default='CPU',
-            help="Target device (default: %(default)s)")
-        return parser
-
-    @staticmethod
-    def copy_model(model_dir, model):
-        shutil.copy(model['description'],
-            osp.join(model_dir, osp.basename(model['description'])))
-        model['description'] = osp.basename(model['description'])
-
-        shutil.copy(model['weights'],
-            osp.join(model_dir, osp.basename(model['weights'])))
-        model['weights'] = osp.basename(model['weights'])
-
-        shutil.copy(model['interpreter'],
-            osp.join(model_dir, osp.basename(model['interpreter'])))
-        model['interpreter'] = osp.basename(model['interpreter'])
-
-
-class InterpreterScript:
-    def __init__(self, path):
-        with open(path, 'r') as f:
-            script = f.read()
-
-        context = {}
-        exec(script, context, context)
-
-        process_outputs = context.get('process_outputs')
-        if not callable(process_outputs):
-            raise Exception("Can't find 'process_outputs' function in "
-                "the interpreter script")
-        self.__dict__['process_outputs'] = process_outputs
-
-        get_categories = context.get('get_categories')
-        assert get_categories is None or callable(get_categories)
-        if get_categories:
-            self.__dict__['get_categories'] = get_categories
-
-    @staticmethod
-    def get_categories():
-        return None
-
-    @staticmethod
-    def process_outputs(inputs, outputs):
-        raise NotImplementedError(
-            "Function should be implemented in the interpreter script")
-
-
-class OpenVinoLauncher(Launcher):
-    cli_plugin = OpenVinoImporter
-
-    def __init__(self, description, weights, interpreter,
-            plugins_path=None, device=None, model_dir=None):
-        model_dir = model_dir or ''
-        if not osp.isfile(description):
-            description = osp.join(model_dir, description)
-        if not osp.isfile(description):
-            raise Exception('Failed to open model description file "%s"' % \
-                (description))
-
-        if not osp.isfile(weights):
-            weights = osp.join(model_dir, weights)
-        if not osp.isfile(weights):
-            raise Exception('Failed to open model weights file "%s"' % \
-                (weights))
-
-        if not osp.isfile(interpreter):
-            interpreter = osp.join(model_dir, interpreter)
-        if not osp.isfile(interpreter):
-            raise Exception('Failed to open model interpreter script file "%s"' % \
-                (interpreter))
-
-        self._interpreter = InterpreterScript(interpreter)
-
-        self._device = device or 'CPU'
-
-        self._ie = IECore()
-        if hasattr(self._ie, 'read_network'):
-            self._network = self._ie.read_network(description, weights)
-        else: # backward compatibility
-            from openvino.inference_engine import IENetwork
-            self._network = IENetwork.from_ir(description, weights)
-        self._check_model_support(self._network, self._device)
-        self._load_executable_net()
-
-    def _check_model_support(self, net, device):
-        supported_layers = set(self._ie.query_network(net, device))
-        not_supported_layers = set(net.layers) - supported_layers
-        if len(not_supported_layers) != 0:
-            log.error("The following layers are not supported " \
-                "by the plugin for device '%s': %s." % \
-                (device, ', '.join(not_supported_layers)))
-            raise NotImplementedError(
-                "Some layers are not supported on the device")
-
-    def _load_executable_net(self, batch_size=1):
-        network = self._network
-
-        iter_inputs = iter(network.inputs)
-        self._input_blob_name = next(iter_inputs)
-        self._output_blob_name = next(iter(network.outputs))
-
-        # NOTE: handling for the inclusion of `image_info` in OpenVino2019
-        self._require_image_info = 'image_info' in network.inputs
-        if self._input_blob_name == 'image_info':
-            self._input_blob_name = next(iter_inputs)
-
-        input_type = network.inputs[self._input_blob_name]
-        self._input_layout = input_type if isinstance(input_type, list) else input_type.shape
-
-        self._input_layout[0] = batch_size
-        network.reshape({self._input_blob_name: self._input_layout})
-        self._batch_size = batch_size
-
-        self._net = self._ie.load_network(network=network, num_requests=1,
-            device_name=self._device)
-
-    def infer(self, inputs):
-        assert len(inputs.shape) == 4, \
-            "Expected an input image in (N, H, W, C) format, got %s" % \
-            (inputs.shape)
-        assert inputs.shape[3] == 3, "Expected BGR input, got %s" % inputs.shape
-
-        n, c, h, w = self._input_layout
-        if inputs.shape[1:3] != (h, w):
-            resized_inputs = np.empty((n, h, w, c), dtype=inputs.dtype)
-            for inp, resized_input in zip(inputs, resized_inputs):
-                cv2.resize(inp, (w, h), resized_input)
-            inputs = resized_inputs
-        inputs = inputs.transpose((0, 3, 1, 2)) # NHWC to NCHW
-        inputs = {self._input_blob_name: inputs}
-        if self._require_image_info:
-            info = np.zeros([1, 3])
-            info[0, 0] = h
-            info[0, 1] = w
-            info[0, 2] = 1.0 # scale
-            inputs['image_info'] = info
-
-        results = self._net.infer(inputs)
-        if len(results) == 1:
-            return results[self._output_blob_name]
-        else:
-            return results
-
-    def launch(self, inputs):
-        batch_size = len(inputs)
-        if self._batch_size < batch_size:
-            self._load_executable_net(batch_size)
-
-        outputs = self.infer(inputs)
-        results = self.process_outputs(inputs, outputs)
-        return results
-
-    def categories(self):
-        return self._interpreter.get_categories()
-
-    def process_outputs(self, inputs, outputs):
-        return self._interpreter.process_outputs(inputs, outputs)
-
--- a/datumaro/datumaro/plugins/tf_detection_api_format/init.py
+++ b/datumaro/datumaro/plugins/tf_detection_api_format/init.py
--- a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py
+++ b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py
@ -1,217 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import codecs
-from collections import OrderedDict
-import hashlib
-import logging as log
-import os
-import os.path as osp
-import string
-
-from datumaro.components.extractor import (AnnotationType, DEFAULT_SUBSET_NAME,
-    LabelCategories
-)
-from datumaro.components.converter import Converter
-from datumaro.util.image import encode_image
-from datumaro.util.annotation_util import (max_bbox,
-    find_group_leader, find_instances)
-from datumaro.util.mask_tools import merge_masks
-from datumaro.util.tf_util import import_tf as _import_tf
-
-from .format import DetectionApiPath
-tf = _import_tf()
-
-
-# filter out non-ASCII characters, otherwise training will crash
-_printable = set(string.printable)
-def _make_printable(s):
-    return ''.join(filter(lambda x: x in _printable, s))
-
-def int64_feature(value):
-    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
-
-def int64_list_feature(value):
-    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
-
-def bytes_feature(value):
-    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
-def bytes_list_feature(value):
-    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
-
-def float_list_feature(value):
-    return tf.train.Feature(float_list=tf.train.FloatList(value=value))
-
-class TfDetectionApiConverter(Converter):
-    DEFAULT_IMAGE_EXT = DetectionApiPath.DEFAULT_IMAGE_EXT
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('--save-masks', action='store_true',
-            help="Include instance masks (default: %(default)s)")
-        return parser
-
-    def __init__(self, extractor, save_dir, save_masks=False, **kwargs):
-        super().__init__(extractor, save_dir, **kwargs)
-
-        self._save_masks = save_masks
-
-    def apply(self):
-        os.makedirs(self._save_dir, exist_ok=True)
-
-        label_categories = self._extractor.categories().get(AnnotationType.label,
-            LabelCategories())
-        get_label = lambda label_id: label_categories.items[label_id].name \
-            if label_id is not None else ''
-        label_ids = OrderedDict((label.name, 1 + idx)
-            for idx, label in enumerate(label_categories.items))
-        map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0)
-        self._get_label = get_label
-        self._get_label_id = map_label_id
-
-        subsets = self._extractor.subsets()
-        if len(subsets) == 0:
-            subsets = [ None ]
-
-        for subset_name in subsets:
-            if subset_name:
-                subset = self._extractor.get_subset(subset_name)
-            else:
-                subset_name = DEFAULT_SUBSET_NAME
-                subset = self._extractor
-
-            labelmap_path = osp.join(self._save_dir, DetectionApiPath.LABELMAP_FILE)
-            with codecs.open(labelmap_path, 'w', encoding='utf8') as f:
-                for label, idx in label_ids.items():
-                    f.write(
-                        'item {\n' +
-                        ('\tid: %s\n' % (idx)) +
-                        ("\tname: '%s'\n" % (label)) +
-                        '}\n\n'
-                    )
-
-            anno_path = osp.join(self._save_dir, '%s.tfrecord' % (subset_name))
-            with tf.io.TFRecordWriter(anno_path) as writer:
-                for item in subset:
-                    tf_example = self._make_tf_example(item)
-                    writer.write(tf_example.SerializeToString())
-
-    @staticmethod
-    def _find_instances(annotations):
-        return find_instances(a for a in annotations
-            if a.type in { AnnotationType.bbox, AnnotationType.mask })
-
-    def _find_instance_parts(self, group, img_width, img_height):
-        boxes = [a for a in group if a.type == AnnotationType.bbox]
-        masks = [a for a in group if a.type == AnnotationType.mask]
-
-        anns = boxes + masks
-        leader = find_group_leader(anns)
-        bbox = max_bbox(anns)
-
-        mask = None
-        if self._save_masks:
-            mask = merge_masks([m.image for m in masks])
-
-        return [leader, mask, bbox]
-
-    def _export_instances(self, instances, width, height):
-        xmins = [] # List of normalized left x coordinates of bounding boxes (1 per box)
-        xmaxs = [] # List of normalized right x coordinates of bounding boxes (1 per box)
-        ymins = [] # List of normalized top y coordinates of bounding boxes (1 per box)
-        ymaxs = [] # List of normalized bottom y coordinates of bounding boxes (1 per box)
-        classes_text = [] # List of class names of bounding boxes (1 per box)
-        classes = [] # List of class ids of bounding boxes (1 per box)
-        masks = [] # List of PNG-encoded instance masks (1 per box)
-
-        for leader, mask, box in instances:
-            label = _make_printable(self._get_label(leader.label))
-            classes_text.append(label.encode('utf-8'))
-            classes.append(self._get_label_id(leader.label))
-
-            xmins.append(box[0] / width)
-            xmaxs.append((box[0] + box[2]) / width)
-            ymins.append(box[1] / height)
-            ymaxs.append((box[1] + box[3]) / height)
-
-            if self._save_masks:
-                if mask is not None:
-                    mask = encode_image(mask, '.png')
-                else:
-                    mask = b''
-                masks.append(mask)
-
-        result = {}
-        if classes:
-            result = {
-                'image/object/bbox/xmin': float_list_feature(xmins),
-                'image/object/bbox/xmax': float_list_feature(xmaxs),
-                'image/object/bbox/ymin': float_list_feature(ymins),
-                'image/object/bbox/ymax': float_list_feature(ymaxs),
-                'image/object/class/text': bytes_list_feature(classes_text),
-                'image/object/class/label': int64_list_feature(classes),
-            }
-            if masks:
-                result['image/object/mask'] = bytes_list_feature(masks)
-        return result
-
-    def _make_tf_example(self, item):
-        features = {
-            'image/source_id': bytes_feature(
-                str(item.attributes.get('source_id') or '').encode('utf-8')
-            ),
-        }
-
-        filename = self._make_image_filename(item)
-        features['image/filename'] = bytes_feature(filename.encode('utf-8'))
-
-        if not item.has_image:
-            raise Exception("Failed to export dataset item '%s': "
-                "item has no image info" % item.id)
-        height, width = item.image.size
-
-        features.update({
-            'image/height': int64_feature(height),
-            'image/width': int64_feature(width),
-        })
-
-        features.update({
-            'image/encoded': bytes_feature(b''),
-            'image/format': bytes_feature(b''),
-            'image/key/sha256': bytes_feature(b''),
-        })
-        if self._save_images:
-            if item.has_image and item.image.has_data:
-                buffer, fmt = self._save_image(item, filename)
-                key = hashlib.sha256(buffer).hexdigest()
-
-                features.update({
-                    'image/encoded': bytes_feature(buffer),
-                    'image/format': bytes_feature(fmt.encode('utf-8')),
-                    'image/key/sha256': bytes_feature(key.encode('utf8')),
-                })
-            else:
-                log.warning("Item '%s' has no image" % item.id)
-
-        instances = self._find_instances(item.annotations)
-        instances = [self._find_instance_parts(i, width, height) for i in instances]
-        features.update(self._export_instances(instances, width, height))
-
-        tf_example = tf.train.Example(
-            features=tf.train.Features(feature=features))
-
-        return tf_example
-
-    def _save_image(self, item, path=None):
-        dst_ext = osp.splitext(osp.basename(path))[1]
-        fmt = DetectionApiPath.IMAGE_EXT_FORMAT.get(dst_ext)
-        if not fmt:
-            log.warning("Item '%s': can't find format string for the '%s' "
-                "image extension, the corresponding field will be empty." % \
-                (item.id, dst_ext))
-        buffer = encode_image(item.image.data, dst_ext)
-        return buffer, fmt
--- a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py
+++ b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py
@ -1,195 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import OrderedDict
-import numpy as np
-import os.path as osp
-import re
-
-from datumaro.components.extractor import (SourceExtractor, DatasetItem,
-    AnnotationType, Bbox, Mask, LabelCategories
-)
-from datumaro.util.image import Image, decode_image, lazy_image
-from datumaro.util.tf_util import import_tf as _import_tf
-
-from .format import DetectionApiPath
-tf = _import_tf()
-
-
-def clamp(value, _min, _max):
-    return max(min(_max, value), _min)
-
-class TfDetectionApiExtractor(SourceExtractor):
-    def __init__(self, path):
-        assert osp.isfile(path), path
-        images_dir = ''
-        root_dir = osp.dirname(osp.abspath(path))
-        if osp.basename(root_dir) == DetectionApiPath.ANNOTATIONS_DIR:
-            root_dir = osp.dirname(root_dir)
-            images_dir = osp.join(root_dir, DetectionApiPath.IMAGES_DIR)
-            if not osp.isdir(images_dir):
-                images_dir = ''
-
-        super().__init__(subset=osp.splitext(osp.basename(path))[0])
-
-        items, labels = self._parse_tfrecord_file(path, self._subset, images_dir)
-        self._items = items
-        self._categories = self._load_categories(labels)
-
-    def categories(self):
-        return self._categories
-
-    def __iter__(self):
-        for item in self._items:
-            yield item
-
-    def __len__(self):
-        return len(self._items)
-
-    @staticmethod
-    def _load_categories(labels):
-        label_categories = LabelCategories()
-        labels = sorted(labels.items(), key=lambda item: item[1])
-        for label, _ in labels:
-            label_categories.add(label)
-        return {
-            AnnotationType.label: label_categories
-        }
-
-    @classmethod
-    def _parse_labelmap(cls, text):
-        id_pattern = r'(?:id\s*:\s*(?P<id>\d+))'
-        name_pattern = r'(?:name\s*:\s*[\'\"](?P<name>.*?)[\'\"])'
-        entry_pattern = r'(\{(?:[\s\n]*(?:%(id)s|%(name)s)[\s\n]*){2}\})+' % \
-            {'id': id_pattern, 'name': name_pattern}
-        matches = re.finditer(entry_pattern, text)
-
-        labelmap = {}
-        for match in matches:
-            label_id = match.group('id')
-            label_name = match.group('name')
-            if label_id is not None and label_name is not None:
-                labelmap[label_name] = int(label_id)
-
-        return labelmap
-
-    @classmethod
-    def _parse_tfrecord_file(cls, filepath, subset, images_dir):
-        dataset = tf.data.TFRecordDataset(filepath)
-        features = {
-            'image/filename': tf.io.FixedLenFeature([], tf.string),
-            'image/source_id': tf.io.FixedLenFeature([], tf.string),
-            'image/height': tf.io.FixedLenFeature([], tf.int64),
-            'image/width': tf.io.FixedLenFeature([], tf.int64),
-            'image/encoded': tf.io.FixedLenFeature([], tf.string),
-            'image/format': tf.io.FixedLenFeature([], tf.string),
-
-            # use varlen to avoid errors when this field is missing
-            'image/key/sha256': tf.io.VarLenFeature(tf.string),
-
-            # Object boxes and classes.
-            'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
-            'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
-            'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
-            'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
-            'image/object/class/label': tf.io.VarLenFeature(tf.int64),
-            'image/object/class/text': tf.io.VarLenFeature(tf.string),
-            'image/object/mask': tf.io.VarLenFeature(tf.string),
-        }
-
-        dataset_labels = OrderedDict()
-        labelmap_path = osp.join(osp.dirname(filepath),
-            DetectionApiPath.LABELMAP_FILE)
-        if osp.exists(labelmap_path):
-            with open(labelmap_path, 'r', encoding='utf-8') as f:
-                labelmap_text = f.read()
-            dataset_labels.update({ label: id - 1
-                for label, id in cls._parse_labelmap(labelmap_text).items()
-            })
-
-        dataset_items = []
-
-        for record in dataset:
-            parsed_record = tf.io.parse_single_example(record, features)
-            frame_id = parsed_record['image/source_id'].numpy().decode('utf-8')
-            frame_filename = \
-                parsed_record['image/filename'].numpy().decode('utf-8')
-            frame_height = tf.cast(
-                parsed_record['image/height'], tf.int64).numpy().item()
-            frame_width = tf.cast(
-                parsed_record['image/width'], tf.int64).numpy().item()
-            frame_image = parsed_record['image/encoded'].numpy()
-            xmins = tf.sparse.to_dense(
-                parsed_record['image/object/bbox/xmin']).numpy()
-            ymins = tf.sparse.to_dense(
-                parsed_record['image/object/bbox/ymin']).numpy()
-            xmaxs = tf.sparse.to_dense(
-                parsed_record['image/object/bbox/xmax']).numpy()
-            ymaxs = tf.sparse.to_dense(
-                parsed_record['image/object/bbox/ymax']).numpy()
-            label_ids = tf.sparse.to_dense(
-                parsed_record['image/object/class/label']).numpy()
-            labels = tf.sparse.to_dense(
-                parsed_record['image/object/class/text'],
-                default_value=b'').numpy()
-            masks = tf.sparse.to_dense(
-                parsed_record['image/object/mask'],
-                default_value=b'').numpy()
-
-            for label, label_id in zip(labels, label_ids):
-                label = label.decode('utf-8')
-                if not label:
-                    continue
-                if label_id <= 0:
-                    continue
-                if label in dataset_labels:
-                    continue
-                dataset_labels[label] = label_id - 1
-
-            item_id = osp.splitext(frame_filename)[0]
-
-            annotations = []
-            for shape_id, shape in enumerate(
-                    np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]):
-                label = shape[0].decode('utf-8')
-
-                mask = None
-                if len(masks) != 0:
-                    mask = masks[shape_id]
-
-                if mask is not None:
-                    if isinstance(mask, bytes):
-                        mask = lazy_image(mask, decode_image)
-                    annotations.append(Mask(image=mask,
-                        label=dataset_labels.get(label)
-                    ))
-                else:
-                    x = clamp(shape[1] * frame_width, 0, frame_width)
-                    y = clamp(shape[2] * frame_height, 0, frame_height)
-                    w = clamp(shape[3] * frame_width, 0, frame_width) - x
-                    h = clamp(shape[4] * frame_height, 0, frame_height) - y
-                    annotations.append(Bbox(x, y, w, h,
-                        label=dataset_labels.get(label)
-                    ))
-
-            image_size = None
-            if frame_height and frame_width:
-                image_size = (frame_height, frame_width)
-
-            image_params = {}
-            if frame_image:
-                image_params['data'] = lazy_image(frame_image, decode_image)
-            if frame_filename:
-                image_params['path'] = osp.join(images_dir, frame_filename)
-
-            image = None
-            if image_params:
-                image = Image(**image_params, size=image_size)
-
-            dataset_items.append(DatasetItem(id=item_id, subset=subset,
-                image=image, annotations=annotations,
-                attributes={'source_id': frame_id}))
-
-        return dataset_items, dataset_labels
--- a/datumaro/datumaro/plugins/tf_detection_api_format/format.py
+++ b/datumaro/datumaro/plugins/tf_detection_api_format/format.py
@ -1,13 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-class DetectionApiPath:
-    IMAGES_DIR = 'images'
-    ANNOTATIONS_DIR = 'annotations'
-
-    DEFAULT_IMAGE_EXT = '.jpg'
-    IMAGE_EXT_FORMAT = {'.jpg': 'jpeg', '.png': 'png'}
-
-    LABELMAP_FILE = 'label_map.pbtxt'
--- a/datumaro/datumaro/plugins/tf_detection_api_format/importer.py
+++ b/datumaro/datumaro/plugins/tf_detection_api_format/importer.py
@ -1,52 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from glob import glob
-import logging as log
-import os.path as osp
-
-from datumaro.components.extractor import Importer
-
-
-class TfDetectionApiImporter(Importer):
-    EXTRACTOR_NAME = 'tf_detection_api'
-
-    @classmethod
-    def detect(cls, path):
-        return len(cls.find_subsets(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        subset_paths = self.find_subsets(path)
-        if len(subset_paths) == 0:
-            raise Exception(
-                "Failed to find 'tf_detection_api' dataset at '%s'" % path)
-
-        for subset_path in subset_paths:
-            if not osp.isfile(subset_path):
-                continue
-
-            log.info("Found a dataset at '%s'" % subset_path)
-
-            subset_name = osp.splitext(osp.basename(subset_path))[0]
-
-            project.add_source(subset_name, {
-                'url': subset_path,
-                'format': self.EXTRACTOR_NAME,
-                'options': dict(extra_params),
-            })
-
-        return project
-
-    @staticmethod
-    def find_subsets(path):
-        if path.endswith('.tfrecord') and osp.isfile(path):
-            subset_paths = [path]
-        else:
-            subset_paths = glob(osp.join(path, '**', '*.tfrecord'),
-                recursive=True)
-        return subset_paths
--- a/datumaro/datumaro/plugins/transforms.py
+++ b/datumaro/datumaro/plugins/transforms.py
@ -1,524 +0,0 @@
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from enum import Enum
-import logging as log
-import os.path as osp
-import random
-import re
-
-import pycocotools.mask as mask_utils
-
-from datumaro.components.extractor import (Transform, AnnotationType,
-    RleMask, Polygon, Bbox,
-    LabelCategories, MaskCategories, PointsCategories
-)
-from datumaro.components.cli_plugin import CliPlugin
-import datumaro.util.mask_tools as mask_tools
-from datumaro.util.annotation_util import find_group_leader, find_instances
-
-
-class CropCoveredSegments(Transform, CliPlugin):
-    def transform_item(self, item):
-        annotations = []
-        segments = []
-        for ann in item.annotations:
-            if ann.type in {AnnotationType.polygon, AnnotationType.mask}:
-                segments.append(ann)
-            else:
-                annotations.append(ann)
-        if not segments:
-            return item
-
-        if not item.has_image:
-            raise Exception("Image info is required for this transform")
-        h, w = item.image.size
-        segments = self.crop_segments(segments, w, h)
-
-        annotations += segments
-        return self.wrap_item(item, annotations=annotations)
-
-    @classmethod
-    def crop_segments(cls, segment_anns, img_width, img_height):
-        segment_anns = sorted(segment_anns, key=lambda x: x.z_order)
-
-        segments = []
-        for s in segment_anns:
-            if s.type == AnnotationType.polygon:
-                segments.append(s.points)
-            elif s.type == AnnotationType.mask:
-                if isinstance(s, RleMask):
-                    rle = s.rle
-                else:
-                    rle = mask_tools.mask_to_rle(s.image)
-                segments.append(rle)
-
-        segments = mask_tools.crop_covered_segments(
-            segments, img_width, img_height)
-
-        new_anns = []
-        for ann, new_segment in zip(segment_anns, segments):
-            fields = {'z_order': ann.z_order, 'label': ann.label,
-                'id': ann.id, 'group': ann.group, 'attributes': ann.attributes
-            }
-            if ann.type == AnnotationType.polygon:
-                if fields['group'] is None:
-                    fields['group'] = cls._make_group_id(
-                        segment_anns + new_anns, fields['id'])
-                for polygon in new_segment:
-                    new_anns.append(Polygon(points=polygon, **fields))
-            else:
-                rle = mask_tools.mask_to_rle(new_segment)
-                rle = mask_utils.frPyObjects(rle, *rle['size'])
-                new_anns.append(RleMask(rle=rle, **fields))
-
-        return new_anns
-
-    @staticmethod
-    def _make_group_id(anns, ann_id):
-        if ann_id:
-            return ann_id
-        max_gid = max(anns, default=0, key=lambda x: x.group)
-        return max_gid + 1
-
-class MergeInstanceSegments(Transform, CliPlugin):
-    """
-    Replaces instance masks and, optionally, polygons with a single mask.
-    """
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('--include-polygons', action='store_true',
-            help="Include polygons")
-        return parser
-
-    def __init__(self, extractor, include_polygons=False):
-        super().__init__(extractor)
-
-        self._include_polygons = include_polygons
-
-    def transform_item(self, item):
-        annotations = []
-        segments = []
-        for ann in item.annotations:
-            if ann.type in {AnnotationType.polygon, AnnotationType.mask}:
-                segments.append(ann)
-            else:
-                annotations.append(ann)
-        if not segments:
-            return item
-
-        if not item.has_image:
-            raise Exception("Image info is required for this transform")
-        h, w = item.image.size
-        instances = self.find_instances(segments)
-        segments = [self.merge_segments(i, w, h, self._include_polygons)
-            for i in instances]
-        segments = sum(segments, [])
-
-        annotations += segments
-        return self.wrap_item(item, annotations=annotations)
-
-    @classmethod
-    def merge_segments(cls, instance, img_width, img_height,
-            include_polygons=False):
-        polygons = [a for a in instance if a.type == AnnotationType.polygon]
-        masks = [a for a in instance if a.type == AnnotationType.mask]
-        if not polygons and not masks:
-            return []
-
-        leader = find_group_leader(polygons + masks)
-        instance = []
-
-        # Build the resulting mask
-        mask = None
-
-        if include_polygons and polygons:
-            polygons = [p.points for p in polygons]
-            mask = mask_tools.rles_to_mask(polygons, img_width, img_height)
-        else:
-            instance += polygons # keep unused polygons
-
-        if masks:
-            masks = [m.image for m in masks]
-            if mask is not None:
-                masks += [mask]
-            mask = mask_tools.merge_masks(masks)
-
-        if mask is None:
-            return instance
-
-        mask = mask_tools.mask_to_rle(mask)
-        mask = mask_utils.frPyObjects(mask, *mask['size'])
-        instance.append(
-            RleMask(rle=mask, label=leader.label, z_order=leader.z_order,
-                id=leader.id, attributes=leader.attributes, group=leader.group
-            )
-        )
-        return instance
-
-    @staticmethod
-    def find_instances(annotations):
-        return find_instances(a for a in annotations
-            if a.type in {AnnotationType.polygon, AnnotationType.mask})
-
-class PolygonsToMasks(Transform, CliPlugin):
-    def transform_item(self, item):
-        annotations = []
-        for ann in item.annotations:
-            if ann.type == AnnotationType.polygon:
-                if not item.has_image:
-                    raise Exception("Image info is required for this transform")
-                h, w = item.image.size
-                annotations.append(self.convert_polygon(ann, h, w))
-            else:
-                annotations.append(ann)
-
-        return self.wrap_item(item, annotations=annotations)
-
-    @staticmethod
-    def convert_polygon(polygon, img_h, img_w):
-        rle = mask_utils.frPyObjects([polygon.points], img_h, img_w)[0]
-
-        return RleMask(rle=rle, label=polygon.label, z_order=polygon.z_order,
-            id=polygon.id, attributes=polygon.attributes, group=polygon.group)
-
-class BoxesToMasks(Transform, CliPlugin):
-    def transform_item(self, item):
-        annotations = []
-        for ann in item.annotations:
-            if ann.type == AnnotationType.bbox:
-                if not item.has_image:
-                    raise Exception("Image info is required for this transform")
-                h, w = item.image.size
-                annotations.append(self.convert_bbox(ann, h, w))
-            else:
-                annotations.append(ann)
-
-        return self.wrap_item(item, annotations=annotations)
-
-    @staticmethod
-    def convert_bbox(bbox, img_h, img_w):
-        rle = mask_utils.frPyObjects([bbox.as_polygon()], img_h, img_w)[0]
-
-        return RleMask(rle=rle, label=bbox.label, z_order=bbox.z_order,
-            id=bbox.id, attributes=bbox.attributes, group=bbox.group)
-
-class MasksToPolygons(Transform, CliPlugin):
-    def transform_item(self, item):
-        annotations = []
-        for ann in item.annotations:
-            if ann.type == AnnotationType.mask:
-                polygons = self.convert_mask(ann)
-                if not polygons:
-                    log.debug("[%s]: item %s: "
-                        "Mask conversion to polygons resulted in too "
-                        "small polygons, which were discarded" % \
-                        (self._get_name(__class__), item.id))
-                annotations.extend(polygons)
-            else:
-                annotations.append(ann)
-
-        return self.wrap_item(item, annotations=annotations)
-
-    @staticmethod
-    def convert_mask(mask):
-        polygons = mask_tools.mask_to_polygons(mask.image)
-
-        return [
-            Polygon(points=p, label=mask.label, z_order=mask.z_order,
-                id=mask.id, attributes=mask.attributes, group=mask.group)
-            for p in polygons
-        ]
-
-class ShapesToBoxes(Transform, CliPlugin):
-    def transform_item(self, item):
-        annotations = []
-        for ann in item.annotations:
-            if ann.type in { AnnotationType.mask, AnnotationType.polygon,
-                AnnotationType.polyline, AnnotationType.points,
-            }:
-                annotations.append(self.convert_shape(ann))
-            else:
-                annotations.append(ann)
-
-        return self.wrap_item(item, annotations=annotations)
-
-    @staticmethod
-    def convert_shape(shape):
-        bbox = shape.get_bbox()
-        return Bbox(*bbox, label=shape.label, z_order=shape.z_order,
-            id=shape.id, attributes=shape.attributes, group=shape.group)
-
-class Reindex(Transform, CliPlugin):
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('-s', '--start', type=int, default=1,
-            help="Start value for item ids")
-        return parser
-
-    def __init__(self, extractor, start=1):
-        super().__init__(extractor)
-
-        self._start = start
-
-    def __iter__(self):
-        for i, item in enumerate(self._extractor):
-            yield self.wrap_item(item, id=i + self._start)
-
-class MapSubsets(Transform, CliPlugin):
-    @staticmethod
-    def _mapping_arg(s):
-        parts = s.split(':')
-        if len(parts) != 2:
-            import argparse
-            raise argparse.ArgumentTypeError()
-        return parts
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('-s', '--subset', action='append',
-            type=cls._mapping_arg, dest='mapping',
-            help="Subset mapping of the form: 'src:dst' (repeatable)")
-        return parser
-
-    def __init__(self, extractor, mapping=None):
-        super().__init__(extractor)
-
-        if mapping is None:
-            mapping = {}
-        elif not isinstance(mapping, dict):
-            mapping = dict(tuple(m) for m in mapping)
-        self._mapping = mapping
-
-    def transform_item(self, item):
-        return self.wrap_item(item,
-            subset=self._mapping.get(item.subset, item.subset))
-
-class RandomSplit(Transform, CliPlugin):
-    """
-    Joins all subsets into one and splits the result into few parts.
-    It is expected that item ids are unique and subset ratios sum up to 1.|n
-    |n
-    Example:|n
-    |s|s%(prog)s --subset train:.67 --subset test:.33
-    """
-
-    @staticmethod
-    def _split_arg(s):
-        parts = s.split(':')
-        if len(parts) != 2:
-            import argparse
-            raise argparse.ArgumentTypeError()
-        return (parts[0], float(parts[1]))
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('-s', '--subset', action='append',
-            type=cls._split_arg, dest='splits',
-            default=[('train', 0.67), ('test', 0.33)],
-            help="Subsets in the form of: '<subset>:<ratio>' (repeatable)")
-        parser.add_argument('--seed', type=int, help="Random seed")
-        return parser
-
-    def __init__(self, extractor, splits, seed=None):
-        super().__init__(extractor)
-
-        assert 0 < len(splits), "Expected at least one split"
-        assert all(0.0 <= r and r <= 1.0 for _, r in splits), \
-            "Ratios are expected to be in the range [0; 1], but got %s" % splits
-
-        total_ratio = sum(s[1] for s in splits)
-        if not abs(total_ratio - 1.0) <= 1e-7:
-            raise Exception(
-                "Sum of ratios is expected to be 1, got %s, which is %s" %
-                (splits, total_ratio))
-
-        dataset_size = len(extractor)
-        indices = list(range(dataset_size))
-
-        random.seed(seed)
-        random.shuffle(indices)
-        parts = []
-        s = 0
-        for subset, ratio in splits:
-            s += ratio
-            boundary = int(s * dataset_size)
-            parts.append((boundary, subset))
-
-        self._parts = parts
-
-    def _find_split(self, index):
-        for boundary, subset in self._parts:
-            if index < boundary:
-                return subset
-        return subset # all the possible remainder goes to the last split
-
-    def __iter__(self):
-        for i, item in enumerate(self._extractor):
-            yield self.wrap_item(item, subset=self._find_split(i))
-
-class IdFromImageName(Transform, CliPlugin):
-    def transform_item(self, item):
-        if item.has_image and item.image.path:
-            name = osp.splitext(osp.basename(item.image.path))[0]
-            return self.wrap_item(item, id=name)
-        else:
-            log.debug("Can't change item id for item '%s': "
-                "item has no image info" % item.id)
-            return item
-
-class Rename(Transform, CliPlugin):
-    """
-    Renames items in the dataset. Supports regular expressions.
-    The first character in the expression is a delimiter for
-    the pattern and replacement parts. Replacement part can also
-    contain string.format tokens with 'item' object available.|n
-    |n
-    Examples:|n
-    - Replace 'pattern' with 'replacement':|n
-    |s|srename -e '|pattern|replacement|'|n
-    - Remove 'frame_' from item ids:|n
-    |s|srename -e '|frame_(\d+)|\\1|'
-    """
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('-e', '--regex',
-            help="Regex for renaming.")
-        return parser
-
-    def __init__(self, extractor, regex):
-        super().__init__(extractor)
-
-        assert regex and isinstance(regex, str)
-        parts = regex.split(regex[0], maxsplit=3)
-        regex, sub = parts[1:3]
-        self._re = re.compile(regex)
-        self._sub = sub
-
-    def transform_item(self, item):
-        return self.wrap_item(item, id=self._re.sub(self._sub, item.id) \
-            .format(item=item))
-
-class RemapLabels(Transform, CliPlugin):
-    """
-    Changes labels in the dataset.|n
-    Examples:|n
-    - Rename 'person' to 'car' and 'cat' to 'dog', keep 'bus', remove others:|n
-    |s|sremap_labels -l person:car -l bus:bus -l cat:dog --default delete
-    """
-
-    DefaultAction = Enum('DefaultAction', ['keep', 'delete'])
-
-    @staticmethod
-    def _split_arg(s):
-        parts = s.split(':')
-        if len(parts) != 2:
-            import argparse
-            raise argparse.ArgumentTypeError()
-        return (parts[0], parts[1])
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-        parser.add_argument('-l', '--label', action='append',
-            type=cls._split_arg, dest='mapping',
-            help="Label in the form of: '<src>:<dst>' (repeatable)")
-        parser.add_argument('--default',
-            choices=[a.name for a in cls.DefaultAction],
-            default=cls.DefaultAction.keep.name,
-            help="Action for unspecified labels (default: %(default)s)")
-        return parser
-
-    def __init__(self, extractor, mapping, default=None):
-        super().__init__(extractor)
-
-        assert isinstance(default, (str, self.DefaultAction))
-        if isinstance(default, str):
-            default = self.DefaultAction[default]
-
-        assert isinstance(mapping, (dict, list))
-        if isinstance(mapping, list):
-            mapping = dict(mapping)
-
-        self._categories = {}
-
-        src_label_cat = self._extractor.categories().get(AnnotationType.label)
-        if src_label_cat is not None:
-            self._make_label_id_map(src_label_cat, mapping, default)
-
-        src_mask_cat = self._extractor.categories().get(AnnotationType.mask)
-        if src_mask_cat is not None:
-            assert src_label_cat is not None
-            dst_mask_cat = MaskCategories(attributes=src_mask_cat.attributes)
-            dst_mask_cat.colormap = {
-                id: src_mask_cat.colormap[id]
-                for id, _ in enumerate(src_label_cat.items)
-                if self._map_id(id) or id == 0
-            }
-            self._categories[AnnotationType.mask] = dst_mask_cat
-
-        src_points_cat = self._extractor.categories().get(AnnotationType.points)
-        if src_points_cat is not None:
-            assert src_label_cat is not None
-            dst_points_cat = PointsCategories(attributes=src_points_cat.attributes)
-            dst_points_cat.items = {
-                id: src_points_cat.items[id]
-                for id, item in enumerate(src_label_cat.items)
-                if self._map_id(id) or id == 0
-            }
-            self._categories[AnnotationType.points] = dst_points_cat
-
-    def _make_label_id_map(self, src_label_cat, label_mapping, default_action):
-        dst_label_cat = LabelCategories(attributes=src_label_cat.attributes)
-        id_mapping = {}
-        for src_index, src_label in enumerate(src_label_cat.items):
-            dst_label = label_mapping.get(src_label.name)
-            if not dst_label and default_action == self.DefaultAction.keep:
-                dst_label = src_label.name # keep unspecified as is
-            if not dst_label:
-                continue
-
-            dst_index = dst_label_cat.find(dst_label)[0]
-            if dst_index is None:
-                dst_index = dst_label_cat.add(dst_label,
-                    src_label.parent, src_label.attributes)
-            id_mapping[src_index] = dst_index
-
-        if log.getLogger().isEnabledFor(log.DEBUG):
-            log.debug("Label mapping:")
-            for src_id, src_label in enumerate(src_label_cat.items):
-                if id_mapping.get(src_id):
-                    log.debug("#%s '%s' -> #%s '%s'",
-                        src_id, src_label.name, id_mapping[src_id],
-                        dst_label_cat.items[id_mapping[src_id]].name
-                    )
-                else:
-                    log.debug("#%s '%s' -> <deleted>", src_id, src_label.name)
-
-        self._map_id = lambda src_id: id_mapping.get(src_id, None)
-        self._categories[AnnotationType.label] = dst_label_cat
-
-    def categories(self):
-        return self._categories
-
-    def transform_item(self, item):
-        annotations = []
-        for ann in item.annotations:
-            if ann.type in { AnnotationType.label, AnnotationType.mask,
-                AnnotationType.points, AnnotationType.polygon,
-                AnnotationType.polyline, AnnotationType.bbox
-            } and ann.label is not None:
-                conv_label = self._map_id(ann.label)
-                if conv_label is not None:
-                    annotations.append(ann.wrap(label=conv_label))
-            else:
-                annotations.append(ann.wrap())
-        return item.wrap(annotations=annotations)
--- a/datumaro/datumaro/plugins/voc_format/init.py
+++ b/datumaro/datumaro/plugins/voc_format/init.py
--- a/datumaro/datumaro/plugins/voc_format/converter.py
+++ b/datumaro/datumaro/plugins/voc_format/converter.py
@ -1,590 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import logging as log
-import os
-import os.path as osp
-from collections import OrderedDict, defaultdict
-from enum import Enum
-from itertools import chain
-
-from lxml import etree as ET
-
-from datumaro.components.converter import Converter
-from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType,
-    CompiledMask, LabelCategories)
-from datumaro.util import find, str_to_bool
-from datumaro.util.image import save_image
-from datumaro.util.mask_tools import paint_mask, remap_mask
-
-from .format import (VocTask, VocPath, VocInstColormap,
-    parse_label_map, make_voc_label_map, make_voc_categories, write_label_map
-)
-
-
-def _convert_attr(name, attributes, type_conv, default=None, warn=True):
-    d = object()
-    value = attributes.get(name, d)
-    if value is d:
-        return default
-
-    try:
-        return type_conv(value)
-    except Exception as e:
-        log.warning("Failed to convert attribute '%s'='%s': %s" % \
-            (name, value, e))
-        return default
-
-def _write_xml_bbox(bbox, parent_elem):
-    x, y, w, h = bbox
-    bbox_elem = ET.SubElement(parent_elem, 'bndbox')
-    ET.SubElement(bbox_elem, 'xmin').text = str(x)
-    ET.SubElement(bbox_elem, 'ymin').text = str(y)
-    ET.SubElement(bbox_elem, 'xmax').text = str(x + w)
-    ET.SubElement(bbox_elem, 'ymax').text = str(y + h)
-    return bbox_elem
-
-
-LabelmapType = Enum('LabelmapType', ['voc', 'source'])
-
-class VocConverter(Converter):
-    DEFAULT_IMAGE_EXT = VocPath.IMAGE_EXT
-
-    @staticmethod
-    def _split_tasks_string(s):
-        return [VocTask[i.strip()] for i in s.split(',')]
-
-    @staticmethod
-    def _get_labelmap(s):
-        if osp.isfile(s):
-            return s
-        try:
-            return LabelmapType[s].name
-        except KeyError:
-            import argparse
-            raise argparse.ArgumentTypeError()
-
-    @classmethod
-    def build_cmdline_parser(cls, **kwargs):
-        parser = super().build_cmdline_parser(**kwargs)
-
-        parser.add_argument('--apply-colormap', type=str_to_bool, default=True,
-            help="Use colormap for class and instance masks "
-                "(default: %(default)s)")
-        parser.add_argument('--label-map', type=cls._get_labelmap, default=None,
-            help="Labelmap file path or one of %s" % \
-                ', '.join(t.name for t in LabelmapType))
-        parser.add_argument('--allow-attributes',
-            type=str_to_bool, default=True,
-            help="Allow export of attributes (default: %(default)s)")
-        parser.add_argument('--tasks', type=cls._split_tasks_string,
-            help="VOC task filter, comma-separated list of {%s} "
-                "(default: all)" % ', '.join(t.name for t in VocTask))
-
-        return parser
-
-    def __init__(self, extractor, save_dir,
-            tasks=None, apply_colormap=True, label_map=None,
-            allow_attributes=True, **kwargs):
-        super().__init__(extractor, save_dir, **kwargs)
-
-        assert tasks is None or isinstance(tasks, (VocTask, list, set))
-        if tasks is None:
-            tasks = set(VocTask)
-        elif isinstance(tasks, VocTask):
-            tasks = {tasks}
-        else:
-            tasks = set(t if t in VocTask else VocTask[t] for t in tasks)
-        self._tasks = tasks
-
-        self._apply_colormap = apply_colormap
-        self._allow_attributes = allow_attributes
-
-        if label_map is None:
-            label_map = LabelmapType.source
-        self._load_categories(label_map)
-
-    def apply(self):
-        self.make_dirs()
-        self.save_subsets()
-        self.save_label_map()
-
-    def make_dirs(self):
-        save_dir = self._save_dir
-        subsets_dir = osp.join(save_dir, VocPath.SUBSETS_DIR)
-        cls_subsets_dir = osp.join(subsets_dir,
-            VocPath.TASK_DIR[VocTask.classification])
-        action_subsets_dir = osp.join(subsets_dir,
-            VocPath.TASK_DIR[VocTask.action_classification])
-        layout_subsets_dir = osp.join(subsets_dir,
-            VocPath.TASK_DIR[VocTask.person_layout])
-        segm_subsets_dir = osp.join(subsets_dir,
-            VocPath.TASK_DIR[VocTask.segmentation])
-        ann_dir = osp.join(save_dir, VocPath.ANNOTATIONS_DIR)
-        img_dir = osp.join(save_dir, VocPath.IMAGES_DIR)
-        segm_dir = osp.join(save_dir, VocPath.SEGMENTATION_DIR)
-        inst_dir = osp.join(save_dir, VocPath.INSTANCES_DIR)
-        images_dir = osp.join(save_dir, VocPath.IMAGES_DIR)
-
-        os.makedirs(subsets_dir, exist_ok=True)
-        os.makedirs(ann_dir, exist_ok=True)
-        os.makedirs(img_dir, exist_ok=True)
-        os.makedirs(segm_dir, exist_ok=True)
-        os.makedirs(inst_dir, exist_ok=True)
-        os.makedirs(images_dir, exist_ok=True)
-
-        self._subsets_dir = subsets_dir
-        self._cls_subsets_dir = cls_subsets_dir
-        self._action_subsets_dir = action_subsets_dir
-        self._layout_subsets_dir = layout_subsets_dir
-        self._segm_subsets_dir = segm_subsets_dir
-        self._ann_dir = ann_dir
-        self._img_dir = img_dir
-        self._segm_dir = segm_dir
-        self._inst_dir = inst_dir
-        self._images_dir = images_dir
-
-    def get_label(self, label_id):
-        return self._extractor. \
-            categories()[AnnotationType.label].items[label_id].name
-
-    def save_subsets(self):
-        for subset_name in self._extractor.subsets() or [None]:
-            if subset_name:
-                subset = self._extractor.get_subset(subset_name)
-            else:
-                subset_name = DEFAULT_SUBSET_NAME
-                subset = self._extractor
-
-            class_lists = OrderedDict()
-            clsdet_list = OrderedDict()
-            action_list = OrderedDict()
-            layout_list = OrderedDict()
-            segm_list = OrderedDict()
-
-            for item in subset:
-                log.debug("Converting item '%s'", item.id)
-
-                image_filename = self._make_image_filename(item)
-                if self._save_images:
-                    if item.has_image and item.image.has_data:
-                        self._save_image(item,
-                            osp.join(self._images_dir, image_filename))
-                    else:
-                        log.debug("Item '%s' has no image", item.id)
-
-                labels = []
-                bboxes = []
-                masks = []
-                for a in item.annotations:
-                    if a.type == AnnotationType.label:
-                        labels.append(a)
-                    elif a.type == AnnotationType.bbox:
-                        bboxes.append(a)
-                    elif a.type == AnnotationType.mask:
-                        masks.append(a)
-
-                if self._tasks is None and bboxes or \
-                        self._tasks & {VocTask.detection, VocTask.person_layout,
-                            VocTask.action_classification}:
-                    root_elem = ET.Element('annotation')
-                    if '_' in item.id:
-                        folder = item.id[ : item.id.find('_')]
-                    else:
-                        folder = ''
-                    ET.SubElement(root_elem, 'folder').text = folder
-                    ET.SubElement(root_elem, 'filename').text = image_filename
-
-                    source_elem = ET.SubElement(root_elem, 'source')
-                    ET.SubElement(source_elem, 'database').text = 'Unknown'
-                    ET.SubElement(source_elem, 'annotation').text = 'Unknown'
-                    ET.SubElement(source_elem, 'image').text = 'Unknown'
-
-                    if item.has_image:
-                        h, w = item.image.size
-                        if item.image.has_data:
-                            image_shape = item.image.data.shape
-                            c = 1 if len(image_shape) == 2 else image_shape[2]
-                        else:
-                            c = 3
-                        size_elem = ET.SubElement(root_elem, 'size')
-                        ET.SubElement(size_elem, 'width').text = str(w)
-                        ET.SubElement(size_elem, 'height').text = str(h)
-                        ET.SubElement(size_elem, 'depth').text = str(c)
-
-                    item_segmented = 0 < len(masks)
-                    ET.SubElement(root_elem, 'segmented').text = \
-                        str(int(item_segmented))
-
-                    objects_with_parts = []
-                    objects_with_actions = defaultdict(dict)
-
-                    main_bboxes = []
-                    layout_bboxes = []
-                    for bbox in bboxes:
-                        label = self.get_label(bbox.label)
-                        if self._is_part(label):
-                            layout_bboxes.append(bbox)
-                        elif self._is_label(label):
-                            main_bboxes.append(bbox)
-
-                    for new_obj_id, obj in enumerate(main_bboxes):
-                        attr = obj.attributes
-
-                        obj_elem = ET.SubElement(root_elem, 'object')
-
-                        obj_label = self.get_label(obj.label)
-                        ET.SubElement(obj_elem, 'name').text = obj_label
-
-                        if 'pose' in attr:
-                            ET.SubElement(obj_elem, 'pose').text = \
-                                str(attr['pose'])
-
-                        if 'truncated' in attr:
-                            truncated = _convert_attr('truncated', attr, int, 0)
-                            ET.SubElement(obj_elem, 'truncated').text = \
-                                '%d' % truncated
-
-                        if 'difficult' in attr:
-                            difficult = _convert_attr('difficult', attr, int, 0)
-                            ET.SubElement(obj_elem, 'difficult').text = \
-                                '%d' % difficult
-
-                        if 'occluded' in attr:
-                            occluded = _convert_attr('occluded', attr, int, 0)
-                            ET.SubElement(obj_elem, 'occluded').text = \
-                                '%d' % occluded
-
-                        bbox = obj.get_bbox()
-                        if bbox is not None:
-                            _write_xml_bbox(bbox, obj_elem)
-
-                        for part_bbox in filter(
-                                lambda x: obj.group and obj.group == x.group,
-                                layout_bboxes):
-                            part_elem = ET.SubElement(obj_elem, 'part')
-                            ET.SubElement(part_elem, 'name').text = \
-                                self.get_label(part_bbox.label)
-                            _write_xml_bbox(part_bbox.get_bbox(), part_elem)
-
-                            objects_with_parts.append(new_obj_id)
-
-                        label_actions = self._get_actions(obj_label)
-                        actions_elem = ET.Element('actions')
-                        for action in label_actions:
-                            present = 0
-                            if action in attr:
-                                present = _convert_attr(action, attr,
-                                    lambda v: int(v == True), 0)
-                                ET.SubElement(actions_elem, action).text = \
-                                    '%d' % present
-
-                            objects_with_actions[new_obj_id][action] = present
-                        if len(actions_elem) != 0:
-                            obj_elem.append(actions_elem)
-
-                        if self._allow_attributes:
-                            native_attrs = {'difficult', 'pose',
-                                'truncated', 'occluded' }
-                            native_attrs.update(label_actions)
-
-                            attrs_elem = ET.Element('attributes')
-                            for k, v in attr.items():
-                                if k in native_attrs:
-                                    continue
-                                attr_elem = ET.SubElement(attrs_elem, 'attribute')
-                                ET.SubElement(attr_elem, 'name').text = str(k)
-                                ET.SubElement(attr_elem, 'value').text = str(v)
-                            if len(attrs_elem):
-                                obj_elem.append(attrs_elem)
-
-                    if self._tasks & {VocTask.detection, VocTask.person_layout,
-                            VocTask.action_classification}:
-                        ann_path = osp.join(self._ann_dir, item.id + '.xml')
-                        os.makedirs(osp.dirname(ann_path), exist_ok=True)
-                        with open(ann_path, 'w') as f:
-                            f.write(ET.tostring(root_elem,
-                                encoding='unicode', pretty_print=True))
-
-                    clsdet_list[item.id] = True
-                    layout_list[item.id] = objects_with_parts
-                    action_list[item.id] = objects_with_actions
-
-                for label_ann in labels:
-                    label = self.get_label(label_ann.label)
-                    if not self._is_label(label):
-                        continue
-                    class_list = class_lists.get(item.id, set())
-                    class_list.add(label_ann.label)
-                    class_lists[item.id] = class_list
-
-                    clsdet_list[item.id] = True
-
-                if masks:
-                    compiled_mask = CompiledMask.from_instance_masks(masks,
-                        instance_labels=[self._label_id_mapping(m.label)
-                            for m in masks])
-
-                    self.save_segm(
-                        osp.join(self._segm_dir, item.id + VocPath.SEGM_EXT),
-                        compiled_mask.class_mask)
-                    self.save_segm(
-                        osp.join(self._inst_dir, item.id + VocPath.SEGM_EXT),
-                        compiled_mask.instance_mask,
-                        colormap=VocInstColormap)
-
-                    segm_list[item.id] = True
-
-                if len(item.annotations) == 0:
-                    clsdet_list[item.id] = None
-                    layout_list[item.id] = None
-                    action_list[item.id] = None
-                    segm_list[item.id] = None
-
-                if self._tasks & {VocTask.classification, VocTask.detection,
-                        VocTask.action_classification, VocTask.person_layout}:
-                    self.save_clsdet_lists(subset_name, clsdet_list)
-                    if self._tasks & {VocTask.classification}:
-                        self.save_class_lists(subset_name, class_lists)
-                if self._tasks & {VocTask.action_classification}:
-                    self.save_action_lists(subset_name, action_list)
-                if self._tasks & {VocTask.person_layout}:
-                    self.save_layout_lists(subset_name, layout_list)
-                if self._tasks & {VocTask.segmentation}:
-                    self.save_segm_lists(subset_name, segm_list)
-
-    def save_action_lists(self, subset_name, action_list):
-        if not action_list:
-            return
-
-        os.makedirs(self._action_subsets_dir, exist_ok=True)
-
-        ann_file = osp.join(self._action_subsets_dir, subset_name + '.txt')
-        with open(ann_file, 'w') as f:
-            for item in action_list:
-                f.write('%s\n' % item)
-
-        if len(action_list) == 0:
-            return
-
-        all_actions = set(chain(*(self._get_actions(l)
-            for l in self._label_map)))
-        for action in all_actions:
-            ann_file = osp.join(self._action_subsets_dir,
-                '%s_%s.txt' % (action, subset_name))
-            with open(ann_file, 'w') as f:
-                for item, objs in action_list.items():
-                    if not objs:
-                        continue
-                    for obj_id, obj_actions in objs.items():
-                        presented = obj_actions[action]
-                        f.write('%s %s % d\n' % \
-                            (item, 1 + obj_id, 1 if presented else -1))
-
-    def save_class_lists(self, subset_name, class_lists):
-        if not class_lists:
-            return
-
-        os.makedirs(self._cls_subsets_dir, exist_ok=True)
-
-        for label in self._label_map:
-            ann_file = osp.join(self._cls_subsets_dir,
-                '%s_%s.txt' % (label, subset_name))
-            with open(ann_file, 'w') as f:
-                for item, item_labels in class_lists.items():
-                    if not item_labels:
-                        continue
-                    item_labels = [self.get_label(l) for l in item_labels]
-                    presented = label in item_labels
-                    f.write('%s % d\n' % (item, 1 if presented else -1))
-
-    def save_clsdet_lists(self, subset_name, clsdet_list):
-        if not clsdet_list:
-            return
-
-        os.makedirs(self._cls_subsets_dir, exist_ok=True)
-
-        ann_file = osp.join(self._cls_subsets_dir, subset_name + '.txt')
-        with open(ann_file, 'w') as f:
-            for item in clsdet_list:
-                f.write('%s\n' % item)
-
-    def save_segm_lists(self, subset_name, segm_list):
-        if not segm_list:
-            return
-
-        os.makedirs(self._segm_subsets_dir, exist_ok=True)
-
-        ann_file = osp.join(self._segm_subsets_dir, subset_name + '.txt')
-        with open(ann_file, 'w') as f:
-            for item in segm_list:
-                f.write('%s\n' % item)
-
-    def save_layout_lists(self, subset_name, layout_list):
-        if not layout_list:
-            return
-
-        os.makedirs(self._layout_subsets_dir, exist_ok=True)
-
-        ann_file = osp.join(self._layout_subsets_dir, subset_name + '.txt')
-        with open(ann_file, 'w') as f:
-            for item, item_layouts in layout_list.items():
-                if item_layouts:
-                    for obj_id in item_layouts:
-                        f.write('%s % d\n' % (item, 1 + obj_id))
-                else:
-                    f.write('%s\n' % (item))
-
-    def save_segm(self, path, mask, colormap=None):
-        if self._apply_colormap:
-            if colormap is None:
-                colormap = self._categories[AnnotationType.mask].colormap
-            mask = paint_mask(mask, colormap)
-        save_image(path, mask, create_dir=True)
-
-    def save_label_map(self):
-        path = osp.join(self._save_dir, VocPath.LABELMAP_FILE)
-        write_label_map(path, self._label_map)
-
-    def _load_categories(self, label_map_source):
-        if label_map_source == LabelmapType.voc.name:
-            # use the default VOC colormap
-            label_map = make_voc_label_map()
-
-        elif label_map_source == LabelmapType.source.name and \
-                AnnotationType.mask not in self._extractor.categories():
-            # generate colormap for input labels
-            labels = self._extractor.categories() \
-                .get(AnnotationType.label, LabelCategories())
-            label_map = OrderedDict((item.name, [None, [], []])
-                for item in labels.items)
-
-        elif label_map_source == LabelmapType.source.name and \
-                AnnotationType.mask in self._extractor.categories():
-            # use source colormap
-            labels = self._extractor.categories()[AnnotationType.label]
-            colors = self._extractor.categories()[AnnotationType.mask]
-            label_map = OrderedDict()
-            for idx, item in enumerate(labels.items):
-                color = colors.colormap.get(idx)
-                if color is not None:
-                    label_map[item.name] = [color, [], []]
-
-        elif isinstance(label_map_source, dict):
-            label_map = OrderedDict(
-                sorted(label_map_source.items(), key=lambda e: e[0]))
-
-        elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
-            label_map = parse_label_map(label_map_source)
-
-        else:
-            raise Exception("Wrong labelmap specified, "
-                "expected one of %s or a file path" % \
-                ', '.join(t.name for t in LabelmapType))
-
-        # There must always be a label with color (0, 0, 0) at index 0
-        bg_label = find(label_map.items(), lambda x: x[1][0] == (0, 0, 0))
-        if bg_label is not None:
-            bg_label = bg_label[0]
-        else:
-            bg_label = 'background'
-            if bg_label not in label_map:
-                has_colors = any(v[0] is not None for v in label_map.values())
-                color = (0, 0, 0) if has_colors else None
-                label_map[bg_label] = [color, [], []]
-        label_map.move_to_end(bg_label, last=False)
-
-        self._categories = make_voc_categories(label_map)
-
-        # Update colors with assigned values
-        colormap = self._categories[AnnotationType.mask].colormap
-        for label_id, color in colormap.items():
-            label_desc = label_map[
-                self._categories[AnnotationType.label].items[label_id].name]
-            label_desc[0] = color
-
-        self._label_map = label_map
-        self._label_id_mapping = self._make_label_id_map()
-
-    def _is_label(self, s):
-        return self._label_map.get(s) is not None
-
-    def _is_part(self, s):
-        for label_desc in self._label_map.values():
-            if s in label_desc[1]:
-                return True
-        return False
-
-    def _is_action(self, label, s):
-        return s in self._get_actions(label)
-
-    def _get_actions(self, label):
-        label_desc = self._label_map.get(label)
-        if not label_desc:
-            return []
-        return label_desc[2]
-
-    def _make_label_id_map(self):
-        source_labels = {
-            id: label.name for id, label in
-            enumerate(self._extractor.categories().get(
-                AnnotationType.label, LabelCategories()).items)
-        }
-        target_labels = {
-            label.name: id for id, label in
-            enumerate(self._categories[AnnotationType.label].items)
-        }
-        id_mapping = {
-            src_id: target_labels.get(src_label, 0)
-            for src_id, src_label in source_labels.items()
-        }
-
-        void_labels = [src_label for src_id, src_label in source_labels.items()
-            if src_label not in target_labels]
-        if void_labels:
-            log.warning("The following labels are remapped to background: %s" %
-                ', '.join(void_labels))
-        log.debug("Saving segmentations with the following label mapping: \n%s" %
-            '\n'.join(["#%s '%s' -> #%s '%s'" %
-                (
-                    src_id, src_label, id_mapping[src_id],
-                    self._categories[AnnotationType.label] \
-                        .items[id_mapping[src_id]].name
-                )
-                for src_id, src_label in source_labels.items()
-            ])
-        )
-
-        def map_id(src_id):
-            return id_mapping.get(src_id, 0)
-        return map_id
-
-    def _remap_mask(self, mask):
-        return remap_mask(mask, self._label_id_mapping)
-
-class VocClassificationConverter(VocConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = VocTask.classification
-        super().__init__(*args, **kwargs)
-
-class VocDetectionConverter(VocConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = VocTask.detection
-        super().__init__(*args, **kwargs)
-
-class VocLayoutConverter(VocConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = VocTask.person_layout
-        super().__init__(*args, **kwargs)
-
-class VocActionConverter(VocConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = VocTask.action_classification
-        super().__init__(*args, **kwargs)
-
-class VocSegmentationConverter(VocConverter):
-    def __init__(self, *args, **kwargs):
-        kwargs['tasks'] = VocTask.segmentation
-        super().__init__(*args, **kwargs)
--- a/datumaro/datumaro/plugins/voc_format/extractor.py
+++ b/datumaro/datumaro/plugins/voc_format/extractor.py
@ -1,302 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import defaultdict
-import logging as log
-import numpy as np
-import os.path as osp
-from defusedxml import ElementTree
-
-from datumaro.components.extractor import (SourceExtractor, DatasetItem,
-    AnnotationType, Label, Mask, Bbox, CompiledMask
-)
-from datumaro.util import dir_items
-from datumaro.util.image import Image
-from datumaro.util.mask_tools import lazy_mask, invert_colormap
-
-from .format import (
-    VocTask, VocPath, VocInstColormap, parse_label_map, make_voc_categories
-)
-
-
-_inverse_inst_colormap = invert_colormap(VocInstColormap)
-
-class _VocExtractor(SourceExtractor):
-    def __init__(self, path):
-        assert osp.isfile(path), path
-        self._path = path
-        self._dataset_dir = osp.dirname(osp.dirname(osp.dirname(path)))
-
-        super().__init__(subset=osp.splitext(osp.basename(path))[0])
-
-        self._categories = self._load_categories(self._dataset_dir)
-
-        label_color = lambda label_idx: \
-            self._categories[AnnotationType.mask].colormap.get(label_idx, None)
-        log.debug("Loaded labels: %s" % ', '.join(
-            "'%s' %s" % (l.name, ('(%s, %s, %s)' % c) if c else '')
-            for i, l, c in ((i, l, label_color(i)) for i, l in enumerate(
-                self._categories[AnnotationType.label].items
-            ))
-        ))
-        self._items = self._load_subset_list(path)
-
-    def categories(self):
-        return self._categories
-
-    def __len__(self):
-        return len(self._items)
-
-    def _get_label_id(self, label):
-        label_id, _ = self._categories[AnnotationType.label].find(label)
-        assert label_id is not None, label
-        return label_id
-
-    @staticmethod
-    def _load_categories(dataset_path):
-        label_map = None
-        label_map_path = osp.join(dataset_path, VocPath.LABELMAP_FILE)
-        if osp.isfile(label_map_path):
-            label_map = parse_label_map(label_map_path)
-        return make_voc_categories(label_map)
-
-    @staticmethod
-    def _load_subset_list(subset_path):
-        with open(subset_path) as f:
-            return [line.split()[0] for line in f]
-
-class VocClassificationExtractor(_VocExtractor):
-    def __iter__(self):
-        raw_anns = self._load_annotations()
-        for item_id in self._items:
-            log.debug("Reading item '%s'" % item_id)
-            image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR,
-                item_id + VocPath.IMAGE_EXT)
-            anns = self._parse_annotations(raw_anns, item_id)
-            yield DatasetItem(id=item_id, subset=self._subset,
-                image=image, annotations=anns)
-
-    def _load_annotations(self):
-        annotations = defaultdict(list)
-        task_dir = osp.dirname(self._path)
-        anno_files = [s for s in dir_items(task_dir, '.txt')
-            if s.endswith('_' + osp.basename(self._path))]
-        for ann_filename in anno_files:
-            with open(osp.join(task_dir, ann_filename)) as f:
-                label = ann_filename[:ann_filename.rfind('_')]
-                label_id = self._get_label_id(label)
-                for line in f:
-                    item, present = line.split()
-                    if present == '1':
-                        annotations[item].append(label_id)
-
-        return dict(annotations)
-
-    @staticmethod
-    def _parse_annotations(raw_anns, item_id):
-        return [Label(label_id) for label_id in raw_anns.get(item_id, [])]
-
-class _VocXmlExtractor(_VocExtractor):
-    def __init__(self, path, task):
-        super().__init__(path)
-        self._task = task
-
-    def __iter__(self):
-        anno_dir = osp.join(self._dataset_dir, VocPath.ANNOTATIONS_DIR)
-
-        for item_id in self._items:
-            log.debug("Reading item '%s'" % item_id)
-            image = item_id + VocPath.IMAGE_EXT
-            height, width = 0, 0
-
-            anns = []
-            ann_file = osp.join(anno_dir, item_id + '.xml')
-            if osp.isfile(ann_file):
-                root_elem = ElementTree.parse(ann_file)
-                height = root_elem.find('size/height')
-                if height is not None:
-                    height = int(height.text)
-                width = root_elem.find('size/width')
-                if width is not None:
-                    width = int(width.text)
-                filename_elem = root_elem.find('filename')
-                if filename_elem is not None:
-                    image = filename_elem.text
-                anns = self._parse_annotations(root_elem)
-
-            image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, image)
-            if height and width:
-                image = Image(path=image, size=(height, width))
-
-            yield DatasetItem(id=item_id, subset=self._subset,
-                image=image, annotations=anns)
-
-    def _parse_annotations(self, root_elem):
-        item_annotations = []
-
-        for obj_id, object_elem in enumerate(root_elem.findall('object')):
-            obj_id += 1
-            attributes = {}
-            group = obj_id
-
-            obj_label_id = None
-            label_elem = object_elem.find('name')
-            if label_elem is not None:
-                obj_label_id = self._get_label_id(label_elem.text)
-
-            obj_bbox = self._parse_bbox(object_elem)
-
-            if obj_label_id is None or obj_bbox is None:
-                continue
-
-            difficult_elem = object_elem.find('difficult')
-            attributes['difficult'] = difficult_elem is not None and \
-                difficult_elem.text == '1'
-
-            truncated_elem = object_elem.find('truncated')
-            attributes['truncated'] = truncated_elem is not None and \
-                truncated_elem.text == '1'
-
-            occluded_elem = object_elem.find('occluded')
-            attributes['occluded'] = occluded_elem is not None and \
-                occluded_elem.text == '1'
-
-            pose_elem = object_elem.find('pose')
-            if pose_elem is not None:
-                attributes['pose'] = pose_elem.text
-
-            point_elem = object_elem.find('point')
-            if point_elem is not None:
-                point_x = point_elem.find('x')
-                point_y = point_elem.find('y')
-                point = [float(point_x.text), float(point_y.text)]
-                attributes['point'] = point
-
-            actions_elem = object_elem.find('actions')
-            actions = {a: False
-                for a in self._categories[AnnotationType.label] \
-                    .items[obj_label_id].attributes}
-            if actions_elem is not None:
-                for action_elem in actions_elem:
-                    actions[action_elem.tag] = (action_elem.text == '1')
-            for action, present in actions.items():
-                attributes[action] = present
-
-            has_parts = False
-            for part_elem in object_elem.findall('part'):
-                part = part_elem.find('name').text
-                part_label_id = self._get_label_id(part)
-                part_bbox = self._parse_bbox(part_elem)
-
-                if self._task is not VocTask.person_layout:
-                    break
-                if part_bbox is None:
-                    continue
-                has_parts = True
-                item_annotations.append(Bbox(*part_bbox, label=part_label_id,
-                    group=group))
-
-            attributes_elem = object_elem.find('attributes')
-            if attributes_elem is not None:
-                for attr_elem in attributes_elem.iter('attribute'):
-                    attributes[attr_elem.find('name').text] = \
-                        attr_elem.find('value').text
-
-            if self._task is VocTask.person_layout and not has_parts:
-                continue
-            if self._task is VocTask.action_classification and not actions:
-                continue
-
-            item_annotations.append(Bbox(*obj_bbox, label=obj_label_id,
-                attributes=attributes, id=obj_id, group=group))
-
-        return item_annotations
-
-    @staticmethod
-    def _parse_bbox(object_elem):
-        bbox_elem = object_elem.find('bndbox')
-        xmin = float(bbox_elem.find('xmin').text)
-        xmax = float(bbox_elem.find('xmax').text)
-        ymin = float(bbox_elem.find('ymin').text)
-        ymax = float(bbox_elem.find('ymax').text)
-        return [xmin, ymin, xmax - xmin, ymax - ymin]
-
-class VocDetectionExtractor(_VocXmlExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=VocTask.detection)
-
-class VocLayoutExtractor(_VocXmlExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=VocTask.person_layout)
-
-class VocActionExtractor(_VocXmlExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=VocTask.action_classification)
-
-class VocSegmentationExtractor(_VocExtractor):
-    def __iter__(self):
-        for item_id in self._items:
-            log.debug("Reading item '%s'" % item_id)
-            image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR,
-                item_id + VocPath.IMAGE_EXT)
-            anns = self._load_annotations(item_id)
-            yield DatasetItem(id=item_id, subset=self._subset,
-                image=image, annotations=anns)
-
-    @staticmethod
-    def _lazy_extract_mask(mask, c):
-        return lambda: mask == c
-
-    def _load_annotations(self, item_id):
-        item_annotations = []
-
-        class_mask = None
-        segm_path = osp.join(self._dataset_dir, VocPath.SEGMENTATION_DIR,
-            item_id + VocPath.SEGM_EXT)
-        if osp.isfile(segm_path):
-            inverse_cls_colormap = \
-                self._categories[AnnotationType.mask].inverse_colormap
-            class_mask = lazy_mask(segm_path, inverse_cls_colormap)
-
-        instances_mask = None
-        inst_path = osp.join(self._dataset_dir, VocPath.INSTANCES_DIR,
-            item_id + VocPath.SEGM_EXT)
-        if osp.isfile(inst_path):
-            instances_mask = lazy_mask(inst_path, _inverse_inst_colormap)
-
-        if instances_mask is not None:
-            compiled_mask = CompiledMask(class_mask, instances_mask)
-
-            if class_mask is not None:
-                label_cat = self._categories[AnnotationType.label]
-                instance_labels = compiled_mask.get_instance_labels()
-            else:
-                instance_labels = {i: None
-                    for i in range(compiled_mask.instance_count)}
-
-            for instance_id, label_id in instance_labels.items():
-                image = compiled_mask.lazy_extract(instance_id)
-
-                attributes = {}
-                if label_id is not None:
-                    actions = {a: False
-                        for a in label_cat.items[label_id].attributes
-                    }
-                    attributes.update(actions)
-
-                item_annotations.append(Mask(
-                    image=image, label=label_id,
-                    attributes=attributes, group=instance_id
-                ))
-        elif class_mask is not None:
-            log.warn("item '%s': has only class segmentation, "
-                "instance masks will not be available" % item_id)
-            class_mask = class_mask()
-            classes = np.unique(class_mask)
-            for label_id in classes:
-                image = self._lazy_extract_mask(class_mask, label_id)
-                item_annotations.append(Mask(image=image, label=label_id))
-
-        return item_annotations
--- a/datumaro/datumaro/plugins/voc_format/format.py
+++ b/datumaro/datumaro/plugins/voc_format/format.py
@ -1,206 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import OrderedDict
-from enum import Enum
-from itertools import chain
-import numpy as np
-
-from datumaro.components.extractor import (AnnotationType,
-    LabelCategories, MaskCategories
-)
-
-
-VocTask = Enum('VocTask', [
-    'classification',
-    'detection',
-    'segmentation',
-    'action_classification',
-    'person_layout',
-])
-
-VocLabel = Enum('VocLabel', [
-    ('background', 0),
-    ('aeroplane', 1),
-    ('bicycle', 2),
-    ('bird', 3),
-    ('boat', 4),
-    ('bottle', 5),
-    ('bus', 6),
-    ('car', 7),
-    ('cat', 8),
-    ('chair', 9),
-    ('cow', 10),
-    ('diningtable', 11),
-    ('dog', 12),
-    ('horse', 13),
-    ('motorbike', 14),
-    ('person', 15),
-    ('pottedplant', 16),
-    ('sheep', 17),
-    ('sofa', 18),
-    ('train', 19),
-    ('tvmonitor', 20),
-    ('ignored', 255),
-])
-
-VocPose = Enum('VocPose', [
-    'Unspecified',
-    'Left',
-    'Right',
-    'Frontal',
-    'Rear',
-])
-
-VocBodyPart = Enum('VocBodyPart', [
-    'head',
-    'hand',
-    'foot',
-])
-
-VocAction = Enum('VocAction', [
-    'other',
-    'jumping',
-    'phoning',
-    'playinginstrument',
-    'reading',
-    'ridingbike',
-    'ridinghorse',
-    'running',
-    'takingphoto',
-    'usingcomputer',
-    'walking',
-])
-
-def generate_colormap(length=256):
-    def get_bit(number, index):
-        return (number >> index) & 1
-
-    colormap = np.zeros((length, 3), dtype=int)
-    indices = np.arange(length, dtype=int)
-
-    for j in range(7, -1, -1):
-        for c in range(3):
-            colormap[:, c] |= get_bit(indices, c) << j
-        indices >>= 3
-
-    return OrderedDict(
-        (id, tuple(color)) for id, color in enumerate(colormap)
-    )
-
-VocColormap = {id: color for id, color in generate_colormap(256).items()
-    if id in [l.value for l in VocLabel]}
-VocInstColormap = generate_colormap(256)
-
-class VocPath:
-    IMAGES_DIR = 'JPEGImages'
-    ANNOTATIONS_DIR = 'Annotations'
-    SEGMENTATION_DIR = 'SegmentationClass'
-    INSTANCES_DIR = 'SegmentationObject'
-    SUBSETS_DIR = 'ImageSets'
-    IMAGE_EXT = '.jpg'
-    SEGM_EXT = '.png'
-    LABELMAP_FILE = 'labelmap.txt'
-
-    TASK_DIR = {
-        VocTask.classification: 'Main',
-        VocTask.detection: 'Main',
-        VocTask.segmentation: 'Segmentation',
-        VocTask.action_classification: 'Action',
-        VocTask.person_layout: 'Layout',
-    }
-
-
-def make_voc_label_map():
-    labels = sorted(VocLabel, key=lambda l: l.value)
-    label_map = OrderedDict(
-        (label.name, [VocColormap[label.value], [], []]) for label in labels)
-    label_map[VocLabel.person.name][1] = [p.name for p in VocBodyPart]
-    label_map[VocLabel.person.name][2] = [a.name for a in VocAction]
-    return label_map
-
-def parse_label_map(path):
-    if not path:
-        return None
-
-    label_map = OrderedDict()
-    with open(path, 'r') as f:
-        for line in f:
-            # skip empty and commented lines
-            line = line.strip()
-            if not line or line and line[0] == '#':
-                continue
-
-            # name, color, parts, actions
-            label_desc = line.strip().split(':')
-            name = label_desc[0]
-
-            if name in label_map:
-                raise ValueError("Label '%s' is already defined" % name)
-
-            if 1 < len(label_desc) and len(label_desc[1]) != 0:
-                color = label_desc[1].split(',')
-                assert len(color) == 3, \
-                    "Label '%s' has wrong color, expected 'r,g,b', got '%s'" % \
-                    (name, color)
-                color = tuple([int(c) for c in color])
-            else:
-                color = None
-
-            if 2 < len(label_desc) and len(label_desc[2]) != 0:
-                parts = label_desc[2].split(',')
-            else:
-                parts = []
-
-            if 3 < len(label_desc) and len(label_desc[3]) != 0:
-                actions = label_desc[3].split(',')
-            else:
-                actions = []
-
-            label_map[name] = [color, parts, actions]
-    return label_map
-
-def write_label_map(path, label_map):
-    with open(path, 'w') as f:
-        f.write('# label:color_rgb:parts:actions\n')
-        for label_name, label_desc in label_map.items():
-            if label_desc[0]:
-                color_rgb = ','.join(str(c) for c in label_desc[0])
-            else:
-                color_rgb = ''
-
-            parts = ','.join(str(p) for p in label_desc[1])
-            actions = ','.join(str(a) for a in label_desc[2])
-
-            f.write('%s\n' % ':'.join([label_name, color_rgb, parts, actions]))
-
-def make_voc_categories(label_map=None):
-    if label_map is None:
-        label_map = make_voc_label_map()
-
-    categories = {}
-
-    label_categories = LabelCategories()
-    label_categories.attributes.update(['difficult', 'truncated', 'occluded'])
-
-    for label, desc in label_map.items():
-        label_categories.add(label, attributes=desc[2])
-    for part in OrderedDict((k, None) for k in chain(
-            *(desc[1] for desc in label_map.values()))):
-        label_categories.add(part)
-    categories[AnnotationType.label] = label_categories
-
-    has_colors = any(v[0] is not None for v in label_map.values())
-    if not has_colors: # generate new colors
-        colormap = generate_colormap(len(label_map))
-    else: # only copy defined colors
-        label_id = lambda label: label_categories.find(label)[0]
-        colormap = { label_id(name): desc[0]
-            for name, desc in label_map.items() if desc[0] is not None }
-    mask_categories = MaskCategories(colormap)
-    mask_categories.inverse_colormap # pylint: disable=pointless-statement
-    categories[AnnotationType.mask] = mask_categories
-
-    return categories
--- a/datumaro/datumaro/plugins/voc_format/importer.py
+++ b/datumaro/datumaro/plugins/voc_format/importer.py
@ -1,56 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from glob import glob
-import os.path as osp
-
-from datumaro.components.extractor import Importer
-
-from .format import VocTask, VocPath
-
-
-class VocImporter(Importer):
-    _TASKS = [
-        (VocTask.classification, 'voc_classification', 'Main'),
-        (VocTask.detection, 'voc_detection', 'Main'),
-        (VocTask.segmentation, 'voc_segmentation', 'Segmentation'),
-        (VocTask.person_layout, 'voc_layout', 'Layout'),
-        (VocTask.action_classification, 'voc_action', 'Action'),
-    ]
-
-    @classmethod
-    def detect(cls, path):
-        return len(cls.find_subsets(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        subset_paths = self.find_subsets(path)
-        if len(subset_paths) == 0:
-            raise Exception("Failed to find 'voc' dataset at '%s'" % path)
-
-        for task, extractor_type, subset_path in subset_paths:
-            project.add_source('%s-%s' %
-                (task.name, osp.splitext(osp.basename(subset_path))[0]),
-            {
-                'url': subset_path,
-                'format': extractor_type,
-                'options': dict(extra_params),
-            })
-
-        return project
-
-    @staticmethod
-    def find_subsets(path):
-        subset_paths = []
-        for task, extractor_type, task_dir in __class__._TASKS:
-            task_dir = osp.join(path, VocPath.SUBSETS_DIR, task_dir)
-            if not osp.isdir(task_dir):
-                continue
-            task_subsets = [p for p in glob(osp.join(task_dir, '*.txt'))
-                if '_' not in osp.basename(p)]
-            subset_paths += [(task, extractor_type, p) for p in task_subsets]
-        return subset_paths
--- a/datumaro/datumaro/plugins/yolo_format/init.py
+++ b/datumaro/datumaro/plugins/yolo_format/init.py
--- a/datumaro/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/datumaro/plugins/yolo_format/converter.py
@ -1,108 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import logging as log
-import os
-import os.path as osp
-from collections import OrderedDict
-
-from datumaro.components.converter import Converter
-from datumaro.components.extractor import AnnotationType
-
-from .format import YoloPath
-
-
-def _make_yolo_bbox(img_size, box):
-    # https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py
-    # <x> <y> <width> <height> - values relative to width and height of image
-    # <x> <y> - are center of rectangle
-    x = (box[0] + box[2]) / 2 / img_size[0]
-    y = (box[1] + box[3]) / 2 / img_size[1]
-    w = (box[2] - box[0]) / img_size[0]
-    h = (box[3] - box[1]) / img_size[1]
-    return x, y, w, h
-
-class YoloConverter(Converter):
-    # https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects
-    DEFAULT_IMAGE_EXT = '.jpg'
-
-    def apply(self):
-        extractor = self._extractor
-        save_dir = self._save_dir
-
-        os.makedirs(save_dir, exist_ok=True)
-
-        label_categories = extractor.categories()[AnnotationType.label]
-        label_ids = {label.name: idx
-            for idx, label in enumerate(label_categories.items)}
-        with open(osp.join(save_dir, 'obj.names'), 'w') as f:
-            f.writelines('%s\n' % l[0]
-                for l in sorted(label_ids.items(), key=lambda x: x[1]))
-
-        subset_lists = OrderedDict()
-
-        for subset_name in extractor.subsets() or [None]:
-            if subset_name and subset_name in YoloPath.SUBSET_NAMES:
-                subset = extractor.get_subset(subset_name)
-            elif not subset_name:
-                subset_name = YoloPath.DEFAULT_SUBSET_NAME
-                subset = extractor
-            else:
-                log.warn("Skipping subset export '%s'. "
-                    "If specified, the only valid names are %s" % \
-                    (subset_name, ', '.join(
-                        "'%s'" % s for s in YoloPath.SUBSET_NAMES)))
-                continue
-
-            subset_dir = osp.join(save_dir, 'obj_%s_data' % subset_name)
-            os.makedirs(subset_dir, exist_ok=True)
-
-            image_paths = OrderedDict()
-
-            for item in subset:
-                if not item.has_image:
-                    raise Exception("Failed to export item '%s': "
-                        "item has no image info" % item.id)
-                height, width = item.image.size
-
-                image_name = self._make_image_filename(item)
-                if self._save_images:
-                    if item.has_image and item.image.has_data:
-                        self._save_image(item, osp.join(subset_dir, image_name))
-                    else:
-                        log.warning("Item '%s' has no image" % item.id)
-                image_paths[item.id] = osp.join('data',
-                    osp.basename(subset_dir), image_name)
-
-                yolo_annotation = ''
-                for bbox in item.annotations:
-                    if bbox.type is not AnnotationType.bbox:
-                        continue
-                    if bbox.label is None:
-                        continue
-
-                    yolo_bb = _make_yolo_bbox((width, height), bbox.points)
-                    yolo_bb = ' '.join('%.6f' % p for p in yolo_bb)
-                    yolo_annotation += '%s %s\n' % (bbox.label, yolo_bb)
-
-                annotation_path = osp.join(subset_dir, '%s.txt' % item.id)
-                os.makedirs(osp.dirname(annotation_path), exist_ok=True)
-                with open(annotation_path, 'w') as f:
-                    f.write(yolo_annotation)
-
-            subset_list_name = '%s.txt' % subset_name
-            subset_lists[subset_name] = subset_list_name
-            with open(osp.join(save_dir, subset_list_name), 'w') as f:
-                f.writelines('%s\n' % s for s in image_paths.values())
-
-        with open(osp.join(save_dir, 'obj.data'), 'w') as f:
-            f.write('classes = %s\n' % len(label_ids))
-
-            for subset_name, subset_list_name in subset_lists.items():
-                f.write('%s = %s\n' % (subset_name,
-                    osp.join('data', subset_list_name)))
-
-            f.write('names = %s\n' % osp.join('data', 'obj.names'))
-            f.write('backup = backup/\n')
--- a/datumaro/datumaro/plugins/yolo_format/extractor.py
+++ b/datumaro/datumaro/plugins/yolo_format/extractor.py
@ -1,201 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import OrderedDict
-import os.path as osp
-import re
-
-from datumaro.components.extractor import (SourceExtractor, Extractor,
-    DatasetItem, AnnotationType, Bbox, LabelCategories
-)
-from datumaro.util import split_path
-from datumaro.util.image import Image
-
-from .format import YoloPath
-
-
-class YoloExtractor(SourceExtractor):
-    class Subset(Extractor):
-        def __init__(self, name, parent):
-            super().__init__()
-            self._name = name
-            self._parent = parent
-            self.items = OrderedDict()
-
-        def __iter__(self):
-            for item_id in self.items:
-                yield self._parent._get(item_id, self._name)
-
-        def __len__(self):
-            return len(self.items)
-
-        def categories(self):
-            return self._parent.categories()
-
-    def __init__(self, config_path, image_info=None):
-        super().__init__()
-
-        if not osp.isfile(config_path):
-            raise Exception("Can't read dataset descriptor file '%s'" %
-                config_path)
-
-        rootpath = osp.dirname(config_path)
-        self._path = rootpath
-
-        assert image_info is None or isinstance(image_info, (str, dict))
-        if image_info is None:
-            image_info = osp.join(rootpath, YoloPath.IMAGE_META_FILE)
-            if not osp.isfile(image_info):
-                image_info = {}
-        if isinstance(image_info, str):
-            if not osp.isfile(image_info):
-                raise Exception("Can't read image meta file '%s'" % image_info)
-            with open(image_info) as f:
-                image_info = {}
-                for line in f:
-                    image_name, h, w = line.strip().split()
-                    image_info[image_name] = (int(h), int(w))
-        self._image_info = image_info
-
-        with open(config_path, 'r') as f:
-            config_lines = f.readlines()
-
-        subsets = OrderedDict()
-        names_path = None
-
-        for line in config_lines:
-            match = re.match(r'(\w+)\s*=\s*(.+)$', line)
-            if not match:
-                continue
-
-            key = match.group(1)
-            value = match.group(2)
-            if key == 'names':
-                names_path = value
-            elif key in YoloPath.SUBSET_NAMES:
-                subsets[key] = value
-            else:
-                continue
-
-        if not names_path:
-            raise Exception("Failed to parse labels path from '%s'" % \
-                config_path)
-
-        for subset_name, list_path in subsets.items():
-            list_path = osp.join(self._path, self.localize_path(list_path))
-            if not osp.isfile(list_path):
-                raise Exception("Not found '%s' subset list file" % subset_name)
-
-            subset = YoloExtractor.Subset(subset_name, self)
-            with open(list_path, 'r') as f:
-                subset.items = OrderedDict(
-                    (self.name_from_path(p), self.localize_path(p))
-                    for p in f
-                )
-            subsets[subset_name] = subset
-
-        self._subsets = subsets
-
-        self._categories = {
-            AnnotationType.label:
-                self._load_categories(
-                    osp.join(self._path, self.localize_path(names_path)))
-        }
-
-    @staticmethod
-    def localize_path(path):
-        path = path.strip()
-        default_base = osp.join('data', '')
-        if path.startswith(default_base): # default path
-            path = path[len(default_base) : ]
-        return path
-
-    @classmethod
-    def name_from_path(cls, path):
-        path = cls.localize_path(path)
-        parts = split_path(path)
-        if 1 < len(parts) and not osp.isabs(path):
-            # NOTE: when path is like [data/]<subset_obj>/<image_name>
-            # drop everything but <image name>
-            # <image name> can be <a/b/c/filename.ext>, so no just basename()
-            path = osp.join(*parts[1:])
-        return osp.splitext(path)[0]
-
-    def _get(self, item_id, subset_name):
-        subset = self._subsets[subset_name]
-        item = subset.items[item_id]
-
-        if isinstance(item, str):
-            image_size = self._image_info.get(item_id)
-            image = Image(path=osp.join(self._path, item), size=image_size)
-
-            anno_path = osp.splitext(image.path)[0] + '.txt'
-            annotations = self._parse_annotations(anno_path, image)
-
-            item = DatasetItem(id=item_id, subset=subset_name,
-                image=image, annotations=annotations)
-            subset.items[item_id] = item
-
-        return item
-
-    @staticmethod
-    def _parse_annotations(anno_path, image):
-        lines = []
-        with open(anno_path, 'r') as f:
-            for line in f:
-                line = line.strip()
-                if line:
-                    lines.append(line)
-
-        annotations = []
-        if lines:
-            size = image.size # use image info as late as possible
-            if size is None:
-                raise Exception("Can't find image info for '%s'" % image.path)
-            image_height, image_width = size
-        for line in lines:
-            label_id, xc, yc, w, h = line.split()
-            label_id = int(label_id)
-            w = float(w)
-            h = float(h)
-            x = float(xc) - w * 0.5
-            y = float(yc) - h * 0.5
-            annotations.append(Bbox(
-                round(x * image_width, 1), round(y * image_height, 1),
-                round(w * image_width, 1), round(h * image_height, 1),
-                label=label_id
-            ))
-
-        return annotations
-
-    @staticmethod
-    def _load_categories(names_path):
-        label_categories = LabelCategories()
-
-        with open(names_path, 'r') as f:
-            for label in f:
-                label_categories.add(label.strip())
-
-        return label_categories
-
-    def categories(self):
-        return self._categories
-
-    def __iter__(self):
-        for subset in self._subsets.values():
-            for item in subset:
-                yield item
-
-    def __len__(self):
-        length = 0
-        for subset in self._subsets.values():
-            length += len(subset)
-        return length
-
-    def subsets(self):
-        return list(self._subsets)
-
-    def get_subset(self, name):
-        return self._subsets[name]
--- a/datumaro/datumaro/plugins/yolo_format/format.py
+++ b/datumaro/datumaro/plugins/yolo_format/format.py
@ -1,11 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-
-class YoloPath:
-    DEFAULT_SUBSET_NAME = 'train'
-    SUBSET_NAMES = ['train', 'valid']
-
-    IMAGE_META_FILE = 'images.meta'
--- a/datumaro/datumaro/plugins/yolo_format/importer.py
+++ b/datumaro/datumaro/plugins/yolo_format/importer.py
@ -1,46 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from glob import glob
-import logging as log
-import os.path as osp
-
-from datumaro.components.extractor import Importer
-
-
-class YoloImporter(Importer):
-    @classmethod
-    def detect(cls, path):
-        return len(cls.find_configs(path)) != 0
-
-    def __call__(self, path, **extra_params):
-        from datumaro.components.project import Project # cyclic import
-        project = Project()
-
-        config_paths = self.find_configs(path)
-        if len(config_paths) == 0:
-            raise Exception("Failed to find 'yolo' dataset at '%s'" % path)
-
-        for config_path in config_paths:
-            log.info("Found a dataset at '%s'" % config_path)
-
-            source_name = '%s_%s' % (
-                osp.basename(osp.dirname(config_path)),
-                osp.splitext(osp.basename(config_path))[0])
-            project.add_source(source_name, {
-                'url': config_path,
-                'format': 'yolo',
-                'options': dict(extra_params),
-            })
-
-        return project
-
-    @staticmethod
-    def find_configs(path):
-        if path.endswith('.data') and osp.isfile(path):
-            config_paths = [path]
-        else:
-            config_paths = glob(osp.join(path, '**', '*.data'), recursive=True)
-        return config_paths
--- a/datumaro/datumaro/util/init.py
+++ b/datumaro/datumaro/util/init.py
@ -1,93 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import os
-import os.path as osp
-from itertools import islice
-
-
-def find(iterable, pred=lambda x: True, default=None):
-    return next((x for x in iterable if pred(x)), default)
-
-def dir_items(path, ext, truncate_ext=False):
-    items = []
-    for f in os.listdir(path):
-        ext_pos = f.rfind(ext)
-        if ext_pos != -1:
-            if truncate_ext:
-                f = f[:ext_pos]
-            items.append(f)
-    return items
-
-def split_path(path):
-    path = osp.normpath(path)
-    parts = []
-
-    while True:
-        path, part = osp.split(path)
-        if part:
-            parts.append(part)
-        else:
-            if path:
-                parts.append(path)
-            break
-    parts.reverse()
-
-    return parts
-
-def cast(value, type_conv, default=None):
-    if value is None:
-        return default
-    try:
-        return type_conv(value)
-    except Exception:
-        return default
-
-def to_snake_case(s):
-    if not s:
-        return ''
-
-    name = [s[0].lower()]
-    for idx, char in enumerate(s[1:]):
-        idx = idx + 1
-        if char.isalpha() and char.isupper():
-            prev_char = s[idx - 1]
-            if not (prev_char.isalpha() and prev_char.isupper()):
-                # avoid "HTML" -> "h_t_m_l"
-                name.append('_')
-            name.append(char.lower())
-        else:
-            name.append(char)
-    return ''.join(name)
-
-def pairs(iterable):
-    a = iter(iterable)
-    return zip(a, a)
-
-def take_by(iterable, count):
-    """
-    Returns elements from the input iterable by batches of N items.
-    ('abcdefg', 3) -> ['a', 'b', 'c'], ['d', 'e', 'f'], ['g']
-    """
-
-    it = iter(iterable)
-    while True:
-        batch = list(islice(it, count))
-        if len(batch) == 0:
-            break
-
-        yield batch
-
-def str_to_bool(s):
-    t = s.lower()
-    if t in {'true', '1', 'ok', 'yes', 'y'}:
-        return True
-    elif t in {'false', '0', 'no', 'n'}:
-        return False
-    else:
-        raise ValueError("Can't convert value '%s' to bool" % s)
-
-def filter_dict(d, exclude_keys):
-    return { k: v for k, v in d.items() if k not in exclude_keys }
--- a/datumaro/datumaro/util/annotation_util.py
+++ b/datumaro/datumaro/util/annotation_util.py
@ -1,212 +0,0 @@
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from itertools import groupby
-
-import numpy as np
-
-from datumaro.components.extractor import _Shape, Mask, AnnotationType, RleMask
-from datumaro.util.mask_tools import mask_to_rle
-
-
-def find_instances(instance_anns):
-    instance_anns = sorted(instance_anns, key=lambda a: a.group)
-    ann_groups = []
-    for g_id, group in groupby(instance_anns, lambda a: a.group):
-        if not g_id:
-            ann_groups.extend(([a] for a in group))
-        else:
-            ann_groups.append(list(group))
-
-    return ann_groups
-
-def find_group_leader(group):
-    return max(group, key=lambda x: x.get_area())
-
-def _get_bbox(ann):
-    if isinstance(ann, (_Shape, Mask)):
-        return ann.get_bbox()
-    else:
-        return ann
-
-def max_bbox(annotations):
-    boxes = [_get_bbox(ann) for ann in annotations]
-    x0 = min((b[0] for b in boxes), default=0)
-    y0 = min((b[1] for b in boxes), default=0)
-    x1 = max((b[0] + b[2] for b in boxes), default=0)
-    y1 = max((b[1] + b[3] for b in boxes), default=0)
-    return [x0, y0, x1 - x0, y1 - y0]
-
-def mean_bbox(annotations):
-    le = len(annotations)
-    boxes = [_get_bbox(ann) for ann in annotations]
-    mlb = sum(b[0] for b in boxes) / le
-    mtb = sum(b[1] for b in boxes) / le
-    mrb = sum(b[0] + b[2] for b in boxes) / le
-    mbb = sum(b[1] + b[3] for b in boxes) / le
-    return [mlb, mtb, mrb - mlb, mbb - mtb]
-
-def softmax(x):
-    return np.exp(x) / sum(np.exp(x))
-
-def nms(segments, iou_thresh=0.5):
-    """
-    Non-maxima suppression algorithm.
-    """
-
-    indices = np.argsort([b.attributes['score'] for b in segments])
-    ious = np.array([[iou(a, b) for b in segments] for a in segments])
-
-    predictions = []
-    while len(indices) != 0:
-        i = len(indices) - 1
-        pred_idx = indices[i]
-        to_remove = [i]
-        predictions.append(segments[pred_idx])
-        for i, box_idx in enumerate(indices[:i]):
-            if iou_thresh < ious[pred_idx, box_idx]:
-                to_remove.append(i)
-        indices = np.delete(indices, to_remove)
-
-    return predictions
-
-def bbox_iou(a, b):
-    """
-    IoU computations for simple cases with bounding boxes
-    """
-    bbox_a = _get_bbox(a)
-    bbox_b = _get_bbox(b)
-
-    aX, aY, aW, aH = bbox_a
-    bX, bY, bW, bH = bbox_b
-    in_right = min(aX + aW, bX + bW)
-    in_left = max(aX, bX)
-    in_top = max(aY, bY)
-    in_bottom = min(aY + aH, bY + bH)
-
-    in_w = max(0, in_right - in_left)
-    in_h = max(0, in_bottom - in_top)
-    intersection = in_w * in_h
-    if not intersection:
-        return -1
-
-    a_area = aW * aH
-    b_area = bW * bH
-    union = a_area + b_area - intersection
-    return intersection / union
-
-def segment_iou(a, b):
-    """
-    Generic IoU computation with masks, polygons, and boxes.
-    Returns -1 if no intersection, [0; 1] otherwise
-    """
-    from pycocotools import mask as mask_utils
-
-    a_bbox = a.get_bbox()
-    b_bbox = b.get_bbox()
-
-    is_bbox = AnnotationType.bbox in [a.type, b.type]
-    if is_bbox:
-        a = [a_bbox]
-        b = [b_bbox]
-    else:
-        w = max(a_bbox[0] + a_bbox[2], b_bbox[0] + b_bbox[2])
-        h = max(a_bbox[1] + a_bbox[3], b_bbox[1] + b_bbox[3])
-
-        def _to_rle(ann):
-            if ann.type == AnnotationType.polygon:
-                return mask_utils.frPyObjects([ann.points], h, w)
-            elif isinstance(ann, RleMask):
-                return [ann.rle]
-            elif ann.type == AnnotationType.mask:
-                return mask_utils.frPyObjects([mask_to_rle(ann.image)], h, w)
-            else:
-                raise TypeError("Unexpected arguments: %s, %s" % (a, b))
-        a = _to_rle(a)
-        b = _to_rle(b)
-    return float(mask_utils.iou(a, b, [not is_bbox]))
-
-def PDJ(a, b, eps=None, ratio=0.05, bbox=None):
-    """
-    Percentage of Detected Joints metric.
-    Counts the number of matching points.
-    """
-
-    assert eps is not None or ratio is not None
-
-    p1 = np.array(a.points).reshape((-1, 2))
-    p2 = np.array(b.points).reshape((-1, 2))
-    if len(p1) != len(p2):
-        return 0
-
-    if not eps:
-        if bbox is None:
-            bbox = mean_bbox([a, b])
-
-        diag = (bbox[2] ** 2 + bbox[3] ** 2) ** 0.5
-        eps = ratio * diag
-
-    dists = np.linalg.norm(p1 - p2, axis=1)
-    return np.sum(dists < eps) / len(p1)
-
-def OKS(a, b, sigma=None, bbox=None, scale=None):
-    """
-    Object Keypoint Similarity metric.
-    https://cocodataset.org/#keypoints-eval
-    """
-
-    p1 = np.array(a.points).reshape((-1, 2))
-    p2 = np.array(b.points).reshape((-1, 2))
-    if len(p1) != len(p2):
-        return 0
-
-    if not sigma:
-        sigma = 0.1
-    else:
-        assert len(sigma) == len(p1)
-
-    if not scale:
-        if bbox is None:
-            bbox = mean_bbox([a, b])
-        scale = bbox[2] * bbox[3]
-
-    dists = np.linalg.norm(p1 - p2, axis=1)
-    return np.sum(np.exp(-(dists ** 2) / (2 * scale * (2 * sigma) ** 2)))
-
-def smooth_line(points, segments):
-    assert 2 <= len(points) // 2 and len(points) % 2 == 0
-
-    if len(points) // 2 == segments:
-        return points
-
-    points = list(points)
-    if len(points) == 2:
-        points.extend(points)
-    points = np.array(points).reshape((-1, 2))
-
-    lengths = np.linalg.norm(points[1:] - points[:-1], axis=1)
-    dists = [0]
-    for l in lengths:
-        dists.append(dists[-1] + l)
-
-    step = dists[-1] / segments
-
-    new_points = np.zeros((segments + 1, 2))
-    new_points[0] = points[0]
-
-    old_segment = 0
-    for new_segment in range(1, segments + 1):
-        pos = new_segment * step
-        while dists[old_segment + 1] < pos and old_segment + 2 < len(dists):
-            old_segment += 1
-
-        segment_start = dists[old_segment]
-        segment_len = lengths[old_segment]
-        prev_p = points[old_segment]
-        next_p = points[old_segment + 1]
-        r = (pos - segment_start) / segment_len
-
-        new_points[new_segment] = prev_p * (1 - r) + next_p * r
-
-    return new_points, step
--- a/datumaro/datumaro/util/attrs_util.py
+++ b/datumaro/datumaro/util/attrs_util.py
@ -1,33 +0,0 @@
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import attr
-
-_NOTSET = object()
-
-def not_empty(inst, attribute, x):
-    assert len(x) != 0, x
-
-def default_if_none(conv):
-    def validator(inst, attribute, value):
-        default = attribute.default
-        if value is None:
-            if callable(default):
-                value = default()
-            elif isinstance(default, attr.Factory):
-                value = default.factory()
-            else:
-                value = default
-        elif not isinstance(value, attribute.type or conv):
-            value = conv(value)
-        setattr(inst, attribute.name, value)
-    return validator
-
-def ensure_cls(c):
-    def converter(arg):
-        if isinstance(arg, c):
-            return arg
-        else:
-            return c(**arg)
-    return converter
--- a/datumaro/datumaro/util/command_targets.py
+++ b/datumaro/datumaro/util/command_targets.py
@ -1,113 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import argparse
-from enum import Enum
-
-from datumaro.components.project import Project
-from datumaro.util.image import load_image
-
-
-TargetKinds = Enum('TargetKinds',
-    ['project', 'source', 'external_dataset', 'inference', 'image'])
-
-def is_project_name(value, project):
-    return value == project.config.project_name
-
-def is_project_path(value):
-    if value:
-        try:
-            Project.load(value)
-            return True
-        except Exception:
-            pass
-    return False
-
-def is_project(value, project=None):
-    if is_project_path(value):
-        return True
-    elif project is not None:
-        return is_project_name(value, project)
-
-    return False
-
-def is_source(value, project=None):
-    if project is not None:
-        try:
-            project.get_source(value)
-            return True
-        except KeyError:
-            pass
-
-    return False
-
-def is_external_source(value):
-    return False
-
-def is_inference_path(value):
-    return False
-
-def is_image_path(value):
-    try:
-        return load_image(value) is not None
-    except Exception:
-        return False
-
-
-class Target:
-    def __init__(self, kind, test, is_default=False, name=None):
-        self.kind = kind
-        self.test = test
-        self.is_default = is_default
-        self.name = name
-
-    def _get_fields(self):
-        return [self.kind, self.test, self.is_default, self.name]
-
-    def __str__(self):
-        return self.name or str(self.kind)
-
-    def __len__(self):
-        return len(self._get_fields())
-
-    def __iter__(self):
-        return iter(self._get_fields())
-
-def ProjectTarget(kind=TargetKinds.project, test=None,
-        is_default=False, name='project name or path',
-        project=None):
-    if test is None:
-        test = lambda v: is_project(v, project=project)
-    return Target(kind, test, is_default, name)
-
-def SourceTarget(kind=TargetKinds.source, test=None,
-        is_default=False, name='source name',
-        project=None):
-    if test is None:
-        test = lambda v: is_source(v, project=project)
-    return Target(kind, test, is_default, name)
-
-def ExternalDatasetTarget(kind=TargetKinds.external_dataset,
-        test=is_external_source,
-        is_default=False, name='external dataset path'):
-    return Target(kind, test, is_default, name)
-
-def InferenceTarget(kind=TargetKinds.inference, test=is_inference_path,
-        is_default=False, name='inference path'):
-    return Target(kind, test, is_default, name)
-
-def ImageTarget(kind=TargetKinds.image, test=is_image_path,
-            is_default=False, name='image path'):
-    return Target(kind, test, is_default, name)
-
-
-def target_selector(*targets):
-    def selector(value):
-        for (kind, test, is_default, _) in targets:
-            if (is_default and (value == '' or value is None)) or test(value):
-                return (kind, value)
-        raise argparse.ArgumentTypeError('Value should be one of: %s' \
-            % (', '.join([str(t) for t in targets])))
-    return selector
--- a/datumaro/datumaro/util/image.py
+++ b/datumaro/datumaro/util/image.py
@ -1,246 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-# pylint: disable=unused-import
-
-from enum import Enum
-from io import BytesIO
-import numpy as np
-import os
-import os.path as osp
-
-_IMAGE_BACKENDS = Enum('_IMAGE_BACKENDS', ['cv2', 'PIL'])
-_IMAGE_BACKEND = None
-try:
-    import cv2
-    _IMAGE_BACKEND = _IMAGE_BACKENDS.cv2
-except ImportError:
-    import PIL
-    _IMAGE_BACKEND = _IMAGE_BACKENDS.PIL
-
-from datumaro.util.image_cache import ImageCache as _ImageCache
-
-
-def load_image(path):
-    """
-    Reads an image in the HWC Grayscale/BGR(A) float [0; 255] format.
-    """
-
-    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
-        import cv2
-        image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
-        image = image.astype(np.float32)
-    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
-        from PIL import Image
-        image = Image.open(path)
-        image = np.asarray(image, dtype=np.float32)
-        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
-            image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
-    else:
-        raise NotImplementedError()
-
-    assert len(image.shape) in {2, 3}
-    if len(image.shape) == 3:
-        assert image.shape[2] in {3, 4}
-    return image
-
-def save_image(path, image, create_dir=False, **kwargs):
-    # NOTE: Check destination path for existence
-    # OpenCV silently fails if target directory does not exist
-    dst_dir = osp.dirname(path)
-    if dst_dir:
-        if create_dir:
-            os.makedirs(dst_dir, exist_ok=True)
-        elif not osp.isdir(dst_dir):
-            raise FileNotFoundError("Directory does not exist: '%s'" % dst_dir)
-
-    if not kwargs:
-        kwargs = {}
-
-    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
-        import cv2
-
-        params = []
-
-        ext = path[-4:]
-        if ext.upper() == '.JPG':
-            params = [
-                int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75)
-            ]
-
-        image = image.astype(np.uint8)
-        cv2.imwrite(path, image, params=params)
-    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
-        from PIL import Image
-
-        params = {}
-        params['quality'] = kwargs.get('jpeg_quality')
-        if kwargs.get('jpeg_quality') == 100:
-            params['subsampling'] = 0
-
-        image = image.astype(np.uint8)
-        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
-            image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
-        image = Image.fromarray(image)
-        image.save(path, **params)
-    else:
-        raise NotImplementedError()
-
-def encode_image(image, ext, **kwargs):
-    if not kwargs:
-        kwargs = {}
-
-    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
-        import cv2
-
-        params = []
-
-        if not ext.startswith('.'):
-            ext = '.' + ext
-
-        if ext.upper() == '.JPG':
-            params = [
-                int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75)
-            ]
-
-        image = image.astype(np.uint8)
-        success, result = cv2.imencode(ext, image, params=params)
-        if not success:
-            raise Exception("Failed to encode image to '%s' format" % (ext))
-        return result.tobytes()
-    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
-        from PIL import Image
-
-        if ext.startswith('.'):
-            ext = ext[1:]
-
-        params = {}
-        params['quality'] = kwargs.get('jpeg_quality')
-        if kwargs.get('jpeg_quality') == 100:
-            params['subsampling'] = 0
-
-        image = image.astype(np.uint8)
-        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
-            image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
-        image = Image.fromarray(image)
-        with BytesIO() as buffer:
-            image.save(buffer, format=ext, **params)
-            return buffer.getvalue()
-    else:
-        raise NotImplementedError()
-
-def decode_image(image_bytes):
-    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
-        import cv2
-        image = np.frombuffer(image_bytes, dtype=np.uint8)
-        image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED)
-        image = image.astype(np.float32)
-    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
-        from PIL import Image
-        image = Image.open(BytesIO(image_bytes))
-        image = np.asarray(image, dtype=np.float32)
-        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
-            image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
-    else:
-        raise NotImplementedError()
-
-    assert len(image.shape) in {2, 3}
-    if len(image.shape) == 3:
-        assert image.shape[2] in {3, 4}
-    return image
-
-
-class lazy_image:
-    def __init__(self, path, loader=None, cache=None):
-        if loader is None:
-            loader = load_image
-        self.path = path
-        self.loader = loader
-
-        # Cache:
-        # - False: do not cache
-        # - None: use the global cache
-        # - object: an object to be used as cache
-        assert cache in {None, False} or isinstance(cache, object)
-        self.cache = cache
-
-    def __call__(self):
-        image = None
-        image_id = hash(self) # path is not necessary hashable or a file path
-
-        cache = self._get_cache(self.cache)
-        if cache is not None:
-            image = cache.get(image_id)
-
-        if image is None:
-            image = self.loader(self.path)
-            if cache is not None:
-                cache.push(image_id, image)
-        return image
-
-    @staticmethod
-    def _get_cache(cache):
-        if cache is None:
-            cache = _ImageCache.get_instance()
-        elif cache == False:
-            return None
-        return cache
-
-    def __hash__(self):
-        return hash((id(self), self.path, self.loader))
-
-class Image:
-    def __init__(self, data=None, path=None, loader=None, cache=None,
-            size=None):
-        assert size is None or len(size) == 2
-        if size is not None:
-            assert len(size) == 2 and 0 < size[0] and 0 < size[1], size
-            size = tuple(size)
-        self._size = size # (H, W)
-
-        assert path is None or isinstance(path, str)
-        if path is None:
-            path = ''
-        self._path = path
-
-        assert data is not None or path or loader, "Image can not be empty"
-        if data is None and (path or loader):
-            if osp.isfile(path) or loader:
-                data = lazy_image(path, loader=loader, cache=cache)
-        self._data = data
-
-    @property
-    def path(self):
-        return self._path
-
-    @property
-    def data(self):
-        if callable(self._data):
-            return self._data()
-        return self._data
-
-    @property
-    def has_data(self):
-        return self._data is not None
-
-    @property
-    def size(self):
-        if self._size is None:
-            data = self.data
-            if data is not None:
-                self._size = data.shape[:2]
-        return self._size
-
-    def __eq__(self, other):
-        if isinstance(other, np.ndarray):
-            return self.has_data and np.array_equal(self.data, other)
-
-        if not isinstance(other, __class__):
-            return False
-        return \
-            (np.array_equal(self.size, other.size)) and \
-            (self.has_data == other.has_data) and \
-            (self.has_data and np.array_equal(self.data, other.data) or \
-                not self.has_data)
--- a/datumaro/datumaro/util/image_cache.py
+++ b/datumaro/datumaro/util/image_cache.py
@ -1,42 +0,0 @@
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from collections import OrderedDict
-
-
-_instance = None
-
-DEFAULT_CAPACITY = 2
-
-class ImageCache:
-    @staticmethod
-    def get_instance():
-        global _instance
-        if _instance is None:
-            _instance = ImageCache()
-        return _instance
-
-    def __init__(self, capacity=DEFAULT_CAPACITY):
-        self.capacity = int(capacity)
-        self.items = OrderedDict()
-
-    def push(self, item_id, image):
-        if self.capacity <= len(self.items):
-            self.items.popitem(last=True)
-        self.items[item_id] = image
-
-    def get(self, item_id):
-        default = object()
-        item = self.items.get(item_id, default)
-        if item is default:
-            return None
-
-        self.items.move_to_end(item_id, last=False) # naive splay tree
-        return item
-
-    def size(self):
-        return len(self.items)
-
-    def clear(self):
-        self.items.clear()
--- a/datumaro/datumaro/util/log_utils.py
+++ b/datumaro/datumaro/util/log_utils.py
@ -1,16 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from contextlib import contextmanager
-import logging
-
-@contextmanager
-def logging_disabled(max_level=logging.CRITICAL):
-    previous_level = logging.root.manager.disable
-    logging.disable(max_level)
-    try:
-        yield
-    finally:
-        logging.disable(previous_level)
--- a/datumaro/datumaro/util/mask_tools.py
+++ b/datumaro/datumaro/util/mask_tools.py
@ -1,289 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import numpy as np
-
-from datumaro.util.image import lazy_image, load_image
-
-
-def generate_colormap(length=256):
-    """
-    Generates colors using PASCAL VOC algorithm.
-
-    Returns index -> (R, G, B) mapping.
-    """
-
-    def get_bit(number, index):
-        return (number >> index) & 1
-
-    colormap = np.zeros((length, 3), dtype=int)
-    indices = np.arange(length, dtype=int)
-
-    for j in range(7, -1, -1):
-        for c in range(3):
-            colormap[:, c] |= get_bit(indices, c) << j
-        indices >>= 3
-
-    return {
-        id: tuple(color) for id, color in enumerate(colormap)
-    }
-
-def invert_colormap(colormap):
-    return {
-        tuple(a): index for index, a in colormap.items()
-    }
-
-def check_is_mask(mask):
-    assert len(mask.shape) in {2, 3}
-    if len(mask.shape) == 3:
-        assert mask.shape[2] == 1
-
-_default_colormap = generate_colormap()
-_default_unpaint_colormap = invert_colormap(_default_colormap)
-
-def unpaint_mask(painted_mask, inverse_colormap=None):
-    # Covert color mask to index mask
-
-    # mask: HWC BGR [0; 255]
-    # colormap: (R, G, B) -> index
-    assert len(painted_mask.shape) == 3
-    if inverse_colormap is None:
-        inverse_colormap = _default_unpaint_colormap
-
-    if callable(inverse_colormap):
-        map_fn = lambda a: inverse_colormap(
-                (a >> 16) & 255, (a >> 8) & 255, a & 255
-            )
-    else:
-        map_fn = lambda a: inverse_colormap[(
-                (a >> 16) & 255, (a >> 8) & 255, a & 255
-            )]
-
-    painted_mask = painted_mask.astype(int)
-    painted_mask = painted_mask[:, :, 0] + \
-                   (painted_mask[:, :, 1] << 8) + \
-                   (painted_mask[:, :, 2] << 16)
-    uvals, unpainted_mask = np.unique(painted_mask, return_inverse=True)
-    palette = np.array([map_fn(v) for v in uvals], dtype=np.float32)
-    unpainted_mask = palette[unpainted_mask].reshape(painted_mask.shape[:2])
-
-    return unpainted_mask
-
-def paint_mask(mask, colormap=None):
-    # Applies colormap to index mask
-
-    # mask: HW(C) [0; max_index] mask
-    # colormap: index -> (R, G, B)
-    check_is_mask(mask)
-
-    if colormap is None:
-        colormap = _default_colormap
-    if callable(colormap):
-        map_fn = colormap
-    else:
-        map_fn = lambda c: colormap.get(c, (-1, -1, -1))
-    palette = np.array([map_fn(c)[::-1] for c in range(256)], dtype=np.float32)
-
-    mask = mask.astype(np.uint8)
-    painted_mask = palette[mask].reshape((*mask.shape[:2], 3))
-    return painted_mask
-
-def remap_mask(mask, map_fn):
-    # Changes mask elements from one colormap to another
-
-    # mask: HW(C) [0; max_index] mask
-    check_is_mask(mask)
-
-    return np.array([map_fn(c) for c in range(256)], dtype=np.uint8)[mask]
-
-def make_index_mask(binary_mask, index):
-    return np.choose(binary_mask, np.array([0, index], dtype=np.uint8))
-
-def make_binary_mask(mask):
-    return np.nonzero(mask)
-
-
-def load_mask(path, inverse_colormap=None):
-    mask = load_image(path)
-    mask = mask.astype(np.uint8)
-    if inverse_colormap is not None:
-        if len(mask.shape) == 3 and mask.shape[2] != 1:
-            mask = unpaint_mask(mask, inverse_colormap)
-    return mask
-
-def lazy_mask(path, inverse_colormap=None):
-    return lazy_image(path, lambda path: load_mask(path, inverse_colormap))
-
-def mask_to_rle(binary_mask):
-    # walk in row-major order as COCO format specifies
-    bounded = binary_mask.ravel(order='F')
-
-    # add borders to sequence
-    # find boundary positions for sequences and compute their lengths
-    difs = np.diff(bounded, prepend=[1 - bounded[0]], append=[1 - bounded[-1]])
-    counts, = np.where(difs != 0)
-
-    # start RLE encoding from 0 as COCO format specifies
-    if bounded[0] != 0:
-        counts = np.diff(counts, prepend=[0])
-    else:
-        counts = np.diff(counts)
-
-    return {
-        'counts': counts,
-        'size': list(binary_mask.shape)
-    }
-
-def mask_to_polygons(mask, tolerance=1.0, area_threshold=1):
-    """
-    Convert an instance mask to polygons
-
-    Args:
-        mask: a 2d binary mask
-        tolerance: maximum distance from original points of
-            a polygon to the approximated ones
-        area_threshold: minimal area of generated polygons
-
-    Returns:
-        A list of polygons like [[x1,y1, x2,y2 ...], [...]]
-    """
-    from pycocotools import mask as mask_utils
-    from skimage import measure
-
-    polygons = []
-
-    # pad mask with 0 around borders
-    padded_mask = np.pad(mask, pad_width=1, mode='constant', constant_values=0)
-    contours = measure.find_contours(padded_mask, 0.5)
-    # Fix coordinates after padding
-    contours = np.subtract(contours, 1)
-
-    for contour in contours:
-        if not np.array_equal(contour[0], contour[-1]):
-            contour = np.vstack((contour, contour[0])) # make polygon closed
-
-        contour = measure.approximate_polygon(contour, tolerance)
-        if len(contour) <= 2:
-            continue
-
-        contour = np.flip(contour, axis=1).flatten().clip(0) # [x0, y0, ...]
-
-        # Check if the polygon is big enough
-        rle = mask_utils.frPyObjects([contour], mask.shape[0], mask.shape[1])
-        area = sum(mask_utils.area(rle))
-        if area_threshold <= area:
-            polygons.append(contour)
-    return polygons
-
-def crop_covered_segments(segments, width, height,
-        iou_threshold=0.0, ratio_tolerance=0.001, area_threshold=1,
-        return_masks=False):
-    """
-    Find all segments occluded by others and crop them to the visible part only.
-    Input segments are expected to be sorted from background to foreground.
-
-    Args:
-        segments: 1d list of segment RLEs (in COCO format)
-        width: width of the image
-        height: height of the image
-        iou_threshold: IoU threshold for objects to be counted as intersected
-            By default is set to 0 to process any intersected objects
-        ratio_tolerance: an IoU "handicap" value for a situation
-            when an object is (almost) fully covered by another one and we
-            don't want make a "hole" in the background object
-        area_threshold: minimal area of included segments
-
-    Returns:
-        A list of input segments' parts (in the same order as input):
-            [
-                [[x1,y1, x2,y2 ...], ...], # input segment #0 parts
-                mask1, # input segment #1 mask (if source segment is mask)
-                [], # when source segment is too small
-                ...
-            ]
-    """
-    from pycocotools import mask as mask_utils
-
-    segments = [[s] for s in segments]
-    input_rles = [mask_utils.frPyObjects(s, height, width) for s in segments]
-
-    for i, rle_bottom in enumerate(input_rles):
-        area_bottom = sum(mask_utils.area(rle_bottom))
-        if area_bottom < area_threshold:
-            segments[i] = [] if not return_masks else None
-            continue
-
-        rles_top = []
-        for j in range(i + 1, len(input_rles)):
-            rle_top = input_rles[j]
-            iou = sum(mask_utils.iou(rle_bottom, rle_top, [0, 0]))[0]
-
-            if iou <= iou_threshold:
-                continue
-
-            area_top = sum(mask_utils.area(rle_top))
-            area_ratio = area_top / area_bottom
-
-            # If a segment is fully inside another one, skip this segment
-            if abs(area_ratio - iou) < ratio_tolerance:
-                continue
-
-            # Check if the bottom segment is fully covered by the top one.
-            # There is a mistake in the annotation, keep the background one
-            if abs(1 / area_ratio - iou) < ratio_tolerance:
-                rles_top = []
-                break
-
-            rles_top += rle_top
-
-        if not rles_top and not isinstance(segments[i][0], dict) \
-                and not return_masks:
-            continue
-
-        rle_bottom = rle_bottom[0]
-        bottom_mask = mask_utils.decode(rle_bottom).astype(np.uint8)
-
-        if rles_top:
-            rle_top = mask_utils.merge(rles_top)
-            top_mask = mask_utils.decode(rle_top).astype(np.uint8)
-
-            bottom_mask -= top_mask
-            bottom_mask[bottom_mask != 1] = 0
-
-        if not return_masks and not isinstance(segments[i][0], dict):
-            segments[i] = mask_to_polygons(bottom_mask,
-                area_threshold=area_threshold)
-        else:
-            segments[i] = bottom_mask
-
-    return segments
-
-def rles_to_mask(rles, width, height):
-    from pycocotools import mask as mask_utils
-
-    rles = mask_utils.frPyObjects(rles, height, width)
-    rles = mask_utils.merge(rles)
-    mask = mask_utils.decode(rles)
-    return mask
-
-def find_mask_bbox(mask):
-    cols = np.any(mask, axis=0)
-    rows = np.any(mask, axis=1)
-    x0, x1 = np.where(cols)[0][[0, -1]]
-    y0, y1 = np.where(rows)[0][[0, -1]]
-    return [x0, y0, x1 - x0, y1 - y0]
-
-def merge_masks(masks):
-    """
-        Merges masks into one, mask order is responsible for z order.
-    """
-    if not masks:
-        return None
-
-    merged_mask = masks[0]
-    for m in masks[1:]:
-        merged_mask = np.where(m != 0, m, merged_mask)
-
-    return merged_mask
--- a/datumaro/datumaro/util/os_util.py
+++ b/datumaro/datumaro/util/os_util.py
@ -1,17 +0,0 @@
-
-# Copyright (C) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import subprocess
-
-
-def check_instruction_set(instruction):
-    return instruction == str.strip(
-        # Let's ignore a warning from bandit about using shell=True.
-        # In this case it isn't a security issue and we use some
-        # shell features like pipes.
-        subprocess.check_output(
-            'lscpu | grep -o "%s" | head -1' % instruction,
-            shell=True).decode('utf-8') # nosec
-    )
--- a/datumaro/datumaro/util/test_utils.py
+++ b/datumaro/datumaro/util/test_utils.py
@ -1,121 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-import inspect
-import os
-import os.path as osp
-import shutil
-import tempfile
-
-from datumaro.components.extractor import AnnotationType
-from datumaro.util import find
-
-
-def current_function_name(depth=1):
-    return inspect.getouterframes(inspect.currentframe())[depth].function
-
-class FileRemover:
-    def __init__(self, path, is_dir=False, ignore_errors=False):
-        self.path = path
-        self.is_dir = is_dir
-        self.ignore_errors = ignore_errors
-
-    def __enter__(self):
-        return self.path
-
-    # pylint: disable=redefined-builtin
-    def __exit__(self, type=None, value=None, traceback=None):
-        if self.is_dir:
-            shutil.rmtree(self.path, ignore_errors=self.ignore_errors)
-        else:
-            os.remove(self.path)
-    # pylint: enable=redefined-builtin
-
-class TestDir(FileRemover):
-    def __init__(self, path=None, ignore_errors=False):
-        if path is None:
-            path = osp.abspath('temp_%s-' % current_function_name(2))
-            path = tempfile.mkdtemp(dir=os.getcwd(), prefix=path)
-        else:
-            os.makedirs(path, exist_ok=ignore_errors)
-
-        super().__init__(path, is_dir=True, ignore_errors=ignore_errors)
-
-def compare_categories(test, expected, actual):
-    test.assertEqual(
-        sorted(expected, key=lambda t: t.value),
-        sorted(actual, key=lambda t: t.value)
-    )
-
-    if AnnotationType.label in expected:
-        test.assertEqual(
-            expected[AnnotationType.label].items,
-            actual[AnnotationType.label].items,
-        )
-    if AnnotationType.mask in expected:
-        test.assertEqual(
-            expected[AnnotationType.mask].colormap,
-            actual[AnnotationType.mask].colormap,
-        )
-    if AnnotationType.points in expected:
-        test.assertEqual(
-            expected[AnnotationType.points].items,
-            actual[AnnotationType.points].items,
-        )
-
-def _compare_annotations(expected, actual, ignored_attrs=None):
-    if not ignored_attrs:
-        return expected == actual
-
-    a_attr = expected.attributes
-    b_attr = actual.attributes
-
-    expected.attributes = {k:v for k,v in a_attr.items() if k not in ignored_attrs}
-    actual.attributes = {k:v for k,v in b_attr.items() if k not in ignored_attrs}
-    r = expected == actual
-
-    expected.attributes = a_attr
-    actual.attributes = b_attr
-    return r
-
-def compare_datasets(test, expected, actual, ignored_attrs=None):
-    compare_categories(test, expected.categories(), actual.categories())
-
-    test.assertEqual(sorted(expected.subsets()), sorted(actual.subsets()))
-    test.assertEqual(len(expected), len(actual))
-    for item_a in expected:
-        item_b = find(actual, lambda x: x.id == item_a.id and \
-            x.subset == item_a.subset)
-        test.assertFalse(item_b is None, item_a.id)
-        test.assertEqual(item_a.attributes, item_b.attributes)
-        test.assertEqual(len(item_a.annotations), len(item_b.annotations))
-        for ann_a in item_a.annotations:
-            # We might find few corresponding items, so check them all
-            ann_b_matches = [x for x in item_b.annotations
-                if x.type == ann_a.type]
-            test.assertFalse(len(ann_b_matches) == 0, 'ann id: %s' % ann_a.id)
-
-            ann_b = find(ann_b_matches, lambda x:
-                _compare_annotations(x, ann_a, ignored_attrs=ignored_attrs))
-            if ann_b is None:
-                test.fail('ann %s, candidates %s' % (ann_a, ann_b_matches))
-            item_b.annotations.remove(ann_b) # avoid repeats
-
-def compare_datasets_strict(test, expected, actual):
-    # Compares datasets for strong equality
-
-    test.assertEqual(expected.categories(), actual.categories())
-
-    test.assertListEqual(sorted(expected.subsets()), sorted(actual.subsets()))
-    test.assertEqual(len(expected), len(actual))
-
-    for subset_name in expected.subsets():
-        e_subset = expected.get_subset(subset_name)
-        a_subset = actual.get_subset(subset_name)
-        test.assertEqual(len(e_subset), len(a_subset))
-        for idx, (item_a, item_b) in enumerate(zip(e_subset, a_subset)):
-            test.assertEqual(item_a, item_b,
-                '%s:\n%s\nvs.\n%s\n' % \
-                (idx, item_a, item_b))
--- a/datumaro/datumaro/util/tf_util.py
+++ b/datumaro/datumaro/util/tf_util.py
@ -1,80 +0,0 @@
-
-# Copyright (C) 2019-2020 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-
-def check_import():
-    # Workaround for checking import availability:
-    # Official TF builds include AVX instructions. Once we try to import,
-    # the program crashes. We raise an exception instead.
-
-    import subprocess
-    import sys
-
-    from .os_util import check_instruction_set
-
-    result = subprocess.run([sys.executable, '-c', 'import tensorflow'],
-        timeout=60,
-        universal_newlines=True, # use text mode for output stream
-        stdout=subprocess.PIPE, stderr=subprocess.PIPE) # capture output
-
-    if result.returncode != 0:
-        message = result.stderr
-        if not message:
-            message = "Can't import tensorflow. " \
-                "Test process exit code: %s." % result.returncode
-            if not check_instruction_set('avx'):
-                # The process has probably crashed for AVX unavalability
-                message += " This is likely because your CPU does not " \
-                    "support AVX instructions, " \
-                    "which are required for tensorflow."
-
-        raise ImportError(message)
-
-def import_tf(check=True):
-    import sys
-
-    not_found = object()
-    tf = sys.modules.get('tensorflow', not_found)
-    if tf is None:
-        import tensorflow as tf # emit default error
-    elif tf is not not_found:
-        return tf
-
-    # Reduce output noise, https://stackoverflow.com/questions/38073432/how-to-suppress-verbose-tensorflow-logging
-    import os
-    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
-
-    if check:
-        try:
-            check_import()
-        except Exception:
-            sys.modules['tensorflow'] = None # prevent further import
-            raise
-
-    import tensorflow as tf
-
-    try:
-        tf.get_logger().setLevel('WARNING')
-    except AttributeError:
-        pass
-    try:
-        tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.WARN)
-    except AttributeError:
-        pass
-
-    # Enable eager execution in early versions to unlock dataset operations
-    eager_enabled = False
-    try:
-        tf.compat.v1.enable_eager_execution()
-        eager_enabled = True
-    except AttributeError:
-        pass
-    try:
-        if not eager_enabled:
-            tf.enable_eager_execution()
-    except AttributeError:
-        pass
-
-    return tf
--- a/datumaro/datumaro/version.py
+++ b/datumaro/datumaro/version.py
@ -1 +0,0 @@
-VERSION = '0.1.0'
--- a/Show More
+++ b/Show More