[Datumaro] CLI updates + better documentation (#1057)

main
zhiltsov-max 6 years ago committed by Nikita Manovich
parent 095d6d4611
commit 93b3c091f5

@ -25,5 +25,11 @@
}
],
"python.linting.pylintEnabled": true,
"python.envFile": "${workspaceFolder}/.vscode/python.env"
"python.envFile": "${workspaceFolder}/.vscode/python.env",
"python.testing.unittestEnabled": true,
"python.testing.unittestArgs": [
"-v",
"-s",
"./datumaro",
],
}

@ -16,6 +16,7 @@ CVAT is free, online, interactive video and image annotation tool for computer v
- [Installation guide](cvat/apps/documentation/installation.md)
- [User's guide](cvat/apps/documentation/user_guide.md)
- [Django REST API documentation](#rest-api)
- [Datumaro dataset framework](datumaro/README.md)
- [Command line interface](utils/cli/)
- [XML annotation format](cvat/apps/documentation/xml_format.md)
- [AWS Deployment Guide](cvat/apps/documentation/AWS-Deployment-Guide.md)
@ -34,6 +35,8 @@ CVAT is free, online, interactive video and image annotation tool for computer v
## Supported annotation formats
Format selection is possible after clicking on the Upload annotation / Dump annotation button.
[Datumaro](datumaro/README.md) dataset framework allows additional dataset transformations
via its command line tool.
| Annotation format | Dumper | Loader |
| ---------------------------------------------------------------------------------- | ------ | ------ |

@ -1,3 +1,8 @@
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
import os
import os.path as osp
@ -6,7 +11,7 @@ from django.db import transaction
from cvat.apps.annotation.annotation import Annotation
from cvat.apps.engine.annotation import TaskAnnotation
from cvat.apps.engine.models import Task, ShapeType
from cvat.apps.engine.models import Task, ShapeType, AttributeType
import datumaro.components.extractor as datumaro
from datumaro.util.image import lazy_image
@ -128,18 +133,33 @@ class CvatTaskExtractor(datumaro.Extractor):
attrs = {}
db_attributes = db_label.attributespec_set.all()
for db_attr in db_attributes:
attrs[db_attr.name] = db_attr.default_value
attrs[db_attr.name] = db_attr
label_attrs[db_label.name] = attrs
map_label = lambda label_db_name: label_map[label_db_name]
def convert_attrs(label, cvat_attrs):
cvat_attrs = {a.name: a.value for a in cvat_attrs}
dm_attr = dict()
for attr_name, attr_spec in label_attrs[label].items():
attr_value = cvat_attrs.get(attr_name, attr_spec.default_value)
try:
if attr_spec.input_type == AttributeType.NUMBER:
attr_value = float(attr_value)
elif attr_spec.input_type == AttributeType.CHECKBOX:
attr_value = attr_value.lower() == 'true'
dm_attr[attr_name] = attr_value
except Exception as e:
slogger.task[self._db_task.id].error(
"Failed to convert attribute '%s'='%s': %s" % \
(attr_name, attr_value, e))
return dm_attr
for tag_obj in cvat_anno.tags:
anno_group = tag_obj.group
if isinstance(anno_group, int):
anno_group = anno_group
anno_label = map_label(tag_obj.label)
anno_attr = dict(label_attrs[tag_obj.label])
for attr in tag_obj.attributes:
anno_attr[attr.name] = attr.value
anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes)
anno = datumaro.LabelObject(label=anno_label,
attributes=anno_attr, group=anno_group)
@ -150,9 +170,7 @@ class CvatTaskExtractor(datumaro.Extractor):
if isinstance(anno_group, int):
anno_group = anno_group
anno_label = map_label(shape_obj.label)
anno_attr = dict(label_attrs[shape_obj.label])
for attr in shape_obj.attributes:
anno_attr[attr.name] = attr.value
anno_attr = convert_attrs(shape_obj.label, shape_obj.attributes)
anno_points = shape_obj.points
if shape_obj.type == ShapeType.POINTS:

@ -6,17 +6,15 @@ python -m virtualenv .venv
. .venv/bin/activate
# install dependencies
sed -r "s/^(.*)#.*$/\1/g" datumaro/requirements.txt | xargs -n 1 -L 1 pip install
pip install -e datumaro/
pip install -r cvat/utils/cli/requirements.txt
# set up environment
PYTHONPATH=':'
export PYTHONPATH
ln -s $PWD/datumaro/datum.py ./datum
chmod a+x datum
# use Datumaro
./datum --help
datum --help
```
Check Datumaro [QUICKSTART.md](datumaro/docs/quickstart.md) for further info.
Check Datumaro [docs](datumaro/README.md) for more info.

@ -1,3 +1,8 @@
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
import getpass
import json
@ -27,7 +32,7 @@ class cvat_rest_api_task_images(datumaro.Extractor):
def _image_local_path(self, item_id):
task_id = self._config.task_id
return osp.join(self._cache_dir,
'task_{}_frame_{:06d}.jpg'.format(task_id, item_id))
'task_{}_frame_{:06d}.jpg'.format(task_id, int(item_id)))
def _make_image_loader(self, item_id):
return lazy_image(item_id,

@ -1,3 +1,8 @@
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
from datetime import timedelta
import json
import os
@ -217,8 +222,9 @@ class TaskProject:
if dst_format == EXPORT_FORMAT_DATUMARO_PROJECT:
self._remote_export(save_dir=save_dir, server_url=server_url)
else:
self._dataset.export_project(output_format=dst_format,
save_dir=save_dir, save_images=save_images)
converter = self._dataset.env.make_converter(dst_format,
save_images=save_images)
self._dataset.export_project(converter=converter, save_dir=save_dir)
def _remote_image_converter(self, save_dir, server_url=None):
os.makedirs(save_dir, exist_ok=True)
@ -246,7 +252,7 @@ class TaskProject:
if db_video is not None:
for i in range(self._db_task.size):
frame_info = {
'id': str(i),
'id': i,
'width': db_video.width,
'height': db_video.height,
}

@ -1,3 +1,8 @@
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
import inspect
import os, os.path as osp
import zipfile

@ -0,0 +1,119 @@
## Table of Contents
- [Installation](#installation)
- [Usage](#usage)
- [Testing](#testing)
- [Design](#design-and-code-structure)
## Installation
### Prerequisites
- Python (3.5+)
- OpenVINO (optional)
``` bash
git clone https://github.com/opencv/cvat
```
Optionally, install a virtual environment:
``` bash
python -m pip install virtualenv
python -m virtualenv venv
. venv/bin/activate
```
Then install all dependencies:
``` bash
while read -r p; do pip install $p; done < requirements.txt
```
If you're working inside CVAT environment:
``` bash
. .env/bin/activate
while read -r p; do pip install $p; done < datumaro/requirements.txt
```
## Usage
> The directory containing Datumaro should be in the `PYTHONPATH`
> environment variable or `cvat/datumaro/` should be the current directory.
``` bash
datum --help
python -m datumaro --help
python datumaro/ --help
python datum.py --help
```
``` python
import datumaro
```
## Testing
It is expected that all Datumaro functionality is covered and checked by
unit tests. Tests are placed in `tests/` directory.
To run tests use:
``` bash
python -m unittest discover -s tests
```
If you're working inside CVAT environment, you can also use:
``` bash
python manage.py test datumaro/
```
## Design and code structure
- [Design document](docs/design.md)
### Command-line
Use [Docker](https://www.docker.com/) as an example. Basically,
the interface is divided on contexts and single commands.
Contexts are semantically grouped commands,
related to a single topic or target. Single commands are handy shorter
alternatives for the most used commands and also special commands,
which are hard to be put into any specific context.
![cli-design-image](docs/images/cli_design.png)
- The diagram above was created with [FreeMind](http://freemind.sourceforge.net/wiki/index.php/Main_Page)
Model-View-ViewModel (MVVM) UI pattern is used.
![mvvm-image](docs/images/mvvm.png)
### Datumaro project and environment structure
<!--lint disable fenced-code-flag-->
```
├── [datumaro module]
└── [project folder]
├── .datumaro/
| ├── config.yml
│   ├── .git/
│   ├── importers/
│   │   ├── custom_format_importer1.py
│   │   └── ...
│   ├── statistics/
│   │   ├── custom_statistic1.py
│   │   └── ...
│   ├── visualizers/
│   │ ├── custom_visualizer1.py
│   │ └── ...
│   └── extractors/
│   ├── custom_extractor1.py
│   └── ...
├── dataset/
└── sources/
├── source1
└── ...
```
<!--lint enable fenced-code-flag-->

@ -1,36 +1,176 @@
# Dataset framework
# Dataset Framework (Datumaro)
A framework to prepare, manage, build, analyze datasets
A framework to build, transform, and analyze datasets.
<!--lint disable fenced-code-flag-->
```
CVAT annotations -- ---> Annotation tool
... \ /
COCO-like dataset -----> Datumaro ---> dataset ------> Model training
... / \
VOC-like dataset -- ---> Publication etc.
```
<!--lint enable fenced-code-flag-->
## Contents
- [Documentation](#documentation)
- [Features](#features)
- [Installation](#installation)
- [Usage](#usage)
- [Examples](#examples)
- [Contributing](#contributing)
## Documentation
-[Quick start guide](docs/quickstart.md)
- [User manual](docs/user_manual.md)
- [Design document](docs/design.md)
- [Contributing](CONTRIBUTING.md)
## Installation
## Features
Python3.5+ is required.
- Dataset format conversions:
- COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
- [Format specification](http://cocodataset.org/#format-data)
- `labels` are our extension - like `instances` with only `category_id`
- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`)
- [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html)
- YOLO (`bboxes`)
- [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data)
- TF Detection API (`bboxes`, `masks`)
- Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md)
- CVAT
- [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
- Dataset building operations:
- Merging multiple datasets into one
- Dataset filtering with custom conditions, for instance:
- remove all annotations except polygons of a certain class
- remove images without a specific class
- remove occluded annotations from images
- keep only vertically-oriented images
- remove small area bounding boxes from annotations
- Annotation conversions, for instance
- polygons to instance masks and vise-versa
- apply a custom colormap for mask annotations
- remap dataset labels
- Dataset comparison
- Model integration:
- Inference (OpenVINO and custom models)
- Explainable AI ([RISE algorithm](https://arxiv.org/abs/1806.07421))
To install into a virtual environment do:
> Check the [design document](docs/design.md) for a full list of features
## Installation
Optionally, create a virtual environment:
``` bash
python -m pip install virtualenv
python -m virtualenv venv
. venv/bin/activate
pip install -r requirements.txt
```
## Execution
The tool can be executed both as a script and as a module.
Install Datumaro package:
``` bash
PYTHONPATH="..."
python -m datumaro <command>
python path/to/datum.py
pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro'
```
## Testing
## Usage
There are several options available:
- [A standalone command-line tool](#standalone-tool)
- [A python module](#python-module)
### Standalone tool
<!--lint disable fenced-code-flag-->
```
User
|
v
+------------------+
| CVAT |
+--------v---------+ +------------------+ +--------------+
| Datumaro module | ----> | Datumaro project | <---> | Datumaro CLI | <--- User
+------------------+ +------------------+ +--------------+
```
<!--lint enable fenced-code-flag-->
``` bash
python -m unittest discover -s tests
datum --help
python -m datumaro --help
```
### Python module
Datumaro can be used in custom scripts as a library in the following way:
``` python
from datumaro.components.project import Project # project-related things
import datumaro.components.extractor # annotations and high-level interfaces
# etc.
project = Project.load('directory')
```
## Examples
<!--lint disable list-item-indent-->
<!--lint disable list-item-bullet-indent-->
- Convert [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#data) to COCO, keep only images with `cat` class presented:
```bash
# Download VOC dataset:
# http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
datum project import --format voc --input-path <path/to/voc>
datum project export --format coco --filter '/item[annotation/label="cat"]'
```
- Convert only non-occluded annotations from a CVAT-annotated project to TFrecord:
```bash
# export Datumaro dataset in CVAT UI, extract somewhere, go to the project dir
datum project extract --filter '/item/annotation[occluded="False"]' \
--mode items+anno --output-dir not_occluded
datum project export --project not_occluded \
--format tf_detection_api -- --save-images
```
- Annotate COCO, extract image subset, re-annotate it in CVAT, update old dataset:
```bash
# Download COCO dataset http://cocodataset.org/#download
# Put images to coco/images/ and annotations to coco/annotations/
datum project import --format coco --input-path <path/to/coco>
datum project export --filter '/image[images_I_dont_like]' --format cvat \
--output-dir reannotation
# import dataset and images to CVAT, re-annotate
# export Datumaro project, extract to 'reannotation-upd'
datum project project merge reannotation-upd
datum project export --format coco
```
- Annotate instance polygons in CVAT, export as masks in COCO:
```bash
datum project import --format cvat --input-path <path/to/cvat.xml>
datum project export --format coco -- --segmentation-mode masks
```
- Apply an OpenVINO detection model to some COCO-like dataset,
then compare annotations with ground truth and visualize in TensorBoard:
```bash
datum project import --format coco --input-path <path/to/coco>
# create model results interpretation script
datum model add mymodel openvino \
--weights model.bin --description model.xml \
--interpretation-script parse_results.py
datum model run --model mymodel --output-dir mymodel_inference/
datum project diff mymodel_inference/ --format tensorboard --output-dir diff
```
<!--lint enable list-item-bullet-indent-->
<!--lint enable list-item-indent-->
## Contributing
Feel free to [open an Issue](https://github.com/opencv/cvat/issues/new) if you
think something needs to be changed. You are welcome to participate in development,
development instructions are available in our [developer manual](CONTRIBUTING.md).

@ -1,7 +1,7 @@
#!/usr/bin/env python
import sys
from datumaro import main
from datumaro.cli.__main__ import main
if __name__ == '__main__':

@ -2,92 +2,3 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import sys
from .cli import (
project as project_module,
source as source_module,
item as item_module,
model as model_module,
# inference as inference_module,
create_command as create_command_module,
add_command as add_command_module,
remove_command as remove_command_module,
export_command as export_command_module,
# diff_command as diff_command_module,
# build_command as build_command_module,
stats_command as stats_command_module,
explain_command as explain_command_module,
)
from .version import VERSION
KNOWN_COMMANDS = {
# contexts
'project': project_module.main,
'source': source_module.main,
'item': item_module.main,
'model': model_module.main,
# 'inference': inference_module.main,
# shortcuts
'create': create_command_module.main,
'add': add_command_module.main,
'remove': remove_command_module.main,
'export': export_command_module.main,
# 'diff': diff_command_module.main,
# 'build': build_command_module.main,
'stats': stats_command_module.main,
'explain': explain_command_module.main,
}
def get_command(name, args=None):
return KNOWN_COMMANDS[name]
def loglevel(name):
numeric = getattr(log, name.upper(), None)
if not isinstance(numeric, int):
raise ValueError('Invalid log level: %s' % name)
return numeric
def parse_command(input_args):
parser = argparse.ArgumentParser()
parser.add_argument('command', choices=KNOWN_COMMANDS.keys(),
help='A command to execute')
parser.add_argument('args', nargs=argparse.REMAINDER)
parser.add_argument('--version', action='version', version=VERSION)
parser.add_argument('--loglevel', type=loglevel, default='info',
help="Logging level (default: %(default)s)")
general_args = parser.parse_args(input_args)
command_name = general_args.command
command_args = general_args.args
return general_args, command_name, command_args
def set_up_logger(general_args):
loglevel = general_args.loglevel
log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
level=loglevel)
def main(args=None):
if args is None:
args = sys.argv[1:]
general_args, command_name, command_args = parse_command(args)
set_up_logger(general_args)
command = get_command(command_name, general_args)
try:
return command(command_args)
except Exception as e:
log.error(e)
raise
if __name__ == '__main__':
sys.exit(main())

@ -4,9 +4,9 @@
# SPDX-License-Identifier: MIT
import sys
from . import main
from datumaro.cli.__main__ import main
if __name__ == '__main__':
sys.exit(main())

@ -2,4 +2,3 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT

@ -0,0 +1,109 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import sys
from . import contexts, commands
from .util import CliException, add_subparser
from ..version import VERSION
_log_levels = {
'debug': log.DEBUG,
'info': log.INFO,
'warning': log.WARNING,
'error': log.ERROR,
'critical': log.CRITICAL
}
def loglevel(name):
return _log_levels[name]
def _make_subcommands_help(commands, help_line_start=0):
desc = ""
for command_name, _, command_help in commands:
desc += (" %-" + str(max(0, help_line_start - 2 - 1)) + "s%s\n") % \
(command_name, command_help)
return desc
def make_parser():
parser = argparse.ArgumentParser(prog="datumaro",
description="Dataset Framework",
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version', version=VERSION)
parser.add_argument('--loglevel', type=loglevel, default='info',
help="Logging level (options: %s; default: %s)" % \
(', '.join(_log_levels.keys()), "%(default)s"))
known_contexts = [
('project', contexts.project, "Actions on projects (datasets)"),
('source', contexts.source, "Actions on data sources"),
('model', contexts.model, "Actions on models"),
]
known_commands = [
('create', commands.create, "Create project"),
('add', commands.add, "Add source to project"),
('remove', commands.remove, "Remove source from project"),
('export', commands.export, "Export project"),
('explain', commands.explain, "Run Explainable AI algorithm for model"),
]
# Argparse doesn't support subparser groups:
# https://stackoverflow.com/questions/32017020/grouping-argparse-subparser-arguments
help_line_start = max((len(e[0]) for e in known_contexts + known_commands),
default=0)
help_line_start = max((2 + help_line_start) // 4 + 1, 6) * 4 # align to tabs
subcommands_desc = ""
if known_contexts:
subcommands_desc += "Contexts:\n"
subcommands_desc += _make_subcommands_help(known_contexts,
help_line_start)
if known_commands:
if subcommands_desc:
subcommands_desc += "\n"
subcommands_desc += "Commands:\n"
subcommands_desc += _make_subcommands_help(known_commands,
help_line_start)
if subcommands_desc:
subcommands_desc += \
"\nRun '%s COMMAND --help' for more information on a command." % \
parser.prog
subcommands = parser.add_subparsers(title=subcommands_desc,
description="", help=argparse.SUPPRESS)
for command_name, command, _ in known_contexts + known_commands:
add_subparser(subcommands, command_name, command.build_parser)
return parser
def set_up_logger(args):
log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
level=args.loglevel)
def main(args=None):
parser = make_parser()
args = parser.parse_args(args)
set_up_logger(args)
if 'command' not in args:
parser.print_help()
return 1
try:
return args.command(args)
except CliException as e:
log.error(e)
return 1
except Exception as e:
log.error(e)
raise
if __name__ == '__main__':
sys.exit(main())

@ -1,21 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
from . import source as source_module
def build_parser(parser=argparse.ArgumentParser()):
source_module.build_add_parser(parser). \
set_defaults(command=source_module.add_command)
return parser
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
return args.command(args)

@ -0,0 +1,6 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
from . import add, create, explain, export, remove

@ -0,0 +1,8 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
# pylint: disable=unused-import
from ..contexts.source import build_add_parser as build_parser

@ -0,0 +1,8 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
# pylint: disable=unused-import
from ..contexts.project import build_create_parser as build_parser

@ -9,25 +9,35 @@ import os
import os.path as osp
from datumaro.components.project import Project
from datumaro.components.algorithms.rise import RISE
from datumaro.util.command_targets import (TargetKinds, target_selector,
ProjectTarget, SourceTarget, ImageTarget, is_project_path)
from datumaro.util.image import load_image, save_image
from .util.project import load_project
from ..util import MultilineFormatter
from ..util.project import load_project
def build_parser(parser=argparse.ArgumentParser()):
def build_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Run Explainable AI algorithm",
description="Runs an explainable AI algorithm for a model.")
parser.add_argument('-m', '--model', required=True,
help="Model to use for inference")
parser.add_argument('-t', '--target', default=None,
help="Inference target - image, source, project "
"(default: current dir)")
parser.add_argument('-d', '--save-dir', default=None,
parser.add_argument('-o', '--output-dir', dest='save_dir', default=None,
help="Directory to save output (default: display only)")
method_sp = parser.add_subparsers(dest='algorithm')
rise_parser = method_sp.add_parser('rise')
rise_parser = method_sp.add_parser('rise',
description="""
RISE: Randomized Input Sampling for
Explanation of Black-box Models algorithm|n
|n
See explanations at: https://arxiv.org/pdf/1806.07421.pdf
""",
formatter_class=MultilineFormatter)
rise_parser.add_argument('-s', '--max-samples', default=None, type=int,
help="Number of algorithm iterations (default: mask size ^ 2)")
rise_parser.add_argument('--mw', '--mask-width',
@ -46,7 +56,7 @@ def build_parser(parser=argparse.ArgumentParser()):
help="IoU match threshold in Non-maxima suppression (default: no NMS)")
rise_parser.add_argument('--conf', '--det-conf-thresh',
dest='det_conf_thresh', default=0.0, type=float,
help="Confidence threshold for detections (default: do not filter)")
help="Confidence threshold for detections (default: include all)")
rise_parser.add_argument('-b', '--batch-size', default=1, type=int,
help="Inference batch size (default: %(default)s)")
rise_parser.add_argument('--progressive', action='store_true',
@ -59,6 +69,21 @@ def build_parser(parser=argparse.ArgumentParser()):
return parser
def explain_command(args):
project_path = args.project_dir
if is_project_path(project_path):
project = Project.load(project_path)
else:
project = None
args.target = target_selector(
ProjectTarget(is_default=True, project=project),
SourceTarget(project=project),
ImageTarget()
)(args.target)
if args.target[0] == TargetKinds.project:
if is_project_path(args.target[1]):
args.project_dir = osp.dirname(osp.abspath(args.target[1]))
import cv2
from matplotlib import cm
@ -69,6 +94,7 @@ def explain_command(args):
if str(args.algorithm).lower() != 'rise':
raise NotImplementedError()
from datumaro.components.algorithms.rise import RISE
rise = RISE(model,
max_samples=args.max_samples,
mask_width=args.mask_width,
@ -162,31 +188,3 @@ def explain_command(args):
raise NotImplementedError()
return 0
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
if 'command' not in args:
parser.print_help()
return 1
project_path = args.project_dir
if is_project_path(project_path):
project = Project.load(project_path)
else:
project = None
try:
args.target = target_selector(
ProjectTarget(is_default=True, project=project),
SourceTarget(project=project),
ImageTarget()
)(args.target)
if args.target[0] == TargetKinds.project:
if is_project_path(args.target[1]):
args.project_dir = osp.dirname(osp.abspath(args.target[1]))
except argparse.ArgumentTypeError as e:
print(e)
parser.print_help()
return 1
return args.command(args)

@ -0,0 +1,8 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
# pylint: disable=unused-import
from ..contexts.project import build_export_parser as build_parser

@ -0,0 +1,8 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
# pylint: disable=unused-import
from ..contexts.source import build_remove_parser as build_parser

@ -0,0 +1,6 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
from . import project, source, model, item

@ -0,0 +1,36 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
from ...util import add_subparser
def build_export_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
return parser
def build_stats_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
return parser
def build_diff_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
return parser
def build_edit_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
return parser
def build_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
subparsers = parser.add_subparsers()
add_subparser(subparsers, 'export', build_export_parser)
add_subparser(subparsers, 'stats', build_stats_parser)
add_subparser(subparsers, 'diff', build_diff_parser)
add_subparser(subparsers, 'edit', build_edit_parser)
return parser

@ -9,9 +9,49 @@ import os
import os.path as osp
import shutil
from ..util.project import load_project
from datumaro.components.config import DEFAULT_FORMAT
from ...util import add_subparser
from ...util.project import load_project
def build_openvino_add_parser(parser=argparse.ArgumentParser()):
parser.add_argument('-d', '--description', required=True,
help="Path to the model description file (.xml)")
parser.add_argument('-w', '--weights', required=True,
help="Path to the model weights file (.bin)")
parser.add_argument('-i', '--interpretation-script', required=True,
help="Path to the network output interpretation script (.py)")
parser.add_argument('--plugins-path', default=None,
help="Path to the custom Inference Engine plugins directory")
parser.add_argument('--copy', action='store_true',
help="Copy the model data to the project")
return parser
def openvino_args_extractor(args):
my_args = argparse.Namespace()
my_args.description = args.description
my_args.weights = args.weights
my_args.interpretation_script = args.interpretation_script
my_args.plugins_path = args.plugins_path
return my_args
def build_add_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
parser.add_argument('name',
help="Name of the model to be added")
launchers_sp = parser.add_subparsers(dest='launcher')
build_openvino_add_parser(launchers_sp.add_parser('openvino')) \
.set_defaults(launcher_args_extractor=openvino_args_extractor)
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.set_defaults(command=add_command)
return parser
def add_command(args):
project = load_project(args.project_dir)
@ -55,39 +95,16 @@ def add_command(args):
return 0
def build_openvino_add_parser(parser):
parser.add_argument('-d', '--description', required=True,
help="Path to the model description file (.xml)")
parser.add_argument('-w', '--weights', required=True,
help="Path to the model weights file (.bin)")
parser.add_argument('-i', '--interpretation-script', required=True,
help="Path to the network output interpretation script (.py)")
parser.add_argument('--plugins-path', default=None,
help="Path to the custom Inference Engine plugins directory")
parser.add_argument('--copy', action='store_true',
help="Copy the model data to the project")
return parser
def openvino_args_extractor(args):
my_args = argparse.Namespace()
my_args.description = args.description
my_args.weights = args.weights
my_args.interpretation_script = args.interpretation_script
my_args.plugins_path = args.plugins_path
return my_args
def build_remove_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
def build_add_parser(parser):
parser.add_argument('name',
help="Name of the model to be added")
launchers_sp = parser.add_subparsers(dest='launcher')
build_openvino_add_parser(launchers_sp.add_parser('openvino')) \
.set_defaults(launcher_args_extractor=openvino_args_extractor)
help="Name of the model to be removed")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
parser.set_defaults(command=remove_command)
return parser
def remove_command(args):
project = load_project(args.project_dir)
@ -97,31 +114,39 @@ def remove_command(args):
return 0
def build_remove_parser(parser):
parser.add_argument('name',
help="Name of the model to be removed")
def build_run_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
parser.add_argument('-o', '--output-dir', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-m', '--model', dest='model_name', required=True,
help="Model to apply to the project")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.set_defaults(command=run_command)
return parser
def run_command(args):
project = load_project(args.project_dir)
dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False)
project.make_dataset().apply_model(
save_dir=dst_dir,
model_name=args.model_name)
def build_parser(parser=argparse.ArgumentParser()):
command_parsers = parser.add_subparsers(dest='command_name')
log.info("Inference results have been saved to '%s'" % dst_dir)
build_add_parser(command_parsers.add_parser('add')) \
.set_defaults(command=add_command)
return 0
build_remove_parser(command_parsers.add_parser('remove')) \
.set_defaults(command=remove_command)
return parser
def build_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor()
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
if 'command' not in args:
parser.print_help()
return 1
subparsers = parser.add_subparsers()
add_subparser(subparsers, 'add', build_add_parser)
add_subparser(subparsers, 'remove', build_remove_parser)
add_subparser(subparsers, 'run', build_run_parser)
return args.command(args)
return parser

@ -0,0 +1,647 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
from enum import Enum
import logging as log
import os
import os.path as osp
import shutil
from datumaro.components.project import Project
from datumaro.components.comparator import Comparator
from datumaro.components.dataset_filter import DatasetItemEncoder
from datumaro.components.extractor import AnnotationType
from .diff import DiffVisualizer
from ...util import add_subparser, CliException, MultilineFormatter
from ...util.project import make_project_path, load_project, \
generate_next_dir_name
def build_create_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Create empty project",
description="""
Create a new empty project.|n
|n
Examples:|n
- Create a project in the current directory:|n
|s|screate -n myproject|n
|n
- Create a project in other directory:|n
|s|screate -o path/I/like/
""",
formatter_class=MultilineFormatter)
parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir',
help="Save directory for the new project (default: current dir")
parser.add_argument('-n', '--name', default=None,
help="Name of the new project (default: same as project dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.set_defaults(command=create_command)
return parser
def create_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)
if osp.isdir(project_dir) and os.listdir(project_dir):
if not args.overwrite:
raise CliException("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
else:
shutil.rmtree(project_dir)
os.makedirs(project_dir, exist_ok=True)
if not args.overwrite and osp.isfile(project_path):
raise CliException("Project file '%s' already exists "
"(pass --overwrite to force creation)" % project_path)
project_name = args.name
if project_name is None:
project_name = osp.basename(project_dir)
log.info("Creating project at '%s'" % project_dir)
Project.generate(project_dir, {
'project_name': project_name,
})
log.info("Project has been created at '%s'" % project_dir)
return 0
def build_import_parser(parser_ctor=argparse.ArgumentParser):
import datumaro.components.importers as importers_module
builtin_importers = [name for name, cls in importers_module.items]
parser = parser_ctor(help="Create project from existing dataset",
description="""
Creates a project from an existing dataset. The source can be:|n
- a dataset in a supported format (check 'formats' section below)|n
- a Datumaro project|n
|n
Formats:|n
Datasets come in a wide variety of formats. Each dataset
format defines its own data structure and rules on how to
interpret the data. For example, the following data structure
is used in COCO format:|n
/dataset/|n
- /images/<id>.jpg|n
- /annotations/|n
|n
In Datumaro dataset formats are supported by
Extractor-s and Importer-s.
An Extractor produces a list of dataset items corresponding
to the dataset. An Importer creates a project from the
data source location.
It is possible to add a custom Extractor and Importer.
To do this, you need to put an Extractor and
Importer implementation scripts to
<project_dir>/.datumaro/extractors
and <project_dir>/.datumaro/importers.|n
|n
List of supported dataset formats: %s|n
|n
Examples:|n
- Create a project from VOC dataset in the current directory:|n
|s|simport -f voc -i path/to/voc|n
|n
- Create a project from COCO dataset in other directory:|n
|s|simport -f coco -i path/to/coco -o path/I/like/
""" % ', '.join(builtin_importers),
formatter_class=MultilineFormatter)
parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir',
help="Directory to save the new project to (default: current dir)")
parser.add_argument('-n', '--name', default=None,
help="Name of the new project (default: same as project dir)")
parser.add_argument('--copy', action='store_true',
help="Copy the dataset instead of saving source links")
parser.add_argument('--skip-check', action='store_true',
help="Skip source checking")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-i', '--input-path', required=True, dest='source',
help="Path to import project from")
parser.add_argument('-f', '--format', required=True,
help="Source project format")
# parser.add_argument('extra_args', nargs=argparse.REMAINDER,
# help="Additional arguments for importer (pass '-- -h' for help)")
parser.set_defaults(command=import_command)
return parser
def import_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)
if osp.isdir(project_dir) and os.listdir(project_dir):
if not args.overwrite:
raise CliException("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
else:
shutil.rmtree(project_dir)
os.makedirs(project_dir, exist_ok=True)
if not args.overwrite and osp.isfile(project_path):
raise CliException("Project file '%s' already exists "
"(pass --overwrite to force creation)" % project_path)
project_name = args.name
if project_name is None:
project_name = osp.basename(project_dir)
log.info("Importing project from '%s' as '%s'" % \
(args.source, args.format))
source = osp.abspath(args.source)
project = Project.import_from(source, args.format)
project.config.project_name = project_name
project.config.project_dir = project_dir
if not args.skip_check or args.copy:
log.info("Checking the dataset...")
dataset = project.make_dataset()
if args.copy:
log.info("Cloning data...")
dataset.save(merge=True, save_images=True)
else:
project.save()
log.info("Project has been created at '%s'" % project_dir)
return 0
class FilterModes(Enum):
# primary
items = 1
annotations = 2
items_annotations = 3
# shortcuts
i = 1
a = 2
i_a = 3
a_i = 3
annotations_items = 3
@staticmethod
def parse(s):
s = s.lower()
s = s.replace('+', '_')
return FilterModes[s]
@classmethod
def make_filter_args(cls, mode):
if mode == cls.items:
return {}
elif mode == cls.annotations:
return {
'filter_annotations': True
}
elif mode == cls.items_annotations:
return {
'filter_annotations': True,
'remove_empty': True,
}
else:
raise NotImplementedError()
@classmethod
def list_options(cls):
return [m.name.replace('_', '+') for m in cls]
def build_export_parser(parser_ctor=argparse.ArgumentParser):
import datumaro.components.converters as converters_module
builtin_converters = [name for name, cls in converters_module.items]
parser = parser_ctor(help="Export project",
description="""
Exports the project dataset in some format. Optionally, a filter
can be passed, check 'extract' command description for more info.
Each dataset format has its own options, which
are passed after '--' separator (see examples), pass '-- -h'
for more info. If not stated otherwise, by default
only annotations are exported, to include images pass
'--save-images' parameter.|n
|n
Formats:|n
In Datumaro dataset formats are supported by Converter-s.
A Converter produces a dataset of a specific format
from dataset items. It is possible to add a custom Converter.
To do this, you need to put a Converter
definition script to <project_dir>/.datumaro/converters.|n
|n
List of supported dataset formats: %s|n
|n
Examples:|n
- Export project as a VOC-like dataset, include images:|n
|s|sexport -f voc -- --save-images|n
|n
- Export project as a COCO-like dataset in other directory:|n
|s|sexport -f coco -o path/I/like/
""" % ', '.join(builtin_converters),
formatter_class=MultilineFormatter)
parser.add_argument('-e', '--filter', default=None,
help="Filter expression for dataset items")
parser.add_argument('--filter-mode', default=FilterModes.i.name,
type=FilterModes.parse,
help="Filter mode (options: %s; default: %s)" % \
(', '.join(FilterModes.list_options()) , '%(default)s'))
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
help="Directory to save output (default: a subdir in the current one)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.add_argument('-f', '--format', required=True,
help="Output format")
parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
help="Additional arguments for converter (pass '-- -h' for help)")
parser.set_defaults(command=export_command)
return parser
def export_command(args):
project = load_project(args.project_dir)
dst_dir = args.dst_dir
if dst_dir:
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
raise CliException("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
else:
dst_dir = generate_next_dir_name('%s-export-%s' % \
(project.config.project_name, args.format))
dst_dir = osp.abspath(dst_dir)
try:
converter = project.env.make_converter(args.format,
cmdline_args=args.extra_args)
except KeyError:
raise CliException("Converter for format '%s' is not found" % \
args.format)
filter_args = FilterModes.make_filter_args(args.filter_mode)
log.info("Loading the project...")
dataset = project.make_dataset()
log.info("Exporting the project...")
dataset.export_project(
save_dir=dst_dir,
converter=converter,
filter_expr=args.filter,
**filter_args)
log.info("Project exported to '%s' as '%s'" % \
(dst_dir, args.format))
return 0
def build_extract_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Extract subproject",
description="""
Extracts a subproject that contains only items matching filter.
A filter is an XPath expression, which is applied to XML
representation of a dataset item. Check '--dry-run' parameter
to see XML representations of the dataset items.|n
|n
To filter annotations use the mode ('-m') parameter.|n
Supported modes:|n
- 'i', 'items'|n
- 'a', 'annotations'|n
- 'i+a', 'a+i', 'items+annotations', 'annotations+items'|n
When filtering annotations, use the 'items+annotations'
mode to point that annotation-less dataset items should be
removed. To select an annotation, write an XPath that
returns 'annotation' elements (see examples).|n
|n
Examples:|n
- Filter images with width < height:|n
|s|sextract -e '/item[image/width < image/height]'|n
|n
- Filter images with large-area bboxes:|n
|s|sextract -e '/item[annotation/type="bbox" and
annotation/area>2000]'|n
|n
- Filter out all irrelevant annotations from items:|n
|s|sextract -m a -e '/item/annotation[label = "person"]'|n
|n
- Filter out all irrelevant annotations from items:|n
|s|sextract -m a -e '/item/annotation[label="cat" and
area > 99.5]'|n
|n
- Filter occluded annotations and items, if no annotations left:|n
|s|sextract -m i+a -e '/item/annotation[occluded="True"]'
""",
formatter_class=MultilineFormatter)
parser.add_argument('-e', '--filter', default=None,
help="XML XPath filter expression for dataset items")
parser.add_argument('-m', '--mode', default=FilterModes.i.name,
type=FilterModes.parse,
help="Filter mode (options: %s; default: %s)" % \
(', '.join(FilterModes.list_options()) , '%(default)s'))
parser.add_argument('--dry-run', action='store_true',
help="Print XML representations to be filtered and exit")
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
help="Output directory (default: update current project)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.set_defaults(command=extract_command)
return parser
def extract_command(args):
project = load_project(args.project_dir)
if not args.dry_run:
dst_dir = args.dst_dir
if dst_dir:
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
raise CliException("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
else:
dst_dir = generate_next_dir_name('%s-filter' % \
project.config.project_name)
dst_dir = osp.abspath(dst_dir)
dataset = project.make_dataset()
filter_args = FilterModes.make_filter_args(args.mode)
if args.dry_run:
dataset = dataset.extract(filter_expr=args.filter, **filter_args)
for item in dataset:
encoded_item = DatasetItemEncoder.encode(item, dataset.categories())
xml_item = DatasetItemEncoder.to_string(encoded_item)
print(xml_item)
return 0
if not args.filter:
raise CliException("Expected a filter expression ('-e' argument)")
os.makedirs(dst_dir, exist_ok=False)
dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter,
**filter_args)
log.info("Subproject has been extracted to '%s'" % dst_dir)
return 0
def build_merge_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Merge projects",
description="""
Updates items of the current project with items
from the other project.|n
|n
Examples:|n
- Update a project with items from other project:|n
|s|smerge -p path/to/first/project path/to/other/project
""",
formatter_class=MultilineFormatter)
parser.add_argument('other_project_dir',
help="Directory of the project to get data updates from")
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
help="Output directory (default: current project's dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.set_defaults(command=merge_command)
return parser
def merge_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)
dst_dir = args.dst_dir
if dst_dir:
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
raise CliException("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
first_dataset = first_project.make_dataset()
first_dataset.update(second_project.make_dataset())
first_dataset.save(save_dir=dst_dir)
if dst_dir is None:
dst_dir = first_project.config.project_dir
dst_dir = osp.abspath(dst_dir)
log.info("Merge results have been saved to '%s'" % dst_dir)
return 0
def build_diff_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Compare projects",
description="""
Compares two projects.|n
|n
Examples:|n
- Compare two projects, consider bboxes matching if their IoU > 0.7,|n
|s|s|s|sprint results to Tensorboard:
|s|sdiff path/to/other/project -o diff/ -f tensorboard --iou-thresh 0.7
""",
formatter_class=MultilineFormatter)
parser.add_argument('other_project_dir',
help="Directory of the second project to be compared")
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
help="Directory to save comparison results (default: do not save)")
parser.add_argument('-f', '--format',
default=DiffVisualizer.DEFAULT_FORMAT,
choices=[f.name for f in DiffVisualizer.Format],
help="Output format (default: %(default)s)")
parser.add_argument('--iou-thresh', default=0.5, type=float,
help="IoU match threshold for detections (default: %(default)s)")
parser.add_argument('--conf-thresh', default=0.5, type=float,
help="Confidence threshold for detections (default: %(default)s)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the first project to be compared (default: current dir)")
parser.set_defaults(command=diff_command)
return parser
def diff_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)
comparator = Comparator(
iou_threshold=args.iou_thresh,
conf_threshold=args.conf_thresh)
dst_dir = args.dst_dir
if dst_dir:
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
raise CliException("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
else:
dst_dir = generate_next_dir_name('%s-%s-diff' % (
first_project.config.project_name,
second_project.config.project_name)
)
dst_dir = osp.abspath(dst_dir)
if dst_dir:
log.info("Saving diff to '%s'" % dst_dir)
visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
output_format=args.format)
visualizer.save_dataset_diff(
first_project.make_dataset(),
second_project.make_dataset())
return 0
def build_transform_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Transform project",
description="""
Applies some operation to dataset items in the project
and produces a new project.
[NOT IMPLEMENTED YET]
""",
formatter_class=MultilineFormatter)
parser.add_argument('-t', '--transform', required=True,
help="Transform to apply to the project")
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
help="Directory to save output (default: current dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.set_defaults(command=transform_command)
return parser
def transform_command(args):
raise NotImplementedError("Not implemented yet.")
# project = load_project(args.project_dir)
# dst_dir = args.dst_dir
# if dst_dir:
# if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
# raise CliException("Directory '%s' already exists "
# "(pass --overwrite to force creation)" % dst_dir)
# dst_dir = osp.abspath(args.dst_dir)
# project.make_dataset().transform_project(
# method=args.transform,
# save_dir=dst_dir
# )
# log.info("Transform results saved to '%s'" % dst_dir)
# return 0
def build_info_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Get project info",
description="""
Outputs project info.
""",
formatter_class=MultilineFormatter)
parser.add_argument('--all', action='store_true',
help="Print all information")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.set_defaults(command=info_command)
return parser
def info_command(args):
project = load_project(args.project_dir)
config = project.config
env = project.env
dataset = project.make_dataset()
print("Project:")
print(" name:", config.project_name)
print(" location:", config.project_dir)
print("Plugins:")
print(" importers:", ', '.join(env.importers.items))
print(" extractors:", ', '.join(env.extractors.items))
print(" converters:", ', '.join(env.converters.items))
print(" launchers:", ', '.join(env.launchers.items))
print("Sources:")
for source_name, source in config.sources.items():
print(" source '%s':" % source_name)
print(" format:", source.format)
print(" url:", source.url)
print(" location:", project.local_source_dir(source_name))
def print_extractor_info(extractor, indent=''):
print("%slength:" % indent, len(extractor))
categories = extractor.categories()
print("%scategories:" % indent, ', '.join(c.name for c in categories))
for cat_type, cat in categories.items():
print("%s %s:" % (indent, cat_type.name))
if cat_type == AnnotationType.label:
print("%s count:" % indent, len(cat.items))
count_threshold = 10
if args.all:
count_threshold = len(cat.items)
labels = ', '.join(c.name for c in cat.items[:count_threshold])
if count_threshold < len(cat.items):
labels += " (and %s more)" % (
len(cat.items) - count_threshold)
print("%s labels:" % indent, labels)
print("Dataset:")
print_extractor_info(dataset, indent=" ")
subsets = dataset.subsets()
print(" subsets:", ', '.join(subsets))
for subset_name in subsets:
subset = dataset.get_subset(subset_name)
print(" subset '%s':" % subset_name)
print_extractor_info(subset, indent=" ")
print("Models:")
for model_name, model in env.config.models.items():
print(" model '%s':" % model_name)
print(" type:", model.launcher)
return 0
def build_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(
description="""
Manipulate projects.|n
|n
By default, the project to be operated on is searched for
in the current directory. An additional '-p' argument can be
passed to specify project location.
""",
formatter_class=MultilineFormatter)
subparsers = parser.add_subparsers()
add_subparser(subparsers, 'create', build_create_parser)
add_subparser(subparsers, 'import', build_import_parser)
add_subparser(subparsers, 'export', build_export_parser)
add_subparser(subparsers, 'extract', build_extract_parser)
add_subparser(subparsers, 'merge', build_merge_parser)
add_subparser(subparsers, 'diff', build_diff_parser)
add_subparser(subparsers, 'transform', build_transform_parser)
add_subparser(subparsers, 'info', build_info_parser)
return parser

@ -0,0 +1,247 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import os
import os.path as osp
import shutil
from ...util import add_subparser, CliException, MultilineFormatter
from ...util.project import load_project
def build_add_parser(parser_ctor=argparse.ArgumentParser):
import datumaro.components.extractors as extractors_module
extractors_list = [name for name, cls in extractors_module.items]
base_parser = argparse.ArgumentParser(add_help=False)
base_parser.add_argument('-n', '--name', default=None,
help="Name of the new source")
base_parser.add_argument('-f', '--format', required=True,
help="Source dataset format")
base_parser.add_argument('--skip-check', action='store_true',
help="Skip source checking")
base_parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser = parser_ctor(help="Add data source to project",
description="""
Adds a data source to a project. The source can be:|n
- a dataset in a supported format (check 'formats' section below)|n
- a Datumaro project|n
|n
The source can be either a local directory or a remote
git repository. Each source type has its own parameters, which can
be checked by:|n
'%s'.|n
|n
Formats:|n
Datasets come in a wide variety of formats. Each dataset
format defines its own data structure and rules on how to
interpret the data. For example, the following data structure
is used in COCO format:|n
/dataset/|n
- /images/<id>.jpg|n
- /annotations/|n
|n
In Datumaro dataset formats are supported by Extractor-s.
An Extractor produces a list of dataset items corresponding
to the dataset. It is possible to add a custom Extractor.
To do this, you need to put an Extractor
definition script to <project_dir>/.datumaro/extractors.|n
|n
List of supported source formats: %s|n
|n
Examples:|n
- Add a local directory with VOC-like dataset:|n
|s|sadd path path/to/voc -f voc_detection|n
- Add a local file with CVAT annotations, call it 'mysource'|n
|s|s|s|sto the project somewhere else:|n
|s|sadd path path/to/cvat.xml -f cvat -n mysource -p somewhere/else/
""" % ('%(prog)s SOURCE_TYPE --help', ', '.join(extractors_list)),
formatter_class=MultilineFormatter,
add_help=False)
parser.set_defaults(command=add_command)
sp = parser.add_subparsers(dest='source_type', metavar='SOURCE_TYPE',
help="The type of the data source "
"(call '%s SOURCE_TYPE --help' for more info)" % parser.prog)
dir_parser = sp.add_parser('path', help="Add local path as source",
parents=[base_parser])
dir_parser.add_argument('url',
help="Path to the source")
dir_parser.add_argument('--copy', action='store_true',
help="Copy the dataset instead of saving source links")
repo_parser = sp.add_parser('git', help="Add git repository as source",
parents=[base_parser])
repo_parser.add_argument('url',
help="URL of the source git repository")
repo_parser.add_argument('-b', '--branch', default='master',
help="Branch of the source repository (default: %(default)s)")
repo_parser.add_argument('--checkout', action='store_true',
help="Do branch checkout")
# NOTE: add common parameters to the parent help output
# the other way could be to use parse_known_args()
display_parser = argparse.ArgumentParser(
parents=[base_parser, parser],
prog=parser.prog, usage="%(prog)s [-h] SOURCE_TYPE ...",
description=parser.description, formatter_class=MultilineFormatter)
class HelpAction(argparse._HelpAction):
def __call__(self, parser, namespace, values, option_string=None):
display_parser.print_help()
parser.exit()
parser.add_argument('-h', '--help', action=HelpAction,
help='show this help message and exit')
# TODO: needed distinction on how to add an extractor or a remote source
return parser
def add_command(args):
project = load_project(args.project_dir)
if args.source_type == 'git':
name = args.name
if name is None:
name = osp.splitext(osp.basename(args.url))[0]
if project.env.git.has_submodule(name):
raise CliException("Git submodule '%s' already exists" % name)
try:
project.get_source(name)
raise CliException("Source '%s' already exists" % name)
except KeyError:
pass
rel_local_dir = project.local_source_dir(name)
local_dir = osp.join(project.config.project_dir, rel_local_dir)
url = args.url
project.env.git.create_submodule(name, local_dir,
url=url, branch=args.branch, no_checkout=not args.checkout)
elif args.source_type == 'path':
url = osp.abspath(args.url)
if not osp.exists(url):
raise CliException("Source path '%s' does not exist" % url)
name = args.name
if name is None:
name = osp.splitext(osp.basename(url))[0]
if project.env.git.has_submodule(name):
raise CliException("Git submodule '%s' already exists" % name)
try:
project.get_source(name)
raise CliException("Source '%s' already exists" % name)
except KeyError:
pass
rel_local_dir = project.local_source_dir(name)
local_dir = osp.join(project.config.project_dir, rel_local_dir)
if args.copy:
log.info("Copying from '%s' to '%s'" % (url, local_dir))
if osp.isdir(url):
# copytree requires destination dir not to exist
shutil.copytree(url, local_dir)
url = rel_local_dir
elif osp.isfile(url):
os.makedirs(local_dir)
shutil.copy2(url, local_dir)
url = osp.join(rel_local_dir, osp.basename(url))
else:
raise Exception("Expected file or directory")
else:
os.makedirs(local_dir)
project.add_source(name, { 'url': url, 'format': args.format })
if not args.skip_check:
log.info("Checking the source...")
try:
project.make_source_project(name).make_dataset()
except Exception:
shutil.rmtree(local_dir, ignore_errors=True)
raise
project.save()
log.info("Source '%s' has been added to the project, location: '%s'" \
% (name, rel_local_dir))
return 0
def build_remove_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Remove source from project",
description="Remove a source from a project.")
parser.add_argument('-n', '--name', required=True,
help="Name of the source to be removed")
parser.add_argument('--force', action='store_true',
help="Ignore possible errors during removal")
parser.add_argument('--keep-data', action='store_true',
help="Do not remove source data")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.set_defaults(command=remove_command)
return parser
def remove_command(args):
project = load_project(args.project_dir)
name = args.name
if not name:
raise CliException("Expected source name")
try:
project.get_source(name)
except KeyError:
if not args.force:
raise CliException("Source '%s' does not exist" % name)
if project.env.git.has_submodule(name):
if args.force:
log.warning("Forcefully removing the '%s' source..." % name)
project.env.git.remove_submodule(name, force=args.force)
source_dir = osp.join(project.config.project_dir,
project.local_source_dir(name))
project.remove_source(name)
project.save()
if not args.keep_data:
shutil.rmtree(source_dir, ignore_errors=True)
log.info("Source '%s' has been removed from the project" % name)
return 0
def build_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(description="""
Manipulate data sources inside of a project.|n
|n
A data source is a source of data for a project.
The project combines multiple data sources into one dataset.
The role of a data source is to provide dataset items - images
and/or annotations.|n
|n
By default, the project to be operated on is searched for
in the current directory. An additional '-p' argument can be
passed to specify project location.
""",
formatter_class=MultilineFormatter)
subparsers = parser.add_subparsers()
add_subparser(subparsers, 'add', build_add_parser)
add_subparser(subparsers, 'remove', build_remove_parser)
return parser

@ -1,21 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
from . import project as project_module
def build_parser(parser=argparse.ArgumentParser()):
project_module.build_create_parser(parser) \
.set_defaults(command=project_module.create_command)
return parser
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
return args.command(args)

@ -1,69 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import os.path as osp
from datumaro.components.project import Project
from datumaro.util.command_targets import (TargetKinds, target_selector,
ProjectTarget, SourceTarget, ImageTarget, ExternalDatasetTarget,
is_project_path
)
from . import project as project_module
from . import source as source_module
from . import item as item_module
def export_external_dataset(target, params):
raise NotImplementedError()
def build_parser(parser=argparse.ArgumentParser()):
parser.add_argument('target', nargs='?', default=None)
parser.add_argument('params', nargs=argparse.REMAINDER)
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
def process_command(target, params, args):
project_dir = args.project_dir
target_kind, target_value = target
if target_kind == TargetKinds.project:
return project_module.main(['export', '-p', target_value] + params)
elif target_kind == TargetKinds.source:
return source_module.main(['export', '-p', project_dir, '-n', target_value] + params)
elif target_kind == TargetKinds.item:
return item_module.main(['export', '-p', project_dir, target_value] + params)
elif target_kind == TargetKinds.external_dataset:
return export_external_dataset(target_value, params)
return 1
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
project_path = args.project_dir
if is_project_path(project_path):
project = Project.load(project_path)
else:
project = None
try:
args.target = target_selector(
ProjectTarget(is_default=True, project=project),
SourceTarget(project=project),
ExternalDatasetTarget(),
ImageTarget()
)(args.target)
if args.target[0] == TargetKinds.project:
if is_project_path(args.target[1]):
args.project_dir = osp.dirname(osp.abspath(args.target[1]))
except argparse.ArgumentTypeError as e:
print(e)
parser.print_help()
return 1
return process_command(args.target, args.params, args)

@ -1,33 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
def run_command(args):
return 0
def build_run_parser(parser):
return parser
def build_parser(parser=argparse.ArgumentParser()):
command_parsers = parser.add_subparsers(dest='command')
build_run_parser(command_parsers.add_parser('run')). \
set_defaults(command=run_command)
return parser
def process_command(command, args):
return 0
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
if 'command' not in args:
parser.print_help()
return 1
return args.command(args)

@ -1,38 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
def build_export_parser(parser):
return parser
def build_stats_parser(parser):
return parser
def build_diff_parser(parser):
return parser
def build_edit_parser(parser):
return parser
def build_parser(parser=argparse.ArgumentParser()):
command_parsers = parser.add_subparsers(dest='command_name')
build_export_parser(command_parsers.add_parser('export'))
build_stats_parser(command_parsers.add_parser('stats'))
build_diff_parser(command_parsers.add_parser('diff'))
build_edit_parser(command_parsers.add_parser('edit'))
return parser
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
if 'command' not in args:
parser.print_help()
return 1
return args.command(args)

@ -1,361 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import os
import os.path as osp
import shutil
from datumaro.components.project import Project
from datumaro.components.comparator import Comparator
from datumaro.components.dataset_filter import DatasetItemEncoder
from .diff import DiffVisualizer
from ..util.project import make_project_path, load_project
def build_create_parser(parser):
parser.add_argument('-d', '--dest', default='.', dest='dst_dir',
help="Save directory for the new project (default: current dir")
parser.add_argument('-n', '--name', default=None,
help="Name of the new project (default: same as project dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
return parser
def create_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)
if osp.isdir(project_dir) and os.listdir(project_dir):
if not args.overwrite:
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
else:
shutil.rmtree(project_dir)
os.makedirs(project_dir, exist_ok=args.overwrite)
if not args.overwrite and osp.isfile(project_path):
log.error("Project file '%s' already exists "
"(pass --overwrite to force creation)" % project_path)
return 1
project_name = args.name
if project_name is None:
project_name = osp.basename(project_dir)
log.info("Creating project at '%s'" % (project_dir))
Project.generate(project_dir, {
'project_name': project_name,
})
log.info("Project has been created at '%s'" % (project_dir))
return 0
def build_import_parser(parser):
import datumaro.components.importers as importers_module
importers_list = [name for name, cls in importers_module.items]
parser.add_argument('-s', '--source', required=True,
help="Path to import a project from")
parser.add_argument('-f', '--format', required=True,
help="Source project format (options: %s)" % (', '.join(importers_list)))
parser.add_argument('-d', '--dest', default='.', dest='dst_dir',
help="Directory to save the new project to (default: current dir)")
parser.add_argument('-n', '--name', default=None,
help="Name of the new project (default: same as project dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('--copy', action='store_true',
help="Copy the dataset instead of saving source links")
parser.add_argument('--skip-check', action='store_true',
help="Skip source checking")
# parser.add_argument('extra_args', nargs=argparse.REMAINDER,
# help="Additional arguments for importer (pass '-- -h' for help)")
return parser
def import_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)
if osp.isdir(project_dir) and os.listdir(project_dir):
if not args.overwrite:
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
else:
shutil.rmtree(project_dir)
os.makedirs(project_dir, exist_ok=args.overwrite)
if not args.overwrite and osp.isfile(project_path):
log.error("Project file '%s' already exists "
"(pass --overwrite to force creation)" % project_path)
return 1
project_name = args.name
if project_name is None:
project_name = osp.basename(project_dir)
log.info("Importing project from '%s' as '%s'" % \
(args.source, args.format))
source = osp.abspath(args.source)
project = Project.import_from(source, args.format)
project.config.project_name = project_name
project.config.project_dir = project_dir
if not args.skip_check or args.copy:
log.info("Checking the dataset...")
dataset = project.make_dataset()
if args.copy:
log.info("Cloning data...")
dataset.save(merge=True, save_images=True)
else:
project.save()
log.info("Project has been created at '%s'" % (project_dir))
return 0
def build_build_parser(parser):
return parser
def build_export_parser(parser):
parser.add_argument('-e', '--filter', default=None,
help="Filter expression for dataset items. Examples: "
"extract images with width < height: "
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
"filter out irrelevant annotations from items: "
"'/item/annotation[label = \"person\"]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-f', '--output-format', required=True,
help="Output format")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
help="Additional arguments for converter (pass '-- -h' for help)")
return parser
def export_command(args):
project = load_project(args.project_dir)
dst_dir = osp.abspath(args.dst_dir)
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
return 1
os.makedirs(dst_dir, exist_ok=args.overwrite)
log.info("Loading the project...")
dataset = project.make_dataset()
log.info("Exporting the project...")
dataset.export_project(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,
filter_annotations=args.filter_annotations,
cmdline_args=args.extra_args)
log.info("Project exported to '%s' as '%s'" % \
(dst_dir, args.output_format))
return 0
def build_stats_parser(parser):
parser.add_argument('name')
return parser
def build_docs_parser(parser):
return parser
def build_extract_parser(parser):
parser.add_argument('-e', '--filter', default=None,
help="XML XPath filter expression for dataset items. Examples: "
"extract images with width < height: "
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]' "
"filter out irrelevant annotations from items: "
"'/item/annotation[label = \"person\"]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('--remove-empty', action='store_true',
help="Remove an item if there are no annotations left after filtration")
parser.add_argument('--dry-run', action='store_true',
help="Print XML representations to be filtered and exit")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Output directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
def extract_command(args):
project = load_project(args.project_dir)
dst_dir = osp.abspath(args.dst_dir)
if not args.dry_run:
os.makedirs(dst_dir, exist_ok=False)
dataset = project.make_dataset()
kwargs = {}
if args.filter_annotations:
kwargs['remove_empty'] = args.remove_empty
if args.dry_run:
dataset = dataset.extract(filter_expr=args.filter,
filter_annotations=args.filter_annotations, **kwargs)
for item in dataset:
encoded_item = DatasetItemEncoder.encode(item, dataset.categories())
xml_item = DatasetItemEncoder.to_string(encoded_item)
print(xml_item)
return 0
dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter,
filter_annotations=args.filter_annotations, **kwargs)
log.info("Subproject extracted to '%s'" % (dst_dir))
return 0
def build_merge_parser(parser):
parser.add_argument('other_project_dir',
help="Directory of the project to get data updates from")
parser.add_argument('-d', '--dest', dest='dst_dir', default=None,
help="Output directory (default: current project's dir)")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
def merge_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)
first_dataset = first_project.make_dataset()
first_dataset.update(second_project.make_dataset())
dst_dir = args.dst_dir
first_dataset.save(save_dir=dst_dir)
if dst_dir is None:
dst_dir = first_project.config.project_dir
dst_dir = osp.abspath(dst_dir)
log.info("Merge result saved to '%s'" % (dst_dir))
return 0
def build_diff_parser(parser):
parser.add_argument('other_project_dir',
help="Directory of the second project to be compared")
parser.add_argument('-d', '--dest', default=None, dest='dst_dir',
help="Directory to save comparison results (default: do not save)")
parser.add_argument('-f', '--output-format',
default=DiffVisualizer.DEFAULT_FORMAT,
choices=[f.name for f in DiffVisualizer.Format],
help="Output format (default: %(default)s)")
parser.add_argument('--iou-thresh', default=0.5, type=float,
help="IoU match threshold for detections (default: %(default)s)")
parser.add_argument('--conf-thresh', default=0.5, type=float,
help="Confidence threshold for detections (default: %(default)s)")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the first project to be compared (default: current dir)")
return parser
def diff_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)
comparator = Comparator(
iou_threshold=args.iou_thresh,
conf_threshold=args.conf_thresh)
save_dir = args.dst_dir
if save_dir is not None:
log.info("Saving diff to '%s'" % save_dir)
os.makedirs(osp.abspath(save_dir))
visualizer = DiffVisualizer(save_dir=save_dir, comparator=comparator,
output_format=args.output_format)
visualizer.save_dataset_diff(
first_project.make_dataset(),
second_project.make_dataset())
return 0
def build_transform_parser(parser):
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-m', '--model', dest='model_name', required=True,
help="Model to apply to the project")
parser.add_argument('-f', '--output-format', required=True,
help="Output format")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
def transform_command(args):
project = load_project(args.project_dir)
dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False)
project.make_dataset().apply_model(
save_dir=dst_dir,
model_name=args.model_name)
log.info("Transform results saved to '%s'" % (dst_dir))
return 0
def build_parser(parser=argparse.ArgumentParser()):
command_parsers = parser.add_subparsers(dest='command_name')
build_create_parser(command_parsers.add_parser('create')) \
.set_defaults(command=create_command)
build_import_parser(command_parsers.add_parser('import')) \
.set_defaults(command=import_command)
build_export_parser(command_parsers.add_parser('export')) \
.set_defaults(command=export_command)
build_extract_parser(command_parsers.add_parser('extract')) \
.set_defaults(command=extract_command)
build_merge_parser(command_parsers.add_parser('merge')) \
.set_defaults(command=merge_command)
build_build_parser(command_parsers.add_parser('build'))
build_stats_parser(command_parsers.add_parser('stats'))
build_docs_parser(command_parsers.add_parser('docs'))
build_diff_parser(command_parsers.add_parser('diff')) \
.set_defaults(command=diff_command)
build_transform_parser(command_parsers.add_parser('transform')) \
.set_defaults(command=transform_command)
return parser
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
if 'command' not in args:
parser.print_help()
return 1
return args.command(args)

@ -1,21 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
from . import source as source_module
def build_parser(parser=argparse.ArgumentParser()):
source_module.build_add_parser(parser). \
set_defaults(command=source_module.remove_command)
return parser
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
return args.command(args)

@ -1,254 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import os
import os.path as osp
import shutil
from ..util.project import load_project
def build_create_parser(parser):
parser.add_argument('-n', '--name', required=True,
help="Name of the source to be created")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
def create_command(args):
project = load_project(args.project_dir)
config = project.config
name = args.name
if project.env.git.has_submodule(name):
log.fatal("Submodule '%s' already exists" % (name))
return 1
try:
project.get_source(name)
log.fatal("Source '%s' already exists" % (name))
return 1
except KeyError:
pass
dst_dir = osp.join(config.project_dir, config.sources_dir, name)
project.env.git.init(dst_dir)
project.add_source(name, { 'url': name })
project.save()
log.info("Source '%s' has been added to the project, location: '%s'" \
% (name, dst_dir))
return 0
def build_import_parser(parser):
sp = parser.add_subparsers(dest='source_type')
repo_parser = sp.add_parser('repo')
repo_parser.add_argument('url',
help="URL of the source git repository")
repo_parser.add_argument('-b', '--branch', default='master',
help="Branch of the source repository (default: %(default)s)")
repo_parser.add_argument('--checkout', action='store_true',
help="Do branch checkout")
dir_parser = sp.add_parser('dir')
dir_parser.add_argument('url',
help="Path to the source directory")
dir_parser.add_argument('--copy', action='store_true',
help="Copy the dataset instead of saving source links")
parser.add_argument('-n', '--name', default=None,
help="Name of the new source")
parser.add_argument('-f', '--format', default=None,
help="Name of the source dataset format (default: 'project')")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.add_argument('--skip-check', action='store_true',
help="Skip source checking")
return parser
def import_command(args):
project = load_project(args.project_dir)
if args.source_type == 'repo':
name = args.name
if name is None:
name = osp.splitext(osp.basename(args.url))[0]
if project.env.git.has_submodule(name):
log.fatal("Submodule '%s' already exists" % (name))
return 1
try:
project.get_source(name)
log.fatal("Source '%s' already exists" % (name))
return 1
except KeyError:
pass
dst_dir = project.local_source_dir(name)
project.env.git.create_submodule(name, dst_dir,
url=args.url, branch=args.branch, no_checkout=not args.checkout)
source = { 'url': args.url }
if args.format:
source['format'] = args.format
project.add_source(name, source)
if not args.skip_check:
log.info("Checking the source...")
project.make_source_project(name)
project.save()
log.info("Source '%s' has been added to the project, location: '%s'" \
% (name, dst_dir))
elif args.source_type == 'dir':
url = osp.abspath(args.url)
if not osp.exists(url):
log.fatal("Source path '%s' does not exist" % url)
return 1
name = args.name
if name is None:
name = osp.splitext(osp.basename(url))[0]
try:
project.get_source(name)
log.fatal("Source '%s' already exists" % (name))
return 1
except KeyError:
pass
dst_dir = url
if args.copy:
dst_dir = project.local_source_dir(name)
log.info("Copying from '%s' to '%s'" % (url, dst_dir))
shutil.copytree(url, dst_dir)
url = name
source = { 'url': url }
if args.format:
source['format'] = args.format
project.add_source(name, source)
if not args.skip_check:
log.info("Checking the source...")
project.make_source_project(name)
project.save()
log.info("Source '%s' has been added to the project, location: '%s'" \
% (name, dst_dir))
return 0
def build_remove_parser(parser):
parser.add_argument('-n', '--name', required=True,
help="Name of the source to be removed")
parser.add_argument('--force', action='store_true',
help="Ignore possible errors during removal")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
def remove_command(args):
project = load_project(args.project_dir)
name = args.name
if name is None:
log.fatal("Expected source name")
return
if project.env.git.has_submodule(name):
if args.force:
log.warning("Forcefully removing the '%s' source..." % (name))
project.env.git.remove_submodule(name, force=args.force)
project.remove_source(name)
project.save()
log.info("Source '%s' has been removed from the project" % (name))
return 0
def build_export_parser(parser):
parser.add_argument('-n', '--name', required=True,
help="Source dataset to be extracted")
parser.add_argument('-e', '--filter', default=None,
help="Filter expression for dataset items. Examples: "
"extract images with width < height: "
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-f', '--output-format', required=True,
help="Output format")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
help="Additional arguments for converter (pass '-- -h' for help)")
return parser
def export_command(args):
project = load_project(args.project_dir)
dst_dir = osp.abspath(args.dst_dir)
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
return 1
os.makedirs(dst_dir, exist_ok=args.overwrite)
log.info("Loading the project...")
source_project = project.make_source_project(args.name)
dataset = source_project.make_dataset()
log.info("Exporting the project...")
dataset.export_project(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,
filter_annotations=args.filter_annotations,
cmdline_args=args.extra_args)
log.info("Source '%s' exported to '%s' as '%s'" % \
(args.name, dst_dir, args.output_format))
return 0
def build_parser(parser=argparse.ArgumentParser()):
command_parsers = parser.add_subparsers(dest='command_name')
build_create_parser(command_parsers.add_parser('create')) \
.set_defaults(command=create_command)
build_import_parser(command_parsers.add_parser('import')) \
.set_defaults(command=import_command)
build_remove_parser(command_parsers.add_parser('remove')) \
.set_defaults(command=remove_command)
build_export_parser(command_parsers.add_parser('export')) \
.set_defaults(command=export_command)
return parser
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
if 'command' not in args:
parser.print_help()
return 1
return args.command(args)

@ -1,69 +0,0 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import os.path as osp
from datumaro.components.project import Project
from datumaro.util.command_targets import (TargetKinds, target_selector,
ProjectTarget, SourceTarget, ExternalDatasetTarget, ImageTarget,
is_project_path
)
from . import project as project_module
from . import source as source_module
from . import item as item_module
def compute_external_dataset_stats(target, params):
raise NotImplementedError()
def build_parser(parser=argparse.ArgumentParser()):
parser.add_argument('target', nargs='?', default=None)
parser.add_argument('params', nargs=argparse.REMAINDER)
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
return parser
def process_command(target, params, args):
project_dir = args.project_dir
target_kind, target_value = target
if target_kind == TargetKinds.project:
return project_module.main(['stats', '-p', target_value] + params)
elif target_kind == TargetKinds.source:
return source_module.main(['stats', '-p', project_dir, target_value] + params)
elif target_kind == TargetKinds.item:
return item_module.main(['stats', '-p', project_dir, target_value] + params)
elif target_kind == TargetKinds.external_dataset:
return compute_external_dataset_stats(target_value, params)
return 1
def main(args=None):
parser = build_parser()
args = parser.parse_args(args)
project_path = args.project_dir
if is_project_path(project_path):
project = Project.load(project_path)
else:
project = None
try:
args.target = target_selector(
ProjectTarget(is_default=True, project=project),
SourceTarget(project=project),
ExternalDatasetTarget(),
ImageTarget()
)(args.target)
if args.target[0] == TargetKinds.project:
if is_project_path(args.target[1]):
args.project_dir = osp.dirname(osp.abspath(args.target[1]))
except argparse.ArgumentTypeError as e:
print(e)
parser.print_help()
return 1
return process_command(args.target, args.params, args)

@ -3,3 +3,36 @@
#
# SPDX-License-Identifier: MIT
import argparse
import textwrap
class CliException(Exception): pass
def add_subparser(subparsers, name, builder):
return builder(lambda **kwargs: subparsers.add_parser(name, **kwargs))
class MultilineFormatter(argparse.HelpFormatter):
"""
Keeps line breaks introduced with '|n' separator
and spaces introduced with '|s'.
"""
def __init__(self, keep_natural=False, **kwargs):
super().__init__(**kwargs)
self._keep_natural = keep_natural
def _fill_text(self, text, width, indent):
text = self._whitespace_matcher.sub(' ', text).strip()
text = text.replace('|s', ' ')
paragraphs = text.split('|n ')
if self._keep_natural:
paragraphs = sum((p.split('\n ') for p in paragraphs), [])
multiline_text = ''
for paragraph in paragraphs:
formatted_paragraph = textwrap.fill(paragraph, width,
initial_indent=indent, subsequent_indent=indent) + '\n'
multiline_text += formatted_paragraph
return multiline_text

@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: MIT
import os
import os.path as osp
from datumaro.components.project import Project, \
@ -17,4 +18,26 @@ def make_project_path(project_dir, project_filename=None):
def load_project(project_dir, project_filename=None):
if project_filename:
project_dir = osp.join(project_dir, project_filename)
return Project.load(project_dir)
return Project.load(project_dir)
def generate_next_dir_name(dirname, basedir='.', sep='.'):
"""
If basedir does not contain dirname, returns dirname itself,
else generates a dirname by appending separator to the dirname
and the number, next to the last used number in the basedir for
files with dirname prefix.
"""
def _to_int(s):
try:
return int(s)
except Exception:
return 0
sep_count = dirname.count(sep) + 2
files = [e for e in os.listdir(basedir) if e.startswith(dirname)]
if files:
files = [e.split(sep) for e in files]
files = [_to_int(e[-1]) for e in files if len(e) == sep_count]
dirname += '%s%s' % (sep, max(files, default=0) + 1)
return dirname

@ -8,7 +8,7 @@
import numpy as np
from math import ceil
from datumaro.components.extractor import *
from datumaro.components.extractor import AnnotationType
def flatmatvec(mat):

@ -5,7 +5,7 @@
from datumaro.components.converters.datumaro import DatumaroConverter
from datumaro.components.converters.ms_coco import (
from datumaro.components.converters.coco import (
CocoConverter,
CocoImageInfoConverter,
CocoCaptionsConverter,

@ -14,9 +14,9 @@ import pycocotools.mask as mask_utils
from datumaro.components.converter import Converter
from datumaro.components.extractor import (
DEFAULT_SUBSET_NAME, AnnotationType, PointsObject, BboxObject, MaskObject
DEFAULT_SUBSET_NAME, AnnotationType, PointsObject, MaskObject
)
from datumaro.components.formats.ms_coco import CocoTask, CocoPath
from datumaro.components.formats.coco import CocoTask, CocoPath
from datumaro.util import find
from datumaro.util.image import save_image
import datumaro.util.mask_tools as mask_tools
@ -139,7 +139,10 @@ class _CaptionsConverter(_TaskConverter):
'caption': ann.caption,
}
if 'score' in ann.attributes:
elem['score'] = float(ann.attributes['score'])
try:
elem['score'] = float(ann.attributes['score'])
except Exception as e:
log.warning("Failed to convert attribute 'score': %e" % e)
self.annotations.append(elem)
@ -202,7 +205,7 @@ class _InstancesConverter(_TaskConverter):
polygons = [p.get_polygon() for p in polygons]
if self._context._segmentation_mode == SegmentationMode.guess:
use_masks = leader.attributes.get('is_crowd',
use_masks = True == leader.attributes.get('is_crowd',
find(masks, lambda x: x.label == leader.label) is not None)
elif self._context._segmentation_mode == SegmentationMode.polygons:
use_masks = False
@ -342,7 +345,10 @@ class _InstancesConverter(_TaskConverter):
'iscrowd': int(is_crowd),
}
if 'score' in ann.attributes:
elem['score'] = float(ann.attributes['score'])
try:
elem['score'] = float(ann.attributes['score'])
except Exception as e:
log.warning("Failed to convert attribute 'score': %e" % e)
return elem
@ -448,7 +454,10 @@ class _LabelsConverter(_TaskConverter):
'category_id': int(ann.label) + 1,
}
if 'score' in ann.attributes:
elem['score'] = float(ann.attributes['score'])
try:
elem['score'] = float(ann.attributes['score'])
except Exception as e:
log.warning("Failed to convert attribute 'score': %e" % e)
self.annotations.append(elem)
@ -570,7 +579,7 @@ class CocoConverter(Converter):
def build_cmdline_parser(cls, parser=None):
import argparse
if not parser:
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(prog='coco')
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")

@ -14,6 +14,14 @@ from datumaro.components.formats.cvat import CvatPath
from datumaro.util.image import save_image
def _cast(value, type_conv, default=None):
if value is None:
return default
try:
return type_conv(value)
except Exception:
return default
def pairwise(iterable):
a = iter(iterable)
return zip(a, a)
@ -261,6 +269,8 @@ class _SubsetWriter:
raise NotImplementedError("unknown shape type")
for attr_name, attr_value in shape.attributes.items():
if isinstance(attr_value, bool):
attr_value = 'true' if attr_value else 'false'
if attr_name in self._get_label(shape.label).attributes:
self._writer.add_attribute(OrderedDict([
("name", str(attr_name)),
@ -325,7 +335,7 @@ class CvatConverter(Converter):
def build_cmdline_parser(cls, parser=None):
import argparse
if not parser:
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(prog='cvat')
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")

@ -287,7 +287,7 @@ class DatumaroConverter(Converter):
def build_cmdline_parser(cls, parser=None):
import argparse
if not parser:
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(prog='datumaro')
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")

@ -10,6 +10,7 @@ import os.path as osp
import string
from datumaro.components.extractor import AnnotationType, DEFAULT_SUBSET_NAME
from datumaro.components.converter import Converter
from datumaro.components.formats.tfrecord import DetectionApiPath
from datumaro.util.image import encode_image
from datumaro.util.tf_util import import_tf as _import_tf
@ -97,7 +98,7 @@ def _make_tf_example(item, get_label_id, get_label, save_images=False):
return tf_example
class DetectionApiConverter:
class DetectionApiConverter(Converter):
def __init__(self, save_images=False, cmdline_args=None):
super().__init__()
@ -113,7 +114,7 @@ class DetectionApiConverter:
def build_cmdline_parser(cls, parser=None):
import argparse
if not parser:
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(prog='tf_detection_api')
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")

@ -23,6 +23,19 @@ from datumaro.util.image import save_image
from datumaro.util.mask_tools import apply_colormap, remap_mask
def _convert_attr(name, attributes, type_conv, default=None, warn=True):
d = object()
value = attributes.get(name, d)
if value is d:
return default
try:
return type_conv(value)
except Exception as e:
log.warning("Failed to convert attribute '%s'='%s': %s" % \
(name, value, e))
return default
def _write_xml_bbox(bbox, parent_elem):
x, y, w, h = bbox
bbox_elem = ET.SubElement(parent_elem, 'bndbox')
@ -185,26 +198,17 @@ class _Converter:
obj_label = self.get_label(obj.label)
ET.SubElement(obj_elem, 'name').text = obj_label
pose = attr.get('pose')
if pose is not None:
pose = VocPose[pose]
else:
pose = VocPose.Unspecified
pose = _convert_attr('pose', attr, lambda v: VocPose[v],
VocPose.Unspecified)
ET.SubElement(obj_elem, 'pose').text = pose.name
truncated = attr.get('truncated')
if truncated is not None:
truncated = int(truncated)
else:
truncated = 0
ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated
truncated = _convert_attr('truncated', attr, int, 0)
ET.SubElement(obj_elem, 'truncated').text = \
'%d' % truncated
difficult = attr.get('difficult')
if difficult is not None:
difficult = int(difficult)
else:
difficult = 0
ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult
difficult = _convert_attr('difficult', attr, int, 0)
ET.SubElement(obj_elem, 'difficult').text = \
'%d' % difficult
bbox = obj.get_bbox()
if bbox is not None:
@ -219,16 +223,16 @@ class _Converter:
objects_with_parts.append(new_obj_id)
actions = {k: v for k, v in obj.attributes.items()
if self._is_action(obj_label, k)}
label_actions = self._get_actions(obj_label)
actions_elem = ET.Element('actions')
for action in self._get_actions(obj_label):
presented = action in actions and actions[action]
for action in label_actions:
presented = _convert_attr(action, attr,
lambda v: int(v == True), 0)
ET.SubElement(actions_elem, action).text = \
'%d' % presented
objects_with_actions[new_obj_id][action] = presented
if len(actions) != 0:
if len(actions_elem) != 0:
obj_elem.append(actions_elem)
if set(self._tasks) & set([None,
@ -502,7 +506,7 @@ class VocConverter(Converter):
def build_cmdline_parser(cls, parser=None):
import argparse
if not parser:
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(prog='voc')
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")

@ -41,7 +41,7 @@ class YoloConverter(Converter):
def build_cmdline_parser(cls, parser=None):
import argparse
if not parser:
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(prog='yolo')
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")

@ -57,6 +57,8 @@ class DatasetItemEncoder:
@staticmethod
def _get_label(label_id, categories):
label = ''
if label_id is None:
return ''
if categories is not None:
label_cat = categories.get(AnnotationType.label)
if label_cat is not None:

@ -213,7 +213,7 @@ class MaskObject(Annotation):
(self.label == other.label) and \
(self.z_order == other.z_order) and \
(self.image is not None and other.image is not None and \
np.all(self.image == other.image))
np.array_equal(self.image, other.image))
class RleMask(MaskObject):
# pylint: disable=redefined-builtin
@ -546,7 +546,7 @@ class DatasetItem:
(self.annotations == other.annotations) and \
(self.path == other.path) and \
(self.has_image == other.has_image) and \
(self.has_image and np.all(self.image == other.image) or \
(self.has_image and np.array_equal(self.image, other.image) or \
not self.has_image)
class IExtractor:

@ -5,7 +5,7 @@
from datumaro.components.extractors.datumaro import DatumaroExtractor
from datumaro.components.extractors.ms_coco import (
from datumaro.components.extractors.coco import (
CocoImageInfoExtractor,
CocoCaptionsExtractor,
CocoInstancesExtractor,
@ -29,6 +29,7 @@ from datumaro.components.extractors.voc import (
from datumaro.components.extractors.yolo import YoloExtractor
from datumaro.components.extractors.tfrecord import DetectionApiExtractor
from datumaro.components.extractors.cvat import CvatExtractor
from datumaro.components.extractors.image_dir import ImageDirExtractor
items = [
('datumaro', DatumaroExtractor),
@ -56,4 +57,6 @@ items = [
('tf_detection_api', DetectionApiExtractor),
('cvat', CvatExtractor),
('image_dir', ImageDirExtractor),
]

@ -15,7 +15,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem,
BboxObject, CaptionObject,
LabelCategories, PointsCategories
)
from datumaro.components.formats.ms_coco import CocoTask, CocoPath
from datumaro.components.formats.coco import CocoTask, CocoPath
from datumaro.util.image import lazy_image

@ -91,7 +91,15 @@ class CvatExtractor(Extractor):
shape.update(image)
elif ev == 'end':
if el.tag == 'attribute' and shape is not None:
shape['attributes'][el.attrib['name']] = el.text
attr_value = el.text
if el.text in ['true', 'false']:
attr_value = attr_value == 'true'
else:
try:
attr_value = float(attr_value)
except Exception:
pass
shape['attributes'][el.attrib['name']] = attr_value
elif el.tag in cls._SUPPORTED_SHAPES:
if track is not None:
shape['frame'] = el.attrib['frame']

@ -0,0 +1,55 @@
# Copyright (C) 2018 Intel Corporation
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
import os
import os.path as osp
from datumaro.components.extractor import DatasetItem, Extractor
from datumaro.util.image import lazy_image
class ImageDirExtractor(Extractor):
_SUPPORTED_FORMATS = ['.png', '.jpg']
def __init__(self, url):
super().__init__()
assert osp.isdir(url)
items = []
for name in os.listdir(url):
path = osp.join(url, name)
if self._is_image(path):
item_id = osp.splitext(name)[0]
item = DatasetItem(id=item_id, image=lazy_image(path))
items.append((item.id, item))
items = sorted(items, key=lambda e: e[0])
items = OrderedDict(items)
self._items = items
self._subsets = None
def __iter__(self):
for item in self._items.values():
yield item
def __len__(self):
return len(self._items)
def subsets(self):
return self._subsets
def get(self, item_id, subset=None, path=None):
if path or subset:
raise KeyError()
return self._items[item_id]
def _is_image(self, path):
for ext in self._SUPPORTED_FORMATS:
if osp.isfile(path) and path.endswith(ext):
return True
return False

@ -230,6 +230,8 @@ class VocExtractor(Extractor):
if self._task is not VocTask.person_layout:
break
if bbox is None:
continue
item_annotations.append(BboxObject(
*bbox, label=part_label_id,
group=obj_id))
@ -247,16 +249,16 @@ class VocExtractor(Extractor):
@staticmethod
def _parse_bbox(object_elem):
try:
bbox_elem = object_elem.find('bndbox')
xmin = int(bbox_elem.find('xmin').text)
xmax = int(bbox_elem.find('xmax').text)
ymin = int(bbox_elem.find('ymin').text)
ymax = int(bbox_elem.find('ymax').text)
return [xmin, ymin, xmax - xmin, ymax - ymin]
except Exception:
bbox_elem = object_elem.find('bndbox')
if bbox_elem is None:
return None
xmin = float(bbox_elem.find('xmin').text)
xmax = float(bbox_elem.find('xmax').text)
ymin = float(bbox_elem.find('ymin').text)
ymax = float(bbox_elem.find('ymax').text)
return [xmin, ymin, xmax - xmin, ymax - ymin]
class VocClassificationExtractor(VocExtractor):
def __init__(self, path):
super().__init__(path, task=VocTask.classification)

@ -4,22 +4,18 @@
# SPDX-License-Identifier: MIT
from datumaro.components.importers.datumaro import DatumaroImporter
from datumaro.components.importers.ms_coco import CocoImporter
from datumaro.components.importers.voc import (
VocImporter,
VocResultsImporter,
)
from datumaro.components.importers.coco import CocoImporter
from datumaro.components.importers.voc import VocImporter, VocResultsImporter
from datumaro.components.importers.tfrecord import DetectionApiImporter
from datumaro.components.importers.yolo import YoloImporter
from datumaro.components.importers.cvat import CvatImporter
from datumaro.components.importers.image_dir import ImageDirImporter
items = [
('datumaro', DatumaroImporter),
('ms_coco', CocoImporter),
('coco', CocoImporter),
('voc', VocImporter),
('voc_results', VocResultsImporter),
@ -29,4 +25,6 @@ items = [
('tf_detection_api', DetectionApiImporter),
('cvat', CvatImporter),
('image_dir', ImageDirImporter),
]

@ -8,7 +8,7 @@ from glob import glob
import logging as log
import os.path as osp
from datumaro.components.formats.ms_coco import CocoTask, CocoPath
from datumaro.components.formats.coco import CocoTask, CocoPath
class CocoImporter:

@ -0,0 +1,26 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
import os.path as osp
class ImageDirImporter:
EXTRACTOR_NAME = 'image_dir'
def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()
if not osp.isdir(path):
raise Exception("Can't find a directory at '%s'" % path)
source_name = osp.basename(osp.normpath(path))
project.add_source(source_name, {
'url': source_name,
'format': self.EXTRACTOR_NAME,
'options': dict(extra_params),
})
return project

@ -105,15 +105,17 @@ class GitWrapper:
def _git_dir(base_path):
return osp.join(base_path, '.git')
def init(self, path):
spawn = not osp.isdir(GitWrapper._git_dir(path))
self.repo = git.Repo.init(path=path)
@classmethod
def spawn(cls, path):
spawn = not osp.isdir(cls._git_dir(path))
repo = git.Repo.init(path=path)
if spawn:
author = git.Actor("Nobody", "nobody@example.com")
self.repo.index.commit('Initial commit', author=author)
return self.repo
repo.index.commit('Initial commit', author=author)
return repo
def get_repo(self):
def init(self, path):
self.repo = self.spawn(path)
return self.repo
def is_initialized(self):
@ -316,7 +318,9 @@ class Dataset(Extractor):
categories.update(source.categories())
for source in sources:
for cat_type, source_cat in source.categories().items():
assert categories[cat_type] == source_cat
if not categories[cat_type] == source_cat:
raise NotImplementedError(
"Merging different categories is not implemented yet")
dataset = Dataset(categories=categories)
# merge items
@ -395,11 +399,12 @@ class Dataset(Extractor):
return item
def extract(self, filter_expr, filter_annotations=False, **kwargs):
def extract(self, filter_expr, filter_annotations=False, remove_empty=False):
if filter_annotations:
return self.transform(XPathAnnotationsFilter, filter_expr, **kwargs)
return self.transform(XPathAnnotationsFilter, filter_expr,
remove_empty)
else:
return self.transform(XPathDatasetFilter, filter_expr, **kwargs)
return self.transform(XPathDatasetFilter, filter_expr)
def update(self, items):
for item in items:
@ -468,7 +473,9 @@ class ProjectDataset(Dataset):
categories.update(source.categories())
for source in self._sources.values():
for cat_type, source_cat in source.categories().items():
assert categories[cat_type] == source_cat
if not categories[cat_type] == source_cat:
raise NotImplementedError(
"Merging different categories is not implemented yet")
if own_source is not None and len(own_source) != 0:
categories.update(own_source.categories())
self._categories = categories
@ -651,17 +658,18 @@ class ProjectDataset(Dataset):
launcher = self._project.make_executable_model(model_name)
self.transform_project(InferenceWrapper, launcher, save_dir=save_dir)
def export_project(self, save_dir, output_format,
filter_expr=None, filter_annotations=False, **converter_kwargs):
def export_project(self, save_dir, converter,
filter_expr=None, filter_annotations=False, remove_empty=False):
# NOTE: probably this function should be in the ViewModel layer
save_dir = osp.abspath(save_dir)
os.makedirs(save_dir, exist_ok=True)
dataset = self
if filter_expr:
dataset = dataset.extract(filter_expr, filter_annotations)
dataset = dataset.extract(filter_expr,
filter_annotations=filter_annotations,
remove_empty=remove_empty)
converter = self.env.make_converter(output_format, **converter_kwargs)
converter(dataset, save_dir)
def extract_project(self, filter_expr, filter_annotations=False,

@ -7,6 +7,7 @@ import inspect
import os
import os.path as osp
import shutil
import tempfile
def current_function_name(depth=1):
@ -32,8 +33,22 @@ class FileRemover:
class TestDir(FileRemover):
def __init__(self, path=None, ignore_errors=False):
if path is None:
path = osp.abspath('temp_%s' % current_function_name(2))
os.makedirs(path, exist_ok=ignore_errors)
super().__init__(path, is_dir=True, ignore_errors=ignore_errors)
path = osp.abspath('temp_%s-' % current_function_name(2))
path = tempfile.mkdtemp(dir=os.getcwd(), prefix=path)
else:
os.makedirs(path, exist_ok=ignore_errors)
super().__init__(path, is_dir=True, ignore_errors=ignore_errors)
def ann_to_str(ann):
return vars(ann)
def item_to_str(item):
return '\n'.join(
[
'%s' % vars(item)
] + [
'ann[%s]: %s' % (i, ann_to_str(a))
for i, a in enumerate(item.annotations)
]
)

@ -2,146 +2,64 @@
<!-- To view this file, download free mind mapping software FreeMind from http://freemind.sourceforge.net -->
<node CREATED="1562588909441" ID="ID_362065379" MODIFIED="1562594436169" TEXT="datum">
<node COLOR="#669900" CREATED="1562588926230" ID="ID_392208345" MODIFIED="1562594653553" POSITION="right" STYLE="fork" TEXT="project">
<node CREATED="1562592021703" ID="ID_1131736910" MODIFIED="1567594093533" TEXT="create">
<icon BUILTIN="button_ok"/>
<node CREATED="1562592021703" ID="ID_1131736910" MODIFIED="1579775533832" TEXT="create">
<node CREATED="1574330157737" ID="ID_507280937" MODIFIED="1574330158757" TEXT="Creates a Datumaro project"/>
</node>
<node CREATED="1562592669910" ID="ID_1273417784" MODIFIED="1567594103605" TEXT="import">
<icon BUILTIN="button_ok"/>
<node CREATED="1562592669910" ID="ID_1273417784" MODIFIED="1579775533832" TEXT="import">
<node CREATED="1562592677270" ID="ID_1205701076" MODIFIED="1574330175510" TEXT="Generates a project from other project or dataset in a specific format"/>
</node>
<node CREATED="1562592764462" ID="ID_724395644" MODIFIED="1569927189023" TEXT="export">
<icon BUILTIN="button_ok"/>
<node CREATED="1562592764462" ID="ID_724395644" MODIFIED="1579775533832" TEXT="export">
<node CREATED="1562592918908" ID="ID_44929477" MODIFIED="1574330221398" TEXT="Saves dataset in a specfic format"/>
</node>
<node CREATED="1562593914751" ID="ID_378739335" MODIFIED="1574330501157" TEXT="extract">
<icon BUILTIN="button_ok"/>
<node CREATED="1562593914751" ID="ID_378739335" MODIFIED="1579775533832" TEXT="extract">
<node CREATED="1562593918968" ID="ID_424607257" MODIFIED="1569929409897" TEXT="Extracts subproject by filter"/>
</node>
<node CREATED="1569928239212" ID="ID_1246336762" MODIFIED="1574330501159" TEXT="merge">
<icon BUILTIN="button_ok"/>
<node CREATED="1569928239212" ID="ID_1246336762" MODIFIED="1579775533832" TEXT="merge">
<node CREATED="1569928465766" ID="ID_96716547" MODIFIED="1569928867634" TEXT="Adds new items to project"/>
</node>
<node CREATED="1562594882533" ID="ID_487465081" MODIFIED="1567594126105" TEXT="diff">
<icon BUILTIN="button_ok"/>
<node CREATED="1562594882533" ID="ID_487465081" MODIFIED="1579775533832" TEXT="diff">
<node CREATED="1562594886583" ID="ID_1671375265" MODIFIED="1569928079633" TEXT="Compares two projects"/>
</node>
<node COLOR="#ff0000" CREATED="1563435039037" ID="ID_97578583" MODIFIED="1567594117984" TEXT="transform">
<icon BUILTIN="button_ok"/>
<node CREATED="1563435039037" ID="ID_97578583" MODIFIED="1579775533832" TEXT="transform">
<node CREATED="1563435074116" ID="ID_695576446" MODIFIED="1574330414686" TEXT="Applies specific transformation to the dataset"/>
</node>
<node CREATED="1562592759129" ID="ID_1934152899" MODIFIED="1574330506478" TEXT="build">
<node CREATED="1562592866813" ID="ID_321145109" MODIFIED="1569929109413" TEXT="Compound operation which executes other required operations. &#xa;Probably, executes some pipeline based on a script provided"/>
</node>
<node CREATED="1569928254654" ID="ID_273542545" MODIFIED="1569928258098" TEXT="show">
<node CREATED="1569928411749" ID="ID_842692369" MODIFIED="1569928852922" TEXT="Visualizes project"/>
</node>
<node CREATED="1569928386605" ID="ID_493330514" MODIFIED="1569928388754" TEXT="info">
<node CREATED="1569928386605" ID="ID_493330514" MODIFIED="1579775533832" TEXT="info">
<node CREATED="1569928423173" ID="ID_1273620035" MODIFIED="1569928429050" TEXT="Outputs valuable info"/>
</node>
<node CREATED="1562593076507" ID="ID_779027516" MODIFIED="1574330511948" TEXT="stats">
<node CREATED="1562593079585" ID="ID_1498895180" MODIFIED="1562594653556" TEXT="Computes dataset statistics"/>
</node>
<node CREATED="1562593105322" ID="ID_117744850" MODIFIED="1574330511947" TEXT="docs">
<node CREATED="1562593108705" ID="ID_878198723" MODIFIED="1562594653557" TEXT="Generates dataset documentation"/>
</node>
</node>
<node COLOR="#669900" CREATED="1562592073422" ID="ID_1793909666" MODIFIED="1569928300945" POSITION="right" STYLE="fork" TEXT="source">
<node CREATED="1568023057930" ID="ID_633965389" MODIFIED="1568023077570" TEXT="create">
<icon BUILTIN="button_ok"/>
<node CREATED="1569928185077" ID="ID_1231594305" MODIFIED="1574330396647" TEXT="Creates source dataset in project"/>
</node>
<node CREATED="1562592085302" ID="ID_199597063" MODIFIED="1568023069817" TEXT="import">
<icon BUILTIN="button_ok"/>
<node CREATED="1562592138228" ID="ID_1202153971" MODIFIED="1574330391766" TEXT="Adds source dataset by its URL under a name (like git submodule add)"/>
<node CREATED="1562592085302" ID="ID_199597063" MODIFIED="1579775563506" TEXT="add">
<node CREATED="1562592138228" ID="ID_1202153971" MODIFIED="1579775556115" TEXT="Adds data source by its URL"/>
</node>
<node CREATED="1562592088238" ID="ID_744367784" MODIFIED="1567594264178" TEXT="remove">
<icon BUILTIN="button_ok"/>
<node CREATED="1562592088238" ID="ID_744367784" MODIFIED="1579775533831" TEXT="remove">
<node CREATED="1562592316435" ID="ID_810859340" MODIFIED="1574330377694" TEXT="Removes source dataset"/>
</node>
<node CREATED="1562592469569" ID="ID_329615614" MODIFIED="1569927769905" TEXT="export">
<icon BUILTIN="button_ok"/>
</node>
<node CREATED="1562593134746" ID="ID_195077187" MODIFIED="1562594652955" TEXT="stats"/>
<node CREATED="1569928327997" ID="ID_389265529" MODIFIED="1569928330154" TEXT="show"/>
<node CREATED="1569928398580" ID="ID_348421413" MODIFIED="1569928400642" TEXT="info"/>
</node>
<node COLOR="#669900" CREATED="1563434979149" ID="ID_782927311" MODIFIED="1563435233504" POSITION="right" TEXT="model">
<node CREATED="1563434987574" ID="ID_290716982" MODIFIED="1567594144970" TEXT="add">
<icon BUILTIN="button_ok"/>
<node CREATED="1563434987574" ID="ID_290716982" MODIFIED="1579775533831" TEXT="add">
<node CREATED="1563435018178" ID="ID_1059015375" MODIFIED="1574330372326" TEXT="Registers model for inference"/>
</node>
<node CREATED="1564500174410" ID="ID_451702794" MODIFIED="1567594149642" TEXT="remove">
<icon BUILTIN="button_ok"/>
<node CREATED="1564500174410" ID="ID_451702794" MODIFIED="1579775533831" TEXT="remove">
<node CREATED="1569928809165" ID="ID_1093915022" MODIFIED="1574330359950" TEXT="Removes model from project"/>
</node>
</node>
<node COLOR="#669900" CREATED="1562593748114" ID="ID_970814064" MODIFIED="1562594652591" POSITION="right" STYLE="fork" TEXT="inference">
<node CREATED="1562593758235" ID="ID_1984980861" MODIFIED="1563443545700" STYLE="fork" TEXT="run">
<node CREATED="1562593758235" ID="ID_1984980861" MODIFIED="1579775533831" STYLE="fork" TEXT="run">
<node CREATED="1562593765978" ID="ID_918840812" MODIFIED="1574330356630" TEXT="Executes network for inference"/>
</node>
<node CREATED="1564500277834" ID="ID_1264946351" MODIFIED="1564500279953" TEXT="parse">
<node CREATED="1569927270764" ID="ID_1995847022" MODIFIED="1569927285793" TEXT="Parses training log file"/>
</node>
</node>
<node COLOR="#669900" CREATED="1562594817022" ID="ID_133277273" MODIFIED="1562674963173" POSITION="right" TEXT="item">
<font NAME="SansSerif" SIZE="12"/>
<node CREATED="1562594955691" ID="ID_1344471806" MODIFIED="1569928758939" TEXT="export"/>
<node CREATED="1562594960747" ID="ID_1898276667" MODIFIED="1562594963201" TEXT="stats"/>
<node CREATED="1562594983907" ID="ID_218343857" MODIFIED="1562594985561" TEXT="diff"/>
<node CREATED="1562595454823" ID="ID_1649071450" MODIFIED="1562595456796" TEXT="edit"/>
</node>
<node CREATED="1562594240501" ID="ID_1530017548" MODIFIED="1567594340403" POSITION="right" STYLE="fork" TEXT="create">
<icon BUILTIN="button_ok"/>
<node CREATED="1562594240501" ID="ID_1530017548" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="create">
<node CREATED="1562594244868" ID="ID_1309935216" MODIFIED="1562594591882" TEXT="Calls project create"/>
</node>
<node CREATED="1562594254667" ID="ID_190882752" MODIFIED="1567594344740" POSITION="right" STYLE="fork" TEXT="add">
<icon BUILTIN="button_ok"/>
<node CREATED="1562594262484" ID="ID_949937557" MODIFIED="1569929674939" TEXT="Calls source add / import"/>
<node CREATED="1562594254667" ID="ID_190882752" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="add">
<node CREATED="1562594262484" ID="ID_949937557" MODIFIED="1579701784334" TEXT="Calls source add"/>
</node>
<node CREATED="1562594276540" ID="ID_1430572506" MODIFIED="1567594350421" POSITION="right" STYLE="fork" TEXT="remove">
<icon BUILTIN="button_ok"/>
<node CREATED="1562594276540" ID="ID_1430572506" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="remove">
<node CREATED="1562594281180" ID="ID_124160415" MODIFIED="1562594591248" TEXT="Calls source remove"/>
</node>
<node CREATED="1562594289395" ID="ID_1608995178" MODIFIED="1574330539766" POSITION="right" STYLE="fork" TEXT="export">
<node CREATED="1562594293699" ID="ID_199067242" MODIFIED="1569930927620" TEXT="[project arg, default]">
<node CREATED="1562594313250" ID="ID_1243481155" MODIFIED="1569927804137" TEXT="Calls project export"/>
</node>
<node CREATED="1562594323035" ID="ID_1281657568" MODIFIED="1569930961981" TEXT="[source/item arg]">
<node CREATED="1562594338482" ID="ID_1085162426" MODIFIED="1569930968180" TEXT="Calls source/item export"/>
<node CREATED="1562594289395" ID="ID_1608995178" MODIFIED="1579775542142" POSITION="right" STYLE="fork" TEXT="export">
<node CREATED="1579775617910" ID="ID_1698217727" MODIFIED="1579775622801" TEXT="Calls project export"/>
</node>
<node CREATED="1562594360266" ID="ID_840060495" MODIFIED="1562594590793" TEXT="[external dataset arg]">
<node CREATED="1562594370348" ID="ID_778378456" MODIFIED="1569927504041" TEXT="Project import + project export"/>
</node>
</node>
<node CREATED="1562594703543" ID="ID_210248464" MODIFIED="1562594705685" POSITION="right" TEXT="diff">
<node CREATED="1569927601316" ID="ID_920307385" MODIFIED="1569927934921" TEXT="[2 item/project/source/ext.dataset args]">
<node CREATED="1569927624724" ID="ID_1503422177" MODIFIED="1569927985130" TEXT="Import + project diff"/>
</node>
</node>
<node CREATED="1569929167198" ID="ID_1583130184" MODIFIED="1569929169274" POSITION="right" TEXT="show"/>
<node CREATED="1569929169942" ID="ID_912693725" MODIFIED="1569929174043" POSITION="right" TEXT="info"/>
<node CREATED="1567594310257" ID="ID_995434490" MODIFIED="1567594363999" POSITION="right" TEXT="explain">
<icon BUILTIN="button_ok"/>
<node CREATED="1567594310257" ID="ID_995434490" MODIFIED="1579775542141" POSITION="right" TEXT="explain">
<node CREATED="1567594365942" ID="ID_1529218756" MODIFIED="1567594404172" TEXT="Runs inference explanation"/>
</node>
<node CREATED="1562593914751" ID="ID_925304191" MODIFIED="1569927316928" POSITION="right" TEXT="extract">
<node CREATED="1562593918968" ID="ID_1746788348" MODIFIED="1569929409897" TEXT="Extracts subproject by filter"/>
</node>
<node CREATED="1569928239212" ID="ID_874360504" MODIFIED="1569928241378" POSITION="right" TEXT="merge">
<node CREATED="1569928465766" ID="ID_332142804" MODIFIED="1569928867634" TEXT="Adds new items to project"/>
</node>
<node CREATED="1562593031995" ID="ID_1818638085" MODIFIED="1569930889221" POSITION="right" STYLE="fork" TEXT="stats">
<icon BUILTIN="button_ok"/>
<node CREATED="1562593043258" ID="ID_280465436" MODIFIED="1562594682163" STYLE="fork" TEXT="[project arg, default]">
<node CREATED="1562593064794" ID="ID_1859975421" MODIFIED="1562594682163" STYLE="fork" TEXT="Calls project stats"/>
</node>
<node CREATED="1562593187881" ID="ID_815427730" MODIFIED="1569930976940" STYLE="fork" TEXT="[source/item arg]">
<node CREATED="1562593203687" ID="ID_1958444123" MODIFIED="1569930985172" STYLE="fork" TEXT="Calls source/item stats"/>
</node>
<node CREATED="1562593537868" ID="ID_1000873843" MODIFIED="1562594682163" STYLE="fork" TEXT="[external dataset arg]">
<node CREATED="1562593695074" ID="ID_1931687508" MODIFIED="1569930999660" STYLE="fork" TEXT="Project import + project stats"/>
</node>
</node>
</node>
</map>

@ -5,7 +5,6 @@
## Table of contents
- [Concept](#concept)
- [Design](#design)
- [RC 1 vision](#rc-1-vision)
## Concept
@ -70,53 +69,6 @@ Datumaro is:
- guidance for key frame selection for tracking ([paper](https://arxiv.org/abs/1903.11779))
Use case: more effective annotation, better predictions
## Design
### Command-line
Use Docker as an example. Basically, the interface is partitioned
on contexts and shortcuts. Contexts are semantically grouped commands,
related to a single topic or target. Shortcuts are handy shorter
alternatives for the most used commands and also special commands,
which are hard to be put into specific context.
![cli-design-image](images/cli_design.png)
- [FreeMind tool link](http://freemind.sourceforge.net/wiki/index.php/Main_Page)
### High-level architecture
- Using MVVM UI pattern
![mvvm-image](images/mvvm.png)
### Datumaro project and environment structure
<!--lint disable fenced-code-flag-->
```
├── [datumaro module]
└── [project folder]
├── .datumaro/
│   ├── config.yml
│   ├── .git/
│   ├── importers/
│   │   ├── custom_format_importer1.py
│   │   └── ...
│   ├── statistics/
│   │   ├── custom_statistic1.py
│   │   └── ...
│   ├── visualizers/
│   │ ├── custom_visualizer1.py
│   │ └── ...
│   └── extractors/
│   ├── custom_extractor1.py
│   └── ...
└── sources/
├── source1
└── ...
```
<!--lint enable fenced-code-flag-->
## RC 1 vision
In the first version Datumaro should be a project manager for CVAT.
@ -139,18 +91,20 @@ can be downloaded by user to be operated on with Datumaro CLI.
### Interfaces
- [x] Python API for user code
- [ ] Installation as a package
- [x] Installation as a package
- [x] A command-line tool for dataset manipulations
### Features
- Dataset format support (reading, exporting)
- Dataset format support (reading, writing)
- [x] Own format
- [x] CVAT
- [x] COCO
- [x] PASCAL VOC
- [x] YOLO
- [x] TF Detection API
- [ ] Cityscapes
- [ ] ImageNet
- [ ] CVAT
- Dataset visualization (`show`)
- [ ] Ability to visualize a dataset
@ -199,6 +153,7 @@ can be downloaded by user to be operated on with Datumaro CLI.
- export the task
- convert to a training format
- train a DL model
- [x] Use case "annotate - reannotate problematic images - merge"
- [ ] Use case "annotate and estimate quality"
- create a task
- annotate

Binary file not shown.

Before

Width:  |  Height:  |  Size: 91 KiB

After

Width:  |  Height:  |  Size: 35 KiB

@ -1,325 +0,0 @@
# Quick start guide
## Installation
### Prerequisites
- Python (3.5+)
- OpenVINO (optional)
### Installation steps
Download the project to any directory.
Set up a virtual environment:
``` bash
python -m pip install virtualenv
python -m virtualenv venv
. venv/bin/activate
while read -r p; do pip install $p; done < requirements.txt
```
## Usage
The directory containing the project should be in the
`PYTHONPATH` environment variable. The other way is to invoke
commands from that directory.
As a python module:
``` bash
python -m datumaro --help
```
As a standalone python script:
``` bash
python datum.py --help
```
As a python library:
``` python
import datumaro
```
## Workflow
> **Note**: command invocation **syntax is subject to change, refer to --help output**
The key object is the project. It can be created or imported with
`project create` and `project import` commands. The project is a combination of
dataset and environment.
If you want to interact with models, you should add them to project first.
Implemented commands ([CLI design doc](images/cli_design.png)):
- project create
- project import
- project diff
- project transform
- source add
- explain
### Create a project
Usage:
``` bash
python datum.py project create --help
python datum.py project create \
-d <project_dir>
```
Example:
``` bash
python datum.py project create -d /home/my_dataset
```
### Import a project
This command creates a project from an existing dataset. Supported formats:
- MS COCO
- Custom formats via custom `importers` and `extractors`
Usage:
``` bash
python -m datumaro project import --help
python -m datumaro project import \
<dataset_path> \
-d <project_dir> \
-t <format>
```
Example:
``` bash
python -m datumaro project import \
/home/coco_dir \
-d /home/project_dir \
-t ms_coco
```
An _MS COCO_-like dataset should have the following directory structure:
<!--lint disable fenced-code-flag-->
```
COCO/
├── annotations/
│   ├── instances_val2017.json
│   ├── instances_train2017.json
├── images/
│   ├── val2017
│   ├── train2017
```
<!--lint enable fenced-code-flag-->
Everything after the last `_` is considered as a subset name.
### Register a model
Supported models:
- OpenVINO
- Custom models via custom `launchers`
Usage:
``` bash
python -m datumaro model add --help
```
Example: register OpenVINO model
A model consists of a graph description and weights. There is also a script
used to convert model outputs to internal data structures.
``` bash
python -m datumaro model add \
<model_name> openvino \
-d <path_to_xml> -w <path_to_bin> -i <path_to_interpretation_script>
```
Interpretation script for an OpenVINO detection model (`convert.py`):
``` python
from datumaro.components.extractor import *
max_det = 10
conf_thresh = 0.1
def process_outputs(inputs, outputs):
# inputs = model input, array or images, shape = (N, C, H, W)
# outputs = model output, shape = (N, 1, K, 7)
# results = conversion result, [ [ Annotation, ... ], ... ]
results = []
for input, output in zip(inputs, outputs):
input_height, input_width = input.shape[:2]
detections = output[0]
image_results = []
for i, det in enumerate(detections):
label = int(det[1])
conf = det[2]
if conf <= conf_thresh:
continue
x = max(int(det[3] * input_width), 0)
y = max(int(det[4] * input_height), 0)
w = min(int(det[5] * input_width - x), input_width)
h = min(int(det[6] * input_height - y), input_height)
image_results.append(BboxObject(x, y, w, h,
label=label, attributes={'score': conf} ))
results.append(image_results[:max_det])
return results
def get_categories():
# Optionally, provide output categories - label map etc.
# Example:
label_categories = LabelCategories()
label_categories.add('person')
label_categories.add('car')
return { AnnotationType.label: label_categories }
```
### Run a model inference
This command сreates a new project from the current project. The new
one annotations are the model outputs.
Usage:
``` bash
python -m datumaro project transform --help
python -m datumaro project transform \
-m <model_name> \
-d <save_dir>
```
Example:
``` bash
python -m datumaro project import <...>
python -m datumaro model add mymodel <...>
python -m datumaro project transform -m mymodel -d ../mymodel_inference
```
### Compare datasets
The command compares two datasets and saves the results in the
specified directory. The current project is considered to be
"ground truth".
``` bash
python -m datumaro project diff --help
python -m datumaro project diff <other_project_dir> -d <save_dir>
```
Example: compare a dataset with model inference
``` bash
python -m datumaro project import <...>
python -m datumaro model add mymodel <...>
python -m datumaro project transform <...> -d ../inference
python -m datumaro project diff ../inference -d ../diff
```
### Run inference explanation
Usage:
``` bash
python -m datumaro explain --help
python -m datumaro explain \
-m <model_name> \
-d <save_dir> \
-t <target> \
<method> \
<method_params>
```
Example: run inference explanation on a single image with visualization
``` bash
python -m datumaro project create <...>
python -m datumaro model add mymodel <...>
python -m datumaro explain \
-m mymodel \
-t 'image.png' \
rise \
-s 1000 --progressive
```
### Extract data subset based on filter
This command allows to create a subprject form a project, which
would include only items satisfying some condition. XPath is used as a query
format.
Usage:
``` bash
python -m datumaro project extract --help
python -m datumaro project extract \
-p <source_project> \
-d <destinatin dir> \
-f '<filter expression>'
```
Example:
``` bash
python -m datumaro project extract \
-p ../test_project \
-d ../test_project-extract \
-f '/item[image/width < image/height]'
```
Item representation:
``` xml
<item>
<id>290768</id>
<subset>minival2014</subset>
<image>
<width>612</width>
<height>612</height>
<depth>3</depth>
</image>
<annotation>
<id>80154</id>
<type>bbox</type>
<label_id>39</label_id>
<x>264.59</x>
<y>150.25</y>
<w>11.199999999999989</w>
<h>42.31</h>
<area>473.87199999999956</area>
</annotation>
<annotation>
<id>669839</id>
<type>bbox</type>
<label_id>41</label_id>
<x>163.58</x>
<y>191.75</y>
<w>76.98999999999998</w>
<h>73.63</h>
<area>5668.773699999998</area>
</annotation>
...
</item>
```
## Links
- [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)
- [How to convert model to OpenVINO format](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html)
- [Model convert script for this model](https://github.com/opencv/cvat/blob/3e09503ba6c6daa6469a6c4d275a5a8b168dfa2c/components/tf_annotation/install.sh#L23)

@ -0,0 +1,563 @@
# Quick start guide
## Contents
- [Installation](#installation)
- [Interfaces](#interfaces)
- [Supported dataset formats and annotations](#formats-support)
- [Command line workflow](#command-line-workflow)
- [Create a project](#create-project)
- [Add and remove data](#add-and-remove-data)
- [Import a project](#import-project)
- [Extract a subproject](#extract-subproject)
- [Merge projects](#merge-project)
- [Export a project](#export-project)
- [Compare projects](#compare-projects)
- [Get project info](#get-project-info)
- [Register a model](#register-model)
- [Run inference](#run-inference)
- [Run inference explanation](#explain-inference)
- [Links](#links)
## Installation
### Prerequisites
- Python (3.5+)
- OpenVINO (optional)
### Installation steps
Optionally, set up a virtual environment:
``` bash
python -m pip install virtualenv
python -m virtualenv venv
. venv/bin/activate
```
Install Datumaro:
``` bash
pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro'
```
> You can change the installation branch with `.../cvat@<branch_name>#egg...`
> Also note `--force-reinstall` parameter in this case.
## Interfaces
As a standalone tool:
``` bash
datum --help
```
As a python module:
> The directory containing Datumaro should be in the `PYTHONPATH`
> environment variable or `cvat/datumaro/` should be the current directory.
``` bash
python -m datumaro --help
python datumaro/ --help
python datum.py --help
```
As a python library:
``` python
import datumaro
```
## Formats support
List of supported formats:
- COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
- [Format specification](http://cocodataset.org/#format-data)
- `labels` are our extension - like `instances` with only `category_id`
- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`)
- [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html)
- YOLO (`bboxes`)
- [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data)
- TF Detection API (`bboxes`, `masks`)
- Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md)
- CVAT
- [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
List of supported annotation types:
- Labels
- Bounding boxes
- Polygons
- Polylines
- (Key-)Points
- Captions
- Masks
## Command line workflow
> **Note**: command invocation syntax is subject to change,
> **always refer to command --help output**
The key object is the Project. The Project is a combination of
a Project's own dataset, a number of external data sources and an environment.
An empty Project can be created by `project create` command,
an existing dataset can be imported with `project import` command.
A typical way to obtain projects is to export tasks in CVAT UI.
Available CLI commands:
![CLI design doc](images/cli_design.png)
If you want to interact with models, you need to add them to project first.
### Import project
This command creates a Project from an existing dataset.
Supported formats are listed in the command help.
In Datumaro dataset formats are supported by Extractors and Importers.
An Extractor produces a list of dataset items corresponding
to the dataset. An Importer creates a Project from the
data source location. It is possible to add a custom Extractor and Importer.
To do this, you need to put an Extractor and Importer implementation scripts to
`<project_dir>/.datumaro/extractors` and `<project_dir>/.datumaro/importers`.
Usage:
``` bash
datum project import --help
datum project import \
-i <dataset_path> \
-o <project_dir> \
-f <format>
```
Example: create a project from COCO-like dataset
``` bash
datum project import \
-i /home/coco_dir \
-o /home/project_dir \
-f coco
```
An _MS COCO_-like dataset should have the following directory structure:
<!--lint disable fenced-code-flag-->
```
COCO/
├── annotations/
│   ├── instances_val2017.json
│   ├── instances_train2017.json
├── images/
│   ├── val2017
│   ├── train2017
```
<!--lint enable fenced-code-flag-->
Everything after the last `_` is considered a subset name in the COCO format.
### Create project
The command creates an empty project. Once a Project is created, there are
a few options to interact with it.
Usage:
``` bash
datum project create --help
datum project create \
-o <project_dir>
```
Example: create an empty project `my_dataset`
``` bash
datum project create -o my_dataset/
```
### Add and remove data
A Project can be attached to a number of external Data Sources. Each Source
describes a way to produce dataset items. A Project combines dataset items from
all the sources and its own dataset into one composite dataset. You can manage
project sources by commands in the `source` command line context.
Datasets come in a wide variety of formats. Each dataset
format defines its own data structure and rules on how to
interpret the data. For example, the following data structure
is used in COCO format:
<!--lint disable fenced-code-flag-->
```
/dataset/
- /images/<id>.jpg
- /annotations/
```
<!--lint enable fenced-code-flag-->
In Datumaro dataset formats are supported by Extractors.
An Extractor produces a list of dataset items corresponding
to the dataset. It is possible to add a custom Extractor.
To do this, you need to put an Extractor
definition script to `<project_dir>/.datumaro/extractors`.
Usage:
``` bash
datum source add --help
datum source remove --help
datum source add \
path <path> \
-p <project dir> \
-n <name>
datum source remove \
-p <project dir> \
-n <name>
```
Example: create a project from a bunch of different annotations and images,
and generate TFrecord for TF Detection API for model training
``` bash
datum project create
# 'default' is the name of the subset below
datum source add path <path/to/coco/instances_default.json> -f coco_instances
datum source add path <path/to/cvat/default.xml> -f cvat
datum source add path <path/to/voc> -f voc_detection
datum source add path <path/to/datumaro/default.json> -f datumaro
datum source add path <path/to/images/dir> -f image_dir
datum project export -f tf_detection_api
```
### Extract subproject
This command allows to create a sub-Project from a Project. The new project
includes only items satisfying some condition. [XPath](https://devhints.io/xpath)
is used as query format.
There are several filtering modes available ('-m/--mode' parameter).
Supported modes:
- 'i', 'items'
- 'a', 'annotations'
- 'i+a', 'a+i', 'items+annotations', 'annotations+items'
When filtering annotations, use the 'items+annotations'
mode to point that annotation-less dataset items should be
removed. To select an annotation, write an XPath that
returns 'annotation' elements (see examples).
Usage:
``` bash
datum project extract --help
datum project extract \
-p <project dir> \
-o <output dir> \
-e '<xpath filter expression>'
```
Example: extract a dataset with only images which width < height
``` bash
datum project extract \
-p test_project \
-o test_project-extract \
-e '/item[image/width < image/height]'
```
Example: extract a dataset with only large annotations of class `cat` and any non-`persons`
``` bash
datum project extract \
-p test_project \
-o test_project-extract \
--mode annotations -e '/item/annotation[(label="cat" and area > 999.5) or label!="person"]'
```
Example: extract a dataset with only occluded annotations, remove empty images
``` bash
datum project extract \
-p test_project \
-o test_project-extract \
-m i+a -e '/item/annotation[occluded="True"]'
```
Item representations are available with `--dry-run` parameter:
``` xml
<item>
<id>290768</id>
<subset>minival2014</subset>
<image>
<width>612</width>
<height>612</height>
<depth>3</depth>
</image>
<annotation>
<id>80154</id>
<type>bbox</type>
<label_id>39</label_id>
<x>264.59</x>
<y>150.25</y>
<w>11.199999999999989</w>
<h>42.31</h>
<area>473.87199999999956</area>
</annotation>
<annotation>
<id>669839</id>
<type>bbox</type>
<label_id>41</label_id>
<x>163.58</x>
<y>191.75</y>
<w>76.98999999999998</w>
<h>73.63</h>
<area>5668.773699999998</area>
</annotation>
...
</item>
```
### Merge projects
This command combines multiple Projects into one.
Usage:
``` bash
datum project merge --help
datum project merge \
-p <project dir> \
-o <output dir> \
<other project dir>
```
Example: update annotations in the `first_project` with annotations
from the `second_project` and save the result as `merged_project`
``` bash
datum project merge \
-p first_project \
-o merged_project \
second_project
```
### Export project
This command exports a Project in some format.
Supported formats are listed in the command help.
In Datumaro dataset formats are supported by Converters.
A Converter produces a dataset of a specific format
from dataset items. It is possible to add a custom Converter.
To do this, you need to put a Converter
definition script to <project_dir>/.datumaro/converters.
Usage:
``` bash
datum project export --help
datum project export \
-p <project dir> \
-o <output dir> \
-f <format> \
[-- <additional format parameters>]
```
Example: save project as VOC-like dataset, include images
``` bash
datum project export \
-p test_project \
-o test_project-export \
-f voc \
-- --save-images
```
### Get project info
This command outputs project status information.
Usage:
``` bash
datum project info --help
datum project info \
-p <project dir>
```
Example:
``` bash
datum project info -p /test_project
Project:
name: test_project2
location: /test_project
Sources:
source 'instances_minival2014':
format: coco_instances
url: /coco_like/annotations/instances_minival2014.json
Dataset:
length: 5000
categories: label
label:
count: 80
labels: person, bicycle, car, motorcycle (and 76 more)
subsets: minival2014
subset 'minival2014':
length: 5000
categories: label
label:
count: 80
labels: person, bicycle, car, motorcycle (and 76 more)
```
### Register model
Supported models:
- OpenVINO
- Custom models via custom `launchers`
Usage:
``` bash
datum model add --help
```
Example: register an OpenVINO model
A model consists of a graph description and weights. There is also a script
used to convert model outputs to internal data structures.
``` bash
datum project create
datum model add \
-n <model_name> openvino \
-d <path_to_xml> -w <path_to_bin> -i <path_to_interpretation_script>
```
Interpretation script for an OpenVINO detection model (`convert.py`):
``` python
from datumaro.components.extractor import *
max_det = 10
conf_thresh = 0.1
def process_outputs(inputs, outputs):
# inputs = model input, array or images, shape = (N, C, H, W)
# outputs = model output, shape = (N, 1, K, 7)
# results = conversion result, [ [ Annotation, ... ], ... ]
results = []
for input, output in zip(inputs, outputs):
input_height, input_width = input.shape[:2]
detections = output[0]
image_results = []
for i, det in enumerate(detections):
label = int(det[1])
conf = det[2]
if conf <= conf_thresh:
continue
x = max(int(det[3] * input_width), 0)
y = max(int(det[4] * input_height), 0)
w = min(int(det[5] * input_width - x), input_width)
h = min(int(det[6] * input_height - y), input_height)
image_results.append(BboxObject(x, y, w, h,
label=label, attributes={'score': conf} ))
results.append(image_results[:max_det])
return results
def get_categories():
# Optionally, provide output categories - label map etc.
# Example:
label_categories = LabelCategories()
label_categories.add('person')
label_categories.add('car')
return { AnnotationType.label: label_categories }
```
### Run model
This command applies model to dataset images and produces a new project.
Usage:
``` bash
datum model run --help
datum model run \
-p <project dir> \
-m <model_name> \
-o <save_dir>
```
Example: launch inference on a dataset
``` bash
datum project import <...>
datum model add mymodel <...>
datum model run -m mymodel -o inference
```
### Compare projects
The command compares two datasets and saves the results in the
specified directory. The current project is considered to be
"ground truth".
``` bash
datum project diff --help
datum project diff <other_project_dir> -o <save_dir>
```
Example: compare a dataset with model inference
``` bash
datum project import <...>
datum model add mymodel <...>
datum project transform <...> -o inference
datum project diff inference -o diff
```
### Explain inference
Usage:
``` bash
datum explain --help
datum explain \
-m <model_name> \
-o <save_dir> \
-t <target> \
<method> \
<method_params>
```
Example: run inference explanation on a single image with visualization
``` bash
datum project create <...>
datum model add mymodel <...>
datum explain \
-m mymodel \
-t 'image.png' \
rise \
-s 1000 --progressive
```
## Links
- [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)
- [How to convert model to OpenVINO format](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html)
- [Model conversion script example](https://github.com/opencv/cvat/blob/3e09503ba6c6daa6469a6c4d275a5a8b168dfa2c/components/tf_annotation/install.sh#L23)

@ -62,7 +62,7 @@ setuptools.setup(
],
entry_points={
'console_scripts': [
'datum=datumaro:main',
'datum=datumaro.cli.__main__:main',
],
},
)

@ -1,5 +0,0 @@
import unittest
if __name__ == '__main__':
unittest.main()

@ -12,7 +12,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem,
BboxObject, CaptionObject,
LabelCategories, PointsCategories
)
from datumaro.components.converters.ms_coco import (
from datumaro.components.converters.coco import (
CocoConverter,
CocoImageInfoConverter,
CocoCaptionsConverter,
@ -112,7 +112,7 @@ class CocoImporterTest(TestCase):
def test_can_import(self):
with TestDir() as temp_dir:
self.COCO_dataset_generate(temp_dir.path)
project = Project.import_from(temp_dir.path, 'ms_coco')
project = Project.import_from(temp_dir.path, 'coco')
dataset = project.make_dataset()
self.assertListEqual(['val'], sorted(dataset.subsets()))
@ -142,7 +142,7 @@ class CocoConverterTest(TestCase):
if not importer_params:
importer_params = {}
project = Project.import_from(test_dir.path, 'ms_coco',
project = Project.import_from(test_dir.path, 'coco',
**importer_params)
parsed_dataset = project.make_dataset()

@ -14,7 +14,7 @@ from datumaro.components.converters.cvat import CvatConverter
from datumaro.components.project import Project
import datumaro.components.formats.cvat as Cvat
from datumaro.util.image import save_image
from datumaro.util.test_utils import TestDir
from datumaro.util.test_utils import TestDir, item_to_str
class CvatExtractorTest(TestCase):
@ -108,7 +108,7 @@ class CvatExtractorTest(TestCase):
BboxObject(0, 2, 4, 2, label=0,
attributes={
'occluded': True, 'z_order': 1,
'a1': 'true', 'a2': 'v3'
'a1': True, 'a2': 'v3'
}),
PolyLineObject([1, 2, 3, 4, 5, 6, 7, 8],
attributes={'occluded': False, 'z_order': 0}),
@ -175,7 +175,8 @@ class CvatConverterTest(TestCase):
self.assertEqual(len(source_subset), len(parsed_subset))
for idx, (item_a, item_b) in enumerate(
zip(source_subset, parsed_subset)):
self.assertEqual(item_a, item_b, str(idx))
self.assertEqual(item_a, item_b, '%s:\n%s\nvs.\n%s\n' % \
(idx, item_to_str(item_a), item_to_str(item_b)))
def test_can_save_and_load(self):
label_categories = LabelCategories()
@ -209,12 +210,12 @@ class CvatConverterTest(TestCase):
]
),
DatasetItem(id=0, subset='s2', image=np.zeros((5, 10, 3)),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
annotations=[
PolygonObject([0, 0, 4, 0, 4, 4],
label=3, group=4,
attributes={ 'z_order': 1, 'occluded': False }),
PolyLineObject([5, 0, 9, 0, 5, 5]), # will be skipped
PolyLineObject([5, 0, 9, 0, 5, 5]), # will be skipped as no label
]
),
])
@ -236,7 +237,7 @@ class CvatConverterTest(TestCase):
PointsObject([1, 1, 3, 2, 2, 3],
label=2,
attributes={ 'z_order': 0, 'occluded': False,
'a1': 'x', 'a2': '42' }),
'a1': 'x', 'a2': 42 }),
]
),
DatasetItem(id=1, subset='s1',
@ -250,7 +251,7 @@ class CvatConverterTest(TestCase):
]
),
DatasetItem(id=0, subset='s2', image=np.zeros((5, 10, 3)),
DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
annotations=[
PolygonObject([0, 0, 4, 0, 4, 4],
label=3, group=4,

@ -9,7 +9,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem,
LabelCategories, MaskCategories, PointsCategories
)
from datumaro.components.converters.datumaro import DatumaroConverter
from datumaro.util.test_utils import TestDir
from datumaro.util.test_utils import TestDir, item_to_str
from datumaro.util.mask_tools import generate_colormap
@ -26,7 +26,7 @@ class DatumaroConverterTest(TestCase):
'y': '2',
}),
BboxObject(1, 2, 3, 4, label=4, id=4, attributes={
'score': 10.0,
'score': 1.0,
}),
BboxObject(5, 6, 7, 8, id=5, group=5),
PointsObject([1, 2, 2, 0, 1, 1], label=0, id=5),
@ -92,7 +92,8 @@ class DatumaroConverterTest(TestCase):
self.assertEqual(len(source_subset), len(parsed_subset))
for idx, (item_a, item_b) in enumerate(
zip(source_subset, parsed_subset)):
self.assertEqual(item_a, item_b, str(idx))
self.assertEqual(item_a, item_b, '%s:\n%s\nvs.\n%s\n' % \
(idx, item_to_str(item_a), item_to_str(item_b)))
self.assertEqual(
source_dataset.categories(),

@ -31,7 +31,7 @@ class ImageTest(TestCase):
image_module._IMAGE_BACKEND = load_backend
dst_image = image_module.load_image(path)
self.assertTrue(np.all(src_image == dst_image),
self.assertTrue(np.array_equal(src_image, dst_image),
'save: %s, load: %s' % (save_backend, load_backend))
def test_encode_and_decode_backends(self):
@ -48,5 +48,5 @@ class ImageTest(TestCase):
image_module._IMAGE_BACKEND = load_backend
dst_image = image_module.decode_image(buffer)
self.assertTrue(np.all(src_image == dst_image),
self.assertTrue(np.array_equal(src_image, dst_image),
'save: %s, load: %s' % (save_backend, load_backend))

@ -0,0 +1,48 @@
import numpy as np
import os.path as osp
from unittest import TestCase
from datumaro.components.project import Project
from datumaro.components.extractor import Extractor, DatasetItem
from datumaro.util.test_utils import TestDir
from datumaro.util.image import save_image
class ImageDirFormatTest(TestCase):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.ones((10, 6, 3))),
DatasetItem(id=2, image=np.ones((5, 4, 3))),
])
def test_can_load(self):
with TestDir() as test_dir:
source_dataset = self.TestExtractor()
for item in source_dataset:
save_image(osp.join(test_dir.path, '%s.jpg' % item.id),
item.image)
project = Project.import_from(test_dir.path, 'image_dir')
parsed_dataset = project.make_dataset()
self.assertListEqual(
sorted(source_dataset.subsets()),
sorted(parsed_dataset.subsets()),
)
self.assertEqual(len(source_dataset), len(parsed_dataset))
for subset_name in source_dataset.subsets():
source_subset = source_dataset.get_subset(subset_name)
parsed_subset = parsed_dataset.get_subset(subset_name)
self.assertEqual(len(source_subset), len(parsed_subset))
for idx, (item_a, item_b) in enumerate(
zip(source_subset, parsed_subset)):
self.assertEqual(item_a, item_b, str(idx))
self.assertEqual(
source_dataset.categories(),
parsed_dataset.categories())

@ -353,6 +353,7 @@ class DatasetFilterTest(TestCase):
BboxObject(1, 2, 3, 4, label=4, id=4, attributes={ 'a': 1.0 }),
BboxObject(5, 6, 7, 8, id=5, group=5),
PointsObject([1, 2, 2, 0, 1, 1], label=0, id=5),
MaskObject(id=5, image=np.ones((3, 2))),
MaskObject(label=3, id=5, image=np.ones((2, 3))),
PolyLineObject([1, 2, 3, 4, 5, 6, 7, 8], id=11),
PolygonObject([1, 2, 3, 4, 5, 6, 7, 8]),

@ -141,9 +141,9 @@ def generate_dummy_voc(path):
obj2head_elem = ET.SubElement(obj2_elem, 'part')
ET.SubElement(obj2head_elem, 'name').text = VOC.VocBodyPart(1).name
obj2headbb_elem = ET.SubElement(obj2head_elem, 'bndbox')
ET.SubElement(obj2headbb_elem, 'xmin').text = '5'
ET.SubElement(obj2headbb_elem, 'xmin').text = '5.5'
ET.SubElement(obj2headbb_elem, 'ymin').text = '6'
ET.SubElement(obj2headbb_elem, 'xmax').text = '7'
ET.SubElement(obj2headbb_elem, 'xmax').text = '7.5'
ET.SubElement(obj2headbb_elem, 'ymax').text = '8'
obj2act_elem = ET.SubElement(obj2_elem, 'actions')
for act in VOC.VocAction:
@ -328,7 +328,7 @@ class VocExtractorTest(TestCase):
lambda x: x.type == AnnotationType.bbox and \
get_label(extractor, x.label) == VOC.VocBodyPart(1).name)
self.assertTrue(obj2.id == obj2head.group)
self.assertListEqual([5, 6, 2, 2], obj2head.get_bbox())
self.assertListEqual([5.5, 6, 2, 2], obj2head.get_bbox())
self.assertEqual(2, len(item.annotations))

@ -1,2 +1,2 @@
Pillow==6.2.0
requests==2.20.1
Pillow>=6.2.0
requests>=2.20.1

Loading…
Cancel
Save