From 4dbfa3bfdf67372c237a5993a9e00a7a956614e2 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 4 Sep 2020 12:39:08 +0300 Subject: [PATCH] [Datumaro] Update docs (#2125) * Update docs, add type hints, rename extract * Add developer guide * Update license headers, add license text * Update developer_guide.md Co-authored-by: Nikita Manovich --- datumaro/CONTRIBUTING.md | 122 +---------- datumaro/LICENSE | 22 ++ datumaro/datumaro/__init__.py | 2 +- datumaro/datumaro/__main__.py | 2 +- datumaro/datumaro/cli/__init__.py | 2 +- datumaro/datumaro/cli/__main__.py | 2 +- datumaro/datumaro/cli/commands/__init__.py | 2 +- datumaro/datumaro/cli/commands/add.py | 2 +- datumaro/datumaro/cli/commands/convert.py | 2 +- datumaro/datumaro/cli/commands/create.py | 2 +- datumaro/datumaro/cli/commands/explain.py | 2 +- datumaro/datumaro/cli/commands/export.py | 2 +- datumaro/datumaro/cli/commands/remove.py | 2 +- datumaro/datumaro/cli/contexts/__init__.py | 2 +- .../datumaro/cli/contexts/item/__init__.py | 2 +- .../datumaro/cli/contexts/model/__init__.py | 2 +- .../datumaro/cli/contexts/project/__init__.py | 17 +- .../datumaro/cli/contexts/project/diff.py | 2 +- .../datumaro/cli/contexts/source/__init__.py | 2 +- datumaro/datumaro/cli/util/__init__.py | 2 +- datumaro/datumaro/cli/util/project.py | 2 +- datumaro/datumaro/components/__init__.py | 2 +- .../components/algorithms/__init__.py | 2 +- .../datumaro/components/algorithms/rise.py | 2 +- datumaro/datumaro/components/config.py | 2 +- datumaro/datumaro/components/config_model.py | 2 +- datumaro/datumaro/components/converter.py | 2 +- .../datumaro/components/dataset_filter.py | 2 +- datumaro/datumaro/components/extractor.py | 29 ++- datumaro/datumaro/components/launcher.py | 2 +- datumaro/datumaro/components/project.py | 105 ++++----- .../accuracy_checker_plugin/__init__.py | 4 + .../datumaro/plugins/coco_format/extractor.py | 2 +- .../datumaro/plugins/coco_format/format.py | 2 +- .../datumaro/plugins/coco_format/importer.py | 2 +- .../datumaro/plugins/cvat_format/converter.py | 2 +- .../datumaro/plugins/cvat_format/extractor.py | 2 +- .../datumaro/plugins/cvat_format/format.py | 2 +- .../datumaro/plugins/cvat_format/importer.py | 2 +- .../plugins/datumaro_format/converter.py | 2 +- .../plugins/datumaro_format/extractor.py | 2 +- .../plugins/datumaro_format/format.py | 2 +- .../plugins/datumaro_format/importer.py | 2 +- datumaro/datumaro/plugins/image_dir.py | 2 +- datumaro/datumaro/plugins/labelme_format.py | 1 - datumaro/datumaro/plugins/mot_format.py | 3 +- .../datumaro/plugins/openvino_launcher.py | 2 +- .../tf_detection_api_format/converter.py | 2 +- .../tf_detection_api_format/extractor.py | 2 +- .../plugins/tf_detection_api_format/format.py | 2 +- .../tf_detection_api_format/importer.py | 2 +- datumaro/datumaro/plugins/transforms.py | 1 - .../datumaro/plugins/voc_format/extractor.py | 2 +- .../datumaro/plugins/voc_format/format.py | 2 +- .../datumaro/plugins/voc_format/importer.py | 2 +- .../datumaro/plugins/yolo_format/converter.py | 2 +- .../datumaro/plugins/yolo_format/extractor.py | 2 +- .../datumaro/plugins/yolo_format/format.py | 2 +- .../datumaro/plugins/yolo_format/importer.py | 2 +- datumaro/datumaro/util/__init__.py | 2 +- datumaro/datumaro/util/annotation_util.py | 1 - datumaro/datumaro/util/attrs_util.py | 1 - datumaro/datumaro/util/command_targets.py | 2 +- datumaro/datumaro/util/image.py | 2 +- datumaro/datumaro/util/image_cache.py | 4 + datumaro/datumaro/util/mask_tools.py | 2 +- datumaro/datumaro/util/test_utils.py | 2 +- datumaro/datumaro/util/tf_util.py | 2 +- datumaro/docs/developer_guide.md | 200 ++++++++++++++++++ datumaro/docs/user_manual.md | 64 ++++-- datumaro/setup.py | 4 +- .../assets/pytorch_launcher/samplenet.py | 2 +- datumaro/tests/test_project.py | 2 +- 73 files changed, 401 insertions(+), 293 deletions(-) create mode 100644 datumaro/LICENSE create mode 100644 datumaro/docs/developer_guide.md diff --git a/datumaro/CONTRIBUTING.md b/datumaro/CONTRIBUTING.md index 97373b28..f9a1afc1 100644 --- a/datumaro/CONTRIBUTING.md +++ b/datumaro/CONTRIBUTING.md @@ -72,124 +72,4 @@ python manage.py test datumaro/ ## Design and code structure - [Design document](docs/design.md) - -### Command-line - -Use [Docker](https://www.docker.com/) as an example. Basically, -the interface is divided on contexts and single commands. -Contexts are semantically grouped commands, -related to a single topic or target. Single commands are handy shorter -alternatives for the most used commands and also special commands, -which are hard to be put into any specific context. - -![cli-design-image](docs/images/cli_design.png) - -- The diagram above was created with [FreeMind](http://freemind.sourceforge.net/wiki/index.php/Main_Page) - -Model-View-ViewModel (MVVM) UI pattern is used. - -![mvvm-image](docs/images/mvvm.png) - -### Datumaro project and environment structure - - -``` -├── [datumaro module] -└── [project folder] - ├── .datumaro/ - | ├── config.yml - │   ├── .git/ - │   ├── models/ - │   └── plugins/ - │   ├── plugin1/ - │   | ├── file1.py - │   | └── file2.py - │   ├── plugin2.py - │   ├── custom_extractor1.py - │   └── ... - ├── dataset/ - └── sources/ - ├── source1 - └── ... -``` - - -### Plugins - -Plugins are optional components, which extend the project. In Datumaro there are -several types of plugins, which include: -- `extractor` - produces dataset items from data source -- `importer` - recognizes dataset type and creates project -- `converter` - exports dataset to a specific format -- `transformation` - modifies dataset items or other properties -- `launcher` - executes models - -Plugins reside in plugin directories: -- `datumaro/plugins` for builtin components -- `/.datumaro/plugins` for project-specific components - -A plugin is a python file or package with any name, which exports some symbols. -To export a symbol, put it to `exports` list of the module like this: - -``` python -class MyComponent1: ... -class MyComponent2: ... -exports = [MyComponent1, MyComponent2] -``` - -or inherit it from one of special classes: -``` python -from datumaro.components.extractor import Importer, SourceExtractor, Transform -from datumaro.components.launcher import Launcher -from datumaro.components.converter import Converter -``` - -There is an additional class to modify plugin appearance at command line: - -``` python -from datumaro.components.cli_plugin import CliPlugin -``` - -Plugin example: - - - -``` -datumaro/plugins/ -- my_plugin1/file1.py -- my_plugin1/file2.py -- my_plugin2.py -``` - - - -`my_plugin1/file2.py` contents: - -``` python -from datumaro.components.extractor import Transform, CliPlugin -from .file1 import something, useful - -class MyTransform(Transform, CliPlugin): - NAME = "custom_name" - """ - Some description. - """ - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('-q', help="Some help") - return parser - ... -``` - -`my_plugin2.py` contents: - -``` python -from datumaro.components.extractor import SourceExtractor - -class MyFormat: ... -class MyFormatExtractor(SourceExtractor): ... - -exports = [MyFormat] # explicit exports declaration -# MyFormatExtractor won't be exported -``` +- [Developer guide](docs/developer_guide.md) \ No newline at end of file diff --git a/datumaro/LICENSE b/datumaro/LICENSE new file mode 100644 index 00000000..ae9cf710 --- /dev/null +++ b/datumaro/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (C) 2019-2020 Intel Corporation +  +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: +  +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. +  +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. +  diff --git a/datumaro/datumaro/__init__.py b/datumaro/datumaro/__init__.py index cd825f56..eb864e52 100644 --- a/datumaro/datumaro/__init__.py +++ b/datumaro/datumaro/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/__main__.py b/datumaro/datumaro/__main__.py index 27148356..be1cb092 100644 --- a/datumaro/datumaro/__main__.py +++ b/datumaro/datumaro/__main__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/__init__.py b/datumaro/datumaro/cli/__init__.py index cd825f56..eb864e52 100644 --- a/datumaro/datumaro/cli/__init__.py +++ b/datumaro/datumaro/cli/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/__main__.py b/datumaro/datumaro/cli/__main__.py index fabe43f8..80a8805f 100644 --- a/datumaro/datumaro/cli/__main__.py +++ b/datumaro/datumaro/cli/__main__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/commands/__init__.py b/datumaro/datumaro/cli/commands/__init__.py index 7249842e..fe74bc2b 100644 --- a/datumaro/datumaro/cli/commands/__init__.py +++ b/datumaro/datumaro/cli/commands/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/commands/add.py b/datumaro/datumaro/cli/commands/add.py index b2864039..288d7c04 100644 --- a/datumaro/datumaro/cli/commands/add.py +++ b/datumaro/datumaro/cli/commands/add.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/commands/convert.py b/datumaro/datumaro/cli/commands/convert.py index d867614d..6398bac7 100644 --- a/datumaro/datumaro/cli/commands/convert.py +++ b/datumaro/datumaro/cli/commands/convert.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/commands/create.py b/datumaro/datumaro/cli/commands/create.py index 16f6737c..97e3c9b4 100644 --- a/datumaro/datumaro/cli/commands/create.py +++ b/datumaro/datumaro/cli/commands/create.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/commands/explain.py b/datumaro/datumaro/cli/commands/explain.py index a0a5f1cc..4d5d16b2 100644 --- a/datumaro/datumaro/cli/commands/explain.py +++ b/datumaro/datumaro/cli/commands/explain.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/commands/export.py b/datumaro/datumaro/cli/commands/export.py index afeb73cd..be47245d 100644 --- a/datumaro/datumaro/cli/commands/export.py +++ b/datumaro/datumaro/cli/commands/export.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/commands/remove.py b/datumaro/datumaro/cli/commands/remove.py index 0e0d076f..7b9c0d3a 100644 --- a/datumaro/datumaro/cli/commands/remove.py +++ b/datumaro/datumaro/cli/commands/remove.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/contexts/__init__.py b/datumaro/datumaro/cli/contexts/__init__.py index 95019b7b..433efe9b 100644 --- a/datumaro/datumaro/cli/contexts/__init__.py +++ b/datumaro/datumaro/cli/contexts/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/contexts/item/__init__.py b/datumaro/datumaro/cli/contexts/item/__init__.py index 1df66809..8f74826d 100644 --- a/datumaro/datumaro/cli/contexts/item/__init__.py +++ b/datumaro/datumaro/cli/contexts/item/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/contexts/model/__init__.py b/datumaro/datumaro/cli/contexts/model/__init__.py index 0c4f2018..69b7da1e 100644 --- a/datumaro/datumaro/cli/contexts/model/__init__.py +++ b/datumaro/datumaro/cli/contexts/model/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/contexts/project/__init__.py b/datumaro/datumaro/cli/contexts/project/__init__.py index 8915086b..bab5da6f 100644 --- a/datumaro/datumaro/cli/contexts/project/__init__.py +++ b/datumaro/datumaro/cli/contexts/project/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT @@ -278,7 +278,7 @@ def build_export_parser(parser_ctor=argparse.ArgumentParser): parser = parser_ctor(help="Export project", description=""" Exports the project dataset in some format. Optionally, a filter - can be passed, check 'extract' command description for more info. + can be passed, check 'filter' command description for more info. Each dataset format has its own options, which are passed after '--' separator (see examples), pass '-- -h' for more info. If not stated otherwise, by default @@ -362,7 +362,7 @@ def export_command(args): return 0 -def build_extract_parser(parser_ctor=argparse.ArgumentParser): +def build_filter_parser(parser_ctor=argparse.ArgumentParser): parser = parser_ctor(help="Extract subproject", description=""" Extracts a subproject that contains only items matching filter. @@ -414,11 +414,11 @@ def build_extract_parser(parser_ctor=argparse.ArgumentParser): help="Overwrite existing files in the save directory") parser.add_argument('-p', '--project', dest='project_dir', default='.', help="Directory of the project to operate on (default: current dir)") - parser.set_defaults(command=extract_command) + parser.set_defaults(command=filter_command) return parser -def extract_command(args): +def filter_command(args): project = load_project(args.project_dir) if not args.dry_run: @@ -437,7 +437,7 @@ def extract_command(args): filter_args = FilterModes.make_filter_args(args.mode) if args.dry_run: - dataset = dataset.extract(filter_expr=args.filter, **filter_args) + dataset = dataset.filter(expr=args.filter, **filter_args) for item in dataset: encoded_item = DatasetItemEncoder.encode(item, dataset.categories()) xml_item = DatasetItemEncoder.to_string(encoded_item) @@ -447,8 +447,7 @@ def extract_command(args): if not args.filter: raise CliException("Expected a filter expression ('-e' argument)") - dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter, - **filter_args) + dataset.filter_project(save_dir=dst_dir, expr=args.filter, **filter_args) log.info("Subproject has been extracted to '%s'" % dst_dir) @@ -816,7 +815,7 @@ def build_parser(parser_ctor=argparse.ArgumentParser): add_subparser(subparsers, 'create', build_create_parser) add_subparser(subparsers, 'import', build_import_parser) add_subparser(subparsers, 'export', build_export_parser) - add_subparser(subparsers, 'extract', build_extract_parser) + add_subparser(subparsers, 'filter', build_filter_parser) add_subparser(subparsers, 'merge', build_merge_parser) add_subparser(subparsers, 'diff', build_diff_parser) add_subparser(subparsers, 'ediff', build_ediff_parser) diff --git a/datumaro/datumaro/cli/contexts/project/diff.py b/datumaro/datumaro/cli/contexts/project/diff.py index 571908f6..358f3860 100644 --- a/datumaro/datumaro/cli/contexts/project/diff.py +++ b/datumaro/datumaro/cli/contexts/project/diff.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/contexts/source/__init__.py b/datumaro/datumaro/cli/contexts/source/__init__.py index ef9edafb..45dbdb1b 100644 --- a/datumaro/datumaro/cli/contexts/source/__init__.py +++ b/datumaro/datumaro/cli/contexts/source/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/util/__init__.py b/datumaro/datumaro/cli/util/__init__.py index 3884b156..4ee0b72b 100644 --- a/datumaro/datumaro/cli/util/__init__.py +++ b/datumaro/datumaro/cli/util/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/cli/util/project.py b/datumaro/datumaro/cli/util/project.py index 75013053..56590a4d 100644 --- a/datumaro/datumaro/cli/util/project.py +++ b/datumaro/datumaro/cli/util/project.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/__init__.py b/datumaro/datumaro/components/__init__.py index a9773073..5a1ec10f 100644 --- a/datumaro/datumaro/components/__init__.py +++ b/datumaro/datumaro/components/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/algorithms/__init__.py b/datumaro/datumaro/components/algorithms/__init__.py index a9773073..5a1ec10f 100644 --- a/datumaro/datumaro/components/algorithms/__init__.py +++ b/datumaro/datumaro/components/algorithms/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/algorithms/rise.py b/datumaro/datumaro/components/algorithms/rise.py index 2f65c8cf..3fb9a895 100644 --- a/datumaro/datumaro/components/algorithms/rise.py +++ b/datumaro/datumaro/components/algorithms/rise.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/config.py b/datumaro/datumaro/components/config.py index ca66eff8..a79cda15 100644 --- a/datumaro/datumaro/components/config.py +++ b/datumaro/datumaro/components/config.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/config_model.py b/datumaro/datumaro/components/config_model.py index f46682d2..c6f65179 100644 --- a/datumaro/datumaro/components/config_model.py +++ b/datumaro/datumaro/components/config_model.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/converter.py b/datumaro/datumaro/components/converter.py index a7c6e101..05dedb48 100644 --- a/datumaro/datumaro/components/converter.py +++ b/datumaro/datumaro/components/converter.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/dataset_filter.py b/datumaro/datumaro/components/dataset_filter.py index e9fc5e35..2fe1443d 100644 --- a/datumaro/datumaro/components/dataset_filter.py +++ b/datumaro/datumaro/components/dataset_filter.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py index 0473a250..b213b623 100644 --- a/datumaro/datumaro/components/extractor.py +++ b/datumaro/datumaro/components/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT @@ -27,26 +27,25 @@ AnnotationType = Enum('AnnotationType', _COORDINATE_ROUNDING_DIGITS = 2 -@attrs +@attrs(kw_only=True) class Annotation: - id = attrib(default=0, validator=default_if_none(int), kw_only=True) - attributes = attrib(factory=dict, validator=default_if_none(dict), kw_only=True) - group = attrib(default=0, validator=default_if_none(int), kw_only=True) + id = attrib(default=0, validator=default_if_none(int)) + attributes = attrib(factory=dict, validator=default_if_none(dict)) + group = attrib(default=0, validator=default_if_none(int)) def __attrs_post_init__(self): assert isinstance(self.type, AnnotationType) @property - def type(self): + def type(self) -> AnnotationType: return self._type # must be set in subclasses - def wrap(item, **kwargs): - return attr.evolve(item, **kwargs) + def wrap(self, **kwargs): + return attr.evolve(self, **kwargs) -@attrs +@attrs(kw_only=True) class Categories: - attributes = attrib(factory=set, validator=default_if_none(set), - kw_only=True, eq=False) + attributes = attrib(factory=set, validator=default_if_none(set), eq=False) @attrs class LabelCategories(Categories): @@ -92,7 +91,7 @@ class LabelCategories(Categories): indices[item.name] = index self._indices = indices - def add(self, name, parent=None, attributes=None): + def add(self, name: str, parent: str = None, attributes: dict = None): assert name not in self._indices, name if attributes is None: attributes = set() @@ -109,7 +108,7 @@ class LabelCategories(Categories): self._indices[name] = index return index - def find(self, name): + def find(self, name: str): index = self._indices.get(name) if index is not None: return index, self.items[index] @@ -601,7 +600,7 @@ class SourceExtractor(Extractor): def get_subset(self, name): if name != self._subset: - return None + raise Exception("Unknown subset '%s' requested" % name) return self class Importer: @@ -629,5 +628,5 @@ class Transform(Extractor): def categories(self): return self._extractor.categories() - def transform_item(self, item): + def transform_item(self, item: DatasetItem) -> DatasetItem: raise NotImplementedError() diff --git a/datumaro/datumaro/components/launcher.py b/datumaro/datumaro/components/launcher.py index b66bf237..adc31fb5 100644 --- a/datumaro/datumaro/components/launcher.py +++ b/datumaro/datumaro/components/launcher.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py index 8ac3ceb0..07f8f019 100644 --- a/datumaro/datumaro/components/project.py +++ b/datumaro/datumaro/components/project.py @@ -1,12 +1,12 @@ - -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT from collections import OrderedDict, defaultdict from functools import reduce -import git from glob import glob +from typing import Iterable, Union, Dict, List +import git import importlib import inspect import logging as log @@ -19,7 +19,7 @@ from datumaro.components.config import Config, DEFAULT_FORMAT from datumaro.components.config_model import (Model, Source, PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA) from datumaro.components.extractor import Extractor, LabelCategories,\ - AnnotationType + AnnotationType, DatasetItem from datumaro.components.launcher import ModelTransform from datumaro.components.dataset_filter import \ XPathDatasetFilter, XPathAnnotationsFilter @@ -304,50 +304,40 @@ class Environment: self.models.unregister(name) -class Subset(Extractor): - def __init__(self, parent): - self._parent = parent - self.items = OrderedDict() +class Dataset(Extractor): + class Subset(Extractor): + def __init__(self, parent): + self.parent = parent + self.items = OrderedDict() - def __iter__(self): - for item in self.items.values(): - yield item + def __iter__(self): + yield from self.items.values() - def __len__(self): - return len(self.items) + def __len__(self): + return len(self.items) - def categories(self): - return self._parent.categories() + def categories(self): + return self.parent.categories() -class Dataset(Extractor): @classmethod - def from_iterable(cls, iterable, categories=None): - """Generation of Dataset from iterable object - - Args: - iterable: Iterable object contains DatasetItems - categories (dict, optional): You can pass dict of categories or - you can pass list of names. It'll interpreted as list of names of - LabelCategories. Defaults to {}. - - Returns: - Dataset: Dataset object - """ - + def from_iterable(cls, iterable: Iterable[DatasetItem], + categories: Union[Dict, List[str]] = None): if isinstance(categories, list): - categories = {AnnotationType.label : LabelCategories.from_iterable(categories)} + categories = { AnnotationType.label: + LabelCategories.from_iterable(categories) + } if not categories: categories = {} - class tmpExtractor(Extractor): + class _extractor(Extractor): def __iter__(self): return iter(iterable) def categories(self): return categories - return cls.from_extractors(tmpExtractor()) + return cls.from_extractors(_extractor()) @classmethod def from_extractors(cls, *sources): @@ -355,7 +345,7 @@ class Dataset(Extractor): dataset = Dataset(categories=categories) # merge items - subsets = defaultdict(lambda: Subset(dataset)) + subsets = defaultdict(lambda: cls.Subset(dataset)) for source in sources: for item in source: existing_item = subsets[item.subset].items.get(item.id) @@ -416,20 +406,19 @@ class Dataset(Extractor): if subset is None: subset = item.subset - item = item.wrap(path=None, annotations=item.annotations) - if item.subset not in self._subsets: - self._subsets[item.subset] = Subset(self) + item = item.wrap(id=item_id, subset=subset, path=None) + if subset not in self._subsets: + self._subsets[subset] = self.Subset(self) self._subsets[subset].items[item_id] = item self._length = None return item - def extract(self, filter_expr, filter_annotations=False, remove_empty=False): + def filter(self, expr, filter_annotations=False, remove_empty=False): if filter_annotations: - return self.transform(XPathAnnotationsFilter, filter_expr, - remove_empty) + return self.transform(XPathAnnotationsFilter, expr, remove_empty) else: - return self.transform(XPathDatasetFilter, filter_expr) + return self.transform(XPathDatasetFilter, expr) def update(self, items): for item in items: @@ -500,17 +489,14 @@ class ProjectDataset(Dataset): sources = {} for s_name, source in config.sources.items(): - s_format = source.format - if not s_format: - s_format = env.PROJECT_EXTRACTOR_NAME + s_format = source.format or env.PROJECT_EXTRACTOR_NAME options = {} options.update(source.options) url = source.url if not source.url: url = osp.join(config.project_dir, config.sources_dir, s_name) - sources[s_name] = env.make_extractor(s_format, - url, **options) + sources[s_name] = env.make_extractor(s_format, url, **options) self._sources = sources own_source = None @@ -531,7 +517,7 @@ class ProjectDataset(Dataset): self._categories = categories # merge items - subsets = defaultdict(lambda: Subset(self)) + subsets = defaultdict(lambda: self.Subset(self)) for source_name, source in self._sources.items(): log.debug("Loading '%s' source contents..." % source_name) for item in source: @@ -548,11 +534,8 @@ class ProjectDataset(Dataset): # NOTE: consider imported sources as our own dataset path = None else: - path = item.path - if path is None: - path = [] - path = [source_name] + path - item = item.wrap(path=path, annotations=item.annotations) + path = [source_name] + (item.path or []) + item = item.wrap(path=path) subsets[item.subset].items[item.id] = item @@ -563,8 +546,7 @@ class ProjectDataset(Dataset): existing_item = subsets[item.subset].items.get(item.id) if existing_item is not None: item = item.wrap(path=None, - image=self._merge_images(existing_item, item), - annotations=item.annotations) + image=self._merge_images(existing_item, item)) subsets[item.subset].items[item.id] = item @@ -590,6 +572,7 @@ class ProjectDataset(Dataset): def put(self, item, item_id=None, subset=None, path=None): if path is None: path = item.path + if path: source = path[0] rest_path = path[1:] @@ -602,9 +585,9 @@ class ProjectDataset(Dataset): if subset is None: subset = item.subset - item = item.wrap(path=path, annotations=item.annotations) - if item.subset not in self._subsets: - self._subsets[item.subset] = Subset(self) + item = item.wrap(path=path) + if subset not in self._subsets: + self._subsets[subset] = self.Subset(self) self._subsets[subset].items[item_id] = item self._length = None @@ -713,7 +696,7 @@ class ProjectDataset(Dataset): # NOTE: probably this function should be in the ViewModel layer dataset = self if filter_expr: - dataset = dataset.extract(filter_expr, + dataset = dataset.filter(filter_expr, filter_annotations=filter_annotations, remove_empty=remove_empty) @@ -727,15 +710,15 @@ class ProjectDataset(Dataset): shutil.rmtree(save_dir) raise - def extract_project(self, filter_expr, filter_annotations=False, + def filter_project(self, filter_expr, filter_annotations=False, save_dir=None, remove_empty=False): # NOTE: probably this function should be in the ViewModel layer - filtered = self + dataset = self if filter_expr: - filtered = self.extract(filter_expr, + dataset = dataset.filter(filter_expr, filter_annotations=filter_annotations, remove_empty=remove_empty) - self._save_branch_project(filtered, save_dir=save_dir) + self._save_branch_project(dataset, save_dir=save_dir) class Project: @classmethod diff --git a/datumaro/datumaro/plugins/accuracy_checker_plugin/__init__.py b/datumaro/datumaro/plugins/accuracy_checker_plugin/__init__.py index e69de29b..fdd6d291 100644 --- a/datumaro/datumaro/plugins/accuracy_checker_plugin/__init__.py +++ b/datumaro/datumaro/plugins/accuracy_checker_plugin/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + diff --git a/datumaro/datumaro/plugins/coco_format/extractor.py b/datumaro/datumaro/plugins/coco_format/extractor.py index 8bb6e464..73e78820 100644 --- a/datumaro/datumaro/plugins/coco_format/extractor.py +++ b/datumaro/datumaro/plugins/coco_format/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/coco_format/format.py b/datumaro/datumaro/plugins/coco_format/format.py index 6db04f0c..5129d49d 100644 --- a/datumaro/datumaro/plugins/coco_format/format.py +++ b/datumaro/datumaro/plugins/coco_format/format.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/coco_format/importer.py b/datumaro/datumaro/plugins/coco_format/importer.py index 4c32064b..3896b725 100644 --- a/datumaro/datumaro/plugins/coco_format/importer.py +++ b/datumaro/datumaro/plugins/coco_format/importer.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/cvat_format/converter.py b/datumaro/datumaro/plugins/cvat_format/converter.py index 37751703..4849619b 100644 --- a/datumaro/datumaro/plugins/cvat_format/converter.py +++ b/datumaro/datumaro/plugins/cvat_format/converter.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/cvat_format/extractor.py b/datumaro/datumaro/plugins/cvat_format/extractor.py index 75a3e5d8..7e37c2dd 100644 --- a/datumaro/datumaro/plugins/cvat_format/extractor.py +++ b/datumaro/datumaro/plugins/cvat_format/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/cvat_format/format.py b/datumaro/datumaro/plugins/cvat_format/format.py index c73fd467..e5572a89 100644 --- a/datumaro/datumaro/plugins/cvat_format/format.py +++ b/datumaro/datumaro/plugins/cvat_format/format.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/cvat_format/importer.py b/datumaro/datumaro/plugins/cvat_format/importer.py index 31f8dbd4..a3a83757 100644 --- a/datumaro/datumaro/plugins/cvat_format/importer.py +++ b/datumaro/datumaro/plugins/cvat_format/importer.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py index 81c2cd55..2d862094 100644 --- a/datumaro/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/datumaro/plugins/datumaro_format/converter.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/datumaro_format/extractor.py b/datumaro/datumaro/plugins/datumaro_format/extractor.py index 71eb6856..c1ae40d4 100644 --- a/datumaro/datumaro/plugins/datumaro_format/extractor.py +++ b/datumaro/datumaro/plugins/datumaro_format/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/datumaro_format/format.py b/datumaro/datumaro/plugins/datumaro_format/format.py index ef587b9b..501c100b 100644 --- a/datumaro/datumaro/plugins/datumaro_format/format.py +++ b/datumaro/datumaro/plugins/datumaro_format/format.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/datumaro_format/importer.py b/datumaro/datumaro/plugins/datumaro_format/importer.py index ed2f7527..dbb90f86 100644 --- a/datumaro/datumaro/plugins/datumaro_format/importer.py +++ b/datumaro/datumaro/plugins/datumaro_format/importer.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/image_dir.py b/datumaro/datumaro/plugins/image_dir.py index 410a91f8..062387e1 100644 --- a/datumaro/datumaro/plugins/image_dir.py +++ b/datumaro/datumaro/plugins/image_dir.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/labelme_format.py b/datumaro/datumaro/plugins/labelme_format.py index 5218e36f..e037afba 100644 --- a/datumaro/datumaro/plugins/labelme_format.py +++ b/datumaro/datumaro/plugins/labelme_format.py @@ -1,4 +1,3 @@ - # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/mot_format.py b/datumaro/datumaro/plugins/mot_format.py index f3776078..12d3d07c 100644 --- a/datumaro/datumaro/plugins/mot_format.py +++ b/datumaro/datumaro/plugins/mot_format.py @@ -1,4 +1,3 @@ - # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT @@ -20,7 +19,7 @@ from datumaro.components.extractor import (SourceExtractor, from datumaro.components.extractor import Importer from datumaro.components.converter import Converter from datumaro.util import cast -from datumaro.util.image import Image, save_image +from datumaro.util.image import Image MotLabel = Enum('MotLabel', [ diff --git a/datumaro/datumaro/plugins/openvino_launcher.py b/datumaro/datumaro/plugins/openvino_launcher.py index 4e150b03..abdaa0fc 100644 --- a/datumaro/datumaro/plugins/openvino_launcher.py +++ b/datumaro/datumaro/plugins/openvino_launcher.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py index 7ff3569d..a178bdba 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py index f91c8b72..6962d3c0 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/format.py b/datumaro/datumaro/plugins/tf_detection_api_format/format.py index 829a89e4..f4a879a6 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/format.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/format.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/importer.py b/datumaro/datumaro/plugins/tf_detection_api_format/importer.py index 169618ba..b3d8a47d 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/importer.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/importer.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/transforms.py b/datumaro/datumaro/plugins/transforms.py index 82493610..7e7cea8b 100644 --- a/datumaro/datumaro/plugins/transforms.py +++ b/datumaro/datumaro/plugins/transforms.py @@ -1,4 +1,3 @@ - # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/voc_format/extractor.py b/datumaro/datumaro/plugins/voc_format/extractor.py index 669d7810..0fe667d3 100644 --- a/datumaro/datumaro/plugins/voc_format/extractor.py +++ b/datumaro/datumaro/plugins/voc_format/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/voc_format/format.py b/datumaro/datumaro/plugins/voc_format/format.py index 471866be..a03446d5 100644 --- a/datumaro/datumaro/plugins/voc_format/format.py +++ b/datumaro/datumaro/plugins/voc_format/format.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/voc_format/importer.py b/datumaro/datumaro/plugins/voc_format/importer.py index 78dc6cc9..e9354e6c 100644 --- a/datumaro/datumaro/plugins/voc_format/importer.py +++ b/datumaro/datumaro/plugins/voc_format/importer.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/yolo_format/converter.py b/datumaro/datumaro/plugins/yolo_format/converter.py index a8ed3524..9217c774 100644 --- a/datumaro/datumaro/plugins/yolo_format/converter.py +++ b/datumaro/datumaro/plugins/yolo_format/converter.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/yolo_format/extractor.py b/datumaro/datumaro/plugins/yolo_format/extractor.py index 9e34508c..c8c39c42 100644 --- a/datumaro/datumaro/plugins/yolo_format/extractor.py +++ b/datumaro/datumaro/plugins/yolo_format/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/yolo_format/format.py b/datumaro/datumaro/plugins/yolo_format/format.py index c88c99d4..02a07669 100644 --- a/datumaro/datumaro/plugins/yolo_format/format.py +++ b/datumaro/datumaro/plugins/yolo_format/format.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/plugins/yolo_format/importer.py b/datumaro/datumaro/plugins/yolo_format/importer.py index 344475c6..a040ea4e 100644 --- a/datumaro/datumaro/plugins/yolo_format/importer.py +++ b/datumaro/datumaro/plugins/yolo_format/importer.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/__init__.py b/datumaro/datumaro/util/__init__.py index 010057d5..0a75756b 100644 --- a/datumaro/datumaro/util/__init__.py +++ b/datumaro/datumaro/util/__init__.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/annotation_util.py b/datumaro/datumaro/util/annotation_util.py index 38a2c814..63950a14 100644 --- a/datumaro/datumaro/util/annotation_util.py +++ b/datumaro/datumaro/util/annotation_util.py @@ -1,4 +1,3 @@ - # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/attrs_util.py b/datumaro/datumaro/util/attrs_util.py index 15f0c318..e631f35a 100644 --- a/datumaro/datumaro/util/attrs_util.py +++ b/datumaro/datumaro/util/attrs_util.py @@ -1,4 +1,3 @@ - # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/command_targets.py b/datumaro/datumaro/util/command_targets.py index d8035a23..50c854f2 100644 --- a/datumaro/datumaro/util/command_targets.py +++ b/datumaro/datumaro/util/command_targets.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py index fc6a113c..626d8499 100644 --- a/datumaro/datumaro/util/image.py +++ b/datumaro/datumaro/util/image.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/image_cache.py b/datumaro/datumaro/util/image_cache.py index fd1ad0d7..08f02582 100644 --- a/datumaro/datumaro/util/image_cache.py +++ b/datumaro/datumaro/util/image_cache.py @@ -1,3 +1,7 @@ +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + from collections import OrderedDict diff --git a/datumaro/datumaro/util/mask_tools.py b/datumaro/datumaro/util/mask_tools.py index 680093d9..95c8633a 100644 --- a/datumaro/datumaro/util/mask_tools.py +++ b/datumaro/datumaro/util/mask_tools.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/test_utils.py b/datumaro/datumaro/util/test_utils.py index 62973ca5..db2767db 100644 --- a/datumaro/datumaro/util/test_utils.py +++ b/datumaro/datumaro/util/test_utils.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/datumaro/util/tf_util.py b/datumaro/datumaro/util/tf_util.py index f5d70090..9eda97ba 100644 --- a/datumaro/datumaro/util/tf_util.py +++ b/datumaro/datumaro/util/tf_util.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT diff --git a/datumaro/docs/developer_guide.md b/datumaro/docs/developer_guide.md new file mode 100644 index 00000000..e2fd101d --- /dev/null +++ b/datumaro/docs/developer_guide.md @@ -0,0 +1,200 @@ +## Basics + +The center part of the library is the `Dataset` class, which allows to iterate +over its elements. `DatasetItem`, an element of a dataset, represents a single +dataset entry with annotations - an image, video sequence, audio track etc. +It can contain only annotated data or meta information, only annotations, or +all of this. + +Basic library usage and data flow: + +```lang-none +Extractors -> Dataset -> Converter + | + Filtration + Transformations + Statistics + Merging + Inference + Quality Checking + Comparison + ... +``` + +1. Data is read (or produced) by one or many `Extractor`s and merged + into a `Dataset` +1. A dataset is processed in some way +1. A dataset is saved with a `Converter` + +Datumaro has a number of dataset and annotation features: +- iteration over dataset elements +- filtering of datasets and annotations by a custom criteria +- working with subsets (e.g. `train`, `val`, `test`) +- computing of dataset statistics +- comparison and merging of datasets +- various annotation operations + +```python +from datumaro.components.project import Environment + +# Import and save a dataset +env = Environment() +dataset = env.make_importer('voc')('src/dir').make_dataset() +env.converters.get('coco').convert(dataset, save_dir='dst/dir') +``` + +## Library contents + +### Dataset Formats + +Dataset reading is supported by `Extractor`s and `Importer`s: +- An `Extractor` produces a list of `DatasetItem`s corresponding +to the dataset. +- An `Importer` creates a project from the data source location. + +It is possible to add custom Extractors and Importers. To do this, you need +to put an `Extractor` and `Importer` implementations to a plugin directory. + +Dataset writing is supported by `Converter`s. +A Converter produces a dataset of a specific format from dataset items. +It is possible to add custom `Converter`s. To do this, you need to put a +Converter implementation script to a plugin directory. + +### Dataset Conversions ("Transforms") + +A `Transform` is a function for altering a dataset and producing a new one. +It can update dataset items, annotations, classes, and other properties. +A list of available transforms for dataset conversions can be extended by +adding a `Transform` implementation script into a plugin directory. + +### Model launchers + +A list of available launchers for model execution can be extended by +adding a `Launcher` implementation script into a plugin directory. + +## Plugins + +Datumaro comes with a number of built-in formats and other tools, +but it also can be extended by plugins. Plugins are optional components, +which dependencies are not installed by default. +In Datumaro there are several types of plugins, which include: +- `extractor` - produces dataset items from data source +- `importer` - recognizes dataset type and creates project +- `converter` - exports dataset to a specific format +- `transformation` - modifies dataset items or other properties +- `launcher` - executes models + +A plugin is a regular Python module. It must be present in a plugin directory: +- `/.datumaro/plugins` for project-specific plugins +- `/plugins` for global plugins + +A plugin can be used either via the `Environment` class instance, +or by regular module importing: + +```python +from datumaro.components.project import Environment, Project +from datumaro.plugins.yolo_format.converter import YoloConverter + +# Import a dataset +dataset = Environment().make_importer('voc')(src_dir).make_dataset() + +# Load an existing project, save the dataset in some project-specific format +project = Project.load('project/dir') +project.env.converters.get('custom_format').convert(dataset, save_dir=dst_dir) + +# Save the dataset in some built-in format +Environment().converters.get('yolo').convert(dataset, save_dir=dst_dir) +YoloConverter.convert(dataset, save_dir=dst_dir) +``` + +### Writing a plugin + +A plugin is a Python module with any name, which exports some symbols. +To export a symbol, inherit it from one of special classes: + +```python +from datumaro.components.extractor import Importer, SourceExtractor, Transform +from datumaro.components.launcher import Launcher +from datumaro.components.converter import Converter +``` + +The `exports` list of the module can be used to override default behaviour: +```python +class MyComponent1: ... +class MyComponent2: ... +exports = [MyComponent2] # exports only MyComponent2 +``` + +There is also an additional class to modify plugin appearance in command line: + +```python +from datumaro.components.cli_plugin import CliPlugin +``` + +#### Plugin example + + + +``` +datumaro/plugins/ +- my_plugin1/file1.py +- my_plugin1/file2.py +- my_plugin2.py +``` + + + +`my_plugin1/file2.py` contents: + +```python +from datumaro.components.extractor import Transform, CliPlugin +from .file1 import something, useful + +class MyTransform(Transform, CliPlugin): + NAME = "custom_name" # could be generated automatically + + """ + Some description. The text will be displayed in the command line output. + """ + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-q', help="Very useful parameter") + return parser + + def __init__(self, extractor, q): + super().__init__(extractor) + self.q = q + + def transform_item(self, item): + return item +``` + +`my_plugin2.py` contents: + +```python +from datumaro.components.extractor import SourceExtractor + +class MyFormat: ... +class MyFormatExtractor(SourceExtractor): ... + +exports = [MyFormat] # explicit exports declaration +# MyFormatExtractor won't be exported +``` + +## Command-line + +Basically, the interface is divided on contexts and single commands. +Contexts are semantically grouped commands, related to a single topic or target. +Single commands are handy shorter alternatives for the most used commands +and also special commands, which are hard to be put into any specific context. +[Docker](https://www.docker.com/) is an example of similar approach. + +![cli-design-image](images/cli_design.png) + +- The diagram above was created with [FreeMind](http://freemind.sourceforge.net/wiki/index.php/Main_Page) + +Model-View-ViewModel (MVVM) UI pattern is used. + +![mvvm-image](images/mvvm.png) diff --git a/datumaro/docs/user_manual.md b/datumaro/docs/user_manual.md index e2e798e2..9e68f8f9 100644 --- a/datumaro/docs/user_manual.md +++ b/datumaro/docs/user_manual.md @@ -6,22 +6,23 @@ - [Interfaces](#interfaces) - [Supported dataset formats and annotations](#supported-formats) - [Command line workflow](#command-line-workflow) + - [Project structure](#project-structure) - [Command reference](#command-reference) - [Convert datasets](#convert-datasets) - - [Create a project](#create-project) + - [Create project](#create-project) - [Add and remove data](#add-and-remove-data) - - [Import a project](#import-project) - - [Extract a subproject](#extract-subproject) + - [Import project](#import-project) + - [Filter project](#filter-project) - [Update project (merge)](#update-project) - [Merge projects](#merge-projects) - - [Export a project](#export-project) + - [Export project](#export-project) - [Compare projects](#compare-projects) - [Obtaining project info](#get-project-info) - [Obtaining project statistics](#get-project-statistics) - - [Register a model](#register-model) + - [Register model](#register-model) - [Run inference](#run-inference) - [Run inference explanation](#explain-inference) - - [Transform a project](#transform-project) + - [Transform project](#transform-project) - [Extending](#extending) - [Links](#links) @@ -111,15 +112,39 @@ List of supported annotation types: ## Command line workflow -The key object is a project, so most CLI commands operate on projects. However, there -are few commands operating on datasets directly. A project is a combination of -a project's own dataset, a number of external data sources and an environment. +The key object is a project, so most CLI commands operate on projects. +However, there are few commands operating on datasets directly. +A project is a combination of a project's own dataset, a number of +external data sources and an environment. An empty Project can be created by `project create` command, an existing dataset can be imported with `project import` command. A typical way to obtain projects is to export tasks in CVAT UI. If you want to interact with models, you need to add them to project first. +### Project structure + + +``` +└── project/ + ├── .datumaro/ + | ├── config.yml + │   ├── .git/ + │   ├── models/ + │   └── plugins/ + │   ├── plugin1/ + │   | ├── file1.py + │   | └── file2.py + │   ├── plugin2.py + │   ├── custom_extractor1.py + │   └── ... + ├── dataset/ + └── sources/ + ├── source1 + └── ... +``` + + ## Command reference > **Note**: command invocation syntax is subject to change, @@ -270,11 +295,11 @@ datum source add path -f image_dir datum project export -f tf_detection_api ``` -### Extract subproject +### Filter project This command allows to create a sub-Project from a Project. The new project includes only items satisfying some condition. [XPath](https://devhints.io/xpath) -is used as query format. +is used as a query format. There are several filtering modes available (`-m/--mode` parameter). Supported modes: @@ -290,38 +315,34 @@ returns `annotation` elements (see examples). Usage: ``` bash -datum project extract --help +datum project filter --help -datum project extract \ +datum project filter \ -p \ - -o \ -e '' ``` Example: extract a dataset with only images which `width` < `height` ``` bash -datum project extract \ +datum project filter \ -p test_project \ - -o test_project-extract \ -e '/item[image/width < image/height]' ``` Example: extract a dataset with only large annotations of class `cat` and any non-`persons` ``` bash -datum project extract \ +datum project filter \ -p test_project \ - -o test_project-extract \ --mode annotations -e '/item/annotation[(label="cat" and area > 99.5) or label!="person"]' ``` Example: extract a dataset with only occluded annotations, remove empty images ``` bash -datum project extract \ +datum project filter \ -p test_project \ - -o test_project-extract \ -m i+a -e '/item/annotation[occluded="True"]' ``` @@ -362,7 +383,8 @@ Item representations are available with `--dry-run` parameter: ### Update project -This command updates items in a project from another one (check [Merge Projects](#merge-projects) for complex merging). +This command updates items in a project from another one +(check [Merge Projects](#merge-projects) for complex merging). Usage: diff --git a/datumaro/setup.py b/datumaro/setup.py index 4ebf1119..cf6d0433 100644 --- a/datumaro/setup.py +++ b/datumaro/setup.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT @@ -36,7 +36,7 @@ setuptools.setup( version=find_version(), author="Intel", author_email="maxim.zhiltsov@intel.com", - description="Dataset Framework", + description="Dataset Management Framework (Datumaro)", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/opencv/cvat/datumaro", diff --git a/datumaro/tests/assets/pytorch_launcher/samplenet.py b/datumaro/tests/assets/pytorch_launcher/samplenet.py index a742a650..7282e43a 100644 --- a/datumaro/tests/assets/pytorch_launcher/samplenet.py +++ b/datumaro/tests/assets/pytorch_launcher/samplenet.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2019 Intel Corporation +Copyright (C) 2019-2020 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/datumaro/tests/test_project.py b/datumaro/tests/test_project.py index ed4ad976..50d21d38 100644 --- a/datumaro/tests/test_project.py +++ b/datumaro/tests/test_project.py @@ -250,7 +250,7 @@ class ProjectTest(TestCase): project.env.extractors.register(e_type, TestExtractor) project.add_source('source', { 'format': e_type }) - dataset = project.make_dataset().extract('/item[id < 5]') + dataset = project.make_dataset().filter('/item[id < 5]') self.assertEqual(5, len(dataset))