[Datumaro] VOC labelmap support (#957)

* Add import result checks and options to skip * Add label-specific attributes * Overwrite option for export * Add labelmap file support in voc * Add labelmap tests * Little refactoring
6 years ago · c84daaf2ef
parent 20a0e66238
commit c84daaf2ef
8 changed files with 576 additions and 220 deletions
--- a/datumaro/datumaro/cli/project/init.py
+++ b/datumaro/datumaro/cli/project/init.py
@ -68,6 +68,8 @@ def build_import_parser(parser):
        help="Overwrite existing files in the save directory")
    parser.add_argument('--copy', action='store_true',
        help="Copy the dataset instead of saving source links")
    parser.add_argument('--skip-check', action='store_true',
        help="Skip source checking")
    # parser.add_argument('extra_args', nargs=argparse.REMAINDER,
    #     help="Additional arguments for importer (pass '-- -h' for help)")
    return parser
@ -99,7 +101,9 @@ def import_command(args):
    project.config.project_name = project_name
    project.config.project_dir = project_dir
-    dataset = project.make_dataset()
+    if not args.skip_check or args.copy:
        log.info("Checking the dataset...")
        dataset = project.make_dataset()
    if args.copy:
        log.info("Cloning data...")
        dataset.save(merge=True, save_images=True)
@ -127,6 +131,8 @@ def build_export_parser(parser):
        help="Output format")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
    parser.add_argument('--overwrite', action='store_true',
        help="Overwrite existing files in the save directory")
    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
        help="Additional arguments for converter (pass '-- -h' for help)")
    return parser
@ -135,7 +141,11 @@ def export_command(args):
    project = load_project(args.project_dir)
    dst_dir = osp.abspath(args.dst_dir)
-    os.makedirs(dst_dir, exist_ok=False)
+    if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
        log.error("Directory '%s' already exists "
            "(pass --overwrite to force creation)" % dst_dir)
        return 1
    os.makedirs(dst_dir, exist_ok=args.overwrite)
    project.make_dataset().export(
        save_dir=dst_dir,
--- a/datumaro/datumaro/cli/source/init.py
+++ b/datumaro/datumaro/cli/source/init.py
@ -62,14 +62,16 @@ def build_import_parser(parser):
    dir_parser.add_argument('url',
        help="Path to the source directory")
    dir_parser.add_argument('--copy', action='store_true',
-        help="Copy data to the project")
+        help="Copy the dataset instead of saving source links")
    parser.add_argument('-n', '--name', default=None,
        help="Name of the new source")
    parser.add_argument('-f', '--format', default=None,
        help="Name of the source dataset format (default: 'project')")
    parser.add_argument('-n', '--name', default=None,
        help="Name of the source to be imported")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
    parser.add_argument('--skip-check', action='store_true',
        help="Skip source checking")
    return parser
 def import_command(args):
@ -99,6 +101,10 @@ def import_command(args):
        if args.format:
            source['format'] = args.format
        project.add_source(name, source)
        if not args.skip_check:
            log.info("Checking the source...")
            project.make_source_project(name)
        project.save()
        log.info("Source '%s' has been added to the project, location: '%s'" \
@ -131,6 +137,10 @@ def import_command(args):
        if args.format:
            source['format'] = args.format
        project.add_source(name, source)
        if not args.skip_check:
            log.info("Checking the source...")
            project.make_source_project(name)
        project.save()
        log.info("Source '%s' has been added to the project, location: '%s'" \
@ -184,6 +194,8 @@ def build_export_parser(parser):
        help="Output format")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
    parser.add_argument('--overwrite', action='store_true',
        help="Overwrite existing files in the save directory")
    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
        help="Additional arguments for converter (pass '-- -h' for help)")
    return parser
@ -192,7 +204,11 @@ def export_command(args):
    project = load_project(args.project_dir)
    dst_dir = osp.abspath(args.dst_dir)
-    os.makedirs(dst_dir, exist_ok=False)
+    if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
        log.error("Directory '%s' already exists "
            "(pass --overwrite to force creation)" % dst_dir)
        return 1
    os.makedirs(dst_dir, exist_ok=args.overwrite)
    source_project = project.make_source_project(args.name)
    source_project.make_dataset().export(
--- a/datumaro/datumaro/components/converters/voc.py
+++ b/datumaro/datumaro/components/converters/voc.py
@ -4,18 +4,23 @@
 # SPDX-License-Identifier: MIT
 from collections import OrderedDict, defaultdict
 from enum import Enum
 from itertools import chain
 import logging as log
 from lxml import etree as ET
 import os
 import os.path as osp
 from datumaro.components.converter import Converter
-from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType
+from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType,
-from datumaro.components.formats.voc import VocLabel, VocAction, \
+    LabelCategories
-    VocBodyPart, VocPose, VocTask, VocPath, VocColormap, VocInstColormap
+)
-from datumaro.util import find
+from datumaro.components.formats.voc import (VocTask, VocPath,
    VocInstColormap, VocPose,
    parse_label_map, make_voc_label_map, make_voc_categories, write_label_map
 )
 from datumaro.util.image import save_image
-from datumaro.util.mask_tools import apply_colormap
+from datumaro.util.mask_tools import apply_colormap, remap_mask
 def _write_xml_bbox(bbox, parent_elem):
@ -27,13 +32,12 @@ def _write_xml_bbox(bbox, parent_elem):
    ET.SubElement(bbox_elem, 'ymax').text = str(y + h)
    return bbox_elem
 class _Converter:
    _LABELS = set([entry.name for entry in VocLabel])
    _BODY_PARTS = set([entry.name for entry in VocBodyPart])
    _ACTIONS = set([entry.name for entry in VocAction])
 LabelmapType = Enum('LabelmapType', ['voc', 'source', 'guess'])
 class _Converter:
    def __init__(self, extractor, save_dir,
-            tasks=None, apply_colormap=True, save_images=False):
+            tasks=None, apply_colormap=True, save_images=False, label_map=None):
        assert tasks is None or isinstance(tasks, (VocTask, list))
        if tasks is None:
            tasks = list(VocTask)
@ -49,14 +53,12 @@ class _Converter:
        self._apply_colormap = apply_colormap
        self._save_images = save_images
-        self._label_categories = extractor.categories() \
+        self._load_categories(label_map)
            .get(AnnotationType.label)
        self._mask_categories = extractor.categories() \
            .get(AnnotationType.mask)
    def convert(self):
        self.init_dirs()
        self.save_subsets()
        self.save_label_map()
    def init_dirs(self):
        save_dir = self._save_dir
@ -94,7 +96,8 @@ class _Converter:
        self._images_dir = images_dir
    def get_label(self, label_id):
-        return self._label_categories.items[label_id].name
+        return self._extractor.categories()[AnnotationType.label] \
            .items[label_id].name
    def save_subsets(self):
        subsets = self._extractor.subsets()
@ -167,56 +170,64 @@ class _Converter:
                    layout_bboxes = []
                    for bbox in bboxes:
                        label = self.get_label(bbox.label)
-                        if label in self._LABELS:
+                        if self._is_part(label):
                            main_bboxes.append(bbox)
                        elif label in self._BODY_PARTS:
                            layout_bboxes.append(bbox)
                        elif self._is_label(label):
                            main_bboxes.append(bbox)
                    for new_obj_id, obj in enumerate(main_bboxes):
                        attr = obj.attributes
                        obj_elem = ET.SubElement(root_elem, 'object')
-                        ET.SubElement(obj_elem, 'name').text = self.get_label(obj.label)
+
                        obj_label =  self.get_label(obj.label)
                        ET.SubElement(obj_elem, 'name').text = obj_label
                        pose = attr.get('pose')
                        if pose is not None:
-                            ET.SubElement(obj_elem, 'pose').text = VocPose[pose].name
+                            pose = VocPose[pose]
                        else:
                            pose = VocPose.Unspecified
                        ET.SubElement(obj_elem, 'pose').text = pose.name
                        truncated = attr.get('truncated')
                        if truncated is not None:
-                            ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated
+                            truncated = int(truncated)
                        else:
                            truncated = 0
                        ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated
                        difficult = attr.get('difficult')
                        if difficult is not None:
-                            ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult
+                            difficult = int(difficult)
                        else:
                            difficult = 0
                        ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult
                        bbox = obj.get_bbox()
                        if bbox is not None:
                            _write_xml_bbox(bbox, obj_elem)
-                        for part in VocBodyPart:
+                        for part_bbox in filter(lambda x: obj.id == x.group,
-                            part_bbox = find(layout_bboxes, lambda x: \
+                                layout_bboxes):
-                                obj.id == x.group and \
+                            part_elem = ET.SubElement(obj_elem, 'part')
-                                self.get_label(x.label) == part.name)
+                            ET.SubElement(part_elem, 'name').text = \
-                            if part_bbox is not None:
+                                self.get_label(part_bbox.label)
-                                part_elem = ET.SubElement(obj_elem, 'part')
+                            _write_xml_bbox(part_bbox.get_bbox(), part_elem)
                                ET.SubElement(part_elem, 'name').text = part.name
                                _write_xml_bbox(part_bbox.get_bbox(), part_elem)
-                                objects_with_parts.append(new_obj_id)
+                            objects_with_parts.append(new_obj_id)
-                        actions = [x for x in labels
+                        actions = {k: v for k, v in obj.attributes.items()
-                            if obj.id == x.group and \
+                            if self._is_action(obj_label, k)}
-                               self.get_label(x.label) in self._ACTIONS]
+                        actions_elem = ET.Element('actions')
-                        if len(actions) != 0:
+                        for action in self._get_actions(obj_label):
-                            actions_elem = ET.SubElement(obj_elem, 'actions')
+                            presented = action in actions and actions[action]
-                            for action in VocAction:
+                            ET.SubElement(actions_elem, action).text = \
-                                presented = find(actions, lambda x: \
+                                '%d' % presented
                                    self.get_label(x.label) == action.name) is not None
                                ET.SubElement(actions_elem, action.name).text = \
                                    '%d' % presented
-                                objects_with_actions[new_obj_id][action] = presented
+                            objects_with_actions[new_obj_id][action] = presented
                        if len(actions) != 0:
                            obj_elem.append(actions_elem)
                    if set(self._tasks) & set([None,
                            VocTask.detection,
@ -232,7 +243,7 @@ class _Converter:
                for label_obj in labels:
                    label = self.get_label(label_obj.label)
-                    if label not in self._LABELS:
+                    if not self._is_label(label):
                        continue
                    class_list = class_lists.get(item_id, set())
                    class_list.add(label_obj.label)
@ -244,7 +255,7 @@ class _Converter:
                    if mask_obj.attributes.get('class') == True:
                        self.save_segm(osp.join(self._segm_dir,
                                item_id + VocPath.SEGM_EXT),
-                            mask_obj, self._mask_categories.colormap)
+                            mask_obj)
                    if mask_obj.attributes.get('instances') == True:
                        self.save_segm(osp.join(self._inst_dir,
                                item_id + VocPath.SEGM_EXT),
@ -284,9 +295,11 @@ class _Converter:
        if len(action_list) == 0:
            return
-        for action in VocAction:
+        all_actions = set(chain(*(self._get_actions(l)
            for l in self._label_map)))
        for action in all_actions:
            ann_file = osp.join(self._action_subsets_dir,
-                '%s_%s.txt' % (action.name, subset_name))
+                '%s_%s.txt' % (action, subset_name))
            with open(ann_file, 'w') as f:
                for item, objs in action_list.items():
                    if not objs:
@ -302,23 +315,17 @@ class _Converter:
        if len(class_lists) == 0:
            return
-        label_cat = self._extractor.categories().get(AnnotationType.label, None)
+        for label in self._label_map:
        if not label_cat:
            log.warn("Unable to save classification task lists "
                "as source does not provide class labels. Skipped.")
            return
        for label in VocLabel:
            ann_file = osp.join(self._cls_subsets_dir,
-                '%s_%s.txt' % (label.name, subset_name))
+                '%s_%s.txt' % (label, subset_name))
            with open(ann_file, 'w') as f:
                for item, item_labels in class_lists.items():
                    if not item_labels:
                        continue
-                    item_labels = [label_cat.items[l].name for l in item_labels]
+                    item_labels = [self._strip_label(self.get_label(l))
-                    presented = label.name in item_labels
+                        for l in item_labels]
-                    f.write('%s % d\n' % \
+                    presented = label in item_labels
-                        (item, 1 if presented else -1))
+                    f.write('%s % d\n' % (item, 1 if presented else -1))
    def save_clsdet_lists(self, subset_name, clsdet_list):
        os.makedirs(self._cls_subsets_dir, exist_ok=True)
@ -348,17 +355,124 @@ class _Converter:
                else:
                    f.write('%s\n' % (item))
-    def save_segm(self, path, annotation, colormap):
+    def save_segm(self, path, annotation, colormap=None):
        data = annotation.image
        if self._apply_colormap:
            if colormap is None:
-                colormap = VocColormap
+                colormap = self._categories[AnnotationType.mask].colormap
            data = self._remap_mask(data)
            data = apply_colormap(data, colormap)
        save_image(path, data)
    def save_label_map(self):
        path = osp.join(self._save_dir, VocPath.LABELMAP_FILE)
        write_label_map(path, self._label_map)
    @staticmethod
    def _strip_label(label):
        return label.lower().strip()
    def _load_categories(self, label_map_source=None):
        if label_map_source == LabelmapType.voc.name:
            # strictly use VOC default labelmap
            label_map = make_voc_label_map()
        elif label_map_source == LabelmapType.source.name:
            # generate colormap from the input dataset
            labels = self._extractor.categories() \
                .get(AnnotationType.label, LabelCategories())
            label_map = OrderedDict(
                (item.name, [None, [], []]) for item in labels.items)
        elif label_map_source in [LabelmapType.guess.name, None]:
            # generate colormap for union of VOC and input dataset labels
            label_map = make_voc_label_map()
            rebuild_colormap = False
            source_labels = self._extractor.categories() \
                .get(AnnotationType.label, LabelCategories())
            for label in source_labels.items:
                label_name = self._strip_label(label.name)
                if label_name not in label_map:
                    rebuild_colormap = True
                if label.attributes or label_name not in label_map:
                    label_map[label_name] = [None, [], label.attributes]
            if rebuild_colormap:
                for item in label_map.values():
                    item[0] = None
        elif isinstance(label_map_source, dict):
            label_map = label_map_source
        elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
            label_map = parse_label_map(label_map_source)
        else:
            raise Exception("Wrong labelmap specified, "
                "expected one of %s or a file path" % \
                ', '.join(t.name for t in LabelmapType))
        self._categories = make_voc_categories(label_map)
        self._label_map = label_map
        colormap = self._categories[AnnotationType.mask].colormap
        for label_id, color in colormap.items():
            label_desc = label_map[
                self._categories[AnnotationType.label].items[label_id].name]
            label_desc[0] = color
        self._label_id_mapping = self._make_label_id_map()
    def _is_label(self, s):
        return self._label_map.get(self._strip_label(s)) is not None
    def _is_part(self, s):
        s = self._strip_label(s)
        for label_desc in self._label_map.values():
            if s in label_desc[1]:
                return True
        return False
    def _is_action(self, label, s):
        return self._strip_label(s) in self._get_actions(label)
    def _get_actions(self, label):
        label_desc = self._label_map.get(self._strip_label(label))
        if not label_desc:
            return []
        return label_desc[2]
    def _make_label_id_map(self):
        source_labels = {
            id: label.name for id, label in
            enumerate(self._extractor.categories()[AnnotationType.label].items)
        }
        target_labels = {
            label.name: id for id, label in
            enumerate(self._categories[AnnotationType.label].items)
        }
        id_mapping = {
            src_id: target_labels.get(src_label, 0)
            for src_id, src_label in source_labels.items()
        }
        void_labels = [src_label for src_id, src_label in source_labels.items()
            if src_label not in target_labels]
        if void_labels:
            log.warn("The following labels are remapped to background: %s" %
                ', '.join(void_labels))
        def map_id(src_id):
            return id_mapping[src_id]
        return map_id
    def _remap_mask(self, mask):
        return remap_mask(mask, self._label_id_mapping)
 class VocConverter(Converter):
    def __init__(self,
-            tasks=None, save_images=False, apply_colormap=False,
+            tasks=None, save_images=False, apply_colormap=False, label_map=None,
            cmdline_args=None):
        super().__init__()
@ -366,6 +480,7 @@ class VocConverter(Converter):
            'tasks': tasks,
            'save_images': save_images,
            'apply_colormap': apply_colormap,
            'label_map': label_map,
        }
        if cmdline_args is not None:
@ -375,6 +490,12 @@ class VocConverter(Converter):
    def _split_tasks_string(s):
        return [VocTask[i.strip()] for i in s.split(',')]
    @staticmethod
    def _get_labelmap(s):
        if osp.isfile(s):
            return s
        return LabelmapType[s].name
    @classmethod
    def build_cmdline_parser(cls, parser=None):
        import argparse
@ -386,6 +507,9 @@ class VocConverter(Converter):
        parser.add_argument('--apply-colormap', type=bool, default=True,
            help="Use colormap for class and instance masks "
                "(default: %(default)s)")
        parser.add_argument('--label-map', type=cls._get_labelmap, default=None,
            help="Labelmap file path or one of %s" % \
                ', '.join(t.name for t in LabelmapType))
        parser.add_argument('--tasks', type=cls._split_tasks_string,
            default=None,
            help="VOC task filter, comma-separated list of {%s} "
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@ -53,6 +53,11 @@ class Categories:
    def __init__(self, attributes=None):
        if attributes is None:
            attributes = set()
        else:
            if not isinstance(attributes, set):
                attributes = set(attributes)
            for attr in attributes:
                assert isinstance(attr, str)
        self.attributes = attributes
    def __eq__(self, other):
@ -62,7 +67,7 @@ class Categories:
            (self.attributes == other.attributes)
 class LabelCategories(Categories):
-    Category = namedtuple('Category', ['name', 'parent'])
+    Category = namedtuple('Category', ['name', 'parent', 'attributes'])
    def __init__(self, items=None, attributes=None):
        super().__init__(attributes=attributes)
@ -81,11 +86,18 @@ class LabelCategories(Categories):
            indices[item.name] = index
        self._indices = indices
-    def add(self, name, parent=None):
+    def add(self, name, parent=None, attributes=None):
        assert name not in self._indices
        if attributes is None:
            attributes = set()
        else:
            if not isinstance(attributes, set):
                attributes = set(attributes)
            for attr in attributes:
                assert isinstance(attr, str)
        index = len(self.items)
-        self.items.append(self.Category(name, parent))
+        self.items.append(self.Category(name, parent, attributes))
        self._indices[name] = index
    def find(self, name):
--- a/datumaro/datumaro/components/extractors/voc.py
+++ b/datumaro/datumaro/components/extractors/voc.py
@ -4,18 +4,15 @@
 # SPDX-License-Identifier: MIT
 from collections import defaultdict
 from itertools import chain
 import os
 import os.path as osp
 from xml.etree import ElementTree as ET
 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, LabelObject, MaskObject, BboxObject,
    LabelCategories, MaskCategories
 )
-from datumaro.components.formats.voc import (VocLabel, VocAction,
+from datumaro.components.formats.voc import (
-    VocBodyPart, VocTask, VocPath, VocColormap, VocInstColormap,
+    VocTask, VocPath, VocInstColormap, parse_label_map, make_voc_categories
    VocIgnoredLabel
 )
 from datumaro.util import dir_items
 from datumaro.util.image import lazy_image
@ -24,31 +21,6 @@ from datumaro.util.mask_tools import lazy_mask, invert_colormap
 _inverse_inst_colormap = invert_colormap(VocInstColormap)
 # pylint: disable=pointless-statement
 def _make_voc_categories():
    categories = {}
    label_categories = LabelCategories()
    for label in chain(VocLabel, VocAction, VocBodyPart):
        label_categories.add(label.name)
    categories[AnnotationType.label] = label_categories
    def label_id(class_index):
        if class_index in [0, VocIgnoredLabel]:
            return class_index
        class_label = VocLabel(class_index).name
        label_id, _ = label_categories.find(class_label)
        return label_id + 1
    colormap = { label_id(idx): tuple(color) \
        for idx, color in VocColormap.items() }
    mask_categories = MaskCategories(colormap)
    mask_categories.inverse_colormap # force init
    categories[AnnotationType.mask] = mask_categories
    return categories
 # pylint: enable=pointless-statement
 class VocExtractor(Extractor):
    class Subset(Extractor):
        def __init__(self, name, parent):
@ -58,8 +30,8 @@ class VocExtractor(Extractor):
            self.items = []
        def __iter__(self):
-            for item in self.items:
+            for item_id in self.items:
-                yield self._parent._get(item, self._name)
+                yield self._parent._get(item_id, self._name)
        def __len__(self):
            return len(self.items)
@ -87,10 +59,10 @@ class VocExtractor(Extractor):
        label_annotations = defaultdict(list)
        label_anno_files = [s for s in dir_files \
            if '_' in s and s[s.rfind('_') + 1:] in subset_names]
-        for ann_file in label_anno_files:
+        for ann_filename in label_anno_files:
-            with open(osp.join(subsets_dir, ann_file + '.txt'), 'r') as f:
+            with open(osp.join(subsets_dir, ann_filename + '.txt'), 'r') as f:
-                label = ann_file[:ann_file.rfind('_')]
+                label = ann_filename[:ann_filename.rfind('_')]
-                label_id = VocLabel[label].value
+                label_id = self._get_label_id(label)
                for line in f:
                    item, present = line.split()
                    if present == '1':
@ -113,7 +85,11 @@ class VocExtractor(Extractor):
        self._annotations[VocTask.detection] = det_annotations
    def _load_categories(self):
-        self._categories = _make_voc_categories()
+        label_map = None
        label_map_path = osp.join(self._path, VocPath.LABELMAP_FILE)
        if osp.isfile(label_map_path):
            label_map = parse_label_map(label_map_path)
        self._categories = make_voc_categories(label_map)
    def __init__(self, path, task):
        super().__init__()
@ -146,17 +122,17 @@ class VocExtractor(Extractor):
            for item in subset:
                yield item
-    def _get(self, item, subset_name):
+    def _get(self, item_id, subset_name):
        image = None
        image_path = osp.join(self._path, VocPath.IMAGES_DIR,
-            item + VocPath.IMAGE_EXT)
+            item_id + VocPath.IMAGE_EXT)
        if osp.isfile(image_path):
            image = lazy_image(image_path)
-        annotations = self._get_annotations(item)
+        annotations = self._get_annotations(item_id)
        return DatasetItem(annotations=annotations,
-            id=item, subset=subset_name, image=image)
+            id=item_id, subset=subset_name, image=image)
    def _get_label_id(self, label):
        label_id, _ = self._categories[AnnotationType.label].find(label)
@ -187,11 +163,10 @@ class VocExtractor(Extractor):
        cls_annotations = self._annotations.get(VocTask.classification)
        if cls_annotations is not None and \
-           self._task is VocTask.classification:
+                self._task is VocTask.classification:
            item_labels = cls_annotations.get(item)
            if item_labels is not None:
-                for label in item_labels:
+                for label_id in item_labels:
                    label_id = self._get_label_id(VocLabel(label).name)
                    item_annotations.append(LabelObject(label_id))
        det_annotations = self._annotations.get(VocTask.detection)
@ -215,16 +190,16 @@ class VocExtractor(Extractor):
                    continue
                difficult_elem = object_elem.find('difficult')
-                if difficult_elem is not None:
+                attributes['difficult'] = difficult_elem is not None and \
-                    attributes['difficult'] = (difficult_elem.text == '1')
+                    difficult_elem.text == '1'
                truncated_elem = object_elem.find('truncated')
-                if truncated_elem is not None:
+                attributes['truncated'] = truncated_elem is not None and \
-                    attributes['truncated'] = (truncated_elem.text == '1')
+                    truncated_elem.text == '1'
                occluded_elem = object_elem.find('occluded')
-                if occluded_elem is not None:
+                attributes['occluded'] = occluded_elem is not None and \
-                    attributes['occluded'] = (occluded_elem.text == '1')
+                    occluded_elem.text == '1'
                pose_elem = object_elem.find('pose')
                if pose_elem is not None:
@ -238,34 +213,34 @@ class VocExtractor(Extractor):
                    attributes['point'] = point
                actions_elem = object_elem.find('actions')
-                if actions_elem is not None and \
+                actions = {a: False
-                   self._task is VocTask.action_classification:
+                    for a in self._categories[AnnotationType.label] \
-                    for action in VocAction:
+                        .items[obj_label_id].attributes}
-                        action_elem = actions_elem.find(action.name)
+                if actions_elem is not None:
-                        if action_elem is None or action_elem.text != '1':
+                    for action_elem in actions_elem:
-                            continue
+                        actions[action_elem.tag] = (action_elem.text == '1')
-
+                for action, present in actions.items():
-                        act_label_id = self._get_label_id(action.name)
+                    attributes[action] = present
-                        assert group in [None, obj_id]
+
-                        group = obj_id
+                for part_elem in object_elem.findall('part'):
-                        item_annotations.append(LabelObject(act_label_id,
+                    part = part_elem.find('name').text
-                            group=obj_id))
+                    part_label_id = self._get_label_id(part)
-
+                    bbox = self._parse_bbox(part_elem)
-                if self._task is VocTask.person_layout:
+                    group = obj_id
-                    for part_elem in object_elem.findall('part'):
+
-                        part = part_elem.find('name').text
+                    if self._task is not VocTask.person_layout:
-                        part_label_id = self._get_label_id(part)
+                        break
-                        bbox = self._parse_bbox(part_elem)
+                    item_annotations.append(BboxObject(
-                        group = obj_id
+                        *bbox, label=part_label_id,
-                        item_annotations.append(BboxObject(
+                        group=obj_id))
-                            *bbox, label=part_label_id,
+
-                            group=obj_id))
+                if self._task is VocTask.person_layout and group is None:
-
+                    continue
-                if self._task in [VocTask.action_classification, VocTask.person_layout]:
+                if self._task is VocTask.action_classification and not actions:
-                    if group is None:
+                    continue
-                        continue
+
-
+                item_annotations.append(BboxObject(
-                item_annotations.append(BboxObject(*obj_bbox, label=obj_label_id,
+                    *obj_bbox, label=obj_label_id,
                    attributes=attributes, id=obj_id, group=group))
        return item_annotations
@ -283,58 +258,48 @@ class VocExtractor(Extractor):
            return None
 class VocClassificationExtractor(VocExtractor):
    _ANNO_DIR = 'Main'
    def __init__(self, path):
        super().__init__(path, task=VocTask.classification)
-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Main')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets
        self._load_cls_annotations(subsets_dir, subsets)
 class VocDetectionExtractor(VocExtractor):
    _ANNO_DIR = 'Main'
    def __init__(self, path):
        super().__init__(path, task=VocTask.detection)
-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Main')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets
        self._load_det_annotations()
 class VocSegmentationExtractor(VocExtractor):
    _ANNO_DIR = 'Segmentation'
    def __init__(self, path):
        super().__init__(path, task=VocTask.segmentation)
-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Segmentation')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets
 class VocLayoutExtractor(VocExtractor):
    _ANNO_DIR = 'Layout'
    def __init__(self, path):
        super().__init__(path, task=VocTask.person_layout)
-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Layout')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets
        self._load_det_annotations()
 class VocActionExtractor(VocExtractor):
    _ANNO_DIR = 'Action'
    def __init__(self, path):
        super().__init__(path, task=VocTask.action_classification)
-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Action')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets
@ -414,7 +379,7 @@ class VocResultsExtractor(Extractor):
            if mark != task_desc['mark']:
                continue
-            label_id = VocLabel[label].value
+            label_id = self._get_label_id(label)
            anns = defaultdict(list)
            with open(osp.join(task_dir, ann_file + ann_ext), 'r') as f:
                for line in f:
@ -441,7 +406,11 @@ class VocResultsExtractor(Extractor):
            VocTask.action_classification)
    def _load_categories(self):
-        self._categories = _make_voc_categories()
+        label_map = None
        label_map_path = osp.join(self._path, VocPath.LABELMAP_FILE)
        if osp.isfile(label_map_path):
            label_map = parse_label_map(label_map_path)
        self._categories = make_voc_categories(label_map)
    def _get_label_id(self, label):
        label_id = self._categories[AnnotationType.label].find(label)
@ -511,9 +480,8 @@ class VocComp_1_2_Extractor(VocResultsExtractor):
        if cls_ann is not None:
            for desc in cls_ann:
                label_id, conf = desc
                label_id = self._get_label_id(VocLabel(int(label_id)).name)
                annotations.append(LabelObject(
-                    label_id,
+                    int(label_id),
                    attributes={ 'score': float(conf) }
                ))
@ -538,11 +506,10 @@ class VocComp_3_4_Extractor(VocResultsExtractor):
        if det_ann is not None:
            for desc in det_ann:
                label_id, conf, left, top, right, bottom = desc
                label_id = self._get_label_id(VocLabel(int(label_id)).name)
                annotations.append(BboxObject(
                    x=float(left), y=float(top),
                    w=float(right) - float(left), h=float(bottom) - float(top),
-                    label=label_id,
+                    label=int(label_id),
                    attributes={ 'score': float(conf) }
                ))
@ -639,7 +606,7 @@ class VocComp_7_8_Extractor(VocResultsExtractor):
                conf = float(layout_elem.find('confidence').text)
                parts = []
                for part_elem in layout_elem.findall('part'):
-                    label_id = VocBodyPart[part_elem.find('class').text].value
+                    label_id = self._get_label_id(part_elem.find('class').text)
                    bbox_elem = part_elem.find('bndbox')
                    xmin = float(bbox_elem.find('xmin').text)
                    xmax = float(bbox_elem.find('xmax').text)
@ -671,8 +638,7 @@ class VocComp_7_8_Extractor(VocResultsExtractor):
                }
                for part in parts:
-                    part_id, bbox = part
+                    label_id, bbox = part
                    label_id = self._get_label_id(VocBodyPart(part_id).name)
                    annotations.append(BboxObject(
                        *bbox, label=label_id,
                        attributes=attributes))
@ -691,6 +657,12 @@ class VocComp_9_10_Extractor(VocResultsExtractor):
        self._subsets = subsets
        self._annotations = dict(annotations)
    def _load_categories(self):
        from collections import OrderedDict
        from datumaro.components.formats.voc import VocAction
        label_map = OrderedDict((a.name, [[], [], []]) for a in VocAction)
        self._categories = make_voc_categories(label_map)
    def _get_annotations(self, item, subset_name):
        annotations = []
@ -698,9 +670,8 @@ class VocComp_9_10_Extractor(VocResultsExtractor):
        if action_ann is not None:
            for desc in action_ann:
                action_id, obj_id, conf = desc
                label_id = self._get_label_id(VocAction(int(action_id)).name)
                annotations.append(LabelObject(
-                    label_id,
+                    action_id,
                    attributes={
                        'score': conf,
                        'object_id': int(obj_id),
--- a/datumaro/datumaro/components/formats/voc.py
+++ b/datumaro/datumaro/components/formats/voc.py
@ -5,8 +5,13 @@
 from collections import OrderedDict
 from enum import Enum
 from itertools import chain
 import numpy as np
 from datumaro.components.extractor import (AnnotationType,
    LabelCategories, MaskCategories
 )
 VocTask = Enum('VocTask', [
    'classification',
@ -17,6 +22,7 @@ VocTask = Enum('VocTask', [
 ])
 VocLabel = Enum('VocLabel', [
    ('background', 0),
    ('aeroplane', 1),
    ('bicycle', 2),
    ('bird', 3),
@ -37,10 +43,9 @@ VocLabel = Enum('VocLabel', [
    ('sofa', 18),
    ('train', 19),
    ('tvmonitor', 20),
    ('ignored', 255),
 ])
 VocIgnoredLabel = 255
 VocPose = Enum('VocPose', [
    'Unspecified',
    'Left',
@ -86,7 +91,7 @@ def generate_colormap(length=256):
    )
 VocColormap = {id: color for id, color in generate_colormap(256).items()
-    if id in [l.value for l in VocLabel] + [0, VocIgnoredLabel]}
+    if id in [l.value for l in VocLabel]}
 VocInstColormap = generate_colormap(256)
 class VocPath:
@ -97,6 +102,7 @@ class VocPath:
    SUBSETS_DIR = 'ImageSets'
    IMAGE_EXT = '.jpg'
    SEGM_EXT = '.png'
    LABELMAP_FILE = 'labelmap.txt'
    TASK_DIR = {
        VocTask.classification: 'Main',
@ -105,3 +111,95 @@ class VocPath:
        VocTask.action_classification: 'Action',
        VocTask.person_layout: 'Layout',
    }
 def make_voc_label_map():
    labels = sorted(VocLabel, key=lambda l: l.value)
    label_map = OrderedDict(
        (label.name, [VocColormap[label.value], [], []]) for label in labels)
    label_map[VocLabel.person.name][1] = [p.name for p in VocBodyPart]
    label_map[VocLabel.person.name][2] = [a.name for a in VocAction]
    return label_map
 def parse_label_map(path):
    if not path:
        return None
    label_map = OrderedDict()
    with open(path, 'r') as f:
        for line in f:
            # skip empty and commented lines
            line = line.strip()
            if not line or line and line[0] == '#':
                continue
            # name, color, parts, actions
            label_desc = line.strip().split(':')
            name = label_desc[0]
            if 1 < len(label_desc) and len(label_desc[1]) != 0:
                color = label_desc[1].split(',')
                assert len(color) == 3, \
                    "Label '%s' has wrong color, expected 'r,g,b', got '%s'" % \
                    (name, color)
                color = tuple([int(c) for c in color][::-1])
            else:
                color = None
            if 2 < len(label_desc) and len(label_desc[2]) != 0:
                parts = label_desc[2].split(',')
            else:
                parts = []
            if 3 < len(label_desc) and len(label_desc[3]) != 0:
                actions = label_desc[3].split(',')
            else:
                actions = []
            label_map[name] = [color, parts, actions]
    return label_map
 def write_label_map(path, label_map):
    with open(path, 'w') as f:
        f.write('# label:color_rgb:parts:actions\n')
        for label_name, label_desc in label_map.items():
            if label_desc[0]:
                color_rgb = ','.join(str(c) for c in label_desc[0][::-1])
            else:
                color_rgb = ''
            parts = ','.join(str(p) for p in label_desc[1])
            actions = ','.join(str(a) for a in label_desc[2])
            f.write('%s\n' % ':'.join([label_name, color_rgb, parts, actions]))
 # pylint: disable=pointless-statement
 def make_voc_categories(label_map=None):
    if label_map is None:
        label_map = make_voc_label_map()
    categories = {}
    label_categories = LabelCategories()
    label_categories.attributes.update(['difficult', 'truncated', 'occluded'])
    for label, desc in label_map.items():
        label_categories.add(label, attributes=desc[2])
    for part in OrderedDict((k, None) for k in chain(
            *(desc[1] for desc in label_map.values()))):
        label_categories.add(part)
    categories[AnnotationType.label] = label_categories
    has_colors = sum(v[0] is not None for v in label_map.values())
    if not has_colors:
        colormap = generate_colormap(len(label_map))
    else:
        label_id = lambda label: label_categories.find(label)[0]
        colormap = { label_id(name): desc[0]
            for name, desc in label_map.items() }
    mask_categories = MaskCategories(colormap)
    mask_categories.inverse_colormap # force init
    categories[AnnotationType.mask] = mask_categories
    return categories
 # pylint: enable=pointless-statement
--- a/datumaro/datumaro/util/mask_tools.py
+++ b/datumaro/datumaro/util/mask_tools.py
@ -69,6 +69,16 @@ def apply_colormap(mask, colormap=None):
    painted_mask = np.reshape(painted_mask, (*mask.shape, 3))
    return painted_mask.astype(np.float32)
 def remap_mask(mask, map_fn):
    # Changes mask elements from one colormap to another
    assert len(mask.shape) == 2
    shape = mask.shape
    mask = np.reshape(mask, (-1, 1))
    mask = np.apply_along_axis(map_fn, 1, mask)
    mask = np.reshape(mask, shape)
    return mask
 def load_mask(path, colormap=None):
    mask = load_image(path)
--- a/datumaro/tests/test_voc_format.py
+++ b/datumaro/tests/test_voc_format.py
@ -1,5 +1,4 @@
 import cv2
 from itertools import zip_longest
 import numpy as np
 import os
 import os.path as osp
@ -235,6 +234,7 @@ class VocExtractorTest(TestCase):
                    {
                        'pose': VOC.VocPose(1).name,
                        'truncated': True,
                        'occluded': False,
                        'difficult': False,
                    },
                    obj1.attributes)
@ -365,16 +365,9 @@ class VocExtractorTest(TestCase):
                self.assertFalse(obj2 is None)
                self.assertListEqual([4, 5, 2, 2], obj2.get_bbox())
                count = 1
                for action in VOC.VocAction:
-                    if action.value % 2 == 1:
+                    attr = obj2.attributes[action.name]
-                        count += 1
+                    self.assertEqual(attr, action.value % 2)
                        ann = find(item.annotations,
                            lambda x: x.type == AnnotationType.label and \
                                get_label(extractor, x.label) == action.name)
                        self.assertFalse(ann is None)
                        self.assertTrue(obj2.id == ann.group)
                self.assertEqual(count, len(item.annotations))
            subset_name = 'test'
            generated_subset = generated_subsets[subset_name]
@ -388,50 +381,66 @@ class VocExtractorTest(TestCase):
                self.assertEqual(0, len(item.annotations))
 class VocConverterTest(TestCase):
-    def _test_can_save_voc(self, extractor_type, converter_type, test_dir):
+    def _test_can_save_voc(self, src_extractor, converter, test_dir,
-        dummy_dir = osp.join(test_dir, 'dummy')
+            target_extractor=None):
-        generate_dummy_voc(dummy_dir)
+        converter(src_extractor, test_dir)
        gen_extractor = extractor_type(dummy_dir)
-        conv_dir = osp.join(test_dir, 'converted')
+        result_extractor = VocImporter()(test_dir).make_dataset()
-        converter = converter_type()
+        if target_extractor is None:
-        converter(gen_extractor, conv_dir)
+            target_extractor = src_extractor
-        conv_extractor = extractor_type(conv_dir)
+        if AnnotationType.label in target_extractor.categories():
-        for item_a, item_b in zip_longest(gen_extractor, conv_extractor):
+            self.assertEqual(
                target_extractor.categories()[AnnotationType.label].items,
                result_extractor.categories()[AnnotationType.label].items)
        if AnnotationType.mask in target_extractor.categories():
            self.assertEqual(
                target_extractor.categories()[AnnotationType.mask].colormap,
                result_extractor.categories()[AnnotationType.mask].colormap)
        self.assertEqual(len(target_extractor), len(result_extractor))
        for item_a, item_b in zip(target_extractor, result_extractor):
            self.assertEqual(item_a.id, item_b.id)
            self.assertEqual(len(item_a.annotations), len(item_b.annotations))
            for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
                self.assertEqual(ann_a.type, ann_b.type)
    def _test_can_save_voc_dummy(self, extractor_type, converter, test_dir):
        dummy_dir = osp.join(test_dir, 'dummy')
        generate_dummy_voc(dummy_dir)
        gen_extractor = extractor_type(dummy_dir)
        self._test_can_save_voc(gen_extractor, converter,
            osp.join(test_dir, 'converted'))
    def test_can_save_voc_cls(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
+            self._test_can_save_voc_dummy(
-                VocClassificationExtractor, VocClassificationConverter,
+                VocClassificationExtractor, VocClassificationConverter(label_map='voc'),
                test_dir.path)
    def test_can_save_voc_det(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
+            self._test_can_save_voc_dummy(
-                VocDetectionExtractor, VocDetectionConverter,
+                VocDetectionExtractor, VocDetectionConverter(label_map='voc'),
                test_dir.path)
    def test_can_save_voc_segm(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
+            self._test_can_save_voc_dummy(
-                VocSegmentationExtractor, VocSegmentationConverter,
+                VocSegmentationExtractor, VocSegmentationConverter(label_map='voc'),
                test_dir.path)
    def test_can_save_voc_layout(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
+            self._test_can_save_voc_dummy(
-                VocLayoutExtractor, VocLayoutConverter,
+                VocLayoutExtractor, VocLayoutConverter(label_map='voc'),
                test_dir.path)
    def test_can_save_voc_action(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
+            self._test_can_save_voc_dummy(
-                VocActionExtractor, VocActionConverter,
+                VocActionExtractor, VocActionConverter(label_map='voc'),
                test_dir.path)
    def test_can_save_dataset_with_no_subsets(self):
@ -451,28 +460,121 @@ class VocConverterTest(TestCase):
                for item in items:
                    yield item
            def categories(self):
                return VOC.make_voc_categories()
        with TestDir() as test_dir:
            self._test_can_save_voc(TestExtractor(), VocConverter(label_map='voc'),
                test_dir.path)
    def test_dataset_with_voc_labelmap(self):
        class SrcExtractor(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1, annotations=[
                        BboxObject(2, 3, 4, 5, label=0, id=1),
                        BboxObject(1, 2, 3, 4, label=1, id=2),
                    ])
            def categories(self):
                label_cat = LabelCategories()
-                for label in VOC.VocLabel:
+                label_cat.add(VOC.VocLabel(1).name)
-                    label_cat.add(label.name)
+                label_cat.add('non_voc_label')
                return {
                    AnnotationType.label: label_cat,
                }
        class DstExtractor(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1, annotations=[
                        BboxObject(2, 3, 4, 5, label=0, id=1),
                    ])
            def categories(self):
                return VOC.make_voc_categories()
        with TestDir() as test_dir:
-            src_extractor = TestExtractor()
+            self._test_can_save_voc(
-            converter = VocConverter()
+                SrcExtractor(), VocConverter(label_map='voc'),
                test_dir.path, target_extractor=DstExtractor())
-            converter(src_extractor, test_dir.path)
+    def test_dataset_with_guessed_labelmap(self):
        class SrcExtractor(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1, annotations=[
                        BboxObject(2, 3, 4, 5, label=0, id=1),
                        BboxObject(1, 2, 3, 4, label=1, id=2),
                    ])
-            dst_extractor = VocImporter()(test_dir.path).make_dataset()
+            def categories(self):
                label_cat = LabelCategories()
                label_cat.add(VOC.VocLabel(1).name)
                label_cat.add('non_voc_label')
                return {
                    AnnotationType.label: label_cat,
                }
        class DstExtractor(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1, annotations=[
                        BboxObject(2, 3, 4, 5, label=0, id=1),
                        BboxObject(1, 2, 3, 4,
                            label=self.categories()[AnnotationType.label] \
                                .find('non_voc_label')[0], id=2),
                    ])
-            self.assertEqual(len(src_extractor), len(dst_extractor))
+            def categories(self):
-            for item_a, item_b in zip_longest(src_extractor, dst_extractor):
+                label_map = VOC.make_voc_label_map()
-                self.assertEqual(item_a.id, item_b.id)
+                label_map['non_voc_label'] = [None, [], []]
-                self.assertEqual(len(item_a.annotations), len(item_b.annotations))
+                for label_desc in label_map.values():
-                for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
+                    label_desc[0] = None # rebuild colormap
-                    self.assertEqual(ann_a.type, ann_b.type)
+                return VOC.make_voc_categories(label_map)
        with TestDir() as test_dir:
            self._test_can_save_voc(
                SrcExtractor(), VocConverter(label_map='guess'),
                test_dir.path, target_extractor=DstExtractor())
    def test_dataset_with_fixed_labelmap(self):
        class SrcExtractor(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1, annotations=[
                        BboxObject(2, 3, 4, 5, label=0, id=1),
                        BboxObject(1, 2, 3, 4, label=1, id=2, group=2,
                            attributes={'act1': True}),
                        BboxObject(2, 3, 4, 5, label=2, id=3, group=2),
                        BboxObject(2, 3, 4, 6, label=3, id=4, group=2),
                    ])
            def categories(self):
                label_cat = LabelCategories()
                label_cat.add('foreign_label')
                label_cat.add('label', attributes=['act1', 'act2'])
                label_cat.add('label_part1')
                label_cat.add('label_part2')
                return {
                    AnnotationType.label: label_cat,
                }
        label_map = {
            'label': [None, ['label_part1', 'label_part2'], ['act1', 'act2']]
        }
        class DstExtractor(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1, annotations=[
                        BboxObject(1, 2, 3, 4, label=0, id=2, group=2,
                            attributes={'act1': True, 'act2': False}),
                        BboxObject(2, 3, 4, 5, label=1, id=3, group=2),
                        BboxObject(2, 3, 4, 6, label=2, id=4, group=2),
                    ])
            def categories(self):
                return VOC.make_voc_categories(label_map)
        with TestDir() as test_dir:
            self._test_can_save_voc(
                SrcExtractor(), VocConverter(label_map=label_map),
                test_dir.path, target_extractor=DstExtractor())
 class VocImporterTest(TestCase):
    def test_can_import(self):
@ -487,3 +589,16 @@ class VocImporterTest(TestCase):
            self.assertEqual(
                sum([len(s) for _, s in subsets.items()]),
                len(dataset))
 class VocFormatTest(TestCase):
    def test_can_write_and_parse_labelmap(self):
        src_label_map = VOC.make_voc_label_map()
        src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']]
        with TestDir() as test_dir:
            file_path = osp.join(test_dir.path, 'test.txt')
            VOC.write_label_map(file_path, src_label_map)
            dst_label_map = VOC.parse_label_map(file_path)
            self.assertEqual(src_label_map, dst_label_map)