[Datumaro] VOC labelmap support (#957)

* Add import result checks and options to skip * Add label-specific attributes * Overwrite option for export * Add labelmap file support in voc * Add labelmap tests * Little refactoring
6 years ago · c84daaf2ef
parent 20a0e66238
commit c84daaf2ef
8 changed files with 576 additions and 220 deletions
--- a/datumaro/datumaro/cli/project/init.py
+++ b/datumaro/datumaro/cli/project/init.py
@ -68,6 +68,8 @@ def build_import_parser(parser):
        help="Overwrite existing files in the save directory")
    parser.add_argument('--copy', action='store_true',
        help="Copy the dataset instead of saving source links")
+    parser.add_argument('--skip-check', action='store_true',
+        help="Skip source checking")
    # parser.add_argument('extra_args', nargs=argparse.REMAINDER,
    #     help="Additional arguments for importer (pass '-- -h' for help)")
    return parser
@ -99,7 +101,9 @@ def import_command(args):
    project.config.project_name = project_name
    project.config.project_dir = project_dir

-    dataset = project.make_dataset()
+    if not args.skip_check or args.copy:
+        log.info("Checking the dataset...")
+        dataset = project.make_dataset()
    if args.copy:
        log.info("Cloning data...")
        dataset.save(merge=True, save_images=True)
@ -127,6 +131,8 @@ def build_export_parser(parser):
        help="Output format")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
        help="Additional arguments for converter (pass '-- -h' for help)")
    return parser
@ -135,7 +141,11 @@ def export_command(args):
    project = load_project(args.project_dir)

    dst_dir = osp.abspath(args.dst_dir)
-    os.makedirs(dst_dir, exist_ok=False)
+    if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+        log.error("Directory '%s' already exists "
+            "(pass --overwrite to force creation)" % dst_dir)
+        return 1
+    os.makedirs(dst_dir, exist_ok=args.overwrite)

    project.make_dataset().export(
        save_dir=dst_dir,
--- a/datumaro/datumaro/cli/source/init.py
+++ b/datumaro/datumaro/cli/source/init.py
@ -62,14 +62,16 @@ def build_import_parser(parser):
    dir_parser.add_argument('url',
        help="Path to the source directory")
    dir_parser.add_argument('--copy', action='store_true',
-        help="Copy data to the project")
+        help="Copy the dataset instead of saving source links")

+    parser.add_argument('-n', '--name', default=None,
+        help="Name of the new source")
    parser.add_argument('-f', '--format', default=None,
        help="Name of the source dataset format (default: 'project')")
-    parser.add_argument('-n', '--name', default=None,
-        help="Name of the source to be imported")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
+    parser.add_argument('--skip-check', action='store_true',
+        help="Skip source checking")
    return parser

 def import_command(args):
@ -99,6 +101,10 @@ def import_command(args):
        if args.format:
            source['format'] = args.format
        project.add_source(name, source)
+
+        if not args.skip_check:
+            log.info("Checking the source...")
+            project.make_source_project(name)
        project.save()

        log.info("Source '%s' has been added to the project, location: '%s'" \
@ -131,6 +137,10 @@ def import_command(args):
        if args.format:
            source['format'] = args.format
        project.add_source(name, source)
+
+        if not args.skip_check:
+            log.info("Checking the source...")
+            project.make_source_project(name)
        project.save()

        log.info("Source '%s' has been added to the project, location: '%s'" \
@ -184,6 +194,8 @@ def build_export_parser(parser):
        help="Output format")
    parser.add_argument('-p', '--project', dest='project_dir', default='.',
        help="Directory of the project to operate on (default: current dir)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
    parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
        help="Additional arguments for converter (pass '-- -h' for help)")
    return parser
@ -192,7 +204,11 @@ def export_command(args):
    project = load_project(args.project_dir)

    dst_dir = osp.abspath(args.dst_dir)
-    os.makedirs(dst_dir, exist_ok=False)
+    if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+        log.error("Directory '%s' already exists "
+            "(pass --overwrite to force creation)" % dst_dir)
+        return 1
+    os.makedirs(dst_dir, exist_ok=args.overwrite)

    source_project = project.make_source_project(args.name)
    source_project.make_dataset().export(
--- a/datumaro/datumaro/components/converters/voc.py
+++ b/datumaro/datumaro/components/converters/voc.py
@ -4,18 +4,23 @@
 # SPDX-License-Identifier: MIT

 from collections import OrderedDict, defaultdict
+from enum import Enum
+from itertools import chain
 import logging as log
 from lxml import etree as ET
 import os
 import os.path as osp

 from datumaro.components.converter import Converter
-from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType
-from datumaro.components.formats.voc import VocLabel, VocAction, \
-    VocBodyPart, VocPose, VocTask, VocPath, VocColormap, VocInstColormap
-from datumaro.util import find
+from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType,
+    LabelCategories
+)
+from datumaro.components.formats.voc import (VocTask, VocPath,
+    VocInstColormap, VocPose,
+    parse_label_map, make_voc_label_map, make_voc_categories, write_label_map
+)
 from datumaro.util.image import save_image
-from datumaro.util.mask_tools import apply_colormap
+from datumaro.util.mask_tools import apply_colormap, remap_mask


 def _write_xml_bbox(bbox, parent_elem):
@ -27,13 +32,12 @@ def _write_xml_bbox(bbox, parent_elem):
    ET.SubElement(bbox_elem, 'ymax').text = str(y + h)
    return bbox_elem

-class _Converter:
-    _LABELS = set([entry.name for entry in VocLabel])
-    _BODY_PARTS = set([entry.name for entry in VocBodyPart])
-    _ACTIONS = set([entry.name for entry in VocAction])

+LabelmapType = Enum('LabelmapType', ['voc', 'source', 'guess'])
+
+class _Converter:
    def __init__(self, extractor, save_dir,
-            tasks=None, apply_colormap=True, save_images=False):
+            tasks=None, apply_colormap=True, save_images=False, label_map=None):
        assert tasks is None or isinstance(tasks, (VocTask, list))
        if tasks is None:
            tasks = list(VocTask)
@ -49,14 +53,12 @@ class _Converter:
        self._apply_colormap = apply_colormap
        self._save_images = save_images

-        self._label_categories = extractor.categories() \
-            .get(AnnotationType.label)
-        self._mask_categories = extractor.categories() \
-            .get(AnnotationType.mask)
+        self._load_categories(label_map)

    def convert(self):
        self.init_dirs()
        self.save_subsets()
+        self.save_label_map()

    def init_dirs(self):
        save_dir = self._save_dir
@ -94,7 +96,8 @@ class _Converter:
        self._images_dir = images_dir

    def get_label(self, label_id):
-        return self._label_categories.items[label_id].name
+        return self._extractor.categories()[AnnotationType.label] \
+            .items[label_id].name

    def save_subsets(self):
        subsets = self._extractor.subsets()
@ -167,56 +170,64 @@ class _Converter:
                    layout_bboxes = []
                    for bbox in bboxes:
                        label = self.get_label(bbox.label)
-                        if label in self._LABELS:
-                            main_bboxes.append(bbox)
-                        elif label in self._BODY_PARTS:
+                        if self._is_part(label):
                            layout_bboxes.append(bbox)
+                        elif self._is_label(label):
+                            main_bboxes.append(bbox)

                    for new_obj_id, obj in enumerate(main_bboxes):
                        attr = obj.attributes

                        obj_elem = ET.SubElement(root_elem, 'object')
-                        ET.SubElement(obj_elem, 'name').text = self.get_label(obj.label)
+
+                        obj_label =  self.get_label(obj.label)
+                        ET.SubElement(obj_elem, 'name').text = obj_label

                        pose = attr.get('pose')
                        if pose is not None:
-                            ET.SubElement(obj_elem, 'pose').text = VocPose[pose].name
+                            pose = VocPose[pose]
+                        else:
+                            pose = VocPose.Unspecified
+                        ET.SubElement(obj_elem, 'pose').text = pose.name

                        truncated = attr.get('truncated')
                        if truncated is not None:
-                            ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated
+                            truncated = int(truncated)
+                        else:
+                            truncated = 0
+                        ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated

                        difficult = attr.get('difficult')
                        if difficult is not None:
-                            ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult
+                            difficult = int(difficult)
+                        else:
+                            difficult = 0
+                        ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult

                        bbox = obj.get_bbox()
                        if bbox is not None:
                            _write_xml_bbox(bbox, obj_elem)

-                        for part in VocBodyPart:
-                            part_bbox = find(layout_bboxes, lambda x: \
-                                obj.id == x.group and \
-                                self.get_label(x.label) == part.name)
-                            if part_bbox is not None:
-                                part_elem = ET.SubElement(obj_elem, 'part')
-                                ET.SubElement(part_elem, 'name').text = part.name
-                                _write_xml_bbox(part_bbox.get_bbox(), part_elem)
+                        for part_bbox in filter(lambda x: obj.id == x.group,
+                                layout_bboxes):
+                            part_elem = ET.SubElement(obj_elem, 'part')
+                            ET.SubElement(part_elem, 'name').text = \
+                                self.get_label(part_bbox.label)
+                            _write_xml_bbox(part_bbox.get_bbox(), part_elem)

-                                objects_with_parts.append(new_obj_id)
+                            objects_with_parts.append(new_obj_id)

-                        actions = [x for x in labels
-                            if obj.id == x.group and \
-                               self.get_label(x.label) in self._ACTIONS]
-                        if len(actions) != 0:
-                            actions_elem = ET.SubElement(obj_elem, 'actions')
-                            for action in VocAction:
-                                presented = find(actions, lambda x: \
-                                    self.get_label(x.label) == action.name) is not None
-                                ET.SubElement(actions_elem, action.name).text = \
-                                    '%d' % presented
+                        actions = {k: v for k, v in obj.attributes.items()
+                            if self._is_action(obj_label, k)}
+                        actions_elem = ET.Element('actions')
+                        for action in self._get_actions(obj_label):
+                            presented = action in actions and actions[action]
+                            ET.SubElement(actions_elem, action).text = \
+                                '%d' % presented

-                                objects_with_actions[new_obj_id][action] = presented
+                            objects_with_actions[new_obj_id][action] = presented
+                        if len(actions) != 0:
+                            obj_elem.append(actions_elem)

                    if set(self._tasks) & set([None,
                            VocTask.detection,
@ -232,7 +243,7 @@ class _Converter:

                for label_obj in labels:
                    label = self.get_label(label_obj.label)
-                    if label not in self._LABELS:
+                    if not self._is_label(label):
                        continue
                    class_list = class_lists.get(item_id, set())
                    class_list.add(label_obj.label)
@ -244,7 +255,7 @@ class _Converter:
                    if mask_obj.attributes.get('class') == True:
                        self.save_segm(osp.join(self._segm_dir,
                                item_id + VocPath.SEGM_EXT),
-                            mask_obj, self._mask_categories.colormap)
+                            mask_obj)
                    if mask_obj.attributes.get('instances') == True:
                        self.save_segm(osp.join(self._inst_dir,
                                item_id + VocPath.SEGM_EXT),
@ -284,9 +295,11 @@ class _Converter:
        if len(action_list) == 0:
            return

-        for action in VocAction:
+        all_actions = set(chain(*(self._get_actions(l)
+            for l in self._label_map)))
+        for action in all_actions:
            ann_file = osp.join(self._action_subsets_dir,
-                '%s_%s.txt' % (action.name, subset_name))
+                '%s_%s.txt' % (action, subset_name))
            with open(ann_file, 'w') as f:
                for item, objs in action_list.items():
                    if not objs:
@ -302,23 +315,17 @@ class _Converter:
        if len(class_lists) == 0:
            return

-        label_cat = self._extractor.categories().get(AnnotationType.label, None)
-        if not label_cat:
-            log.warn("Unable to save classification task lists "
-                "as source does not provide class labels. Skipped.")
-            return
-
-        for label in VocLabel:
+        for label in self._label_map:
            ann_file = osp.join(self._cls_subsets_dir,
-                '%s_%s.txt' % (label.name, subset_name))
+                '%s_%s.txt' % (label, subset_name))
            with open(ann_file, 'w') as f:
                for item, item_labels in class_lists.items():
                    if not item_labels:
                        continue
-                    item_labels = [label_cat.items[l].name for l in item_labels]
-                    presented = label.name in item_labels
-                    f.write('%s % d\n' % \
-                        (item, 1 if presented else -1))
+                    item_labels = [self._strip_label(self.get_label(l))
+                        for l in item_labels]
+                    presented = label in item_labels
+                    f.write('%s % d\n' % (item, 1 if presented else -1))

    def save_clsdet_lists(self, subset_name, clsdet_list):
        os.makedirs(self._cls_subsets_dir, exist_ok=True)
@ -348,17 +355,124 @@ class _Converter:
                else:
                    f.write('%s\n' % (item))

-    def save_segm(self, path, annotation, colormap):
+    def save_segm(self, path, annotation, colormap=None):
        data = annotation.image
        if self._apply_colormap:
            if colormap is None:
-                colormap = VocColormap
+                colormap = self._categories[AnnotationType.mask].colormap
+            data = self._remap_mask(data)
            data = apply_colormap(data, colormap)
        save_image(path, data)

+    def save_label_map(self):
+        path = osp.join(self._save_dir, VocPath.LABELMAP_FILE)
+        write_label_map(path, self._label_map)
+
+    @staticmethod
+    def _strip_label(label):
+        return label.lower().strip()
+
+    def _load_categories(self, label_map_source=None):
+        if label_map_source == LabelmapType.voc.name:
+            # strictly use VOC default labelmap
+            label_map = make_voc_label_map()
+
+        elif label_map_source == LabelmapType.source.name:
+            # generate colormap from the input dataset
+            labels = self._extractor.categories() \
+                .get(AnnotationType.label, LabelCategories())
+            label_map = OrderedDict(
+                (item.name, [None, [], []]) for item in labels.items)
+
+        elif label_map_source in [LabelmapType.guess.name, None]:
+            # generate colormap for union of VOC and input dataset labels
+            label_map = make_voc_label_map()
+
+            rebuild_colormap = False
+            source_labels = self._extractor.categories() \
+                .get(AnnotationType.label, LabelCategories())
+            for label in source_labels.items:
+                label_name = self._strip_label(label.name)
+                if label_name not in label_map:
+                    rebuild_colormap = True
+                if label.attributes or label_name not in label_map:
+                    label_map[label_name] = [None, [], label.attributes]
+
+            if rebuild_colormap:
+                for item in label_map.values():
+                    item[0] = None
+
+        elif isinstance(label_map_source, dict):
+            label_map = label_map_source
+
+        elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
+            label_map = parse_label_map(label_map_source)
+
+        else:
+            raise Exception("Wrong labelmap specified, "
+                "expected one of %s or a file path" % \
+                ', '.join(t.name for t in LabelmapType))
+
+        self._categories = make_voc_categories(label_map)
+
+        self._label_map = label_map
+        colormap = self._categories[AnnotationType.mask].colormap
+        for label_id, color in colormap.items():
+            label_desc = label_map[
+                self._categories[AnnotationType.label].items[label_id].name]
+            label_desc[0] = color
+
+        self._label_id_mapping = self._make_label_id_map()
+
+    def _is_label(self, s):
+        return self._label_map.get(self._strip_label(s)) is not None
+
+    def _is_part(self, s):
+        s = self._strip_label(s)
+        for label_desc in self._label_map.values():
+            if s in label_desc[1]:
+                return True
+        return False
+
+    def _is_action(self, label, s):
+        return self._strip_label(s) in self._get_actions(label)
+
+    def _get_actions(self, label):
+        label_desc = self._label_map.get(self._strip_label(label))
+        if not label_desc:
+            return []
+        return label_desc[2]
+
+    def _make_label_id_map(self):
+        source_labels = {
+            id: label.name for id, label in
+            enumerate(self._extractor.categories()[AnnotationType.label].items)
+        }
+        target_labels = {
+            label.name: id for id, label in
+            enumerate(self._categories[AnnotationType.label].items)
+        }
+        id_mapping = {
+            src_id: target_labels.get(src_label, 0)
+            for src_id, src_label in source_labels.items()
+        }
+
+        void_labels = [src_label for src_id, src_label in source_labels.items()
+            if src_label not in target_labels]
+        if void_labels:
+            log.warn("The following labels are remapped to background: %s" %
+                ', '.join(void_labels))
+
+        def map_id(src_id):
+            return id_mapping[src_id]
+        return map_id
+
+    def _remap_mask(self, mask):
+        return remap_mask(mask, self._label_id_mapping)
+
 class VocConverter(Converter):
    def __init__(self,
-            tasks=None, save_images=False, apply_colormap=False,
+            tasks=None, save_images=False, apply_colormap=False, label_map=None,
            cmdline_args=None):
        super().__init__()

@ -366,6 +480,7 @@ class VocConverter(Converter):
            'tasks': tasks,
            'save_images': save_images,
            'apply_colormap': apply_colormap,
+            'label_map': label_map,
        }

        if cmdline_args is not None:
@ -375,6 +490,12 @@ class VocConverter(Converter):
    def _split_tasks_string(s):
        return [VocTask[i.strip()] for i in s.split(',')]

+    @staticmethod
+    def _get_labelmap(s):
+        if osp.isfile(s):
+            return s
+        return LabelmapType[s].name
+
    @classmethod
    def build_cmdline_parser(cls, parser=None):
        import argparse
@ -386,6 +507,9 @@ class VocConverter(Converter):
        parser.add_argument('--apply-colormap', type=bool, default=True,
            help="Use colormap for class and instance masks "
                "(default: %(default)s)")
+        parser.add_argument('--label-map', type=cls._get_labelmap, default=None,
+            help="Labelmap file path or one of %s" % \
+                ', '.join(t.name for t in LabelmapType))
        parser.add_argument('--tasks', type=cls._split_tasks_string,
            default=None,
            help="VOC task filter, comma-separated list of {%s} "
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@ -53,6 +53,11 @@ class Categories:
    def __init__(self, attributes=None):
        if attributes is None:
            attributes = set()
+        else:
+            if not isinstance(attributes, set):
+                attributes = set(attributes)
+            for attr in attributes:
+                assert isinstance(attr, str)
        self.attributes = attributes

    def __eq__(self, other):
@ -62,7 +67,7 @@ class Categories:
            (self.attributes == other.attributes)

 class LabelCategories(Categories):
-    Category = namedtuple('Category', ['name', 'parent'])
+    Category = namedtuple('Category', ['name', 'parent', 'attributes'])

    def __init__(self, items=None, attributes=None):
        super().__init__(attributes=attributes)
@ -81,11 +86,18 @@ class LabelCategories(Categories):
            indices[item.name] = index
        self._indices = indices

-    def add(self, name, parent=None):
+    def add(self, name, parent=None, attributes=None):
        assert name not in self._indices
+        if attributes is None:
+            attributes = set()
+        else:
+            if not isinstance(attributes, set):
+                attributes = set(attributes)
+            for attr in attributes:
+                assert isinstance(attr, str)

        index = len(self.items)
-        self.items.append(self.Category(name, parent))
+        self.items.append(self.Category(name, parent, attributes))
        self._indices[name] = index

    def find(self, name):
--- a/datumaro/datumaro/components/extractors/voc.py
+++ b/datumaro/datumaro/components/extractors/voc.py
@ -4,18 +4,15 @@
 # SPDX-License-Identifier: MIT

 from collections import defaultdict
-from itertools import chain
 import os
 import os.path as osp
 from xml.etree import ElementTree as ET

 from datumaro.components.extractor import (Extractor, DatasetItem,
    AnnotationType, LabelObject, MaskObject, BboxObject,
-    LabelCategories, MaskCategories
 )
-from datumaro.components.formats.voc import (VocLabel, VocAction,
-    VocBodyPart, VocTask, VocPath, VocColormap, VocInstColormap,
-    VocIgnoredLabel
+from datumaro.components.formats.voc import (
+    VocTask, VocPath, VocInstColormap, parse_label_map, make_voc_categories
 )
 from datumaro.util import dir_items
 from datumaro.util.image import lazy_image
@ -24,31 +21,6 @@ from datumaro.util.mask_tools import lazy_mask, invert_colormap

 _inverse_inst_colormap = invert_colormap(VocInstColormap)

-# pylint: disable=pointless-statement
-def _make_voc_categories():
-    categories = {}
-
-    label_categories = LabelCategories()
-    for label in chain(VocLabel, VocAction, VocBodyPart):
-        label_categories.add(label.name)
-    categories[AnnotationType.label] = label_categories
-
-    def label_id(class_index):
-        if class_index in [0, VocIgnoredLabel]:
-            return class_index
-
-        class_label = VocLabel(class_index).name
-        label_id, _ = label_categories.find(class_label)
-        return label_id + 1
-    colormap = { label_id(idx): tuple(color) \
-        for idx, color in VocColormap.items() }
-    mask_categories = MaskCategories(colormap)
-    mask_categories.inverse_colormap # force init
-    categories[AnnotationType.mask] = mask_categories
-
-    return categories
-# pylint: enable=pointless-statement
-
 class VocExtractor(Extractor):
    class Subset(Extractor):
        def __init__(self, name, parent):
@ -58,8 +30,8 @@ class VocExtractor(Extractor):
            self.items = []

        def __iter__(self):
-            for item in self.items:
-                yield self._parent._get(item, self._name)
+            for item_id in self.items:
+                yield self._parent._get(item_id, self._name)

        def __len__(self):
            return len(self.items)
@ -87,10 +59,10 @@ class VocExtractor(Extractor):
        label_annotations = defaultdict(list)
        label_anno_files = [s for s in dir_files \
            if '_' in s and s[s.rfind('_') + 1:] in subset_names]
-        for ann_file in label_anno_files:
-            with open(osp.join(subsets_dir, ann_file + '.txt'), 'r') as f:
-                label = ann_file[:ann_file.rfind('_')]
-                label_id = VocLabel[label].value
+        for ann_filename in label_anno_files:
+            with open(osp.join(subsets_dir, ann_filename + '.txt'), 'r') as f:
+                label = ann_filename[:ann_filename.rfind('_')]
+                label_id = self._get_label_id(label)
                for line in f:
                    item, present = line.split()
                    if present == '1':
@ -113,7 +85,11 @@ class VocExtractor(Extractor):
        self._annotations[VocTask.detection] = det_annotations

    def _load_categories(self):
-        self._categories = _make_voc_categories()
+        label_map = None
+        label_map_path = osp.join(self._path, VocPath.LABELMAP_FILE)
+        if osp.isfile(label_map_path):
+            label_map = parse_label_map(label_map_path)
+        self._categories = make_voc_categories(label_map)

    def __init__(self, path, task):
        super().__init__()
@ -146,17 +122,17 @@ class VocExtractor(Extractor):
            for item in subset:
                yield item

-    def _get(self, item, subset_name):
+    def _get(self, item_id, subset_name):
        image = None
        image_path = osp.join(self._path, VocPath.IMAGES_DIR,
-            item + VocPath.IMAGE_EXT)
+            item_id + VocPath.IMAGE_EXT)
        if osp.isfile(image_path):
            image = lazy_image(image_path)

-        annotations = self._get_annotations(item)
+        annotations = self._get_annotations(item_id)

        return DatasetItem(annotations=annotations,
-            id=item, subset=subset_name, image=image)
+            id=item_id, subset=subset_name, image=image)

    def _get_label_id(self, label):
        label_id, _ = self._categories[AnnotationType.label].find(label)
@ -187,11 +163,10 @@ class VocExtractor(Extractor):

        cls_annotations = self._annotations.get(VocTask.classification)
        if cls_annotations is not None and \
-           self._task is VocTask.classification:
+                self._task is VocTask.classification:
            item_labels = cls_annotations.get(item)
            if item_labels is not None:
-                for label in item_labels:
-                    label_id = self._get_label_id(VocLabel(label).name)
+                for label_id in item_labels:
                    item_annotations.append(LabelObject(label_id))

        det_annotations = self._annotations.get(VocTask.detection)
@ -215,16 +190,16 @@ class VocExtractor(Extractor):
                    continue

                difficult_elem = object_elem.find('difficult')
-                if difficult_elem is not None:
-                    attributes['difficult'] = (difficult_elem.text == '1')
+                attributes['difficult'] = difficult_elem is not None and \
+                    difficult_elem.text == '1'

                truncated_elem = object_elem.find('truncated')
-                if truncated_elem is not None:
-                    attributes['truncated'] = (truncated_elem.text == '1')
+                attributes['truncated'] = truncated_elem is not None and \
+                    truncated_elem.text == '1'

                occluded_elem = object_elem.find('occluded')
-                if occluded_elem is not None:
-                    attributes['occluded'] = (occluded_elem.text == '1')
+                attributes['occluded'] = occluded_elem is not None and \
+                    occluded_elem.text == '1'

                pose_elem = object_elem.find('pose')
                if pose_elem is not None:
@ -238,34 +213,34 @@ class VocExtractor(Extractor):
                    attributes['point'] = point

                actions_elem = object_elem.find('actions')
-                if actions_elem is not None and \
-                   self._task is VocTask.action_classification:
-                    for action in VocAction:
-                        action_elem = actions_elem.find(action.name)
-                        if action_elem is None or action_elem.text != '1':
-                            continue
-
-                        act_label_id = self._get_label_id(action.name)
-                        assert group in [None, obj_id]
-                        group = obj_id
-                        item_annotations.append(LabelObject(act_label_id,
-                            group=obj_id))
-
-                if self._task is VocTask.person_layout:
-                    for part_elem in object_elem.findall('part'):
-                        part = part_elem.find('name').text
-                        part_label_id = self._get_label_id(part)
-                        bbox = self._parse_bbox(part_elem)
-                        group = obj_id
-                        item_annotations.append(BboxObject(
-                            *bbox, label=part_label_id,
-                            group=obj_id))
-
-                if self._task in [VocTask.action_classification, VocTask.person_layout]:
-                    if group is None:
-                        continue
-
-                item_annotations.append(BboxObject(*obj_bbox, label=obj_label_id,
+                actions = {a: False
+                    for a in self._categories[AnnotationType.label] \
+                        .items[obj_label_id].attributes}
+                if actions_elem is not None:
+                    for action_elem in actions_elem:
+                        actions[action_elem.tag] = (action_elem.text == '1')
+                for action, present in actions.items():
+                    attributes[action] = present
+
+                for part_elem in object_elem.findall('part'):
+                    part = part_elem.find('name').text
+                    part_label_id = self._get_label_id(part)
+                    bbox = self._parse_bbox(part_elem)
+                    group = obj_id
+
+                    if self._task is not VocTask.person_layout:
+                        break
+                    item_annotations.append(BboxObject(
+                        *bbox, label=part_label_id,
+                        group=obj_id))
+
+                if self._task is VocTask.person_layout and group is None:
+                    continue
+                if self._task is VocTask.action_classification and not actions:
+                    continue
+
+                item_annotations.append(BboxObject(
+                    *obj_bbox, label=obj_label_id,
                    attributes=attributes, id=obj_id, group=group))

        return item_annotations
@ -283,58 +258,48 @@ class VocExtractor(Extractor):
            return None

 class VocClassificationExtractor(VocExtractor):
-    _ANNO_DIR = 'Main'
-
    def __init__(self, path):
        super().__init__(path, task=VocTask.classification)

-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Main')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets

        self._load_cls_annotations(subsets_dir, subsets)

 class VocDetectionExtractor(VocExtractor):
-    _ANNO_DIR = 'Main'
-
    def __init__(self, path):
        super().__init__(path, task=VocTask.detection)

-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Main')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets

        self._load_det_annotations()

 class VocSegmentationExtractor(VocExtractor):
-    _ANNO_DIR = 'Segmentation'
-
    def __init__(self, path):
        super().__init__(path, task=VocTask.segmentation)

-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Segmentation')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets

 class VocLayoutExtractor(VocExtractor):
-    _ANNO_DIR = 'Layout'
-
    def __init__(self, path):
        super().__init__(path, task=VocTask.person_layout)

-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Layout')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets

        self._load_det_annotations()

 class VocActionExtractor(VocExtractor):
-    _ANNO_DIR = 'Action'
-
    def __init__(self, path):
        super().__init__(path, task=VocTask.action_classification)

-        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR)
+        subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Action')
        subsets = self._load_subsets(subsets_dir)
        self._subsets = subsets

@ -414,7 +379,7 @@ class VocResultsExtractor(Extractor):
            if mark != task_desc['mark']:
                continue

-            label_id = VocLabel[label].value
+            label_id = self._get_label_id(label)
            anns = defaultdict(list)
            with open(osp.join(task_dir, ann_file + ann_ext), 'r') as f:
                for line in f:
@ -441,7 +406,11 @@ class VocResultsExtractor(Extractor):
            VocTask.action_classification)

    def _load_categories(self):
-        self._categories = _make_voc_categories()
+        label_map = None
+        label_map_path = osp.join(self._path, VocPath.LABELMAP_FILE)
+        if osp.isfile(label_map_path):
+            label_map = parse_label_map(label_map_path)
+        self._categories = make_voc_categories(label_map)

    def _get_label_id(self, label):
        label_id = self._categories[AnnotationType.label].find(label)
@ -511,9 +480,8 @@ class VocComp_1_2_Extractor(VocResultsExtractor):
        if cls_ann is not None:
            for desc in cls_ann:
                label_id, conf = desc
-                label_id = self._get_label_id(VocLabel(int(label_id)).name)
                annotations.append(LabelObject(
-                    label_id,
+                    int(label_id),
                    attributes={ 'score': float(conf) }
                ))

@ -538,11 +506,10 @@ class VocComp_3_4_Extractor(VocResultsExtractor):
        if det_ann is not None:
            for desc in det_ann:
                label_id, conf, left, top, right, bottom = desc
-                label_id = self._get_label_id(VocLabel(int(label_id)).name)
                annotations.append(BboxObject(
                    x=float(left), y=float(top),
                    w=float(right) - float(left), h=float(bottom) - float(top),
-                    label=label_id,
+                    label=int(label_id),
                    attributes={ 'score': float(conf) }
                ))

@ -639,7 +606,7 @@ class VocComp_7_8_Extractor(VocResultsExtractor):
                conf = float(layout_elem.find('confidence').text)
                parts = []
                for part_elem in layout_elem.findall('part'):
-                    label_id = VocBodyPart[part_elem.find('class').text].value
+                    label_id = self._get_label_id(part_elem.find('class').text)
                    bbox_elem = part_elem.find('bndbox')
                    xmin = float(bbox_elem.find('xmin').text)
                    xmax = float(bbox_elem.find('xmax').text)
@ -671,8 +638,7 @@ class VocComp_7_8_Extractor(VocResultsExtractor):
                }

                for part in parts:
-                    part_id, bbox = part
-                    label_id = self._get_label_id(VocBodyPart(part_id).name)
+                    label_id, bbox = part
                    annotations.append(BboxObject(
                        *bbox, label=label_id,
                        attributes=attributes))
@ -691,6 +657,12 @@ class VocComp_9_10_Extractor(VocResultsExtractor):
        self._subsets = subsets
        self._annotations = dict(annotations)

+    def _load_categories(self):
+        from collections import OrderedDict
+        from datumaro.components.formats.voc import VocAction
+        label_map = OrderedDict((a.name, [[], [], []]) for a in VocAction)
+        self._categories = make_voc_categories(label_map)
+
    def _get_annotations(self, item, subset_name):
        annotations = []

@ -698,9 +670,8 @@ class VocComp_9_10_Extractor(VocResultsExtractor):
        if action_ann is not None:
            for desc in action_ann:
                action_id, obj_id, conf = desc
-                label_id = self._get_label_id(VocAction(int(action_id)).name)
                annotations.append(LabelObject(
-                    label_id,
+                    action_id,
                    attributes={
                        'score': conf,
                        'object_id': int(obj_id),
--- a/datumaro/datumaro/components/formats/voc.py
+++ b/datumaro/datumaro/components/formats/voc.py
@ -5,8 +5,13 @@

 from collections import OrderedDict
 from enum import Enum
+from itertools import chain
 import numpy as np

+from datumaro.components.extractor import (AnnotationType,
+    LabelCategories, MaskCategories
+)
+

 VocTask = Enum('VocTask', [
    'classification',
@ -17,6 +22,7 @@ VocTask = Enum('VocTask', [
 ])

 VocLabel = Enum('VocLabel', [
+    ('background', 0),
    ('aeroplane', 1),
    ('bicycle', 2),
    ('bird', 3),
@ -37,10 +43,9 @@ VocLabel = Enum('VocLabel', [
    ('sofa', 18),
    ('train', 19),
    ('tvmonitor', 20),
+    ('ignored', 255),
 ])

-VocIgnoredLabel = 255
-
 VocPose = Enum('VocPose', [
    'Unspecified',
    'Left',
@ -86,7 +91,7 @@ def generate_colormap(length=256):
    )

 VocColormap = {id: color for id, color in generate_colormap(256).items()
-    if id in [l.value for l in VocLabel] + [0, VocIgnoredLabel]}
+    if id in [l.value for l in VocLabel]}
 VocInstColormap = generate_colormap(256)

 class VocPath:
@ -97,6 +102,7 @@ class VocPath:
    SUBSETS_DIR = 'ImageSets'
    IMAGE_EXT = '.jpg'
    SEGM_EXT = '.png'
+    LABELMAP_FILE = 'labelmap.txt'

    TASK_DIR = {
        VocTask.classification: 'Main',
@ -104,4 +110,96 @@ class VocPath:
        VocTask.segmentation: 'Segmentation',
        VocTask.action_classification: 'Action',
        VocTask.person_layout: 'Layout',
-    }
+    }
+
+
+def make_voc_label_map():
+    labels = sorted(VocLabel, key=lambda l: l.value)
+    label_map = OrderedDict(
+        (label.name, [VocColormap[label.value], [], []]) for label in labels)
+    label_map[VocLabel.person.name][1] = [p.name for p in VocBodyPart]
+    label_map[VocLabel.person.name][2] = [a.name for a in VocAction]
+    return label_map
+
+def parse_label_map(path):
+    if not path:
+        return None
+
+    label_map = OrderedDict()
+    with open(path, 'r') as f:
+        for line in f:
+            # skip empty and commented lines
+            line = line.strip()
+            if not line or line and line[0] == '#':
+                continue
+
+            # name, color, parts, actions
+            label_desc = line.strip().split(':')
+            name = label_desc[0]
+
+            if 1 < len(label_desc) and len(label_desc[1]) != 0:
+                color = label_desc[1].split(',')
+                assert len(color) == 3, \
+                    "Label '%s' has wrong color, expected 'r,g,b', got '%s'" % \
+                    (name, color)
+                color = tuple([int(c) for c in color][::-1])
+            else:
+                color = None
+
+            if 2 < len(label_desc) and len(label_desc[2]) != 0:
+                parts = label_desc[2].split(',')
+            else:
+                parts = []
+
+            if 3 < len(label_desc) and len(label_desc[3]) != 0:
+                actions = label_desc[3].split(',')
+            else:
+                actions = []
+
+            label_map[name] = [color, parts, actions]
+    return label_map
+
+def write_label_map(path, label_map):
+    with open(path, 'w') as f:
+        f.write('# label:color_rgb:parts:actions\n')
+        for label_name, label_desc in label_map.items():
+            if label_desc[0]:
+                color_rgb = ','.join(str(c) for c in label_desc[0][::-1])
+            else:
+                color_rgb = ''
+
+            parts = ','.join(str(p) for p in label_desc[1])
+            actions = ','.join(str(a) for a in label_desc[2])
+
+            f.write('%s\n' % ':'.join([label_name, color_rgb, parts, actions]))
+
+# pylint: disable=pointless-statement
+def make_voc_categories(label_map=None):
+    if label_map is None:
+        label_map = make_voc_label_map()
+
+    categories = {}
+
+    label_categories = LabelCategories()
+    label_categories.attributes.update(['difficult', 'truncated', 'occluded'])
+
+    for label, desc in label_map.items():
+        label_categories.add(label, attributes=desc[2])
+    for part in OrderedDict((k, None) for k in chain(
+            *(desc[1] for desc in label_map.values()))):
+        label_categories.add(part)
+    categories[AnnotationType.label] = label_categories
+
+    has_colors = sum(v[0] is not None for v in label_map.values())
+    if not has_colors:
+        colormap = generate_colormap(len(label_map))
+    else:
+        label_id = lambda label: label_categories.find(label)[0]
+        colormap = { label_id(name): desc[0]
+            for name, desc in label_map.items() }
+    mask_categories = MaskCategories(colormap)
+    mask_categories.inverse_colormap # force init
+    categories[AnnotationType.mask] = mask_categories
+
+    return categories
+# pylint: enable=pointless-statement
--- a/datumaro/datumaro/util/mask_tools.py
+++ b/datumaro/datumaro/util/mask_tools.py
@ -69,6 +69,16 @@ def apply_colormap(mask, colormap=None):
    painted_mask = np.reshape(painted_mask, (*mask.shape, 3))
    return painted_mask.astype(np.float32)

+def remap_mask(mask, map_fn):
+    # Changes mask elements from one colormap to another
+    assert len(mask.shape) == 2
+
+    shape = mask.shape
+    mask = np.reshape(mask, (-1, 1))
+    mask = np.apply_along_axis(map_fn, 1, mask)
+    mask = np.reshape(mask, shape)
+    return mask
+

 def load_mask(path, colormap=None):
    mask = load_image(path)
--- a/datumaro/tests/test_voc_format.py
+++ b/datumaro/tests/test_voc_format.py
@ -1,5 +1,4 @@
 import cv2
-from itertools import zip_longest
 import numpy as np
 import os
 import os.path as osp
@ -235,6 +234,7 @@ class VocExtractorTest(TestCase):
                    {
                        'pose': VOC.VocPose(1).name,
                        'truncated': True,
+                        'occluded': False,
                        'difficult': False,
                    },
                    obj1.attributes)
@ -365,16 +365,9 @@ class VocExtractorTest(TestCase):
                self.assertFalse(obj2 is None)
                self.assertListEqual([4, 5, 2, 2], obj2.get_bbox())

-                count = 1
                for action in VOC.VocAction:
-                    if action.value % 2 == 1:
-                        count += 1
-                        ann = find(item.annotations,
-                            lambda x: x.type == AnnotationType.label and \
-                                get_label(extractor, x.label) == action.name)
-                        self.assertFalse(ann is None)
-                        self.assertTrue(obj2.id == ann.group)
-                self.assertEqual(count, len(item.annotations))
+                    attr = obj2.attributes[action.name]
+                    self.assertEqual(attr, action.value % 2)

            subset_name = 'test'
            generated_subset = generated_subsets[subset_name]
@ -388,50 +381,66 @@ class VocExtractorTest(TestCase):
                self.assertEqual(0, len(item.annotations))

 class VocConverterTest(TestCase):
-    def _test_can_save_voc(self, extractor_type, converter_type, test_dir):
-        dummy_dir = osp.join(test_dir, 'dummy')
-        generate_dummy_voc(dummy_dir)
-        gen_extractor = extractor_type(dummy_dir)
+    def _test_can_save_voc(self, src_extractor, converter, test_dir,
+            target_extractor=None):
+        converter(src_extractor, test_dir)

-        conv_dir = osp.join(test_dir, 'converted')
-        converter = converter_type()
-        converter(gen_extractor, conv_dir)
+        result_extractor = VocImporter()(test_dir).make_dataset()
+        if target_extractor is None:
+            target_extractor = src_extractor

-        conv_extractor = extractor_type(conv_dir)
-        for item_a, item_b in zip_longest(gen_extractor, conv_extractor):
+        if AnnotationType.label in target_extractor.categories():
+            self.assertEqual(
+                target_extractor.categories()[AnnotationType.label].items,
+                result_extractor.categories()[AnnotationType.label].items)
+        if AnnotationType.mask in target_extractor.categories():
+            self.assertEqual(
+                target_extractor.categories()[AnnotationType.mask].colormap,
+                result_extractor.categories()[AnnotationType.mask].colormap)
+
+        self.assertEqual(len(target_extractor), len(result_extractor))
+        for item_a, item_b in zip(target_extractor, result_extractor):
            self.assertEqual(item_a.id, item_b.id)
            self.assertEqual(len(item_a.annotations), len(item_b.annotations))
            for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
                self.assertEqual(ann_a.type, ann_b.type)

+    def _test_can_save_voc_dummy(self, extractor_type, converter, test_dir):
+        dummy_dir = osp.join(test_dir, 'dummy')
+        generate_dummy_voc(dummy_dir)
+        gen_extractor = extractor_type(dummy_dir)
+
+        self._test_can_save_voc(gen_extractor, converter,
+            osp.join(test_dir, 'converted'))
+
    def test_can_save_voc_cls(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
-                VocClassificationExtractor, VocClassificationConverter,
+            self._test_can_save_voc_dummy(
+                VocClassificationExtractor, VocClassificationConverter(label_map='voc'),
                test_dir.path)

    def test_can_save_voc_det(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
-                VocDetectionExtractor, VocDetectionConverter,
+            self._test_can_save_voc_dummy(
+                VocDetectionExtractor, VocDetectionConverter(label_map='voc'),
                test_dir.path)

    def test_can_save_voc_segm(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
-                VocSegmentationExtractor, VocSegmentationConverter,
+            self._test_can_save_voc_dummy(
+                VocSegmentationExtractor, VocSegmentationConverter(label_map='voc'),
                test_dir.path)

    def test_can_save_voc_layout(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
-                VocLayoutExtractor, VocLayoutConverter,
+            self._test_can_save_voc_dummy(
+                VocLayoutExtractor, VocLayoutConverter(label_map='voc'),
                test_dir.path)

    def test_can_save_voc_action(self):
        with TestDir() as test_dir:
-            self._test_can_save_voc(
-                VocActionExtractor, VocActionConverter,
+            self._test_can_save_voc_dummy(
+                VocActionExtractor, VocActionConverter(label_map='voc'),
                test_dir.path)

    def test_can_save_dataset_with_no_subsets(self):
@ -451,28 +460,121 @@ class VocConverterTest(TestCase):
                for item in items:
                    yield item

+            def categories(self):
+                return VOC.make_voc_categories()
+
+        with TestDir() as test_dir:
+            self._test_can_save_voc(TestExtractor(), VocConverter(label_map='voc'),
+                test_dir.path)
+
+    def test_dataset_with_voc_labelmap(self):
+        class SrcExtractor(Extractor):
+            def __iter__(self):
+                yield DatasetItem(id=1, annotations=[
+                        BboxObject(2, 3, 4, 5, label=0, id=1),
+                        BboxObject(1, 2, 3, 4, label=1, id=2),
+                    ])
+
            def categories(self):
                label_cat = LabelCategories()
-                for label in VOC.VocLabel:
-                    label_cat.add(label.name)
+                label_cat.add(VOC.VocLabel(1).name)
+                label_cat.add('non_voc_label')
                return {
                    AnnotationType.label: label_cat,
                }

+        class DstExtractor(Extractor):
+            def __iter__(self):
+                yield DatasetItem(id=1, annotations=[
+                        BboxObject(2, 3, 4, 5, label=0, id=1),
+                    ])
+
+            def categories(self):
+                return VOC.make_voc_categories()
+
        with TestDir() as test_dir:
-            src_extractor = TestExtractor()
-            converter = VocConverter()
+            self._test_can_save_voc(
+                SrcExtractor(), VocConverter(label_map='voc'),
+                test_dir.path, target_extractor=DstExtractor())

-            converter(src_extractor, test_dir.path)
+    def test_dataset_with_guessed_labelmap(self):
+        class SrcExtractor(Extractor):
+            def __iter__(self):
+                yield DatasetItem(id=1, annotations=[
+                        BboxObject(2, 3, 4, 5, label=0, id=1),
+                        BboxObject(1, 2, 3, 4, label=1, id=2),
+                    ])

-            dst_extractor = VocImporter()(test_dir.path).make_dataset()
+            def categories(self):
+                label_cat = LabelCategories()
+                label_cat.add(VOC.VocLabel(1).name)
+                label_cat.add('non_voc_label')
+                return {
+                    AnnotationType.label: label_cat,
+                }
+
+        class DstExtractor(Extractor):
+            def __iter__(self):
+                yield DatasetItem(id=1, annotations=[
+                        BboxObject(2, 3, 4, 5, label=0, id=1),
+                        BboxObject(1, 2, 3, 4,
+                            label=self.categories()[AnnotationType.label] \
+                                .find('non_voc_label')[0], id=2),
+                    ])

-            self.assertEqual(len(src_extractor), len(dst_extractor))
-            for item_a, item_b in zip_longest(src_extractor, dst_extractor):
-                self.assertEqual(item_a.id, item_b.id)
-                self.assertEqual(len(item_a.annotations), len(item_b.annotations))
-                for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
-                    self.assertEqual(ann_a.type, ann_b.type)
+            def categories(self):
+                label_map = VOC.make_voc_label_map()
+                label_map['non_voc_label'] = [None, [], []]
+                for label_desc in label_map.values():
+                    label_desc[0] = None # rebuild colormap
+                return VOC.make_voc_categories(label_map)
+
+        with TestDir() as test_dir:
+            self._test_can_save_voc(
+                SrcExtractor(), VocConverter(label_map='guess'),
+                test_dir.path, target_extractor=DstExtractor())
+
+    def test_dataset_with_fixed_labelmap(self):
+        class SrcExtractor(Extractor):
+            def __iter__(self):
+                yield DatasetItem(id=1, annotations=[
+                        BboxObject(2, 3, 4, 5, label=0, id=1),
+                        BboxObject(1, 2, 3, 4, label=1, id=2, group=2,
+                            attributes={'act1': True}),
+                        BboxObject(2, 3, 4, 5, label=2, id=3, group=2),
+                        BboxObject(2, 3, 4, 6, label=3, id=4, group=2),
+                    ])
+
+            def categories(self):
+                label_cat = LabelCategories()
+                label_cat.add('foreign_label')
+                label_cat.add('label', attributes=['act1', 'act2'])
+                label_cat.add('label_part1')
+                label_cat.add('label_part2')
+                return {
+                    AnnotationType.label: label_cat,
+                }
+
+        label_map = {
+            'label': [None, ['label_part1', 'label_part2'], ['act1', 'act2']]
+        }
+
+        class DstExtractor(Extractor):
+            def __iter__(self):
+                yield DatasetItem(id=1, annotations=[
+                        BboxObject(1, 2, 3, 4, label=0, id=2, group=2,
+                            attributes={'act1': True, 'act2': False}),
+                        BboxObject(2, 3, 4, 5, label=1, id=3, group=2),
+                        BboxObject(2, 3, 4, 6, label=2, id=4, group=2),
+                    ])
+
+            def categories(self):
+                return VOC.make_voc_categories(label_map)
+
+        with TestDir() as test_dir:
+            self._test_can_save_voc(
+                SrcExtractor(), VocConverter(label_map=label_map),
+                test_dir.path, target_extractor=DstExtractor())

 class VocImporterTest(TestCase):
    def test_can_import(self):
@ -486,4 +588,17 @@ class VocImporterTest(TestCase):
            self.assertEqual(set(subsets), set(dataset.subsets()))
            self.assertEqual(
                sum([len(s) for _, s in subsets.items()]),
-                len(dataset))
+                len(dataset))
+
+class VocFormatTest(TestCase):
+    def test_can_write_and_parse_labelmap(self):
+        src_label_map = VOC.make_voc_label_map()
+        src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']]
+
+        with TestDir() as test_dir:
+            file_path = osp.join(test_dir.path, 'test.txt')
+
+            VOC.write_label_map(file_path, src_label_map)
+            dst_label_map = VOC.parse_label_map(file_path)
+
+            self.assertEqual(src_label_map, dst_label_map)