[Datumaro] VOC labelmap support (#957)

* Add import result checks and options to skip
* Add label-specific attributes
* Overwrite option for export
* Add labelmap file support in voc
* Add labelmap tests
* Little refactoring
main
zhiltsov-max 6 years ago committed by Nikita Manovich
parent 20a0e66238
commit c84daaf2ef

@ -68,6 +68,8 @@ def build_import_parser(parser):
help="Overwrite existing files in the save directory") help="Overwrite existing files in the save directory")
parser.add_argument('--copy', action='store_true', parser.add_argument('--copy', action='store_true',
help="Copy the dataset instead of saving source links") help="Copy the dataset instead of saving source links")
parser.add_argument('--skip-check', action='store_true',
help="Skip source checking")
# parser.add_argument('extra_args', nargs=argparse.REMAINDER, # parser.add_argument('extra_args', nargs=argparse.REMAINDER,
# help="Additional arguments for importer (pass '-- -h' for help)") # help="Additional arguments for importer (pass '-- -h' for help)")
return parser return parser
@ -99,7 +101,9 @@ def import_command(args):
project.config.project_name = project_name project.config.project_name = project_name
project.config.project_dir = project_dir project.config.project_dir = project_dir
dataset = project.make_dataset() if not args.skip_check or args.copy:
log.info("Checking the dataset...")
dataset = project.make_dataset()
if args.copy: if args.copy:
log.info("Cloning data...") log.info("Cloning data...")
dataset.save(merge=True, save_images=True) dataset.save(merge=True, save_images=True)
@ -127,6 +131,8 @@ def build_export_parser(parser):
help="Output format") help="Output format")
parser.add_argument('-p', '--project', dest='project_dir', default='.', parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)") help="Directory of the project to operate on (default: current dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
help="Additional arguments for converter (pass '-- -h' for help)") help="Additional arguments for converter (pass '-- -h' for help)")
return parser return parser
@ -135,7 +141,11 @@ def export_command(args):
project = load_project(args.project_dir) project = load_project(args.project_dir)
dst_dir = osp.abspath(args.dst_dir) dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False) if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
return 1
os.makedirs(dst_dir, exist_ok=args.overwrite)
project.make_dataset().export( project.make_dataset().export(
save_dir=dst_dir, save_dir=dst_dir,

@ -62,14 +62,16 @@ def build_import_parser(parser):
dir_parser.add_argument('url', dir_parser.add_argument('url',
help="Path to the source directory") help="Path to the source directory")
dir_parser.add_argument('--copy', action='store_true', dir_parser.add_argument('--copy', action='store_true',
help="Copy data to the project") help="Copy the dataset instead of saving source links")
parser.add_argument('-n', '--name', default=None,
help="Name of the new source")
parser.add_argument('-f', '--format', default=None, parser.add_argument('-f', '--format', default=None,
help="Name of the source dataset format (default: 'project')") help="Name of the source dataset format (default: 'project')")
parser.add_argument('-n', '--name', default=None,
help="Name of the source to be imported")
parser.add_argument('-p', '--project', dest='project_dir', default='.', parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)") help="Directory of the project to operate on (default: current dir)")
parser.add_argument('--skip-check', action='store_true',
help="Skip source checking")
return parser return parser
def import_command(args): def import_command(args):
@ -99,6 +101,10 @@ def import_command(args):
if args.format: if args.format:
source['format'] = args.format source['format'] = args.format
project.add_source(name, source) project.add_source(name, source)
if not args.skip_check:
log.info("Checking the source...")
project.make_source_project(name)
project.save() project.save()
log.info("Source '%s' has been added to the project, location: '%s'" \ log.info("Source '%s' has been added to the project, location: '%s'" \
@ -131,6 +137,10 @@ def import_command(args):
if args.format: if args.format:
source['format'] = args.format source['format'] = args.format
project.add_source(name, source) project.add_source(name, source)
if not args.skip_check:
log.info("Checking the source...")
project.make_source_project(name)
project.save() project.save()
log.info("Source '%s' has been added to the project, location: '%s'" \ log.info("Source '%s' has been added to the project, location: '%s'" \
@ -184,6 +194,8 @@ def build_export_parser(parser):
help="Output format") help="Output format")
parser.add_argument('-p', '--project', dest='project_dir', default='.', parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)") help="Directory of the project to operate on (default: current dir)")
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
help="Additional arguments for converter (pass '-- -h' for help)") help="Additional arguments for converter (pass '-- -h' for help)")
return parser return parser
@ -192,7 +204,11 @@ def export_command(args):
project = load_project(args.project_dir) project = load_project(args.project_dir)
dst_dir = osp.abspath(args.dst_dir) dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False) if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % dst_dir)
return 1
os.makedirs(dst_dir, exist_ok=args.overwrite)
source_project = project.make_source_project(args.name) source_project = project.make_source_project(args.name)
source_project.make_dataset().export( source_project.make_dataset().export(

@ -4,18 +4,23 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
from enum import Enum
from itertools import chain
import logging as log import logging as log
from lxml import etree as ET from lxml import etree as ET
import os import os
import os.path as osp import os.path as osp
from datumaro.components.converter import Converter from datumaro.components.converter import Converter
from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType,
from datumaro.components.formats.voc import VocLabel, VocAction, \ LabelCategories
VocBodyPart, VocPose, VocTask, VocPath, VocColormap, VocInstColormap )
from datumaro.util import find from datumaro.components.formats.voc import (VocTask, VocPath,
VocInstColormap, VocPose,
parse_label_map, make_voc_label_map, make_voc_categories, write_label_map
)
from datumaro.util.image import save_image from datumaro.util.image import save_image
from datumaro.util.mask_tools import apply_colormap from datumaro.util.mask_tools import apply_colormap, remap_mask
def _write_xml_bbox(bbox, parent_elem): def _write_xml_bbox(bbox, parent_elem):
@ -27,13 +32,12 @@ def _write_xml_bbox(bbox, parent_elem):
ET.SubElement(bbox_elem, 'ymax').text = str(y + h) ET.SubElement(bbox_elem, 'ymax').text = str(y + h)
return bbox_elem return bbox_elem
class _Converter:
_LABELS = set([entry.name for entry in VocLabel])
_BODY_PARTS = set([entry.name for entry in VocBodyPart])
_ACTIONS = set([entry.name for entry in VocAction])
LabelmapType = Enum('LabelmapType', ['voc', 'source', 'guess'])
class _Converter:
def __init__(self, extractor, save_dir, def __init__(self, extractor, save_dir,
tasks=None, apply_colormap=True, save_images=False): tasks=None, apply_colormap=True, save_images=False, label_map=None):
assert tasks is None or isinstance(tasks, (VocTask, list)) assert tasks is None or isinstance(tasks, (VocTask, list))
if tasks is None: if tasks is None:
tasks = list(VocTask) tasks = list(VocTask)
@ -49,14 +53,12 @@ class _Converter:
self._apply_colormap = apply_colormap self._apply_colormap = apply_colormap
self._save_images = save_images self._save_images = save_images
self._label_categories = extractor.categories() \ self._load_categories(label_map)
.get(AnnotationType.label)
self._mask_categories = extractor.categories() \
.get(AnnotationType.mask)
def convert(self): def convert(self):
self.init_dirs() self.init_dirs()
self.save_subsets() self.save_subsets()
self.save_label_map()
def init_dirs(self): def init_dirs(self):
save_dir = self._save_dir save_dir = self._save_dir
@ -94,7 +96,8 @@ class _Converter:
self._images_dir = images_dir self._images_dir = images_dir
def get_label(self, label_id): def get_label(self, label_id):
return self._label_categories.items[label_id].name return self._extractor.categories()[AnnotationType.label] \
.items[label_id].name
def save_subsets(self): def save_subsets(self):
subsets = self._extractor.subsets() subsets = self._extractor.subsets()
@ -167,56 +170,64 @@ class _Converter:
layout_bboxes = [] layout_bboxes = []
for bbox in bboxes: for bbox in bboxes:
label = self.get_label(bbox.label) label = self.get_label(bbox.label)
if label in self._LABELS: if self._is_part(label):
main_bboxes.append(bbox)
elif label in self._BODY_PARTS:
layout_bboxes.append(bbox) layout_bboxes.append(bbox)
elif self._is_label(label):
main_bboxes.append(bbox)
for new_obj_id, obj in enumerate(main_bboxes): for new_obj_id, obj in enumerate(main_bboxes):
attr = obj.attributes attr = obj.attributes
obj_elem = ET.SubElement(root_elem, 'object') obj_elem = ET.SubElement(root_elem, 'object')
ET.SubElement(obj_elem, 'name').text = self.get_label(obj.label)
obj_label = self.get_label(obj.label)
ET.SubElement(obj_elem, 'name').text = obj_label
pose = attr.get('pose') pose = attr.get('pose')
if pose is not None: if pose is not None:
ET.SubElement(obj_elem, 'pose').text = VocPose[pose].name pose = VocPose[pose]
else:
pose = VocPose.Unspecified
ET.SubElement(obj_elem, 'pose').text = pose.name
truncated = attr.get('truncated') truncated = attr.get('truncated')
if truncated is not None: if truncated is not None:
ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated truncated = int(truncated)
else:
truncated = 0
ET.SubElement(obj_elem, 'truncated').text = '%d' % truncated
difficult = attr.get('difficult') difficult = attr.get('difficult')
if difficult is not None: if difficult is not None:
ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult difficult = int(difficult)
else:
difficult = 0
ET.SubElement(obj_elem, 'difficult').text = '%d' % difficult
bbox = obj.get_bbox() bbox = obj.get_bbox()
if bbox is not None: if bbox is not None:
_write_xml_bbox(bbox, obj_elem) _write_xml_bbox(bbox, obj_elem)
for part in VocBodyPart: for part_bbox in filter(lambda x: obj.id == x.group,
part_bbox = find(layout_bboxes, lambda x: \ layout_bboxes):
obj.id == x.group and \ part_elem = ET.SubElement(obj_elem, 'part')
self.get_label(x.label) == part.name) ET.SubElement(part_elem, 'name').text = \
if part_bbox is not None: self.get_label(part_bbox.label)
part_elem = ET.SubElement(obj_elem, 'part') _write_xml_bbox(part_bbox.get_bbox(), part_elem)
ET.SubElement(part_elem, 'name').text = part.name
_write_xml_bbox(part_bbox.get_bbox(), part_elem)
objects_with_parts.append(new_obj_id) objects_with_parts.append(new_obj_id)
actions = [x for x in labels actions = {k: v for k, v in obj.attributes.items()
if obj.id == x.group and \ if self._is_action(obj_label, k)}
self.get_label(x.label) in self._ACTIONS] actions_elem = ET.Element('actions')
if len(actions) != 0: for action in self._get_actions(obj_label):
actions_elem = ET.SubElement(obj_elem, 'actions') presented = action in actions and actions[action]
for action in VocAction: ET.SubElement(actions_elem, action).text = \
presented = find(actions, lambda x: \ '%d' % presented
self.get_label(x.label) == action.name) is not None
ET.SubElement(actions_elem, action.name).text = \
'%d' % presented
objects_with_actions[new_obj_id][action] = presented objects_with_actions[new_obj_id][action] = presented
if len(actions) != 0:
obj_elem.append(actions_elem)
if set(self._tasks) & set([None, if set(self._tasks) & set([None,
VocTask.detection, VocTask.detection,
@ -232,7 +243,7 @@ class _Converter:
for label_obj in labels: for label_obj in labels:
label = self.get_label(label_obj.label) label = self.get_label(label_obj.label)
if label not in self._LABELS: if not self._is_label(label):
continue continue
class_list = class_lists.get(item_id, set()) class_list = class_lists.get(item_id, set())
class_list.add(label_obj.label) class_list.add(label_obj.label)
@ -244,7 +255,7 @@ class _Converter:
if mask_obj.attributes.get('class') == True: if mask_obj.attributes.get('class') == True:
self.save_segm(osp.join(self._segm_dir, self.save_segm(osp.join(self._segm_dir,
item_id + VocPath.SEGM_EXT), item_id + VocPath.SEGM_EXT),
mask_obj, self._mask_categories.colormap) mask_obj)
if mask_obj.attributes.get('instances') == True: if mask_obj.attributes.get('instances') == True:
self.save_segm(osp.join(self._inst_dir, self.save_segm(osp.join(self._inst_dir,
item_id + VocPath.SEGM_EXT), item_id + VocPath.SEGM_EXT),
@ -284,9 +295,11 @@ class _Converter:
if len(action_list) == 0: if len(action_list) == 0:
return return
for action in VocAction: all_actions = set(chain(*(self._get_actions(l)
for l in self._label_map)))
for action in all_actions:
ann_file = osp.join(self._action_subsets_dir, ann_file = osp.join(self._action_subsets_dir,
'%s_%s.txt' % (action.name, subset_name)) '%s_%s.txt' % (action, subset_name))
with open(ann_file, 'w') as f: with open(ann_file, 'w') as f:
for item, objs in action_list.items(): for item, objs in action_list.items():
if not objs: if not objs:
@ -302,23 +315,17 @@ class _Converter:
if len(class_lists) == 0: if len(class_lists) == 0:
return return
label_cat = self._extractor.categories().get(AnnotationType.label, None) for label in self._label_map:
if not label_cat:
log.warn("Unable to save classification task lists "
"as source does not provide class labels. Skipped.")
return
for label in VocLabel:
ann_file = osp.join(self._cls_subsets_dir, ann_file = osp.join(self._cls_subsets_dir,
'%s_%s.txt' % (label.name, subset_name)) '%s_%s.txt' % (label, subset_name))
with open(ann_file, 'w') as f: with open(ann_file, 'w') as f:
for item, item_labels in class_lists.items(): for item, item_labels in class_lists.items():
if not item_labels: if not item_labels:
continue continue
item_labels = [label_cat.items[l].name for l in item_labels] item_labels = [self._strip_label(self.get_label(l))
presented = label.name in item_labels for l in item_labels]
f.write('%s % d\n' % \ presented = label in item_labels
(item, 1 if presented else -1)) f.write('%s % d\n' % (item, 1 if presented else -1))
def save_clsdet_lists(self, subset_name, clsdet_list): def save_clsdet_lists(self, subset_name, clsdet_list):
os.makedirs(self._cls_subsets_dir, exist_ok=True) os.makedirs(self._cls_subsets_dir, exist_ok=True)
@ -348,17 +355,124 @@ class _Converter:
else: else:
f.write('%s\n' % (item)) f.write('%s\n' % (item))
def save_segm(self, path, annotation, colormap): def save_segm(self, path, annotation, colormap=None):
data = annotation.image data = annotation.image
if self._apply_colormap: if self._apply_colormap:
if colormap is None: if colormap is None:
colormap = VocColormap colormap = self._categories[AnnotationType.mask].colormap
data = self._remap_mask(data)
data = apply_colormap(data, colormap) data = apply_colormap(data, colormap)
save_image(path, data) save_image(path, data)
def save_label_map(self):
path = osp.join(self._save_dir, VocPath.LABELMAP_FILE)
write_label_map(path, self._label_map)
@staticmethod
def _strip_label(label):
return label.lower().strip()
def _load_categories(self, label_map_source=None):
if label_map_source == LabelmapType.voc.name:
# strictly use VOC default labelmap
label_map = make_voc_label_map()
elif label_map_source == LabelmapType.source.name:
# generate colormap from the input dataset
labels = self._extractor.categories() \
.get(AnnotationType.label, LabelCategories())
label_map = OrderedDict(
(item.name, [None, [], []]) for item in labels.items)
elif label_map_source in [LabelmapType.guess.name, None]:
# generate colormap for union of VOC and input dataset labels
label_map = make_voc_label_map()
rebuild_colormap = False
source_labels = self._extractor.categories() \
.get(AnnotationType.label, LabelCategories())
for label in source_labels.items:
label_name = self._strip_label(label.name)
if label_name not in label_map:
rebuild_colormap = True
if label.attributes or label_name not in label_map:
label_map[label_name] = [None, [], label.attributes]
if rebuild_colormap:
for item in label_map.values():
item[0] = None
elif isinstance(label_map_source, dict):
label_map = label_map_source
elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
label_map = parse_label_map(label_map_source)
else:
raise Exception("Wrong labelmap specified, "
"expected one of %s or a file path" % \
', '.join(t.name for t in LabelmapType))
self._categories = make_voc_categories(label_map)
self._label_map = label_map
colormap = self._categories[AnnotationType.mask].colormap
for label_id, color in colormap.items():
label_desc = label_map[
self._categories[AnnotationType.label].items[label_id].name]
label_desc[0] = color
self._label_id_mapping = self._make_label_id_map()
def _is_label(self, s):
return self._label_map.get(self._strip_label(s)) is not None
def _is_part(self, s):
s = self._strip_label(s)
for label_desc in self._label_map.values():
if s in label_desc[1]:
return True
return False
def _is_action(self, label, s):
return self._strip_label(s) in self._get_actions(label)
def _get_actions(self, label):
label_desc = self._label_map.get(self._strip_label(label))
if not label_desc:
return []
return label_desc[2]
def _make_label_id_map(self):
source_labels = {
id: label.name for id, label in
enumerate(self._extractor.categories()[AnnotationType.label].items)
}
target_labels = {
label.name: id for id, label in
enumerate(self._categories[AnnotationType.label].items)
}
id_mapping = {
src_id: target_labels.get(src_label, 0)
for src_id, src_label in source_labels.items()
}
void_labels = [src_label for src_id, src_label in source_labels.items()
if src_label not in target_labels]
if void_labels:
log.warn("The following labels are remapped to background: %s" %
', '.join(void_labels))
def map_id(src_id):
return id_mapping[src_id]
return map_id
def _remap_mask(self, mask):
return remap_mask(mask, self._label_id_mapping)
class VocConverter(Converter): class VocConverter(Converter):
def __init__(self, def __init__(self,
tasks=None, save_images=False, apply_colormap=False, tasks=None, save_images=False, apply_colormap=False, label_map=None,
cmdline_args=None): cmdline_args=None):
super().__init__() super().__init__()
@ -366,6 +480,7 @@ class VocConverter(Converter):
'tasks': tasks, 'tasks': tasks,
'save_images': save_images, 'save_images': save_images,
'apply_colormap': apply_colormap, 'apply_colormap': apply_colormap,
'label_map': label_map,
} }
if cmdline_args is not None: if cmdline_args is not None:
@ -375,6 +490,12 @@ class VocConverter(Converter):
def _split_tasks_string(s): def _split_tasks_string(s):
return [VocTask[i.strip()] for i in s.split(',')] return [VocTask[i.strip()] for i in s.split(',')]
@staticmethod
def _get_labelmap(s):
if osp.isfile(s):
return s
return LabelmapType[s].name
@classmethod @classmethod
def build_cmdline_parser(cls, parser=None): def build_cmdline_parser(cls, parser=None):
import argparse import argparse
@ -386,6 +507,9 @@ class VocConverter(Converter):
parser.add_argument('--apply-colormap', type=bool, default=True, parser.add_argument('--apply-colormap', type=bool, default=True,
help="Use colormap for class and instance masks " help="Use colormap for class and instance masks "
"(default: %(default)s)") "(default: %(default)s)")
parser.add_argument('--label-map', type=cls._get_labelmap, default=None,
help="Labelmap file path or one of %s" % \
', '.join(t.name for t in LabelmapType))
parser.add_argument('--tasks', type=cls._split_tasks_string, parser.add_argument('--tasks', type=cls._split_tasks_string,
default=None, default=None,
help="VOC task filter, comma-separated list of {%s} " help="VOC task filter, comma-separated list of {%s} "

@ -53,6 +53,11 @@ class Categories:
def __init__(self, attributes=None): def __init__(self, attributes=None):
if attributes is None: if attributes is None:
attributes = set() attributes = set()
else:
if not isinstance(attributes, set):
attributes = set(attributes)
for attr in attributes:
assert isinstance(attr, str)
self.attributes = attributes self.attributes = attributes
def __eq__(self, other): def __eq__(self, other):
@ -62,7 +67,7 @@ class Categories:
(self.attributes == other.attributes) (self.attributes == other.attributes)
class LabelCategories(Categories): class LabelCategories(Categories):
Category = namedtuple('Category', ['name', 'parent']) Category = namedtuple('Category', ['name', 'parent', 'attributes'])
def __init__(self, items=None, attributes=None): def __init__(self, items=None, attributes=None):
super().__init__(attributes=attributes) super().__init__(attributes=attributes)
@ -81,11 +86,18 @@ class LabelCategories(Categories):
indices[item.name] = index indices[item.name] = index
self._indices = indices self._indices = indices
def add(self, name, parent=None): def add(self, name, parent=None, attributes=None):
assert name not in self._indices assert name not in self._indices
if attributes is None:
attributes = set()
else:
if not isinstance(attributes, set):
attributes = set(attributes)
for attr in attributes:
assert isinstance(attr, str)
index = len(self.items) index = len(self.items)
self.items.append(self.Category(name, parent)) self.items.append(self.Category(name, parent, attributes))
self._indices[name] = index self._indices[name] = index
def find(self, name): def find(self, name):

@ -4,18 +4,15 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
from collections import defaultdict from collections import defaultdict
from itertools import chain
import os import os
import os.path as osp import os.path as osp
from xml.etree import ElementTree as ET from xml.etree import ElementTree as ET
from datumaro.components.extractor import (Extractor, DatasetItem, from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, LabelObject, MaskObject, BboxObject, AnnotationType, LabelObject, MaskObject, BboxObject,
LabelCategories, MaskCategories
) )
from datumaro.components.formats.voc import (VocLabel, VocAction, from datumaro.components.formats.voc import (
VocBodyPart, VocTask, VocPath, VocColormap, VocInstColormap, VocTask, VocPath, VocInstColormap, parse_label_map, make_voc_categories
VocIgnoredLabel
) )
from datumaro.util import dir_items from datumaro.util import dir_items
from datumaro.util.image import lazy_image from datumaro.util.image import lazy_image
@ -24,31 +21,6 @@ from datumaro.util.mask_tools import lazy_mask, invert_colormap
_inverse_inst_colormap = invert_colormap(VocInstColormap) _inverse_inst_colormap = invert_colormap(VocInstColormap)
# pylint: disable=pointless-statement
def _make_voc_categories():
categories = {}
label_categories = LabelCategories()
for label in chain(VocLabel, VocAction, VocBodyPart):
label_categories.add(label.name)
categories[AnnotationType.label] = label_categories
def label_id(class_index):
if class_index in [0, VocIgnoredLabel]:
return class_index
class_label = VocLabel(class_index).name
label_id, _ = label_categories.find(class_label)
return label_id + 1
colormap = { label_id(idx): tuple(color) \
for idx, color in VocColormap.items() }
mask_categories = MaskCategories(colormap)
mask_categories.inverse_colormap # force init
categories[AnnotationType.mask] = mask_categories
return categories
# pylint: enable=pointless-statement
class VocExtractor(Extractor): class VocExtractor(Extractor):
class Subset(Extractor): class Subset(Extractor):
def __init__(self, name, parent): def __init__(self, name, parent):
@ -58,8 +30,8 @@ class VocExtractor(Extractor):
self.items = [] self.items = []
def __iter__(self): def __iter__(self):
for item in self.items: for item_id in self.items:
yield self._parent._get(item, self._name) yield self._parent._get(item_id, self._name)
def __len__(self): def __len__(self):
return len(self.items) return len(self.items)
@ -87,10 +59,10 @@ class VocExtractor(Extractor):
label_annotations = defaultdict(list) label_annotations = defaultdict(list)
label_anno_files = [s for s in dir_files \ label_anno_files = [s for s in dir_files \
if '_' in s and s[s.rfind('_') + 1:] in subset_names] if '_' in s and s[s.rfind('_') + 1:] in subset_names]
for ann_file in label_anno_files: for ann_filename in label_anno_files:
with open(osp.join(subsets_dir, ann_file + '.txt'), 'r') as f: with open(osp.join(subsets_dir, ann_filename + '.txt'), 'r') as f:
label = ann_file[:ann_file.rfind('_')] label = ann_filename[:ann_filename.rfind('_')]
label_id = VocLabel[label].value label_id = self._get_label_id(label)
for line in f: for line in f:
item, present = line.split() item, present = line.split()
if present == '1': if present == '1':
@ -113,7 +85,11 @@ class VocExtractor(Extractor):
self._annotations[VocTask.detection] = det_annotations self._annotations[VocTask.detection] = det_annotations
def _load_categories(self): def _load_categories(self):
self._categories = _make_voc_categories() label_map = None
label_map_path = osp.join(self._path, VocPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
self._categories = make_voc_categories(label_map)
def __init__(self, path, task): def __init__(self, path, task):
super().__init__() super().__init__()
@ -146,17 +122,17 @@ class VocExtractor(Extractor):
for item in subset: for item in subset:
yield item yield item
def _get(self, item, subset_name): def _get(self, item_id, subset_name):
image = None image = None
image_path = osp.join(self._path, VocPath.IMAGES_DIR, image_path = osp.join(self._path, VocPath.IMAGES_DIR,
item + VocPath.IMAGE_EXT) item_id + VocPath.IMAGE_EXT)
if osp.isfile(image_path): if osp.isfile(image_path):
image = lazy_image(image_path) image = lazy_image(image_path)
annotations = self._get_annotations(item) annotations = self._get_annotations(item_id)
return DatasetItem(annotations=annotations, return DatasetItem(annotations=annotations,
id=item, subset=subset_name, image=image) id=item_id, subset=subset_name, image=image)
def _get_label_id(self, label): def _get_label_id(self, label):
label_id, _ = self._categories[AnnotationType.label].find(label) label_id, _ = self._categories[AnnotationType.label].find(label)
@ -187,11 +163,10 @@ class VocExtractor(Extractor):
cls_annotations = self._annotations.get(VocTask.classification) cls_annotations = self._annotations.get(VocTask.classification)
if cls_annotations is not None and \ if cls_annotations is not None and \
self._task is VocTask.classification: self._task is VocTask.classification:
item_labels = cls_annotations.get(item) item_labels = cls_annotations.get(item)
if item_labels is not None: if item_labels is not None:
for label in item_labels: for label_id in item_labels:
label_id = self._get_label_id(VocLabel(label).name)
item_annotations.append(LabelObject(label_id)) item_annotations.append(LabelObject(label_id))
det_annotations = self._annotations.get(VocTask.detection) det_annotations = self._annotations.get(VocTask.detection)
@ -215,16 +190,16 @@ class VocExtractor(Extractor):
continue continue
difficult_elem = object_elem.find('difficult') difficult_elem = object_elem.find('difficult')
if difficult_elem is not None: attributes['difficult'] = difficult_elem is not None and \
attributes['difficult'] = (difficult_elem.text == '1') difficult_elem.text == '1'
truncated_elem = object_elem.find('truncated') truncated_elem = object_elem.find('truncated')
if truncated_elem is not None: attributes['truncated'] = truncated_elem is not None and \
attributes['truncated'] = (truncated_elem.text == '1') truncated_elem.text == '1'
occluded_elem = object_elem.find('occluded') occluded_elem = object_elem.find('occluded')
if occluded_elem is not None: attributes['occluded'] = occluded_elem is not None and \
attributes['occluded'] = (occluded_elem.text == '1') occluded_elem.text == '1'
pose_elem = object_elem.find('pose') pose_elem = object_elem.find('pose')
if pose_elem is not None: if pose_elem is not None:
@ -238,34 +213,34 @@ class VocExtractor(Extractor):
attributes['point'] = point attributes['point'] = point
actions_elem = object_elem.find('actions') actions_elem = object_elem.find('actions')
if actions_elem is not None and \ actions = {a: False
self._task is VocTask.action_classification: for a in self._categories[AnnotationType.label] \
for action in VocAction: .items[obj_label_id].attributes}
action_elem = actions_elem.find(action.name) if actions_elem is not None:
if action_elem is None or action_elem.text != '1': for action_elem in actions_elem:
continue actions[action_elem.tag] = (action_elem.text == '1')
for action, present in actions.items():
act_label_id = self._get_label_id(action.name) attributes[action] = present
assert group in [None, obj_id]
group = obj_id for part_elem in object_elem.findall('part'):
item_annotations.append(LabelObject(act_label_id, part = part_elem.find('name').text
group=obj_id)) part_label_id = self._get_label_id(part)
bbox = self._parse_bbox(part_elem)
if self._task is VocTask.person_layout: group = obj_id
for part_elem in object_elem.findall('part'):
part = part_elem.find('name').text if self._task is not VocTask.person_layout:
part_label_id = self._get_label_id(part) break
bbox = self._parse_bbox(part_elem) item_annotations.append(BboxObject(
group = obj_id *bbox, label=part_label_id,
item_annotations.append(BboxObject( group=obj_id))
*bbox, label=part_label_id,
group=obj_id)) if self._task is VocTask.person_layout and group is None:
continue
if self._task in [VocTask.action_classification, VocTask.person_layout]: if self._task is VocTask.action_classification and not actions:
if group is None: continue
continue
item_annotations.append(BboxObject(
item_annotations.append(BboxObject(*obj_bbox, label=obj_label_id, *obj_bbox, label=obj_label_id,
attributes=attributes, id=obj_id, group=group)) attributes=attributes, id=obj_id, group=group))
return item_annotations return item_annotations
@ -283,58 +258,48 @@ class VocExtractor(Extractor):
return None return None
class VocClassificationExtractor(VocExtractor): class VocClassificationExtractor(VocExtractor):
_ANNO_DIR = 'Main'
def __init__(self, path): def __init__(self, path):
super().__init__(path, task=VocTask.classification) super().__init__(path, task=VocTask.classification)
subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR) subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Main')
subsets = self._load_subsets(subsets_dir) subsets = self._load_subsets(subsets_dir)
self._subsets = subsets self._subsets = subsets
self._load_cls_annotations(subsets_dir, subsets) self._load_cls_annotations(subsets_dir, subsets)
class VocDetectionExtractor(VocExtractor): class VocDetectionExtractor(VocExtractor):
_ANNO_DIR = 'Main'
def __init__(self, path): def __init__(self, path):
super().__init__(path, task=VocTask.detection) super().__init__(path, task=VocTask.detection)
subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR) subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Main')
subsets = self._load_subsets(subsets_dir) subsets = self._load_subsets(subsets_dir)
self._subsets = subsets self._subsets = subsets
self._load_det_annotations() self._load_det_annotations()
class VocSegmentationExtractor(VocExtractor): class VocSegmentationExtractor(VocExtractor):
_ANNO_DIR = 'Segmentation'
def __init__(self, path): def __init__(self, path):
super().__init__(path, task=VocTask.segmentation) super().__init__(path, task=VocTask.segmentation)
subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR) subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Segmentation')
subsets = self._load_subsets(subsets_dir) subsets = self._load_subsets(subsets_dir)
self._subsets = subsets self._subsets = subsets
class VocLayoutExtractor(VocExtractor): class VocLayoutExtractor(VocExtractor):
_ANNO_DIR = 'Layout'
def __init__(self, path): def __init__(self, path):
super().__init__(path, task=VocTask.person_layout) super().__init__(path, task=VocTask.person_layout)
subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR) subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Layout')
subsets = self._load_subsets(subsets_dir) subsets = self._load_subsets(subsets_dir)
self._subsets = subsets self._subsets = subsets
self._load_det_annotations() self._load_det_annotations()
class VocActionExtractor(VocExtractor): class VocActionExtractor(VocExtractor):
_ANNO_DIR = 'Action'
def __init__(self, path): def __init__(self, path):
super().__init__(path, task=VocTask.action_classification) super().__init__(path, task=VocTask.action_classification)
subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, self._ANNO_DIR) subsets_dir = osp.join(path, VocPath.SUBSETS_DIR, 'Action')
subsets = self._load_subsets(subsets_dir) subsets = self._load_subsets(subsets_dir)
self._subsets = subsets self._subsets = subsets
@ -414,7 +379,7 @@ class VocResultsExtractor(Extractor):
if mark != task_desc['mark']: if mark != task_desc['mark']:
continue continue
label_id = VocLabel[label].value label_id = self._get_label_id(label)
anns = defaultdict(list) anns = defaultdict(list)
with open(osp.join(task_dir, ann_file + ann_ext), 'r') as f: with open(osp.join(task_dir, ann_file + ann_ext), 'r') as f:
for line in f: for line in f:
@ -441,7 +406,11 @@ class VocResultsExtractor(Extractor):
VocTask.action_classification) VocTask.action_classification)
def _load_categories(self): def _load_categories(self):
self._categories = _make_voc_categories() label_map = None
label_map_path = osp.join(self._path, VocPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
self._categories = make_voc_categories(label_map)
def _get_label_id(self, label): def _get_label_id(self, label):
label_id = self._categories[AnnotationType.label].find(label) label_id = self._categories[AnnotationType.label].find(label)
@ -511,9 +480,8 @@ class VocComp_1_2_Extractor(VocResultsExtractor):
if cls_ann is not None: if cls_ann is not None:
for desc in cls_ann: for desc in cls_ann:
label_id, conf = desc label_id, conf = desc
label_id = self._get_label_id(VocLabel(int(label_id)).name)
annotations.append(LabelObject( annotations.append(LabelObject(
label_id, int(label_id),
attributes={ 'score': float(conf) } attributes={ 'score': float(conf) }
)) ))
@ -538,11 +506,10 @@ class VocComp_3_4_Extractor(VocResultsExtractor):
if det_ann is not None: if det_ann is not None:
for desc in det_ann: for desc in det_ann:
label_id, conf, left, top, right, bottom = desc label_id, conf, left, top, right, bottom = desc
label_id = self._get_label_id(VocLabel(int(label_id)).name)
annotations.append(BboxObject( annotations.append(BboxObject(
x=float(left), y=float(top), x=float(left), y=float(top),
w=float(right) - float(left), h=float(bottom) - float(top), w=float(right) - float(left), h=float(bottom) - float(top),
label=label_id, label=int(label_id),
attributes={ 'score': float(conf) } attributes={ 'score': float(conf) }
)) ))
@ -639,7 +606,7 @@ class VocComp_7_8_Extractor(VocResultsExtractor):
conf = float(layout_elem.find('confidence').text) conf = float(layout_elem.find('confidence').text)
parts = [] parts = []
for part_elem in layout_elem.findall('part'): for part_elem in layout_elem.findall('part'):
label_id = VocBodyPart[part_elem.find('class').text].value label_id = self._get_label_id(part_elem.find('class').text)
bbox_elem = part_elem.find('bndbox') bbox_elem = part_elem.find('bndbox')
xmin = float(bbox_elem.find('xmin').text) xmin = float(bbox_elem.find('xmin').text)
xmax = float(bbox_elem.find('xmax').text) xmax = float(bbox_elem.find('xmax').text)
@ -671,8 +638,7 @@ class VocComp_7_8_Extractor(VocResultsExtractor):
} }
for part in parts: for part in parts:
part_id, bbox = part label_id, bbox = part
label_id = self._get_label_id(VocBodyPart(part_id).name)
annotations.append(BboxObject( annotations.append(BboxObject(
*bbox, label=label_id, *bbox, label=label_id,
attributes=attributes)) attributes=attributes))
@ -691,6 +657,12 @@ class VocComp_9_10_Extractor(VocResultsExtractor):
self._subsets = subsets self._subsets = subsets
self._annotations = dict(annotations) self._annotations = dict(annotations)
def _load_categories(self):
from collections import OrderedDict
from datumaro.components.formats.voc import VocAction
label_map = OrderedDict((a.name, [[], [], []]) for a in VocAction)
self._categories = make_voc_categories(label_map)
def _get_annotations(self, item, subset_name): def _get_annotations(self, item, subset_name):
annotations = [] annotations = []
@ -698,9 +670,8 @@ class VocComp_9_10_Extractor(VocResultsExtractor):
if action_ann is not None: if action_ann is not None:
for desc in action_ann: for desc in action_ann:
action_id, obj_id, conf = desc action_id, obj_id, conf = desc
label_id = self._get_label_id(VocAction(int(action_id)).name)
annotations.append(LabelObject( annotations.append(LabelObject(
label_id, action_id,
attributes={ attributes={
'score': conf, 'score': conf,
'object_id': int(obj_id), 'object_id': int(obj_id),

@ -5,8 +5,13 @@
from collections import OrderedDict from collections import OrderedDict
from enum import Enum from enum import Enum
from itertools import chain
import numpy as np import numpy as np
from datumaro.components.extractor import (AnnotationType,
LabelCategories, MaskCategories
)
VocTask = Enum('VocTask', [ VocTask = Enum('VocTask', [
'classification', 'classification',
@ -17,6 +22,7 @@ VocTask = Enum('VocTask', [
]) ])
VocLabel = Enum('VocLabel', [ VocLabel = Enum('VocLabel', [
('background', 0),
('aeroplane', 1), ('aeroplane', 1),
('bicycle', 2), ('bicycle', 2),
('bird', 3), ('bird', 3),
@ -37,10 +43,9 @@ VocLabel = Enum('VocLabel', [
('sofa', 18), ('sofa', 18),
('train', 19), ('train', 19),
('tvmonitor', 20), ('tvmonitor', 20),
('ignored', 255),
]) ])
VocIgnoredLabel = 255
VocPose = Enum('VocPose', [ VocPose = Enum('VocPose', [
'Unspecified', 'Unspecified',
'Left', 'Left',
@ -86,7 +91,7 @@ def generate_colormap(length=256):
) )
VocColormap = {id: color for id, color in generate_colormap(256).items() VocColormap = {id: color for id, color in generate_colormap(256).items()
if id in [l.value for l in VocLabel] + [0, VocIgnoredLabel]} if id in [l.value for l in VocLabel]}
VocInstColormap = generate_colormap(256) VocInstColormap = generate_colormap(256)
class VocPath: class VocPath:
@ -97,6 +102,7 @@ class VocPath:
SUBSETS_DIR = 'ImageSets' SUBSETS_DIR = 'ImageSets'
IMAGE_EXT = '.jpg' IMAGE_EXT = '.jpg'
SEGM_EXT = '.png' SEGM_EXT = '.png'
LABELMAP_FILE = 'labelmap.txt'
TASK_DIR = { TASK_DIR = {
VocTask.classification: 'Main', VocTask.classification: 'Main',
@ -105,3 +111,95 @@ class VocPath:
VocTask.action_classification: 'Action', VocTask.action_classification: 'Action',
VocTask.person_layout: 'Layout', VocTask.person_layout: 'Layout',
} }
def make_voc_label_map():
labels = sorted(VocLabel, key=lambda l: l.value)
label_map = OrderedDict(
(label.name, [VocColormap[label.value], [], []]) for label in labels)
label_map[VocLabel.person.name][1] = [p.name for p in VocBodyPart]
label_map[VocLabel.person.name][2] = [a.name for a in VocAction]
return label_map
def parse_label_map(path):
if not path:
return None
label_map = OrderedDict()
with open(path, 'r') as f:
for line in f:
# skip empty and commented lines
line = line.strip()
if not line or line and line[0] == '#':
continue
# name, color, parts, actions
label_desc = line.strip().split(':')
name = label_desc[0]
if 1 < len(label_desc) and len(label_desc[1]) != 0:
color = label_desc[1].split(',')
assert len(color) == 3, \
"Label '%s' has wrong color, expected 'r,g,b', got '%s'" % \
(name, color)
color = tuple([int(c) for c in color][::-1])
else:
color = None
if 2 < len(label_desc) and len(label_desc[2]) != 0:
parts = label_desc[2].split(',')
else:
parts = []
if 3 < len(label_desc) and len(label_desc[3]) != 0:
actions = label_desc[3].split(',')
else:
actions = []
label_map[name] = [color, parts, actions]
return label_map
def write_label_map(path, label_map):
with open(path, 'w') as f:
f.write('# label:color_rgb:parts:actions\n')
for label_name, label_desc in label_map.items():
if label_desc[0]:
color_rgb = ','.join(str(c) for c in label_desc[0][::-1])
else:
color_rgb = ''
parts = ','.join(str(p) for p in label_desc[1])
actions = ','.join(str(a) for a in label_desc[2])
f.write('%s\n' % ':'.join([label_name, color_rgb, parts, actions]))
# pylint: disable=pointless-statement
def make_voc_categories(label_map=None):
if label_map is None:
label_map = make_voc_label_map()
categories = {}
label_categories = LabelCategories()
label_categories.attributes.update(['difficult', 'truncated', 'occluded'])
for label, desc in label_map.items():
label_categories.add(label, attributes=desc[2])
for part in OrderedDict((k, None) for k in chain(
*(desc[1] for desc in label_map.values()))):
label_categories.add(part)
categories[AnnotationType.label] = label_categories
has_colors = sum(v[0] is not None for v in label_map.values())
if not has_colors:
colormap = generate_colormap(len(label_map))
else:
label_id = lambda label: label_categories.find(label)[0]
colormap = { label_id(name): desc[0]
for name, desc in label_map.items() }
mask_categories = MaskCategories(colormap)
mask_categories.inverse_colormap # force init
categories[AnnotationType.mask] = mask_categories
return categories
# pylint: enable=pointless-statement

@ -69,6 +69,16 @@ def apply_colormap(mask, colormap=None):
painted_mask = np.reshape(painted_mask, (*mask.shape, 3)) painted_mask = np.reshape(painted_mask, (*mask.shape, 3))
return painted_mask.astype(np.float32) return painted_mask.astype(np.float32)
def remap_mask(mask, map_fn):
# Changes mask elements from one colormap to another
assert len(mask.shape) == 2
shape = mask.shape
mask = np.reshape(mask, (-1, 1))
mask = np.apply_along_axis(map_fn, 1, mask)
mask = np.reshape(mask, shape)
return mask
def load_mask(path, colormap=None): def load_mask(path, colormap=None):
mask = load_image(path) mask = load_image(path)

@ -1,5 +1,4 @@
import cv2 import cv2
from itertools import zip_longest
import numpy as np import numpy as np
import os import os
import os.path as osp import os.path as osp
@ -235,6 +234,7 @@ class VocExtractorTest(TestCase):
{ {
'pose': VOC.VocPose(1).name, 'pose': VOC.VocPose(1).name,
'truncated': True, 'truncated': True,
'occluded': False,
'difficult': False, 'difficult': False,
}, },
obj1.attributes) obj1.attributes)
@ -365,16 +365,9 @@ class VocExtractorTest(TestCase):
self.assertFalse(obj2 is None) self.assertFalse(obj2 is None)
self.assertListEqual([4, 5, 2, 2], obj2.get_bbox()) self.assertListEqual([4, 5, 2, 2], obj2.get_bbox())
count = 1
for action in VOC.VocAction: for action in VOC.VocAction:
if action.value % 2 == 1: attr = obj2.attributes[action.name]
count += 1 self.assertEqual(attr, action.value % 2)
ann = find(item.annotations,
lambda x: x.type == AnnotationType.label and \
get_label(extractor, x.label) == action.name)
self.assertFalse(ann is None)
self.assertTrue(obj2.id == ann.group)
self.assertEqual(count, len(item.annotations))
subset_name = 'test' subset_name = 'test'
generated_subset = generated_subsets[subset_name] generated_subset = generated_subsets[subset_name]
@ -388,50 +381,66 @@ class VocExtractorTest(TestCase):
self.assertEqual(0, len(item.annotations)) self.assertEqual(0, len(item.annotations))
class VocConverterTest(TestCase): class VocConverterTest(TestCase):
def _test_can_save_voc(self, extractor_type, converter_type, test_dir): def _test_can_save_voc(self, src_extractor, converter, test_dir,
dummy_dir = osp.join(test_dir, 'dummy') target_extractor=None):
generate_dummy_voc(dummy_dir) converter(src_extractor, test_dir)
gen_extractor = extractor_type(dummy_dir)
conv_dir = osp.join(test_dir, 'converted') result_extractor = VocImporter()(test_dir).make_dataset()
converter = converter_type() if target_extractor is None:
converter(gen_extractor, conv_dir) target_extractor = src_extractor
conv_extractor = extractor_type(conv_dir) if AnnotationType.label in target_extractor.categories():
for item_a, item_b in zip_longest(gen_extractor, conv_extractor): self.assertEqual(
target_extractor.categories()[AnnotationType.label].items,
result_extractor.categories()[AnnotationType.label].items)
if AnnotationType.mask in target_extractor.categories():
self.assertEqual(
target_extractor.categories()[AnnotationType.mask].colormap,
result_extractor.categories()[AnnotationType.mask].colormap)
self.assertEqual(len(target_extractor), len(result_extractor))
for item_a, item_b in zip(target_extractor, result_extractor):
self.assertEqual(item_a.id, item_b.id) self.assertEqual(item_a.id, item_b.id)
self.assertEqual(len(item_a.annotations), len(item_b.annotations)) self.assertEqual(len(item_a.annotations), len(item_b.annotations))
for ann_a, ann_b in zip(item_a.annotations, item_b.annotations): for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
self.assertEqual(ann_a.type, ann_b.type) self.assertEqual(ann_a.type, ann_b.type)
def _test_can_save_voc_dummy(self, extractor_type, converter, test_dir):
dummy_dir = osp.join(test_dir, 'dummy')
generate_dummy_voc(dummy_dir)
gen_extractor = extractor_type(dummy_dir)
self._test_can_save_voc(gen_extractor, converter,
osp.join(test_dir, 'converted'))
def test_can_save_voc_cls(self): def test_can_save_voc_cls(self):
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_can_save_voc( self._test_can_save_voc_dummy(
VocClassificationExtractor, VocClassificationConverter, VocClassificationExtractor, VocClassificationConverter(label_map='voc'),
test_dir.path) test_dir.path)
def test_can_save_voc_det(self): def test_can_save_voc_det(self):
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_can_save_voc( self._test_can_save_voc_dummy(
VocDetectionExtractor, VocDetectionConverter, VocDetectionExtractor, VocDetectionConverter(label_map='voc'),
test_dir.path) test_dir.path)
def test_can_save_voc_segm(self): def test_can_save_voc_segm(self):
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_can_save_voc( self._test_can_save_voc_dummy(
VocSegmentationExtractor, VocSegmentationConverter, VocSegmentationExtractor, VocSegmentationConverter(label_map='voc'),
test_dir.path) test_dir.path)
def test_can_save_voc_layout(self): def test_can_save_voc_layout(self):
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_can_save_voc( self._test_can_save_voc_dummy(
VocLayoutExtractor, VocLayoutConverter, VocLayoutExtractor, VocLayoutConverter(label_map='voc'),
test_dir.path) test_dir.path)
def test_can_save_voc_action(self): def test_can_save_voc_action(self):
with TestDir() as test_dir: with TestDir() as test_dir:
self._test_can_save_voc( self._test_can_save_voc_dummy(
VocActionExtractor, VocActionConverter, VocActionExtractor, VocActionConverter(label_map='voc'),
test_dir.path) test_dir.path)
def test_can_save_dataset_with_no_subsets(self): def test_can_save_dataset_with_no_subsets(self):
@ -451,28 +460,121 @@ class VocConverterTest(TestCase):
for item in items: for item in items:
yield item yield item
def categories(self):
return VOC.make_voc_categories()
with TestDir() as test_dir:
self._test_can_save_voc(TestExtractor(), VocConverter(label_map='voc'),
test_dir.path)
def test_dataset_with_voc_labelmap(self):
class SrcExtractor(Extractor):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
BboxObject(2, 3, 4, 5, label=0, id=1),
BboxObject(1, 2, 3, 4, label=1, id=2),
])
def categories(self): def categories(self):
label_cat = LabelCategories() label_cat = LabelCategories()
for label in VOC.VocLabel: label_cat.add(VOC.VocLabel(1).name)
label_cat.add(label.name) label_cat.add('non_voc_label')
return { return {
AnnotationType.label: label_cat, AnnotationType.label: label_cat,
} }
class DstExtractor(Extractor):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
BboxObject(2, 3, 4, 5, label=0, id=1),
])
def categories(self):
return VOC.make_voc_categories()
with TestDir() as test_dir: with TestDir() as test_dir:
src_extractor = TestExtractor() self._test_can_save_voc(
converter = VocConverter() SrcExtractor(), VocConverter(label_map='voc'),
test_dir.path, target_extractor=DstExtractor())
converter(src_extractor, test_dir.path) def test_dataset_with_guessed_labelmap(self):
class SrcExtractor(Extractor):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
BboxObject(2, 3, 4, 5, label=0, id=1),
BboxObject(1, 2, 3, 4, label=1, id=2),
])
dst_extractor = VocImporter()(test_dir.path).make_dataset() def categories(self):
label_cat = LabelCategories()
label_cat.add(VOC.VocLabel(1).name)
label_cat.add('non_voc_label')
return {
AnnotationType.label: label_cat,
}
class DstExtractor(Extractor):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
BboxObject(2, 3, 4, 5, label=0, id=1),
BboxObject(1, 2, 3, 4,
label=self.categories()[AnnotationType.label] \
.find('non_voc_label')[0], id=2),
])
self.assertEqual(len(src_extractor), len(dst_extractor)) def categories(self):
for item_a, item_b in zip_longest(src_extractor, dst_extractor): label_map = VOC.make_voc_label_map()
self.assertEqual(item_a.id, item_b.id) label_map['non_voc_label'] = [None, [], []]
self.assertEqual(len(item_a.annotations), len(item_b.annotations)) for label_desc in label_map.values():
for ann_a, ann_b in zip(item_a.annotations, item_b.annotations): label_desc[0] = None # rebuild colormap
self.assertEqual(ann_a.type, ann_b.type) return VOC.make_voc_categories(label_map)
with TestDir() as test_dir:
self._test_can_save_voc(
SrcExtractor(), VocConverter(label_map='guess'),
test_dir.path, target_extractor=DstExtractor())
def test_dataset_with_fixed_labelmap(self):
class SrcExtractor(Extractor):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
BboxObject(2, 3, 4, 5, label=0, id=1),
BboxObject(1, 2, 3, 4, label=1, id=2, group=2,
attributes={'act1': True}),
BboxObject(2, 3, 4, 5, label=2, id=3, group=2),
BboxObject(2, 3, 4, 6, label=3, id=4, group=2),
])
def categories(self):
label_cat = LabelCategories()
label_cat.add('foreign_label')
label_cat.add('label', attributes=['act1', 'act2'])
label_cat.add('label_part1')
label_cat.add('label_part2')
return {
AnnotationType.label: label_cat,
}
label_map = {
'label': [None, ['label_part1', 'label_part2'], ['act1', 'act2']]
}
class DstExtractor(Extractor):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
BboxObject(1, 2, 3, 4, label=0, id=2, group=2,
attributes={'act1': True, 'act2': False}),
BboxObject(2, 3, 4, 5, label=1, id=3, group=2),
BboxObject(2, 3, 4, 6, label=2, id=4, group=2),
])
def categories(self):
return VOC.make_voc_categories(label_map)
with TestDir() as test_dir:
self._test_can_save_voc(
SrcExtractor(), VocConverter(label_map=label_map),
test_dir.path, target_extractor=DstExtractor())
class VocImporterTest(TestCase): class VocImporterTest(TestCase):
def test_can_import(self): def test_can_import(self):
@ -487,3 +589,16 @@ class VocImporterTest(TestCase):
self.assertEqual( self.assertEqual(
sum([len(s) for _, s in subsets.items()]), sum([len(s) for _, s in subsets.items()]),
len(dataset)) len(dataset))
class VocFormatTest(TestCase):
def test_can_write_and_parse_labelmap(self):
src_label_map = VOC.make_voc_label_map()
src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']]
with TestDir() as test_dir:
file_path = osp.join(test_dir.path, 'test.txt')
VOC.write_label_map(file_path, src_label_map)
dst_label_map = VOC.parse_label_map(file_path)
self.assertEqual(src_label_map, dst_label_map)
Loading…
Cancel
Save