You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
303 lines
11 KiB
Python
303 lines
11 KiB
Python
|
|
# Copyright (C) 2019-2020 Intel Corporation
|
|
#
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
from collections import defaultdict
|
|
import logging as log
|
|
import numpy as np
|
|
import os.path as osp
|
|
from defusedxml import ElementTree
|
|
|
|
from datumaro.components.extractor import (SourceExtractor, DatasetItem,
|
|
AnnotationType, Label, Mask, Bbox, CompiledMask
|
|
)
|
|
from datumaro.util import dir_items
|
|
from datumaro.util.image import Image
|
|
from datumaro.util.mask_tools import lazy_mask, invert_colormap
|
|
|
|
from .format import (
|
|
VocTask, VocPath, VocInstColormap, parse_label_map, make_voc_categories
|
|
)
|
|
|
|
|
|
_inverse_inst_colormap = invert_colormap(VocInstColormap)
|
|
|
|
class _VocExtractor(SourceExtractor):
|
|
def __init__(self, path):
|
|
assert osp.isfile(path), path
|
|
self._path = path
|
|
self._dataset_dir = osp.dirname(osp.dirname(osp.dirname(path)))
|
|
|
|
super().__init__(subset=osp.splitext(osp.basename(path))[0])
|
|
|
|
self._categories = self._load_categories(self._dataset_dir)
|
|
|
|
label_color = lambda label_idx: \
|
|
self._categories[AnnotationType.mask].colormap.get(label_idx, None)
|
|
log.debug("Loaded labels: %s" % ', '.join(
|
|
"'%s' %s" % (l.name, ('(%s, %s, %s)' % c) if c else '')
|
|
for i, l, c in ((i, l, label_color(i)) for i, l in enumerate(
|
|
self._categories[AnnotationType.label].items
|
|
))
|
|
))
|
|
self._items = self._load_subset_list(path)
|
|
|
|
def categories(self):
|
|
return self._categories
|
|
|
|
def __len__(self):
|
|
return len(self._items)
|
|
|
|
def _get_label_id(self, label):
|
|
label_id, _ = self._categories[AnnotationType.label].find(label)
|
|
assert label_id is not None, label
|
|
return label_id
|
|
|
|
@staticmethod
|
|
def _load_categories(dataset_path):
|
|
label_map = None
|
|
label_map_path = osp.join(dataset_path, VocPath.LABELMAP_FILE)
|
|
if osp.isfile(label_map_path):
|
|
label_map = parse_label_map(label_map_path)
|
|
return make_voc_categories(label_map)
|
|
|
|
@staticmethod
|
|
def _load_subset_list(subset_path):
|
|
with open(subset_path) as f:
|
|
return [line.split()[0] for line in f]
|
|
|
|
class VocClassificationExtractor(_VocExtractor):
|
|
def __iter__(self):
|
|
raw_anns = self._load_annotations()
|
|
for item_id in self._items:
|
|
log.debug("Reading item '%s'" % item_id)
|
|
image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR,
|
|
item_id + VocPath.IMAGE_EXT)
|
|
anns = self._parse_annotations(raw_anns, item_id)
|
|
yield DatasetItem(id=item_id, subset=self._subset,
|
|
image=image, annotations=anns)
|
|
|
|
def _load_annotations(self):
|
|
annotations = defaultdict(list)
|
|
task_dir = osp.dirname(self._path)
|
|
anno_files = [s for s in dir_items(task_dir, '.txt')
|
|
if s.endswith('_' + osp.basename(self._path))]
|
|
for ann_filename in anno_files:
|
|
with open(osp.join(task_dir, ann_filename)) as f:
|
|
label = ann_filename[:ann_filename.rfind('_')]
|
|
label_id = self._get_label_id(label)
|
|
for line in f:
|
|
item, present = line.split()
|
|
if present == '1':
|
|
annotations[item].append(label_id)
|
|
|
|
return dict(annotations)
|
|
|
|
@staticmethod
|
|
def _parse_annotations(raw_anns, item_id):
|
|
return [Label(label_id) for label_id in raw_anns.get(item_id, [])]
|
|
|
|
class _VocXmlExtractor(_VocExtractor):
|
|
def __init__(self, path, task):
|
|
super().__init__(path)
|
|
self._task = task
|
|
|
|
def __iter__(self):
|
|
anno_dir = osp.join(self._dataset_dir, VocPath.ANNOTATIONS_DIR)
|
|
|
|
for item_id in self._items:
|
|
log.debug("Reading item '%s'" % item_id)
|
|
image = item_id + VocPath.IMAGE_EXT
|
|
height, width = 0, 0
|
|
|
|
anns = []
|
|
ann_file = osp.join(anno_dir, item_id + '.xml')
|
|
if osp.isfile(ann_file):
|
|
root_elem = ElementTree.parse(ann_file)
|
|
height = root_elem.find('size/height')
|
|
if height is not None:
|
|
height = int(height.text)
|
|
width = root_elem.find('size/width')
|
|
if width is not None:
|
|
width = int(width.text)
|
|
filename_elem = root_elem.find('filename')
|
|
if filename_elem is not None:
|
|
image = filename_elem.text
|
|
anns = self._parse_annotations(root_elem)
|
|
|
|
image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, image)
|
|
if height and width:
|
|
image = Image(path=image, size=(height, width))
|
|
|
|
yield DatasetItem(id=item_id, subset=self._subset,
|
|
image=image, annotations=anns)
|
|
|
|
def _parse_annotations(self, root_elem):
|
|
item_annotations = []
|
|
|
|
for obj_id, object_elem in enumerate(root_elem.findall('object')):
|
|
obj_id += 1
|
|
attributes = {}
|
|
group = obj_id
|
|
|
|
obj_label_id = None
|
|
label_elem = object_elem.find('name')
|
|
if label_elem is not None:
|
|
obj_label_id = self._get_label_id(label_elem.text)
|
|
|
|
obj_bbox = self._parse_bbox(object_elem)
|
|
|
|
if obj_label_id is None or obj_bbox is None:
|
|
continue
|
|
|
|
difficult_elem = object_elem.find('difficult')
|
|
attributes['difficult'] = difficult_elem is not None and \
|
|
difficult_elem.text == '1'
|
|
|
|
truncated_elem = object_elem.find('truncated')
|
|
attributes['truncated'] = truncated_elem is not None and \
|
|
truncated_elem.text == '1'
|
|
|
|
occluded_elem = object_elem.find('occluded')
|
|
attributes['occluded'] = occluded_elem is not None and \
|
|
occluded_elem.text == '1'
|
|
|
|
pose_elem = object_elem.find('pose')
|
|
if pose_elem is not None:
|
|
attributes['pose'] = pose_elem.text
|
|
|
|
point_elem = object_elem.find('point')
|
|
if point_elem is not None:
|
|
point_x = point_elem.find('x')
|
|
point_y = point_elem.find('y')
|
|
point = [float(point_x.text), float(point_y.text)]
|
|
attributes['point'] = point
|
|
|
|
actions_elem = object_elem.find('actions')
|
|
actions = {a: False
|
|
for a in self._categories[AnnotationType.label] \
|
|
.items[obj_label_id].attributes}
|
|
if actions_elem is not None:
|
|
for action_elem in actions_elem:
|
|
actions[action_elem.tag] = (action_elem.text == '1')
|
|
for action, present in actions.items():
|
|
attributes[action] = present
|
|
|
|
has_parts = False
|
|
for part_elem in object_elem.findall('part'):
|
|
part = part_elem.find('name').text
|
|
part_label_id = self._get_label_id(part)
|
|
part_bbox = self._parse_bbox(part_elem)
|
|
|
|
if self._task is not VocTask.person_layout:
|
|
break
|
|
if part_bbox is None:
|
|
continue
|
|
has_parts = True
|
|
item_annotations.append(Bbox(*part_bbox, label=part_label_id,
|
|
group=group))
|
|
|
|
attributes_elem = object_elem.find('attributes')
|
|
if attributes_elem is not None:
|
|
for attr_elem in attributes_elem.iter('attribute'):
|
|
attributes[attr_elem.find('name').text] = \
|
|
attr_elem.find('value').text
|
|
|
|
if self._task is VocTask.person_layout and not has_parts:
|
|
continue
|
|
if self._task is VocTask.action_classification and not actions:
|
|
continue
|
|
|
|
item_annotations.append(Bbox(*obj_bbox, label=obj_label_id,
|
|
attributes=attributes, id=obj_id, group=group))
|
|
|
|
return item_annotations
|
|
|
|
@staticmethod
|
|
def _parse_bbox(object_elem):
|
|
bbox_elem = object_elem.find('bndbox')
|
|
xmin = float(bbox_elem.find('xmin').text)
|
|
xmax = float(bbox_elem.find('xmax').text)
|
|
ymin = float(bbox_elem.find('ymin').text)
|
|
ymax = float(bbox_elem.find('ymax').text)
|
|
return [xmin, ymin, xmax - xmin, ymax - ymin]
|
|
|
|
class VocDetectionExtractor(_VocXmlExtractor):
|
|
def __init__(self, path):
|
|
super().__init__(path, task=VocTask.detection)
|
|
|
|
class VocLayoutExtractor(_VocXmlExtractor):
|
|
def __init__(self, path):
|
|
super().__init__(path, task=VocTask.person_layout)
|
|
|
|
class VocActionExtractor(_VocXmlExtractor):
|
|
def __init__(self, path):
|
|
super().__init__(path, task=VocTask.action_classification)
|
|
|
|
class VocSegmentationExtractor(_VocExtractor):
|
|
def __iter__(self):
|
|
for item_id in self._items:
|
|
log.debug("Reading item '%s'" % item_id)
|
|
image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR,
|
|
item_id + VocPath.IMAGE_EXT)
|
|
anns = self._load_annotations(item_id)
|
|
yield DatasetItem(id=item_id, subset=self._subset,
|
|
image=image, annotations=anns)
|
|
|
|
@staticmethod
|
|
def _lazy_extract_mask(mask, c):
|
|
return lambda: mask == c
|
|
|
|
def _load_annotations(self, item_id):
|
|
item_annotations = []
|
|
|
|
class_mask = None
|
|
segm_path = osp.join(self._dataset_dir, VocPath.SEGMENTATION_DIR,
|
|
item_id + VocPath.SEGM_EXT)
|
|
if osp.isfile(segm_path):
|
|
inverse_cls_colormap = \
|
|
self._categories[AnnotationType.mask].inverse_colormap
|
|
class_mask = lazy_mask(segm_path, inverse_cls_colormap)
|
|
|
|
instances_mask = None
|
|
inst_path = osp.join(self._dataset_dir, VocPath.INSTANCES_DIR,
|
|
item_id + VocPath.SEGM_EXT)
|
|
if osp.isfile(inst_path):
|
|
instances_mask = lazy_mask(inst_path, _inverse_inst_colormap)
|
|
|
|
if instances_mask is not None:
|
|
compiled_mask = CompiledMask(class_mask, instances_mask)
|
|
|
|
if class_mask is not None:
|
|
label_cat = self._categories[AnnotationType.label]
|
|
instance_labels = compiled_mask.get_instance_labels()
|
|
else:
|
|
instance_labels = {i: None
|
|
for i in range(compiled_mask.instance_count)}
|
|
|
|
for instance_id, label_id in instance_labels.items():
|
|
image = compiled_mask.lazy_extract(instance_id)
|
|
|
|
attributes = {}
|
|
if label_id is not None:
|
|
actions = {a: False
|
|
for a in label_cat.items[label_id].attributes
|
|
}
|
|
attributes.update(actions)
|
|
|
|
item_annotations.append(Mask(
|
|
image=image, label=label_id,
|
|
attributes=attributes, group=instance_id
|
|
))
|
|
elif class_mask is not None:
|
|
log.warn("item '%s': has only class segmentation, "
|
|
"instance masks will not be available" % item_id)
|
|
class_mask = class_mask()
|
|
classes = np.unique(class_mask)
|
|
for label_id in classes:
|
|
image = self._lazy_extract_mask(class_mask, label_id)
|
|
item_annotations.append(Mask(image=image, label=label_id))
|
|
|
|
return item_annotations
|