[Datumaro] Add masks to tfrecord format (#1156)

* Employ transforms and item wrapper

* Add image class and tests

* Add image info support to formats

* Fix cli

* Fix merge and voc converte

* Update remote images extractor

* Codacy

* Remove item name, require path in Image

* Merge images of dataset items

* Update tests

* Add image dir converter

* Update Datumaro format

* Update COCO format with image info

* Update CVAT format with image info

* Update TFrecord format with image info

* Update VOC formar with image info

* Update YOLO format with image info

* Update dataset manager bindings with image info

* Add image name to id transform

* Fix coco export

* Add masks support for tfrecord

* Refactor coco

* Fix comparison

* Remove dead code

* Extract common code for instances
main
zhiltsov-max 6 years ago committed by GitHub
parent f208cfeaf5
commit b36f402f9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -14,12 +14,13 @@ import pycocotools.mask as mask_utils
from datumaro.components.converter import Converter
from datumaro.components.extractor import (DEFAULT_SUBSET_NAME,
AnnotationType, Points, Mask
AnnotationType, Points
)
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util import find
from datumaro.util.image import save_image
import datumaro.util.mask_tools as mask_tools
import datumaro.util.annotation_tools as anno_tools
from .format import CocoTask, CocoPath
@ -194,7 +195,7 @@ class _InstancesConverter(_TaskConverter):
if inst[1]:
inst[1] = sum(new_segments, [])
else:
mask = cls.merge_masks(new_segments)
mask = mask_tools.merge_masks(new_segments)
inst[2] = mask_tools.mask_to_rle(mask)
return instances
@ -205,8 +206,8 @@ class _InstancesConverter(_TaskConverter):
masks = [a for a in group if a.type == AnnotationType.mask]
anns = boxes + polygons + masks
leader = self.find_group_leader(anns)
bbox = self.compute_bbox(anns)
leader = anno_tools.find_group_leader(anns)
bbox = anno_tools.compute_bbox(anns)
mask = None
polygons = [p.points for p in polygons]
@ -228,68 +229,29 @@ class _InstancesConverter(_TaskConverter):
if masks:
if mask is not None:
masks += [mask]
mask = self.merge_masks(masks)
mask = mask_tools.merge_masks([m.image for m in masks])
if mask is not None:
mask = mask_tools.mask_to_rle(mask)
polygons = []
else:
if masks:
mask = self.merge_masks(masks)
mask = mask_tools.merge_masks([m.image for m in masks])
polygons += mask_tools.mask_to_polygons(mask)
mask = None
return [leader, polygons, mask, bbox]
@staticmethod
def find_group_leader(group):
return max(group, key=lambda x: x.get_area())
@staticmethod
def merge_masks(masks):
if not masks:
return None
def get_mask(m):
if isinstance(m, Mask):
return m.image
else:
return m
binary_mask = get_mask(masks[0])
for m in masks[1:]:
binary_mask |= get_mask(m)
return binary_mask
@staticmethod
def compute_bbox(annotations):
boxes = [ann.get_bbox() for ann in annotations]
x0 = min((b[0] for b in boxes), default=0)
y0 = min((b[1] for b in boxes), default=0)
x1 = max((b[0] + b[2] for b in boxes), default=0)
y1 = max((b[1] + b[3] for b in boxes), default=0)
return [x0, y0, x1 - x0, y1 - y0]
@staticmethod
def find_instance_anns(annotations):
return [a for a in annotations
if a.type in { AnnotationType.bbox, AnnotationType.polygon } or \
a.type == AnnotationType.mask and a.label is not None
if a.type in { AnnotationType.bbox,
AnnotationType.polygon, AnnotationType.mask }
]
@classmethod
def find_instances(cls, annotations):
instance_anns = cls.find_instance_anns(annotations)
ann_groups = []
for g_id, group in groupby(instance_anns, lambda a: a.group):
if not g_id:
ann_groups.extend(([a] for a in group))
else:
ann_groups.append(list(group))
return ann_groups
return anno_tools.find_instances(cls.find_instance_anns(annotations))
def save_annotations(self, item):
instances = self.find_instances(item.annotations)

@ -16,98 +16,34 @@ from datumaro.components.extractor import (AnnotationType, DEFAULT_SUBSET_NAME,
from datumaro.components.converter import Converter
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util.image import encode_image
from datumaro.util.mask_tools import merge_masks
from datumaro.util.annotation_tools import (compute_bbox,
find_group_leader, find_instances)
from datumaro.util.tf_util import import_tf as _import_tf
from .format import DetectionApiPath
tf = _import_tf()
# we need it to filter out non-ASCII characters, otherwise training will crash
# filter out non-ASCII characters, otherwise training will crash
_printable = set(string.printable)
def _make_printable(s):
return ''.join(filter(lambda x: x in _printable, s))
def _make_tf_example(item, get_label_id, get_label, save_images=False):
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
features = {
'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
'image/filename': bytes_feature(
('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')),
}
if not item.has_image:
raise Exception("Failed to export dataset item '%s': "
"item has no image info" % item.id)
height, width = item.image.size
features.update({
'image/height': int64_feature(height),
'image/width': int64_feature(width),
})
features.update({
'image/encoded': bytes_feature(b''),
'image/format': bytes_feature(b'')
})
if save_images:
if item.has_image and item.image.has_data:
fmt = DetectionApiPath.IMAGE_FORMAT
buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT)
features.update({
'image/encoded': bytes_feature(buffer),
'image/format': bytes_feature(fmt.encode('utf-8')),
})
else:
log.warning("Item '%s' has no image" % item.id)
xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)
boxes = [ann for ann in item.annotations if ann.type is AnnotationType.bbox]
for box in boxes:
box_label = _make_printable(get_label(box.label))
xmins.append(box.points[0] / width)
xmaxs.append(box.points[2] / width)
ymins.append(box.points[1] / height)
ymaxs.append(box.points[3] / height)
classes_text.append(box_label.encode('utf-8'))
classes.append(get_label_id(box.label))
if boxes:
features.update({
'image/object/bbox/xmin': float_list_feature(xmins),
'image/object/bbox/xmax': float_list_feature(xmaxs),
'image/object/bbox/ymin': float_list_feature(ymins),
'image/object/bbox/ymax': float_list_feature(ymaxs),
'image/object/class/text': bytes_list_feature(classes_text),
'image/object/class/label': int64_list_feature(classes),
})
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
tf_example = tf.train.Example(
features=tf.train.Features(feature=features))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
return tf_example
def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
class TfDetectionApiConverter(Converter, CliPlugin):
@classmethod
@ -115,16 +51,29 @@ class TfDetectionApiConverter(Converter, CliPlugin):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
parser.add_argument('--save-masks', action='store_true',
help="Include instance masks (default: %(default)s)")
return parser
def __init__(self, save_images=False):
def __init__(self, save_images=False, save_masks=False):
super().__init__()
self._save_images = save_images
self._save_masks = save_masks
def __call__(self, extractor, save_dir):
os.makedirs(save_dir, exist_ok=True)
label_categories = extractor.categories().get(AnnotationType.label,
LabelCategories())
get_label = lambda label_id: label_categories.items[label_id].name \
if label_id is not None else ''
label_ids = OrderedDict((label.name, 1 + idx)
for idx, label in enumerate(label_categories.items))
map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0)
self._get_label = get_label
self._get_label_id = map_label_id
subsets = extractor.subsets()
if len(subsets) == 0:
subsets = [ None ]
@ -136,14 +85,6 @@ class TfDetectionApiConverter(Converter, CliPlugin):
subset_name = DEFAULT_SUBSET_NAME
subset = extractor
label_categories = subset.categories().get(AnnotationType.label,
LabelCategories())
get_label = lambda label_id: label_categories.items[label_id].name \
if label_id is not None else ''
label_ids = OrderedDict((label.name, 1 + idx)
for idx, label in enumerate(label_categories.items))
map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0)
labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE)
with codecs.open(labelmap_path, 'w', encoding='utf8') as f:
for label, idx in label_ids.items():
@ -157,10 +98,106 @@ class TfDetectionApiConverter(Converter, CliPlugin):
anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name))
with tf.io.TFRecordWriter(anno_path) as writer:
for item in subset:
tf_example = _make_tf_example(
item,
get_label=get_label,
get_label_id=map_label_id,
save_images=self._save_images,
)
tf_example = self._make_tf_example(item)
writer.write(tf_example.SerializeToString())
@staticmethod
def _find_instances(annotations):
return find_instances(a for a in annotations
if a.type in { AnnotationType.bbox, AnnotationType.mask })
def _find_instance_parts(self, group, img_width, img_height):
boxes = [a for a in group if a.type == AnnotationType.bbox]
masks = [a for a in group if a.type == AnnotationType.mask]
anns = boxes + masks
leader = find_group_leader(anns)
bbox = compute_bbox(anns)
mask = None
if self._save_masks:
mask = merge_masks([m.image for m in masks])
return [leader, mask, bbox]
def _export_instances(self, instances, width, height):
xmins = [] # List of normalized left x coordinates of bounding boxes (1 per box)
xmaxs = [] # List of normalized right x coordinates of bounding boxes (1 per box)
ymins = [] # List of normalized top y coordinates of bounding boxes (1 per box)
ymaxs = [] # List of normalized bottom y coordinates of bounding boxes (1 per box)
classes_text = [] # List of class names of bounding boxes (1 per box)
classes = [] # List of class ids of bounding boxes (1 per box)
masks = [] # List of PNG-encoded instance masks (1 per box)
for leader, mask, box in instances:
label = _make_printable(self._get_label(leader.label))
classes_text.append(label.encode('utf-8'))
classes.append(self._get_label_id(leader.label))
xmins.append(box[0] / width)
xmaxs.append((box[0] + box[2]) / width)
ymins.append(box[1] / height)
ymaxs.append((box[1] + box[3]) / height)
if self._save_masks:
if mask is not None:
mask = encode_image(mask, '.png')
else:
mask = b''
masks.append(mask)
result = {}
if classes:
result = {
'image/object/bbox/xmin': float_list_feature(xmins),
'image/object/bbox/xmax': float_list_feature(xmaxs),
'image/object/bbox/ymin': float_list_feature(ymins),
'image/object/bbox/ymax': float_list_feature(ymaxs),
'image/object/class/text': bytes_list_feature(classes_text),
'image/object/class/label': int64_list_feature(classes),
}
if masks:
result['image/object/mask'] = bytes_list_feature(masks)
return result
def _make_tf_example(self, item):
features = {
'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
'image/filename': bytes_feature(
('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')),
}
if not item.has_image:
raise Exception("Failed to export dataset item '%s': "
"item has no image info" % item.id)
height, width = item.image.size
features.update({
'image/height': int64_feature(height),
'image/width': int64_feature(width),
})
features.update({
'image/encoded': bytes_feature(b''),
'image/format': bytes_feature(b'')
})
if self._save_images:
if item.has_image and item.image.has_data:
fmt = DetectionApiPath.IMAGE_FORMAT
buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT)
features.update({
'image/encoded': bytes_feature(buffer),
'image/format': bytes_feature(fmt.encode('utf-8')),
})
else:
log.warning("Item '%s' has no image" % item.id)
instances = self._find_instances(item.annotations)
instances = [self._find_instance_parts(i, width, height) for i in instances]
features.update(self._export_instances(instances, width, height))
tf_example = tf.train.Example(
features=tf.train.Features(feature=features))
return tf_example

@ -10,7 +10,7 @@ import re
from datumaro.components.extractor import (SourceExtractor,
DEFAULT_SUBSET_NAME, DatasetItem,
AnnotationType, Bbox, LabelCategories
AnnotationType, Bbox, Mask, LabelCategories
)
from datumaro.util.image import Image, decode_image, lazy_image
from datumaro.util.tf_util import import_tf as _import_tf
@ -147,6 +147,8 @@ class TfDetectionApiExtractor(SourceExtractor):
labels = tf.sparse.to_dense(
parsed_record['image/object/class/text'],
default_value=b'').numpy()
masks = tf.sparse.to_dense(
parsed_record['image/object/mask']).numpy()
for label, label_id in zip(labels, label_ids):
label = label.decode('utf-8')
@ -163,15 +165,38 @@ class TfDetectionApiExtractor(SourceExtractor):
item_id = osp.splitext(frame_filename)[0]
annotations = []
for shape in np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]:
for shape_id, shape in enumerate(
np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]):
label = shape[0].decode('utf-8')
x = clamp(shape[1] * frame_width, 0, frame_width)
y = clamp(shape[2] * frame_height, 0, frame_height)
w = clamp(shape[3] * frame_width, 0, frame_width) - x
h = clamp(shape[4] * frame_height, 0, frame_height) - y
annotations.append(Bbox(x, y, w, h,
label=dataset_labels.get(label)
))
mask = None
if len(masks) != 0:
mask = masks[shape_id]
if mask is not None:
if isinstance(mask, bytes):
mask = lazy_image(mask, decode_image)
annotations.append(Mask(image=mask,
label=dataset_labels.get(label)
))
else:
x = clamp(shape[1] * frame_width, 0, frame_width)
y = clamp(shape[2] * frame_height, 0, frame_height)
w = clamp(shape[3] * frame_width, 0, frame_width) - x
h = clamp(shape[4] * frame_height, 0, frame_height) - y
annotations.append(Bbox(x, y, w, h,
label=dataset_labels.get(label)
))
image_size = None
if frame_height and frame_width:
image_size = (frame_height, frame_width)
image_params = {}
if frame_image and frame_format:
image_params['data'] = lazy_image(frame_image, decode_image)
if frame_filename and images_dir:
image_params['path'] = osp.join(images_dir, frame_filename)
image_size = None
if frame_height and frame_width:

@ -3,16 +3,16 @@
#
# SPDX-License-Identifier: MIT
from itertools import groupby
import logging as log
import os.path as osp
import pycocotools.mask as mask_utils
from datumaro.components.extractor import (Transform, AnnotationType,
Mask, RleMask, Polygon, Bbox)
RleMask, Polygon, Bbox)
from datumaro.components.cli_plugin import CliPlugin
import datumaro.util.mask_tools as mask_tools
from datumaro.util.annotation_tools import find_group_leader, find_instances
class CropCoveredSegments(Transform, CliPlugin):
@ -125,7 +125,7 @@ class MergeInstanceSegments(Transform, CliPlugin):
if not polygons and not masks:
return []
leader = cls.find_group_leader(polygons + masks)
leader = find_group_leader(polygons + masks)
instance = []
# Build the resulting mask
@ -138,9 +138,10 @@ class MergeInstanceSegments(Transform, CliPlugin):
instance += polygons # keep unused polygons
if masks:
masks = [m.image for m in masks]
if mask is not None:
masks += [mask]
mask = cls.merge_masks(masks)
mask = mask_tools.merge_masks(masks)
if mask is None:
return instance
@ -154,41 +155,10 @@ class MergeInstanceSegments(Transform, CliPlugin):
)
return instance
@staticmethod
def find_group_leader(group):
return max(group, key=lambda x: x.get_area())
@staticmethod
def merge_masks(masks):
if not masks:
return None
def get_mask(m):
if isinstance(m, Mask):
return m.image
else:
return m
binary_mask = get_mask(masks[0])
for m in masks[1:]:
binary_mask |= get_mask(m)
return binary_mask
@staticmethod
def find_instances(annotations):
segment_anns = (a for a in annotations
if a.type in {AnnotationType.polygon, AnnotationType.mask}
)
ann_groups = []
for g_id, group in groupby(segment_anns, lambda a: a.group):
if g_id is None:
ann_groups.extend(([a] for a in group))
else:
ann_groups.append(list(group))
return ann_groups
return find_instances(a for a in annotations
if a.type in {AnnotationType.polygon, AnnotationType.mask})
class PolygonsToMasks(Transform, CliPlugin):
def transform_item(self, item):

@ -0,0 +1,28 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
from itertools import groupby
def find_instances(instance_anns):
ann_groups = []
for g_id, group in groupby(instance_anns, lambda a: a.group):
if not g_id:
ann_groups.extend(([a] for a in group))
else:
ann_groups.append(list(group))
return ann_groups
def find_group_leader(group):
return max(group, key=lambda x: x.get_area())
def compute_bbox(annotations):
boxes = [ann.get_bbox() for ann in annotations]
x0 = min((b[0] for b in boxes), default=0)
y0 = min((b[1] for b in boxes), default=0)
x1 = max((b[0] + b[2] for b in boxes), default=0)
y1 = max((b[1] + b[3] for b in boxes), default=0)
return [x0, y0, x1 - x0, y1 - y0]

@ -169,8 +169,6 @@ class Image:
if size is not None:
assert len(size) == 2 and 0 < size[0] and 0 < size[1], size
size = tuple(size)
else:
size = None
self._size = size # (H, W)
assert path is None or isinstance(path, str)

@ -3,7 +3,7 @@ import numpy as np
from unittest import TestCase
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, LabelCategories
AnnotationType, Bbox, Mask, LabelCategories
)
from datumaro.plugins.tf_detection_api_format.importer import TfDetectionApiImporter
from datumaro.plugins.tf_detection_api_format.extractor import TfDetectionApiExtractor
@ -65,6 +65,35 @@ class TfrecordConverterTest(TestCase):
TestExtractor(), TfDetectionApiConverter(save_images=True),
test_dir)
def test_can_save_masks(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
annotations=[
Mask(image=np.array([
[1, 0, 0, 1],
[0, 1, 1, 0],
[0, 1, 1, 0],
[1, 0, 0, 1],
]), label=1),
]
),
])
def categories(self):
label_cat = LabelCategories()
for label in range(10):
label_cat.add('label_' + str(label))
return {
AnnotationType.label: label_cat,
}
with TestDir() as test_dir:
self._test_save_and_load(
TestExtractor(), TfDetectionApiConverter(save_masks=True),
test_dir)
def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor):
def __iter__(self):

@ -159,8 +159,10 @@ class TransformsTest(TestCase):
[1, 0, 0, 0, 0],
[1, 1, 1, 0, 0]],
),
z_order=0),
z_order=0, group=1),
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
z_order=1, group=1),
Polygon([0, 0, 0, 2, 2, 2, 2, 0],
z_order=1),
]
),
@ -178,7 +180,15 @@ class TransformsTest(TestCase):
[1, 1, 1, 1, 0],
[1, 1, 1, 0, 0]],
),
z_order=0),
z_order=0, group=1),
Mask(np.array([
[1, 1, 0, 0, 0],
[1, 1, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=1),
]
),
])

Loading…
Cancel
Save