Replace mask format support with Datumaro (#1163)

* Add box to mask transform

* Fix 'source' labelmap mode in voc converter

* Import groups

* Replace mask format support

* Update mask format documentation

* codacy

* Fix tests

* Fix dataset

* Fix segments grouping

* Merge instances in mask export
main
zhiltsov-max 6 years ago committed by GitHub
parent 80d3f97583
commit 8caa1695c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -38,18 +38,17 @@ Format selection is possible after clicking on the Upload annotation / Dump anno
[Datumaro](datumaro/README.md) dataset framework allows additional dataset transformations
via its command line tool.
| Annotation format | Dumper | Loader |
| ---------------------------------------------------------------------------------- | ------ | ------ |
| [CVAT XML v1.1 for images](cvat/apps/documentation/xml_format.md#annotation) | X | X |
| [CVAT XML v1.1 for a video](cvat/apps/documentation/xml_format.md#interpolation) | X | X |
| [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | X | X |
| [YOLO](https://pjreddie.com/darknet/yolo/) | X | X |
| [MS COCO Object Detection](http://cocodataset.org/#format-data) | X | X |
| PNG mask | X | |
| PNG instance mask | X | |
| [TFrecord](https://www.tensorflow.org/tutorials/load_data/tf_records) | X | X |
| [MOT](https://motchallenge.net/) | X | X |
| [LabelMe](http://labelme.csail.mit.edu/Release3.0) | X | X |
| Annotation format | Dumper | Loader |
| ------------------------------------------------------------------------------------------ | ------ | ------ |
| [CVAT XML v1.1 for images](cvat/apps/documentation/xml_format.md#annotation) | X | X |
| [CVAT XML v1.1 for a video](cvat/apps/documentation/xml_format.md#interpolation) | X | X |
| [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | X | X |
| [YOLO](https://pjreddie.com/darknet/yolo/) | X | X |
| [MS COCO Object Detection](http://cocodataset.org/#format-data) | X | X |
| PNG class mask + instance mask as in [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | X | X |
| [TFrecord](https://www.tensorflow.org/tutorials/load_data/tf_records) | X | X |
| [MOT](https://motchallenge.net/) | X | X |
| [LabelMe](http://labelme.csail.mit.edu/Release3.0) | X | X |
## Links
- [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat)

@ -506,18 +506,48 @@ python create_pascal_tf_record.py --data_dir <path to VOCdevkit> --set train --y
- downloaded file: a zip archive with the following structure:
```bash
taskname.zip
├── frame_000001.png
├── frame_000002.png
├── frame_000003.png
├── ...
└── colormap.txt
├── labelmap.txt # optional, required for non-VOC labels
├── ImageSets/
│   └── Segmentation/
│   └── default.txt # list of image names without extension
├── SegmentationClass/ # merged class masks
│   └── image1.png
│   └── image2.png
└── SegmentationObject/ # merged instance masks
└── image1.png
└── image2.png
```
Mask is a png image with several (RGB) channels where each pixel has own color which corresponds to a label.
Color generation correspond to the Pascal VOC color generation
[algorithm](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:voclabelcolormap).
(0, 0, 0) is used for background.
`colormap.txt` file contains the values of the used colors in RGB format.
`labelmap.txt` file contains the values of the used colors in RGB format. The file structure:
```bash
# label:color_rgb:parts:actions
background:0,128,0::
aeroplane:10,10,128::
bicycle:10,128,0::
bird:0,108,128::
boat:108,0,100::
bottle:18,0,8::
bus:12,28,0::
```
- supported shapes - Rectangles, Polygons
#### Mask loader description
Not supported
- uploaded file: a zip archive of the following structure:
```bash
name.zip
├── labelmap.txt # optional, required for non-VOC labels
├── ImageSets/
│   └── Segmentation/
│   └── <any_subset_name>.txt
├── SegmentationClass/
│   └── image1.png
│   └── image2.png
└── SegmentationObject/
└── image.png
└── image2.png
```
- supported shapes: Polygons
- additional comments: the CVAT task should be created with the full label set that may be in the annotation files

@ -6,130 +6,60 @@ format_spec = {
"name": "MASK",
"dumpers": [
{
"display_name": "{name} (by class) {format} {version}",
"display_name": "{name} {format} {version}",
"format": "ZIP",
"version": "1.0",
"handler": "dump_by_class"
"version": "1.1",
"handler": "dump",
},
],
"loaders": [
{
"display_name": "{name} (by instance) {format} {version}",
"display_name": "{name} {format} {version}",
"format": "ZIP",
"version": "1.0",
"handler": "dump_by_instance"
"version": "1.1",
"handler": "load",
},
],
"loaders": [
],
}
MASK_BY_CLASS = 0
MASK_BY_INSTANCE = 1
def convert_box_to_polygon(shape):
xtl = shape.points[0]
ytl = shape.points[1]
xbr = shape.points[2]
ybr = shape.points[3]
return [xtl, ytl, xbr, ytl, xbr, ybr, xtl, ybr]
def create_mask_colorizer(annotations, colorize_type):
import numpy as np
from collections import OrderedDict
class MaskColorizer:
def __init__(self, annotations, colorize_type):
if colorize_type == MASK_BY_CLASS:
self.colors = self.gen_class_mask_colors(annotations)
elif colorize_type == MASK_BY_INSTANCE:
self.colors = self.gen_instance_mask_colors()
def generate_pascal_colormap(self, size=256):
# RGB format, (0, 0, 0) used for background
colormap = np.zeros((size, 3), dtype=int)
ind = np.arange(size, dtype=int)
for shift in reversed(range(8)):
for channel in range(3):
colormap[:, channel] |= ((ind >> channel) & 1) << shift
ind >>= 3
return colormap
def gen_class_mask_colors(self, annotations):
colormap = self.generate_pascal_colormap()
labels = [label[1]["name"] for label in annotations.meta["task"]["labels"] if label[1]["name"] != 'background']
labels.insert(0, 'background')
label_colors = OrderedDict((label, colormap[idx]) for idx, label in enumerate(labels))
return label_colors
def gen_instance_mask_colors(self):
colormap = self.generate_pascal_colormap()
# The first color is black
instance_colors = OrderedDict((idx, colormap[idx]) for idx in range(len(colormap)))
return instance_colors
return MaskColorizer(annotations, colorize_type)
def dump(file_object, annotations, colorize_type):
from zipfile import ZipFile, ZIP_STORED
import numpy as np
import os
from pycocotools import mask as maskUtils
import matplotlib.image
import io
colorizer = create_mask_colorizer(annotations, colorize_type=colorize_type)
if colorize_type == MASK_BY_CLASS:
save_dir = "SegmentationClass"
elif colorize_type == MASK_BY_INSTANCE:
save_dir = "SegmentationObject"
with ZipFile(file_object, "w", ZIP_STORED) as output_zip:
for frame_annotation in annotations.group_by_frame():
image_name = frame_annotation.name
annotation_name = "{}.png".format(os.path.splitext(os.path.basename(image_name))[0])
width = frame_annotation.width
height = frame_annotation.height
shapes = frame_annotation.labeled_shapes
# convert to mask only rectangles and polygons
shapes = [shape for shape in shapes if shape.type == 'rectangle' or shape.type == 'polygon']
if not shapes:
continue
shapes = sorted(shapes, key=lambda x: int(x.z_order))
img_mask = np.zeros((height, width, 3))
buf_mask = io.BytesIO()
for shape_index, shape in enumerate(shapes):
points = shape.points if shape.type != 'rectangle' else convert_box_to_polygon(shape)
rles = maskUtils.frPyObjects([points], height, width)
rle = maskUtils.merge(rles)
mask = maskUtils.decode(rle)
idx = (mask > 0)
# get corresponding color
if colorize_type == MASK_BY_CLASS:
color = colorizer.colors[shape.label] / 255
elif colorize_type == MASK_BY_INSTANCE:
color = colorizer.colors[shape_index+1] / 255
img_mask[idx] = color
# write mask
matplotlib.image.imsave(buf_mask, img_mask, format='png')
output_zip.writestr(os.path.join(save_dir, annotation_name), buf_mask.getvalue())
# Store color map for each class
labels = '\n'.join('{}:{}'.format(label, ','.join(str(i) for i in color)) for label, color in colorizer.colors.items())
output_zip.writestr('colormap.txt', labels)
def dump_by_class(file_object, annotations):
return dump(file_object, annotations, MASK_BY_CLASS)
def dump_by_instance(file_object, annotations):
return dump(file_object, annotations, MASK_BY_INSTANCE)
def dump(file_object, annotations):
from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Environment, Dataset
from tempfile import TemporaryDirectory
env = Environment()
polygons_to_masks = env.transforms.get('polygons_to_masks')
boxes_to_masks = env.transforms.get('boxes_to_masks')
merge_instance_segments = env.transforms.get('merge_instance_segments')
id_from_image = env.transforms.get('id_from_image_name')
extractor = CvatAnnotationsExtractor('', annotations)
extractor = extractor.transform(polygons_to_masks)
extractor = extractor.transform(boxes_to_masks)
extractor = extractor.transform(merge_instance_segments)
extractor = extractor.transform(id_from_image)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
converter = env.make_converter('voc_segmentation',
apply_colormap=True, label_map='source')
with TemporaryDirectory() as temp_dir:
converter(extractor, save_dir=temp_dir)
make_zip_archive(temp_dir, file_object)
def load(file_object, annotations):
from pyunpack import Archive
from tempfile import TemporaryDirectory
from datumaro.plugins.voc_format.importer import VocImporter
from datumaro.components.project import Environment
from cvat.apps.dataset_manager.bindings import import_dm_annotations
archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name")
with TemporaryDirectory() as tmp_dir:
Archive(archive_file).extractall(tmp_dir)
dm_project = VocImporter()(tmp_dir)
dm_dataset = dm_project.make_dataset()
masks_to_polygons = Environment().transforms.get('masks_to_polygons')
dm_dataset = dm_dataset.transform(masks_to_polygons)
import_dm_annotations(dm_dataset, annotations)

@ -211,6 +211,22 @@ def import_dm_annotations(dm_dataset, cvat_task_anno):
for item in dm_dataset:
frame_number = match_frame(item, cvat_task_anno)
# do not store one-item groups
group_map = { 0: 0 }
group_size = { 0: 0 }
for ann in item.annotations:
if ann.type in shapes:
group = group_map.get(ann.group)
if group is None:
group = len(group_map)
group_map[ann.group] = group
group_size[ann.group] = 1
else:
group_size[ann.group] += 1
group_map = {g: s for g, s in group_size.items()
if 1 < s and group_map[g]}
group_map = {g: i for i, g in enumerate([0] + sorted(group_map))}
for ann in item.annotations:
if ann.type in shapes:
cvat_task_anno.add_shape(cvat_task_anno.LabeledShape(
@ -219,5 +235,6 @@ def import_dm_annotations(dm_dataset, cvat_task_anno):
label=label_cat.items[ann.label].name,
points=ann.points,
occluded=False,
group=group_map.get(ann.group, 0),
attributes=[],
))

@ -2658,9 +2658,9 @@ class TaskAnnotationAPITestCase(JobAnnotationAPITestCase):
elif annotation_format == "COCO JSON 1.0":
annotations["shapes"] = polygon_shapes_wo_attrs
elif annotation_format == "MASK ZIP 1.0":
annotations["shapes"] = rectangle_shapes_with_attrs + rectangle_shapes_wo_attrs + polygon_shapes_wo_attrs
annotations["tracks"] = rectangle_tracks_with_attrs + rectangle_tracks_wo_attrs
elif annotation_format == "MASK ZIP 1.1":
annotations["shapes"] = rectangle_shapes_wo_attrs + polygon_shapes_wo_attrs
annotations["tracks"] = rectangle_tracks_wo_attrs
elif annotation_format == "MOT CSV 1.0":
annotations["tracks"] = rectangle_tracks_wo_attrs
@ -2730,6 +2730,8 @@ class TaskAnnotationAPITestCase(JobAnnotationAPITestCase):
}
for loader in annotation_format["loaders"]:
if loader["display_name"] == "MASK ZIP 1.1":
continue # can't really predict the result and check
response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, uploaded_data, "format={}".format(loader["display_name"]))
self.assertEqual(response.status_code, HTTP_202_ACCEPTED)

@ -321,17 +321,17 @@ class Dataset(Extractor):
subsets = defaultdict(lambda: Subset(dataset))
for source in sources:
for item in source:
path = None # NOTE: merge everything into our own dataset
existing_item = subsets[item.subset].items.get(item.id)
if existing_item is not None:
item = self._merge_items(existing_item, item, path=path)
else:
item = item.wrap(path=path, annotations=item.annotations)
path = existing_item.path
if item.path != path:
path = None
item = cls._merge_items(existing_item, item, path=path)
subsets[item.subset].items[item.id] = item
self._subsets = dict(subsets)
dataset._subsets = dict(subsets)
return dataset
def __init__(self, categories=None):
super().__init__()
@ -419,7 +419,7 @@ class Dataset(Extractor):
image._path = current_item.image.path
if all([existing_item.image._size, current_item.image._size]):
assert existing_item.image._size == current_item.image._size, "Image info differs for item '%s'" % item.id
assert existing_item.image._size == current_item.image._size, "Image info differs for item '%s'" % existing_item.id
elif existing_item.image._size:
image._size = existing_item.image._size
else:

@ -361,6 +361,7 @@ class _KeypointsConverter(_InstancesConverter):
@classmethod
def find_solitary_points(cls, annotations):
annotations = sorted(annotations, key=lambda a: a.group)
solitary_points = []
for g_id, group in groupby(annotations, lambda a: a.group):

@ -181,6 +181,27 @@ class PolygonsToMasks(Transform, CliPlugin):
return RleMask(rle=rle, label=polygon.label, z_order=polygon.z_order,
id=polygon.id, attributes=polygon.attributes, group=polygon.group)
class BoxesToMasks(Transform, CliPlugin):
def transform_item(self, item):
annotations = []
for ann in item.annotations:
if ann.type == AnnotationType.bbox:
if not item.has_image:
raise Exception("Image info is required for this transform")
h, w = item.image.size
annotations.append(self.convert_bbox(ann, h, w))
else:
annotations.append(ann)
return self.wrap_item(item, annotations=annotations)
@staticmethod
def convert_bbox(bbox, img_h, img_w):
rle = mask_utils.frPyObjects([bbox.as_polygon()], img_h, img_w)[0]
return RleMask(rle=rle, label=bbox.label, z_order=bbox.z_order,
id=bbox.id, attributes=bbox.attributes, group=bbox.group)
class MasksToPolygons(Transform, CliPlugin):
def transform_item(self, item):
annotations = []

@ -411,8 +411,10 @@ class _Converter:
# generate colormap from the input dataset
labels = self._extractor.categories() \
.get(AnnotationType.label, LabelCategories())
label_map = OrderedDict(
(item.name, [None, [], []]) for item in labels.items)
label_map = OrderedDict()
label_map['background'] = [None, [], []]
for item in labels.items:
label_map[item.name] = [None, [], []]
elif label_map_source in [LabelmapType.guess.name, None]:
# generate colormap for union of VOC and input dataset labels

@ -7,6 +7,7 @@ from itertools import groupby
def find_instances(instance_anns):
instance_anns = sorted(instance_anns, key=lambda a: a.group)
ann_groups = []
for g_id, group in groupby(instance_anns, lambda a: a.group):
if not g_id:

@ -84,7 +84,8 @@ def compare_datasets(test, expected, actual):
test.assertEqual(sorted(expected.subsets()), sorted(actual.subsets()))
test.assertEqual(len(expected), len(actual))
for item_a in expected:
item_b = find(actual, lambda x: x.id == item_a.id)
item_b = find(actual, lambda x: x.id == item_a.id and \
x.subset == item_a.subset)
test.assertFalse(item_b is None, item_a.id)
test.assertEqual(len(item_a.annotations), len(item_b.annotations))
for ann_a in item_a.annotations:

@ -4,8 +4,8 @@ import os.path as osp
from unittest import TestCase
from datumaro.components.project import Project, Environment
from datumaro.components.project import Source, Model
from datumaro.components.project import Project, Environment, Dataset
from datumaro.components.config_model import Source, Model
from datumaro.components.launcher import Launcher, InferenceWrapper
from datumaro.components.converter import Converter
from datumaro.components.extractor import (Extractor, DatasetItem,
@ -372,7 +372,7 @@ class DatasetFilterTest(TestCase):
self.assertEqual(2, len(filtered))
def test_annotations_filter_can_be_applied(self):
class SrcTestExtractor(Extractor):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0),
@ -386,7 +386,7 @@ class DatasetFilterTest(TestCase):
]),
])
class DstTestExtractor(Extractor):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0),
@ -398,15 +398,15 @@ class DatasetFilterTest(TestCase):
]),
])
extractor = SrcTestExtractor()
extractor = SrcExtractor()
filtered = XPathAnnotationsFilter(extractor,
'/item/annotation[label_id = 0]')
self.assertListEqual(list(filtered), list(DstTestExtractor()))
self.assertListEqual(list(filtered), list(DstExtractor()))
def test_annotations_filter_can_remove_empty_items(self):
class SrcTestExtractor(Extractor):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0),
@ -420,7 +420,7 @@ class DatasetFilterTest(TestCase):
]),
])
class DstTestExtractor(Extractor):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=2, annotations=[
@ -428,12 +428,12 @@ class DatasetFilterTest(TestCase):
]),
])
extractor = SrcTestExtractor()
extractor = SrcExtractor()
filtered = XPathAnnotationsFilter(extractor,
'/item/annotation[label_id = 2]', remove_empty=True)
self.assertListEqual(list(filtered), list(DstTestExtractor()))
self.assertListEqual(list(filtered), list(DstExtractor()))
class ConfigTest(TestCase):
def test_can_produce_multilayer_config_from_dict(self):
@ -492,6 +492,46 @@ class ExtractorTest(TestCase):
compare_datasets(self, CustomExtractor(), dataset)
class DatasetTest(TestCase):
def test_create_from_extractors(self):
class SrcExtractor1(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', annotations=[
Bbox(1, 2, 3, 4),
Label(4),
]),
DatasetItem(id=1, subset='val', annotations=[
Label(4),
]),
])
class SrcExtractor2(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='val', annotations=[
Label(5),
]),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', annotations=[
Bbox(1, 2, 3, 4),
Label(4),
]),
DatasetItem(id=1, subset='val', annotations=[
Label(4),
Label(5),
]),
])
dataset = Dataset.from_extractors(SrcExtractor1(), SrcExtractor2())
compare_datasets(self, DstExtractor(), dataset)
class DatasetItemTest(TestCase):
def test_ctor_requires_id(self):
has_error = False

@ -271,3 +271,52 @@ class TransformsTest(TestCase):
actual = transforms.IdFromImageName(SrcExtractor())
compare_datasets(self, DstExtractor(), actual)
def test_boxes_to_masks(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Bbox(0, 0, 3, 3, z_order=1),
Bbox(0, 0, 3, 1, z_order=2),
Bbox(0, 2, 3, 1, z_order=3),
]
),
])
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
annotations=[
Mask(np.array([
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0],
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=1),
Mask(np.array([
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=2),
Mask(np.array([
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
),
z_order=3),
]
),
])
actual = transforms.BoxesToMasks(SrcExtractor())
compare_datasets(self, DstExtractor(), actual)

@ -1,3 +1,4 @@
from collections import OrderedDict
import numpy as np
import os
import os.path as osp
@ -643,6 +644,53 @@ class VocConverterTest(TestCase):
SrcExtractor(), VocConverter(label_map='guess'),
test_dir, target_dataset=DstExtractor())
def test_dataset_with_source_labelmap(self):
class SrcExtractor(TestExtractorBase):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
Bbox(2, 3, 4, 5, label=0, id=1),
Bbox(1, 2, 3, 4, label=1, id=2),
])
def categories(self):
label_cat = LabelCategories()
label_cat.add('label_1')
label_cat.add('label_2')
return {
AnnotationType.label: label_cat,
}
class DstExtractor(TestExtractorBase):
def __iter__(self):
yield DatasetItem(id=1, annotations=[
Bbox(2, 3, 4, 5, label=self._label('label_1'), id=1,
attributes={
'truncated': False,
'difficult': False,
'occluded': False,
}
),
Bbox(1, 2, 3, 4, label=self._label('label_2'), id=2,
attributes={
'truncated': False,
'difficult': False,
'occluded': False,
}
),
])
def categories(self):
label_map = OrderedDict()
label_map['background'] = [None, [], []]
label_map['label_1'] = [None, [], []]
label_map['label_2'] = [None, [], []]
return VOC.make_voc_categories(label_map)
with TestDir() as test_dir:
self._test_save_and_load(
SrcExtractor(), VocConverter(label_map='source'),
test_dir, target_dataset=DstExtractor())
def test_dataset_with_fixed_labelmap(self):
class SrcExtractor(TestExtractorBase):
def __iter__(self):

Loading…
Cancel
Save