Coco converter updates (#864)

main
zhiltsov-max 6 years ago committed by Nikita Manovich
parent 394de98979
commit 50c40ba70d

@ -11,7 +11,7 @@ from datumaro.components.config import Config, \
SOURCE_SCHEMA = _SchemaBuilder() \ SOURCE_SCHEMA = _SchemaBuilder() \
.add('url', str) \ .add('url', str) \
.add('format', str) \ .add('format', str) \
.add('options', str) \ .add('options', dict) \
.build() .build()
class Source(Config): class Source(Config):

@ -121,32 +121,88 @@ class _InstancesConverter(_TaskConverter):
}) })
def save_annotations(self, item): def save_annotations(self, item):
for ann in item.annotations: annotations = item.annotations.copy()
if ann.type != AnnotationType.bbox:
while len(annotations) != 0:
ann = annotations.pop()
if ann.type == AnnotationType.bbox and ann.label is not None:
pass
elif ann.type == AnnotationType.polygon and ann.label is not None:
pass
elif ann.type == AnnotationType.mask and ann.label is not None:
pass
else:
continue continue
is_crowd = ann.attributes.get('is_crowd', False) bbox = None
segmentation = None segmentation = None
if ann.group is not None:
if ann.type == AnnotationType.bbox:
is_crowd = ann.attributes.get('is_crowd', False)
bbox = ann.get_bbox()
elif ann.type == AnnotationType.polygon:
is_crowd = ann.attributes.get('is_crowd', False)
elif ann.type == AnnotationType.mask:
is_crowd = ann.attributes.get('is_crowd', True)
if is_crowd: if is_crowd:
segmentation = find(item.annotations, lambda x: \ segmentation = ann
x.group == ann.group and x.type == AnnotationType.mask) area = None
if segmentation is not None:
binary_mask = np.array(segmentation.image, dtype=np.bool) # If ann in a group, try to find corresponding annotations in
binary_mask = np.asfortranarray(binary_mask, dtype=np.uint8) # this group, otherwise try to infer them.
segmentation = mask_utils.encode(binary_mask)
area = mask_utils.area(segmentation) if bbox is None and ann.group is not None:
segmentation = mask_tools.convert_mask_to_rle(binary_mask) bbox = find(annotations, lambda x: \
else: x.group == ann.group and \
segmentation = find(item.annotations, lambda x: \ x.type == AnnotationType.bbox and \
x.group == ann.group and x.type == AnnotationType.polygon) x.label == ann.label)
if segmentation is not None: if bbox is not None:
area = ann.area() bbox = bbox.get_bbox()
segmentation = [segmentation.get_points()]
if is_crowd:
# is_crowd=True means there should be a mask
if segmentation is None and ann.group is not None:
segmentation = find(annotations, lambda x: \
x.group == ann.group and \
x.type == AnnotationType.mask and \
x.label == ann.label)
if segmentation is not None:
binary_mask = np.array(segmentation.image, dtype=np.bool)
binary_mask = np.asfortranarray(binary_mask, dtype=np.uint8)
segmentation = mask_utils.encode(binary_mask)
area = mask_utils.area(segmentation)
segmentation = mask_tools.convert_mask_to_rle(binary_mask)
else:
# is_crowd=False means there are some polygons
polygons = []
if ann.type == AnnotationType.polygon:
polygons = [ ann ]
if ann.group is not None:
# A single object can consist of several polygons
polygons += [p for p in annotations
if p.group == ann.group and \
p.type == AnnotationType.polygon and \
p.label == ann.label]
if polygons:
segmentation = [p.get_points() for p in polygons]
h, w, _ = item.image.shape
rles = mask_utils.frPyObjects(segmentation, h, w)
rle = mask_utils.merge(rles)
area = mask_utils.area(rle)
if ann.group is not None:
# Mark the group as visited to prevent repeats
for a in annotations[:]:
if a.group == ann.group:
annotations.remove(a)
if segmentation is None: if segmentation is None:
is_crowd = False is_crowd = False
segmentation = [ann.get_polygon()] segmentation = [ann.get_polygon()]
area = ann.area() area = ann.area()
if bbox is None:
bbox = ann.get_bbox()
elem = { elem = {
'id': self._get_ann_id(ann), 'id': self._get_ann_id(ann),
@ -154,7 +210,7 @@ class _InstancesConverter(_TaskConverter):
'category_id': _cast(ann.label, int, -1) + 1, 'category_id': _cast(ann.label, int, -1) + 1,
'segmentation': segmentation, 'segmentation': segmentation,
'area': float(area), 'area': float(area),
'bbox': ann.get_bbox(), 'bbox': bbox,
'iscrowd': int(is_crowd), 'iscrowd': int(is_crowd),
} }
if 'score' in ann.attributes: if 'score' in ann.attributes:

@ -271,6 +271,14 @@ class PolygonObject(ShapeObject):
def get_polygon(self): def get_polygon(self):
return self.get_points() return self.get_points()
def area(self):
import pycocotools.mask as mask_utils
_, _, w, h = self.get_bbox()
rle = mask_utils.frPyObjects([self.get_points()], h, w)
area = mask_utils.area(rle)
return area
class BboxObject(ShapeObject): class BboxObject(ShapeObject):
# pylint: disable=redefined-builtin # pylint: disable=redefined-builtin
def __init__(self, x=0, y=0, w=0, h=0, def __init__(self, x=0, y=0, w=0, h=0,

@ -61,7 +61,7 @@ class CocoExtractor(Extractor):
def categories(self): def categories(self):
return self._parent.categories() return self._parent.categories()
def __init__(self, path, task): def __init__(self, path, task, merge_instance_polygons=False):
super().__init__() super().__init__()
rootpath = path.rsplit(CocoPath.ANNOTATIONS_DIR, maxsplit=1)[0] rootpath = path.rsplit(CocoPath.ANNOTATIONS_DIR, maxsplit=1)[0]
@ -80,6 +80,8 @@ class CocoExtractor(Extractor):
self._load_categories() self._load_categories()
self._merge_instance_polygons = merge_instance_polygons
@staticmethod @staticmethod
def _make_subset_loader(path): def _make_subset_loader(path):
# COCO API has an 'unclosed file' warning # COCO API has an 'unclosed file' warning
@ -212,20 +214,22 @@ class CocoExtractor(Extractor):
segmentation = ann.get('segmentation') segmentation = ann.get('segmentation')
if segmentation is not None: if segmentation is not None:
group = ann_id group = ann_id
rle = None
if isinstance(segmentation, list): if isinstance(segmentation, list):
# polygon -- a single object might consist of multiple parts # polygon - a single object can consist of multiple parts
for polygon_points in segmentation: for polygon_points in segmentation:
parsed_annotations.append(PolygonObject( parsed_annotations.append(PolygonObject(
points=polygon_points, label=label_id, points=polygon_points, label=label_id,
group=group id=ann_id, group=group, attributes=attributes
)) ))
# we merge all parts into one mask RLE code if self._merge_instance_polygons:
img_h = image_info['height'] # merge all parts into a single mask RLE
img_w = image_info['width'] img_h = image_info['height']
rles = mask_utils.frPyObjects(segmentation, img_h, img_w) img_w = image_info['width']
rle = mask_utils.merge(rles) rles = mask_utils.frPyObjects(segmentation, img_h, img_w)
rle = mask_utils.merge(rles)
elif isinstance(segmentation['counts'], list): elif isinstance(segmentation['counts'], list):
# uncompressed RLE # uncompressed RLE
img_h, img_w = segmentation['size'] img_h, img_w = segmentation['size']
@ -234,9 +238,10 @@ class CocoExtractor(Extractor):
# compressed RLE # compressed RLE
rle = segmentation rle = segmentation
parsed_annotations.append(RleMask(rle=rle, label=label_id, if rle is not None:
group=group parsed_annotations.append(RleMask(rle=rle, label=label_id,
)) id=ann_id, group=group, attributes=attributes
))
parsed_annotations.append( parsed_annotations.append(
BboxObject(x, y, w, h, label=label_id, BboxObject(x, y, w, h, label=label_id,
@ -277,21 +282,22 @@ class CocoExtractor(Extractor):
return parsed_annotations return parsed_annotations
class CocoImageInfoExtractor(CocoExtractor): class CocoImageInfoExtractor(CocoExtractor):
def __init__(self, path): def __init__(self, path, **kwargs):
super().__init__(path, task=CocoAnnotationType.image_info) super().__init__(path, task=CocoAnnotationType.image_info, **kwargs)
class CocoCaptionsExtractor(CocoExtractor): class CocoCaptionsExtractor(CocoExtractor):
def __init__(self, path): def __init__(self, path, **kwargs):
super().__init__(path, task=CocoAnnotationType.captions) super().__init__(path, task=CocoAnnotationType.captions, **kwargs)
class CocoInstancesExtractor(CocoExtractor): class CocoInstancesExtractor(CocoExtractor):
def __init__(self, path): def __init__(self, path, **kwargs):
super().__init__(path, task=CocoAnnotationType.instances) super().__init__(path, task=CocoAnnotationType.instances, **kwargs)
class CocoPersonKeypointsExtractor(CocoExtractor): class CocoPersonKeypointsExtractor(CocoExtractor):
def __init__(self, path): def __init__(self, path, **kwargs):
super().__init__(path, task=CocoAnnotationType.person_keypoints) super().__init__(path, task=CocoAnnotationType.person_keypoints,
**kwargs)
class CocoLabelsExtractor(CocoExtractor): class CocoLabelsExtractor(CocoExtractor):
def __init__(self, path): def __init__(self, path, **kwargs):
super().__init__(path, task=CocoAnnotationType.labels) super().__init__(path, task=CocoAnnotationType.labels, **kwargs)

@ -22,7 +22,7 @@ class CocoImporter:
def __init__(self, task_filter=None): def __init__(self, task_filter=None):
self._task_filter = task_filter self._task_filter = task_filter
def __call__(self, path): def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import from datumaro.components.project import Project # cyclic import
project = Project() project = Project()
@ -37,6 +37,7 @@ class CocoImporter:
project.add_source(source_name, { project.add_source(source_name, {
'url': ann_file, 'url': ann_file,
'format': self._COCO_EXTRACTORS[ann_type], 'format': self._COCO_EXTRACTORS[ann_type],
'options': extra_params,
}) })
return project return project

@ -34,12 +34,12 @@ class CocoImporterTest(TestCase):
'info': {}, 'info': {},
'categories': [], 'categories': [],
'images': [], 'images': [],
'annotations': [] 'annotations': [],
} }
annotation['licenses'].append({ annotation['licenses'].append({
'name': '', 'name': '',
'id': 0, 'id': 0,
'url': '' 'url': '',
}) })
annotation['info'] = { annotation['info'] = {
'contributor': '', 'contributor': '',
@ -47,37 +47,41 @@ class CocoImporterTest(TestCase):
'description': '', 'description': '',
'url': '', 'url': '',
'version': '', 'version': '',
'year': '' 'year': '',
} }
annotation['licenses'].append({ annotation['licenses'].append({
'name': '', 'name': '',
'id': 0, 'id': 0,
'url': '' 'url': '',
})
annotation['categories'].append({
'id': 1,
'name': 'TEST',
'supercategory': '',
}) })
annotation['categories'].append({'id': 0, 'name': 'TEST', 'supercategory': ''})
annotation['images'].append({ annotation['images'].append({
"id": 0, "id": 1,
"width": 10, "width": 10,
"height": 5, "height": 5,
"file_name": '000000000001.jpg', "file_name": '000000000001.jpg',
"license": 0, "license": 0,
"flickr_url": '', "flickr_url": '',
"coco_url": '', "coco_url": '',
"date_captured": 0 "date_captured": 0,
}) })
annotation['annotations'].append({ annotation['annotations'].append({
"id": 0, "id": 1,
"image_id": 0, "image_id": 1,
"category_id": 0, "category_id": 1,
"segmentation": [[0, 0, 1, 0, 1, 2, 0, 2]], "segmentation": [[0, 0, 1, 0, 1, 2, 0, 2]],
"area": 2, "area": 2,
"bbox": [0, 0, 1, 2], "bbox": [0, 0, 1, 2],
"iscrowd": 0 "iscrowd": 0,
}) })
annotation['annotations'].append({ annotation['annotations'].append({
"id": 1, "id": 2,
"image_id": 0, "image_id": 1,
"category_id": 0, "category_id": 1,
"segmentation": { "segmentation": {
"counts": [ "counts": [
0, 10, 0, 10,
@ -88,7 +92,7 @@ class CocoImporterTest(TestCase):
"size": [10, 5]}, "size": [10, 5]},
"area": 30, "area": 30,
"bbox": [0, 0, 10, 4], "bbox": [0, 0, 10, 4],
"iscrowd": 0 "iscrowd": 1,
}) })
return annotation return annotation
@ -115,29 +119,30 @@ class CocoImporterTest(TestCase):
item = next(iter(dataset)) item = next(iter(dataset))
self.assertTrue(item.has_image) self.assertTrue(item.has_image)
self.assertEqual(5, len(item.annotations)) self.assertEqual(4, len(item.annotations))
ann_0 = find(item.annotations, lambda x: x.id == 0)
ann_0_poly = find(item.annotations, lambda x: \
x.group == ann_0.id and x.type == AnnotationType.polygon)
ann_0_mask = find(item.annotations, lambda x: \
x.group == ann_0.id and x.type == AnnotationType.mask)
self.assertFalse(ann_0 is None)
self.assertFalse(ann_0_poly is None)
self.assertFalse(ann_0_mask is None)
ann_1 = find(item.annotations, lambda x: x.id == 1) ann_1 = find(item.annotations, lambda x: x.id == 1)
ann_1_mask = find(item.annotations, lambda x: \ ann_1_poly = find(item.annotations, lambda x: \
x.group == ann_1.id and x.type == AnnotationType.mask) x.group == ann_1.id and x.type == AnnotationType.polygon)
self.assertFalse(ann_1 is None) self.assertFalse(ann_1 is None)
self.assertFalse(ann_1_mask is None) self.assertFalse(ann_1_poly is None)
ann_2 = find(item.annotations, lambda x: x.id == 2)
ann_2_mask = find(item.annotations, lambda x: \
x.group == ann_2.id and x.type == AnnotationType.mask)
self.assertFalse(ann_2 is None)
self.assertFalse(ann_2_mask is None)
class CocoConverterTest(TestCase): class CocoConverterTest(TestCase):
def _test_save_and_load(self, source_dataset, converter_type, test_dir): def _test_save_and_load(self, source_dataset, converter_type, test_dir,
importer_params=None):
converter = converter_type() converter = converter_type()
converter(source_dataset, test_dir.path) converter(source_dataset, test_dir.path)
project = Project.import_from(test_dir.path, 'ms_coco') if not importer_params:
importer_params = {}
project = Project.import_from(test_dir.path, 'ms_coco',
**importer_params)
parsed_dataset = project.make_dataset() parsed_dataset = project.make_dataset()
source_subsets = [s if s else DEFAULT_SUBSET_NAME source_subsets = [s if s else DEFAULT_SUBSET_NAME
@ -155,9 +160,9 @@ class CocoConverterTest(TestCase):
self.assertEqual(len(item_a.annotations), len(item_b.annotations)) self.assertEqual(len(item_a.annotations), len(item_b.annotations))
for ann_a in item_a.annotations: for ann_a in item_a.annotations:
ann_b = find(item_b.annotations, lambda x: \ ann_b = find(item_b.annotations, lambda x: \
x.id == ann_a.id if ann_a.id else \ x.id == ann_a.id and \
x.type == ann_a.type and x.group == ann_a.group) x.type == ann_a.type and x.group == ann_a.group)
self.assertEqual(ann_a, ann_b) self.assertEqual(ann_a, ann_b, 'id: ' + str(ann_a.id))
def test_can_save_and_load_captions(self): def test_can_save_and_load_captions(self):
class TestExtractor(Extractor): class TestExtractor(Extractor):
@ -194,34 +199,35 @@ class CocoConverterTest(TestCase):
items = [ items = [
DatasetItem(id=0, subset='train', image=np.ones((4, 4, 3)), DatasetItem(id=0, subset='train', image=np.ones((4, 4, 3)),
annotations=[ annotations=[
BboxObject(0, 1, 2, 3, label=2, group=1, # Bbox + single polygon
attributes={ 'is_crowd': False }, id=1), BboxObject(0, 1, 2, 3, label=2, group=1, id=1,
attributes={ 'is_crowd': False }),
PolygonObject([0, 1, 2, 1, 2, 3, 0, 3], PolygonObject([0, 1, 2, 1, 2, 3, 0, 3],
label=2, group=1), attributes={ 'is_crowd': False },
MaskObject(np.array([[0, 0, 0, 0], [1, 1, 0, 0], label=2, group=1, id=1),
[1, 1, 0, 0], [0, 0, 0, 0]],
# does not include lower row
dtype=np.bool),
label=2, group=1),
]), ]),
DatasetItem(id=1, subset='train', DatasetItem(id=1, subset='train',
annotations=[ annotations=[
BboxObject(0, 1, 3, 3, label=4, group=3, # Mask + bbox
attributes={ 'is_crowd': True }, id=3),
MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0], MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0],
[1, 1, 0, 0], [0, 0, 1, 0]], [1, 1, 0, 0], [0, 0, 1, 0]],
dtype=np.bool), dtype=np.bool),
label=4, group=3), attributes={ 'is_crowd': True },
label=4, group=3, id=3),
BboxObject(0, 1, 3, 3, label=4, group=3, id=3,
attributes={ 'is_crowd': True }),
]), ]),
DatasetItem(id=2, subset='val', DatasetItem(id=3, subset='val',
annotations=[ annotations=[
BboxObject(0, 1, 3, 2, label=4, group=3, # Bbox + mask
attributes={ 'is_crowd': True }, id=3), BboxObject(0, 1, 3, 2, label=4, group=3, id=3,
attributes={ 'is_crowd': True }),
MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0], MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0],
[1, 1, 0, 0], [0, 0, 0, 0]], [1, 1, 0, 0], [0, 0, 0, 0]],
dtype=np.bool), dtype=np.bool),
label=4, group=3), attributes={ 'is_crowd': True },
label=4, group=3, id=3),
]), ]),
] ]
return iter(items) return iter(items)
@ -241,6 +247,49 @@ class CocoConverterTest(TestCase):
self._test_save_and_load(TestExtractor(), self._test_save_and_load(TestExtractor(),
CocoInstancesConverter, test_dir) CocoInstancesConverter, test_dir)
def test_can_save_and_load_instances_with_mask_conversion(self):
class TestExtractor(Extractor):
def __iter__(self):
items = [
DatasetItem(id=0, image=np.zeros((5, 5, 3)), subset='train',
annotations=[
BboxObject(0, 0, 5, 5, label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
PolygonObject([0, 0, 4, 0, 4, 4],
label=3, id=4, group=4,
attributes={ 'is_crowd': False }),
MaskObject(np.array([
[0, 1, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
dtype=np.bool),
attributes={ 'is_crowd': False },
label=3, id=4, group=4),
]
),
]
return iter(items)
def subsets(self):
return ['train']
def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add(str(i))
return {
AnnotationType.label: label_categories,
}
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
CocoInstancesConverter, test_dir,
{'merge_instance_polygons': True})
def test_can_save_and_load_images(self): def test_can_save_and_load_images(self):
class TestExtractor(Extractor): class TestExtractor(Extractor):
def __iter__(self): def __iter__(self):
@ -356,20 +405,10 @@ class CocoConverterTest(TestCase):
DatasetItem(id=2, image=np.zeros((5, 5, 3)), annotations=[ DatasetItem(id=2, image=np.zeros((5, 5, 3)), annotations=[
LabelObject(3, id=3), LabelObject(3, id=3),
BboxObject(0, 0, 5, 5, label=3, BboxObject(0, 0, 5, 5, label=3, id=4, group=4,
attributes={ 'is_crowd': False }, id=4, group=4), attributes={ 'is_crowd': False }),
PolygonObject([0, 0, 4, 0, 4, 4], PolygonObject([0, 0, 4, 0, 4, 4], label=3, id=4, group=4,
label=3, group=4), attributes={ 'is_crowd': False }),
MaskObject(np.array([
[0, 1, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
# only internal fragment (without the border),
# but not everywhere...
dtype=np.bool),
label=3, group=4),
]), ]),
] ]

Loading…
Cancel
Save