You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
260 lines
8.1 KiB
Python
260 lines
8.1 KiB
Python
|
|
# Copyright (C) 2019 Intel Corporation
|
|
#
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
# pylint: disable=no-self-use
|
|
|
|
import json
|
|
import numpy as np
|
|
import os
|
|
import os.path as osp
|
|
|
|
from datumaro.components.converter import Converter
|
|
from datumaro.components.extractor import (
|
|
DEFAULT_SUBSET_NAME, Annotation, _Shape,
|
|
Label, Mask, RleMask, Points, Polygon, PolyLine, Bbox, Caption,
|
|
LabelCategories, MaskCategories, PointsCategories
|
|
)
|
|
from datumaro.util import cast
|
|
import pycocotools.mask as mask_utils
|
|
|
|
from .format import DatumaroPath
|
|
|
|
|
|
class _SubsetWriter:
|
|
def __init__(self, name, context):
|
|
self._name = name
|
|
self._context = context
|
|
|
|
self._data = {
|
|
'info': {},
|
|
'categories': {},
|
|
'items': [],
|
|
}
|
|
|
|
@property
|
|
def categories(self):
|
|
return self._data['categories']
|
|
|
|
@property
|
|
def items(self):
|
|
return self._data['items']
|
|
|
|
def write_item(self, item):
|
|
annotations = []
|
|
item_desc = {
|
|
'id': item.id,
|
|
'annotations': annotations,
|
|
}
|
|
if item.attributes:
|
|
item_desc['attr'] = item.attributes
|
|
if item.path:
|
|
item_desc['path'] = item.path
|
|
if item.has_image:
|
|
path = item.image.path
|
|
if self._context._save_images:
|
|
path = self._context._save_image(item)
|
|
|
|
item_desc['image'] = {
|
|
'size': item.image.size,
|
|
'path': path,
|
|
}
|
|
self.items.append(item_desc)
|
|
|
|
for ann in item.annotations:
|
|
if isinstance(ann, Label):
|
|
converted_ann = self._convert_label_object(ann)
|
|
elif isinstance(ann, Mask):
|
|
converted_ann = self._convert_mask_object(ann)
|
|
elif isinstance(ann, Points):
|
|
converted_ann = self._convert_points_object(ann)
|
|
elif isinstance(ann, PolyLine):
|
|
converted_ann = self._convert_polyline_object(ann)
|
|
elif isinstance(ann, Polygon):
|
|
converted_ann = self._convert_polygon_object(ann)
|
|
elif isinstance(ann, Bbox):
|
|
converted_ann = self._convert_bbox_object(ann)
|
|
elif isinstance(ann, Caption):
|
|
converted_ann = self._convert_caption_object(ann)
|
|
else:
|
|
raise NotImplementedError()
|
|
annotations.append(converted_ann)
|
|
|
|
def write_categories(self, categories):
|
|
for ann_type, desc in categories.items():
|
|
if isinstance(desc, LabelCategories):
|
|
converted_desc = self._convert_label_categories(desc)
|
|
elif isinstance(desc, MaskCategories):
|
|
converted_desc = self._convert_mask_categories(desc)
|
|
elif isinstance(desc, PointsCategories):
|
|
converted_desc = self._convert_points_categories(desc)
|
|
else:
|
|
raise NotImplementedError()
|
|
self.categories[ann_type.name] = converted_desc
|
|
|
|
def write(self, save_dir):
|
|
with open(osp.join(save_dir, '%s.json' % (self._name)), 'w') as f:
|
|
json.dump(self._data, f)
|
|
|
|
def _convert_annotation(self, obj):
|
|
assert isinstance(obj, Annotation)
|
|
|
|
ann_json = {
|
|
'id': cast(obj.id, int),
|
|
'type': cast(obj.type.name, str),
|
|
'attributes': obj.attributes,
|
|
'group': cast(obj.group, int, 0),
|
|
}
|
|
return ann_json
|
|
|
|
def _convert_label_object(self, obj):
|
|
converted = self._convert_annotation(obj)
|
|
|
|
converted.update({
|
|
'label_id': cast(obj.label, int),
|
|
})
|
|
return converted
|
|
|
|
def _convert_mask_object(self, obj):
|
|
converted = self._convert_annotation(obj)
|
|
|
|
if isinstance(obj, RleMask):
|
|
rle = obj.rle
|
|
else:
|
|
rle = mask_utils.encode(
|
|
np.require(obj.image, dtype=np.uint8, requirements='F'))
|
|
|
|
converted.update({
|
|
'label_id': cast(obj.label, int),
|
|
'rle': {
|
|
# serialize as compressed COCO mask
|
|
'counts': rle['counts'].decode('ascii'),
|
|
'size': list(int(c) for c in rle['size']),
|
|
},
|
|
'z_order': obj.z_order,
|
|
})
|
|
return converted
|
|
|
|
def _convert_shape_object(self, obj):
|
|
assert isinstance(obj, _Shape)
|
|
converted = self._convert_annotation(obj)
|
|
|
|
converted.update({
|
|
'label_id': cast(obj.label, int),
|
|
'points': [float(p) for p in obj.points],
|
|
'z_order': obj.z_order,
|
|
})
|
|
return converted
|
|
|
|
def _convert_polyline_object(self, obj):
|
|
return self._convert_shape_object(obj)
|
|
|
|
def _convert_polygon_object(self, obj):
|
|
return self._convert_shape_object(obj)
|
|
|
|
def _convert_bbox_object(self, obj):
|
|
converted = self._convert_shape_object(obj)
|
|
converted.pop('points', None)
|
|
converted['bbox'] = [float(p) for p in obj.get_bbox()]
|
|
return converted
|
|
|
|
def _convert_points_object(self, obj):
|
|
converted = self._convert_shape_object(obj)
|
|
|
|
converted.update({
|
|
'visibility': [int(v.value) for v in obj.visibility],
|
|
})
|
|
return converted
|
|
|
|
def _convert_caption_object(self, obj):
|
|
converted = self._convert_annotation(obj)
|
|
|
|
converted.update({
|
|
'caption': cast(obj.caption, str),
|
|
})
|
|
return converted
|
|
|
|
def _convert_label_categories(self, obj):
|
|
converted = {
|
|
'labels': [],
|
|
}
|
|
for label in obj.items:
|
|
converted['labels'].append({
|
|
'name': cast(label.name, str),
|
|
'parent': cast(label.parent, str),
|
|
})
|
|
return converted
|
|
|
|
def _convert_mask_categories(self, obj):
|
|
converted = {
|
|
'colormap': [],
|
|
}
|
|
for label_id, color in obj.colormap.items():
|
|
converted['colormap'].append({
|
|
'label_id': int(label_id),
|
|
'r': int(color[0]),
|
|
'g': int(color[1]),
|
|
'b': int(color[2]),
|
|
})
|
|
return converted
|
|
|
|
def _convert_points_categories(self, obj):
|
|
converted = {
|
|
'items': [],
|
|
}
|
|
for label_id, item in obj.items.items():
|
|
converted['items'].append({
|
|
'label_id': int(label_id),
|
|
'labels': [cast(label, str) for label in item.labels],
|
|
'joints': [list(map(int, j)) for j in item.joints],
|
|
})
|
|
return converted
|
|
|
|
class DatumaroConverter(Converter):
|
|
DEFAULT_IMAGE_EXT = DatumaroPath.IMAGE_EXT
|
|
|
|
def apply(self):
|
|
os.makedirs(self._save_dir, exist_ok=True)
|
|
|
|
images_dir = osp.join(self._save_dir, DatumaroPath.IMAGES_DIR)
|
|
os.makedirs(images_dir, exist_ok=True)
|
|
self._images_dir = images_dir
|
|
|
|
annotations_dir = osp.join(self._save_dir, DatumaroPath.ANNOTATIONS_DIR)
|
|
os.makedirs(annotations_dir, exist_ok=True)
|
|
self._annotations_dir = annotations_dir
|
|
|
|
subsets = self._extractor.subsets() or [None]
|
|
subsets = [n or DEFAULT_SUBSET_NAME for n in subsets]
|
|
subsets = { name: _SubsetWriter(name, self) for name in subsets }
|
|
|
|
for subset, writer in subsets.items():
|
|
writer.write_categories(self._extractor.categories())
|
|
|
|
for item in self._extractor:
|
|
subset = item.subset or DEFAULT_SUBSET_NAME
|
|
writer = subsets[subset]
|
|
|
|
writer.write_item(item)
|
|
|
|
for subset, writer in subsets.items():
|
|
writer.write(annotations_dir)
|
|
|
|
def _save_image(self, item, path=None):
|
|
super()._save_image(item,
|
|
osp.join(self._images_dir, self._make_image_filename(item)))
|
|
|
|
class DatumaroProjectConverter(Converter):
|
|
@classmethod
|
|
def convert(cls, extractor, save_dir, **kwargs):
|
|
os.makedirs(save_dir, exist_ok=True)
|
|
|
|
from datumaro.components.project import Project
|
|
project = Project.generate(save_dir,
|
|
config=kwargs.pop('project_config', None))
|
|
|
|
DatumaroConverter.convert(extractor,
|
|
save_dir=osp.join(
|
|
project.config.project_dir, project.config.dataset_dir),
|
|
**kwargs) |