You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
480 lines
19 KiB
Python
480 lines
19 KiB
Python
from functools import partial
|
|
import numpy as np
|
|
import os.path as osp
|
|
|
|
from unittest import TestCase
|
|
|
|
from datumaro.components.project import Project, Dataset
|
|
from datumaro.components.extractor import (DatasetItem,
|
|
AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
|
|
LabelCategories, PointsCategories
|
|
)
|
|
from datumaro.plugins.coco_format.converter import (
|
|
CocoConverter,
|
|
CocoImageInfoConverter,
|
|
CocoCaptionsConverter,
|
|
CocoInstancesConverter,
|
|
CocoPersonKeypointsConverter,
|
|
CocoLabelsConverter,
|
|
)
|
|
from datumaro.plugins.coco_format.importer import CocoImporter
|
|
from datumaro.util.image import Image
|
|
from datumaro.util.test_utils import TestDir, compare_datasets
|
|
|
|
|
|
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset')
|
|
|
|
class CocoImporterTest(TestCase):
|
|
def test_can_import(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
|
|
subset='val', attributes={'id': 1},
|
|
annotations=[
|
|
Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
|
|
id=1, group=1, attributes={'is_crowd': False}),
|
|
Mask(np.array(
|
|
[[1, 0, 0, 1, 0]] * 5 +
|
|
[[1, 1, 1, 1, 0]] * 5
|
|
), label=0,
|
|
id=2, group=2, attributes={'is_crowd': True}),
|
|
]
|
|
),
|
|
], categories=['TEST',])
|
|
|
|
dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \
|
|
.make_dataset()
|
|
|
|
compare_datasets(self, expected_dataset, dataset)
|
|
|
|
def test_can_detect(self):
|
|
self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR))
|
|
|
|
class CocoConverterTest(TestCase):
|
|
def _test_save_and_load(self, source_dataset, converter, test_dir,
|
|
target_dataset=None, importer_args=None):
|
|
converter(source_dataset, test_dir)
|
|
|
|
if importer_args is None:
|
|
importer_args = {}
|
|
parsed_dataset = CocoImporter()(test_dir, **importer_args).make_dataset()
|
|
|
|
if target_dataset is None:
|
|
target_dataset = source_dataset
|
|
|
|
compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
|
|
|
|
def test_can_save_and_load_captions(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, subset='train',
|
|
annotations=[
|
|
Caption('hello', id=1, group=1),
|
|
Caption('world', id=2, group=2),
|
|
], attributes={'id': 1}),
|
|
DatasetItem(id=2, subset='train',
|
|
annotations=[
|
|
Caption('test', id=3, group=3),
|
|
], attributes={'id': 2}),
|
|
|
|
DatasetItem(id=3, subset='val',
|
|
annotations=[
|
|
Caption('word', id=1, group=1),
|
|
], attributes={'id': 1}),
|
|
])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(expected_dataset,
|
|
CocoCaptionsConverter.convert, test_dir)
|
|
|
|
def test_can_save_and_load_instances(self):
|
|
source_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
|
|
annotations=[
|
|
# Bbox + single polygon
|
|
Bbox(0, 1, 2, 2,
|
|
label=2, group=1, id=1,
|
|
attributes={ 'is_crowd': False }),
|
|
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
|
|
attributes={ 'is_crowd': False },
|
|
label=2, group=1, id=1),
|
|
], attributes={'id': 1}),
|
|
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
|
|
annotations=[
|
|
# Mask + bbox
|
|
Mask(np.array([
|
|
[0, 1, 0, 0],
|
|
[0, 1, 0, 0],
|
|
[0, 1, 1, 1],
|
|
[0, 0, 0, 0]],
|
|
),
|
|
attributes={ 'is_crowd': True },
|
|
label=4, group=3, id=3),
|
|
Bbox(1, 0, 2, 2, label=4, group=3, id=3,
|
|
attributes={ 'is_crowd': True }),
|
|
], attributes={'id': 2}),
|
|
|
|
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
|
|
annotations=[
|
|
# Bbox + mask
|
|
Bbox(0, 1, 2, 2, label=4, group=3, id=3,
|
|
attributes={ 'is_crowd': True }),
|
|
Mask(np.array([
|
|
[0, 0, 0, 0],
|
|
[1, 1, 1, 0],
|
|
[1, 1, 0, 0],
|
|
[0, 0, 0, 0]],
|
|
),
|
|
attributes={ 'is_crowd': True },
|
|
label=4, group=3, id=3),
|
|
], attributes={'id': 1}),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
target_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
|
|
annotations=[
|
|
Polygon([0, 1, 2, 1, 2, 3, 0, 3],
|
|
attributes={ 'is_crowd': False },
|
|
label=2, group=1, id=1),
|
|
], attributes={'id': 1}),
|
|
DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
|
|
annotations=[
|
|
Mask(np.array([
|
|
[0, 1, 0, 0],
|
|
[0, 1, 0, 0],
|
|
[0, 1, 1, 1],
|
|
[0, 0, 0, 0]],
|
|
),
|
|
attributes={ 'is_crowd': True },
|
|
label=4, group=3, id=3),
|
|
], attributes={'id': 2}),
|
|
|
|
DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
|
|
annotations=[
|
|
Mask(np.array([
|
|
[0, 0, 0, 0],
|
|
[1, 1, 1, 0],
|
|
[1, 1, 0, 0],
|
|
[0, 0, 0, 0]],
|
|
),
|
|
attributes={ 'is_crowd': True },
|
|
label=4, group=3, id=3),
|
|
], attributes={'id': 1})
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(source_dataset,
|
|
CocoInstancesConverter.convert, test_dir,
|
|
target_dataset=target_dataset)
|
|
|
|
def test_can_merge_polygons_on_loading(self):
|
|
source_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
|
|
annotations=[
|
|
Polygon([0, 0, 4, 0, 4, 4],
|
|
label=3, id=4, group=4),
|
|
Polygon([5, 0, 9, 0, 5, 5],
|
|
label=3, id=4, group=4),
|
|
]
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
target_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
|
|
annotations=[
|
|
Mask(np.array([
|
|
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
|
|
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
|
|
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
|
|
# only internal fragment (without the border),
|
|
# but not everywhere...
|
|
),
|
|
label=3, id=4, group=4,
|
|
attributes={ 'is_crowd': False }),
|
|
], attributes={'id': 1}
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(source_dataset,
|
|
CocoInstancesConverter.convert, test_dir,
|
|
importer_args={'merge_instance_polygons': True},
|
|
target_dataset=target_dataset)
|
|
|
|
def test_can_crop_covered_segments(self):
|
|
source_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
|
|
annotations=[
|
|
Mask(np.array([
|
|
[0, 0, 1, 1, 1],
|
|
[0, 0, 1, 1, 1],
|
|
[1, 1, 0, 1, 1],
|
|
[1, 1, 1, 0, 0],
|
|
[1, 1, 1, 0, 0]],
|
|
),
|
|
label=2, id=1, z_order=0),
|
|
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
|
|
label=1, id=2, z_order=1),
|
|
]
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
target_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((5, 5, 3)),
|
|
annotations=[
|
|
Mask(np.array([
|
|
[0, 0, 1, 1, 1],
|
|
[0, 0, 0, 0, 1],
|
|
[1, 0, 0, 0, 1],
|
|
[1, 0, 0, 0, 0],
|
|
[1, 1, 1, 0, 0]],
|
|
),
|
|
attributes={ 'is_crowd': True },
|
|
label=2, id=1, group=1),
|
|
|
|
Polygon([1, 1, 4, 1, 4, 4, 1, 4],
|
|
label=1, id=2, group=2,
|
|
attributes={ 'is_crowd': False }),
|
|
], attributes={'id': 1}
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(source_dataset,
|
|
partial(CocoInstancesConverter.convert, crop_covered=True),
|
|
test_dir, target_dataset=target_dataset)
|
|
|
|
def test_can_convert_polygons_to_mask(self):
|
|
source_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
|
|
annotations=[
|
|
Polygon([0, 0, 4, 0, 4, 4],
|
|
label=3, id=4, group=4),
|
|
Polygon([5, 0, 9, 0, 5, 5],
|
|
label=3, id=4, group=4),
|
|
]
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
target_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((6, 10, 3)),
|
|
annotations=[
|
|
Mask(np.array([
|
|
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
|
|
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
|
|
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
|
|
# only internal fragment (without the border),
|
|
# but not everywhere...
|
|
),
|
|
attributes={ 'is_crowd': True },
|
|
label=3, id=4, group=4),
|
|
], attributes={'id': 1}
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(source_dataset,
|
|
partial(CocoInstancesConverter.convert, segmentation_mode='mask'),
|
|
test_dir, target_dataset=target_dataset)
|
|
|
|
def test_can_convert_masks_to_polygons(self):
|
|
source_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
|
|
annotations=[
|
|
Mask(np.array([
|
|
[0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
|
|
[0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
|
|
[0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
|
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
]),
|
|
label=3, id=4, group=4),
|
|
]
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
target_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.zeros((5, 10, 3)),
|
|
annotations=[
|
|
Polygon(
|
|
[3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
|
|
label=3, id=4, group=4,
|
|
attributes={ 'is_crowd': False }),
|
|
Polygon(
|
|
[5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
|
|
label=3, id=4, group=4,
|
|
attributes={ 'is_crowd': False }),
|
|
], attributes={'id': 1}
|
|
),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(source_dataset,
|
|
partial(CocoInstancesConverter.convert, segmentation_mode='polygons'),
|
|
test_dir,
|
|
target_dataset=target_dataset)
|
|
|
|
def test_can_save_and_load_images(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, subset='train', attributes={'id': 1}),
|
|
DatasetItem(id=2, subset='train', attributes={'id': 2}),
|
|
|
|
DatasetItem(id=2, subset='val', attributes={'id': 2}),
|
|
DatasetItem(id=3, subset='val', attributes={'id': 3}),
|
|
DatasetItem(id=4, subset='val', attributes={'id': 4}),
|
|
|
|
DatasetItem(id=5, subset='test', attributes={'id': 1}),
|
|
])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(expected_dataset,
|
|
CocoImageInfoConverter.convert, test_dir)
|
|
|
|
def test_can_save_and_load_labels(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, subset='train',
|
|
annotations=[
|
|
Label(4, id=1, group=1),
|
|
Label(9, id=2, group=2),
|
|
], attributes={'id': 1}),
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(expected_dataset,
|
|
CocoLabelsConverter.convert, test_dir)
|
|
|
|
def test_can_save_and_load_keypoints(self):
|
|
source_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
|
|
annotations=[
|
|
# Full instance annotations: polygon + keypoints
|
|
Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
|
|
label=3, group=1, id=1),
|
|
Polygon([0, 0, 4, 0, 4, 4],
|
|
label=3, group=1, id=1),
|
|
|
|
# Full instance annotations: bbox + keypoints
|
|
Points([1, 2, 3, 4, 2, 3], group=2, id=2),
|
|
Bbox(1, 2, 2, 2, group=2, id=2),
|
|
|
|
# Solitary keypoints
|
|
Points([1, 2, 0, 2, 4, 1], label=5, id=3),
|
|
|
|
# Some other solitary annotations (bug #1387)
|
|
Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
|
|
|
|
# Solitary keypoints with no label
|
|
Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
|
|
]),
|
|
], categories={
|
|
AnnotationType.label: LabelCategories.from_iterable(
|
|
str(i) for i in range(10)),
|
|
AnnotationType.points: PointsCategories.from_iterable(
|
|
(i, None, [[0, 1], [1, 2]]) for i in range(10)
|
|
),
|
|
})
|
|
|
|
target_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
|
|
annotations=[
|
|
Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
|
|
label=3, group=1, id=1,
|
|
attributes={'is_crowd': False}),
|
|
Polygon([0, 0, 4, 0, 4, 4],
|
|
label=3, group=1, id=1,
|
|
attributes={'is_crowd': False}),
|
|
|
|
Points([1, 2, 3, 4, 2, 3],
|
|
group=2, id=2,
|
|
attributes={'is_crowd': False}),
|
|
Bbox(1, 2, 2, 2,
|
|
group=2, id=2,
|
|
attributes={'is_crowd': False}),
|
|
|
|
Points([1, 2, 0, 2, 4, 1],
|
|
label=5, group=3, id=3,
|
|
attributes={'is_crowd': False}),
|
|
Bbox(0, 1, 4, 1,
|
|
label=5, group=3, id=3,
|
|
attributes={'is_crowd': False}),
|
|
|
|
Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
|
|
group=5, id=5,
|
|
attributes={'is_crowd': False}),
|
|
Bbox(1, 2, 2, 2,
|
|
group=5, id=5,
|
|
attributes={'is_crowd': False}),
|
|
], attributes={'id': 1}),
|
|
], categories={
|
|
AnnotationType.label: LabelCategories.from_iterable(
|
|
str(i) for i in range(10)),
|
|
AnnotationType.points: PointsCategories.from_iterable(
|
|
(i, None, [[0, 1], [1, 2]]) for i in range(10)
|
|
),
|
|
})
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(source_dataset,
|
|
CocoPersonKeypointsConverter.convert, test_dir,
|
|
target_dataset=target_dataset)
|
|
|
|
def test_can_save_dataset_with_no_subsets(self):
|
|
test_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, attributes={'id': 1}),
|
|
DatasetItem(id=2, attributes={'id': 2}),
|
|
])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(test_dataset,
|
|
CocoConverter.convert, test_dir)
|
|
|
|
def test_can_save_dataset_with_image_info(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
|
|
attributes={'id': 1}),
|
|
])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(expected_dataset,
|
|
CocoImageInfoConverter.convert, test_dir)
|
|
|
|
def test_relative_paths(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id='1', image=np.ones((4, 2, 3)),
|
|
attributes={'id': 1}),
|
|
DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
|
|
attributes={'id': 2}),
|
|
DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
|
|
attributes={'id': 3}),
|
|
])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(expected_dataset,
|
|
partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
|
|
|
|
def test_preserve_coco_ids(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
|
|
attributes={'id': 40}),
|
|
])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(expected_dataset,
|
|
partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
|
|
|
|
def test_annotation_attributes(self):
|
|
expected_dataset = Dataset.from_iterable([
|
|
DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
|
|
Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
|
|
attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
|
|
], attributes={'id': 1})
|
|
], categories=[str(i) for i in range(10)])
|
|
|
|
with TestDir() as test_dir:
|
|
self._test_save_and_load(expected_dataset,
|
|
CocoConverter.convert, test_dir)
|