[Datumaro] Add tests for dataset examples (#1648)

* add dataset examples

* update docs

* update yolo tests

* join voc format test classes

* remplace voc extractor tests with import test

* update tfrecord format tests

* update mot tests

* update labelme tests

* update image dir tests
main
zhiltsov-max 6 years ago committed by GitHub
parent c4e769d5cf
commit 306e91c3d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -9,16 +9,16 @@ from datumaro.util.test_utils import TestDir, compare_datasets
class ImageDirFormatTest(TestCase):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.ones((10, 6, 3))),
DatasetItem(id=2, image=np.ones((5, 4, 3))),
])
def test_can_load(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, image=np.ones((10, 6, 3))),
DatasetItem(id=2, image=np.ones((5, 4, 3))),
])
with TestDir() as test_dir:
source_dataset = self.TestExtractor()
source_dataset = TestExtractor()
ImageDirConverter()(source_dataset, save_dir=test_dir)

@ -6,8 +6,8 @@ from unittest import TestCase
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, Mask, Polygon, LabelCategories
)
from datumaro.components.project import Dataset
from datumaro.plugins.labelme_format import LabelMeExtractor, LabelMeImporter, \
from datumaro.components.project import Project
from datumaro.plugins.labelme_format import LabelMeImporter, \
LabelMeConverter
from datumaro.util.test_utils import TestDir, compare_datasets
@ -111,8 +111,11 @@ class LabelMeConverterTest(TestCase):
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
class LabelMeExtractorTest(TestCase):
def test_can_load(self):
class LabelMeImporterTest(TestCase):
def test_can_detect(self):
self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
img1 = np.ones((77, 102, 3)) * 255
@ -208,13 +211,6 @@ class LabelMeExtractorTest(TestCase):
AnnotationType.label: label_cat,
}
parsed = Dataset.from_extractors(LabelMeExtractor(DUMMY_DATASET_DIR))
compare_datasets(self, expected=DstExtractor(), actual=parsed)
class LabelMeImporterTest(TestCase):
def test_can_detect(self):
self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
parsed = LabelMeImporter()(DUMMY_DATASET_DIR).make_dataset()
self.assertEqual(1, len(parsed))
parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \
.make_dataset()
compare_datasets(self, expected=DstExtractor(), actual=parsed)

@ -1,10 +1,12 @@
import numpy as np
import os.path as osp
from unittest import TestCase
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, LabelCategories
)
from datumaro.components.project import Project
from datumaro.plugins.mot_format import MotSeqGtConverter, MotSeqImporter
from datumaro.util.test_utils import TestDir, compare_datasets
@ -116,15 +118,25 @@ class MotConverterTest(TestCase):
SrcExtractor(), MotSeqGtConverter(save_images=True),
test_dir, target_dataset=DstExtractor())
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset')
class MotImporterTest(TestCase):
def test_can_detect(self):
class TestExtractor(Extractor):
self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
DatasetItem(id=1,
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2),
Bbox(0, 4, 4, 8, label=2, attributes={
'occluded': False,
'visibility': 1.0,
'ignored': False,
}),
]
),
])
@ -137,10 +149,7 @@ class MotImporterTest(TestCase):
AnnotationType.label: label_cat,
}
def generate_dummy_dataset(path):
MotSeqGtConverter()(TestExtractor(), save_dir=path)
with TestDir() as test_dir:
generate_dummy_dataset(test_dir)
dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
.make_dataset()
self.assertTrue(MotSeqImporter.detect(test_dir))
compare_datasets(self, DstExtractor(), dataset)

@ -1,10 +1,12 @@
import numpy as np
import os.path as osp
from unittest import TestCase, skipIf
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, Mask, LabelCategories
)
from datumaro.components.project import Project
from datumaro.util.image import Image
from datumaro.util.test_utils import TestDir, compare_datasets
from datumaro.util.tf_util import check_import
@ -56,17 +58,6 @@ class TfrecordConverterTest(TestCase):
Bbox(2, 4, 4, 4),
]
),
DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3),
]
),
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
),
])
def categories(self):
@ -188,17 +179,37 @@ class TfrecordConverterTest(TestCase):
self.assertEqual(expected, parsed)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__),
'assets', 'tf_detection_api_dataset')
@skipIf(import_failed, "Failed to import tensorflow")
class TfrecordImporterTest(TestCase):
def test_can_detect(self):
class TestExtractor(Extractor):
self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=np.ones((16, 16, 3)),
annotations=[
Bbox(0, 4, 4, 8, label=2),
]
Bbox(0, 4, 4, 4, label=3),
Bbox(2, 4, 4, 4),
],
),
DatasetItem(id=2, subset='val',
image=np.ones((8, 8, 3)),
annotations=[
Bbox(1, 2, 4, 2, label=3),
],
),
DatasetItem(id=3, subset='test',
image=np.ones((5, 4, 3)) * 3,
),
])
@ -210,10 +221,7 @@ class TfrecordImporterTest(TestCase):
AnnotationType.label: label_cat,
}
def generate_dummy_tfrecord(path):
TfDetectionApiConverter()(TestExtractor(), save_dir=path)
with TestDir() as test_dir:
generate_dummy_tfrecord(test_dir)
dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
.make_dataset()
self.assertTrue(TfDetectionApiImporter.detect(test_dir))
compare_datasets(self, DstExtractor(), dataset)

@ -1,9 +1,6 @@
from collections import OrderedDict
import numpy as np
import os
import os.path as osp
from xml.etree import ElementTree as ET
import shutil
from unittest import TestCase
@ -11,13 +8,6 @@ from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Label, Bbox, Mask, LabelCategories,
)
import datumaro.plugins.voc_format.format as VOC
from datumaro.plugins.voc_format.extractor import (
VocClassificationExtractor,
VocDetectionExtractor,
VocSegmentationExtractor,
VocLayoutExtractor,
VocActionExtractor,
)
from datumaro.plugins.voc_format.converter import (
VocConverter,
VocClassificationConverter,
@ -28,11 +18,11 @@ from datumaro.plugins.voc_format.converter import (
)
from datumaro.plugins.voc_format.importer import VocImporter
from datumaro.components.project import Project
from datumaro.util.image import save_image, Image
from datumaro.util.image import Image
from datumaro.util.test_utils import TestDir, compare_datasets
class VocTest(TestCase):
class VocFormatTest(TestCase):
def test_colormap_generator(self):
reference = np.array([
[ 0, 0, 0],
@ -61,115 +51,18 @@ class VocTest(TestCase):
self.assertTrue(np.array_equal(reference, list(VOC.VocColormap.values())))
def get_label(extractor, label_id):
return extractor.categories()[AnnotationType.label].items[label_id].name
def generate_dummy_voc(path):
cls_subsets_dir = osp.join(path, 'ImageSets', 'Main')
action_subsets_dir = osp.join(path, 'ImageSets', 'Action')
layout_subsets_dir = osp.join(path, 'ImageSets', 'Layout')
segm_subsets_dir = osp.join(path, 'ImageSets', 'Segmentation')
ann_dir = osp.join(path, 'Annotations')
img_dir = osp.join(path, 'JPEGImages')
segm_dir = osp.join(path, 'SegmentationClass')
inst_dir = osp.join(path, 'SegmentationObject')
os.makedirs(cls_subsets_dir)
os.makedirs(ann_dir)
os.makedirs(img_dir)
os.makedirs(segm_dir)
os.makedirs(inst_dir)
subsets = {
'train': ['2007_000001'],
'test': ['2007_000002'],
}
# Subsets
for subset_name, subset in subsets.items():
for item in subset:
with open(osp.join(cls_subsets_dir, subset_name + '.txt'), 'w') as f:
for item in subset:
f.write('%s\n' % item)
shutil.copytree(cls_subsets_dir, action_subsets_dir)
shutil.copytree(cls_subsets_dir, layout_subsets_dir)
shutil.copytree(cls_subsets_dir, segm_subsets_dir)
# Classification
subset_name = 'train'
subset = subsets[subset_name]
for label in VOC.VocLabel:
with open(osp.join(cls_subsets_dir, '%s_%s.txt' % \
(label.name, subset_name)), 'w') as f:
for item in subset:
presence = label.value % 2
f.write('%s %2d\n' % (item, 1 if presence else -1))
# Detection + Action + Layout
subset_name = 'train'
subset = subsets[subset_name]
for item in subset:
root_elem = ET.Element('annotation')
ET.SubElement(root_elem, 'folder').text = 'VOC' + item.split('_')[0]
ET.SubElement(root_elem, 'filename').text = item + '.jpg'
size_elem = ET.SubElement(root_elem, 'size')
ET.SubElement(size_elem, 'width').text = '10'
ET.SubElement(size_elem, 'height').text = '20'
ET.SubElement(size_elem, 'depth').text = '3'
ET.SubElement(root_elem, 'segmented').text = '1'
obj1_elem = ET.SubElement(root_elem, 'object')
ET.SubElement(obj1_elem, 'name').text = 'cat'
ET.SubElement(obj1_elem, 'pose').text = VOC.VocPose(1).name
ET.SubElement(obj1_elem, 'truncated').text = '1'
ET.SubElement(obj1_elem, 'difficult').text = '0'
obj1bb_elem = ET.SubElement(obj1_elem, 'bndbox')
ET.SubElement(obj1bb_elem, 'xmin').text = '1'
ET.SubElement(obj1bb_elem, 'ymin').text = '2'
ET.SubElement(obj1bb_elem, 'xmax').text = '3'
ET.SubElement(obj1bb_elem, 'ymax').text = '4'
obj2_elem = ET.SubElement(root_elem, 'object')
ET.SubElement(obj2_elem, 'name').text = 'person'
obj2bb_elem = ET.SubElement(obj2_elem, 'bndbox')
ET.SubElement(obj2bb_elem, 'xmin').text = '4'
ET.SubElement(obj2bb_elem, 'ymin').text = '5'
ET.SubElement(obj2bb_elem, 'xmax').text = '6'
ET.SubElement(obj2bb_elem, 'ymax').text = '7'
obj2head_elem = ET.SubElement(obj2_elem, 'part')
ET.SubElement(obj2head_elem, 'name').text = VOC.VocBodyPart(1).name
obj2headbb_elem = ET.SubElement(obj2head_elem, 'bndbox')
ET.SubElement(obj2headbb_elem, 'xmin').text = '5.5'
ET.SubElement(obj2headbb_elem, 'ymin').text = '6'
ET.SubElement(obj2headbb_elem, 'xmax').text = '7.5'
ET.SubElement(obj2headbb_elem, 'ymax').text = '8'
obj2act_elem = ET.SubElement(obj2_elem, 'actions')
for act in VOC.VocAction:
ET.SubElement(obj2act_elem, act.name).text = '%s' % (act.value % 2)
with open(osp.join(ann_dir, item + '.xml'), 'w') as f:
f.write(ET.tostring(root_elem, encoding='unicode'))
# Segmentation + Instances
subset_name = 'train'
subset = subsets[subset_name]
for item in subset:
save_image(osp.join(segm_dir, item + '.png'),
np.tile(VOC.VocColormap[2][::-1], (5, 10, 1))
)
save_image(osp.join(inst_dir, item + '.png'),
np.tile(1, (5, 10, 1)))
# Test images
subset_name = 'test'
subset = subsets[subset_name]
for item in subset:
save_image(osp.join(img_dir, item + '.jpg'),
np.ones([10, 20, 3]))
return subsets
def test_can_write_and_parse_labelmap(self):
src_label_map = VOC.make_voc_label_map()
src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']]
src_label_map['ww'] = [(10, 20, 30), [], ['act3']]
with TestDir() as test_dir:
file_path = osp.join(test_dir, 'test.txt')
VOC.write_label_map(file_path, src_label_map)
dst_label_map = VOC.parse_label_map(file_path)
self.assertEqual(src_label_map, dst_label_map)
class TestExtractorBase(Extractor):
def _label(self, voc_label):
@ -178,32 +71,20 @@ class TestExtractorBase(Extractor):
def categories(self):
return VOC.make_voc_categories()
class VocExtractorTest(TestCase):
def test_can_load_voc_cls(self):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id='2007_000001', subset='train',
annotations=[
Label(self._label(l.name))
for l in VOC.VocLabel if l.value % 2 == 1
]
),
])
with TestDir() as test_dir:
generate_dummy_voc(test_dir)
parsed_train = VocClassificationExtractor(
osp.join(test_dir, 'ImageSets', 'Main', 'train.txt'))
compare_datasets(self, DstExtractor(), parsed_train)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'voc_dataset')
def test_can_load_voc_det(self):
class VocImportTest(TestCase):
def test_can_import(self):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id='2007_000001', subset='train',
image=Image(path='2007_000001.jpg', size=(20, 10)),
annotations=[
Label(self._label(l.name))
for l in VOC.VocLabel if l.value % 2 == 1
] + [
Bbox(1, 2, 2, 2, label=self._label('cat'),
attributes={
'pose': VOC.VocPose(1).name,
@ -224,102 +105,27 @@ class VocExtractorTest(TestCase):
}
},
id=2, group=2,
# TODO: Actions and group should be excluded
# as soon as correct merge is implemented
),
]
),
])
with TestDir() as test_dir:
generate_dummy_voc(test_dir)
parsed_train = VocDetectionExtractor(
osp.join(test_dir, 'ImageSets', 'Main', 'train.txt'))
compare_datasets(self, DstExtractor(), parsed_train)
def test_can_load_voc_segm(self):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id='2007_000001', subset='train',
annotations=[
Bbox(5.5, 6, 2, 2, label=self._label(
VOC.VocBodyPart(1).name),
group=2
),
Mask(image=np.ones([5, 10]),
label=self._label(VOC.VocLabel(2).name),
group=1,
),
]
),
DatasetItem(id='2007_000002', subset='test',
image=np.zeros((20, 10, 3))),
])
with TestDir() as test_dir:
generate_dummy_voc(test_dir)
parsed_train = VocSegmentationExtractor(
osp.join(test_dir, 'ImageSets', 'Segmentation', 'train.txt'))
compare_datasets(self, DstExtractor(), parsed_train)
dataset = Project.import_from(DUMMY_DATASET_DIR, 'voc').make_dataset()
def test_can_load_voc_layout(self):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id='2007_000001', subset='train',
annotations=[
Bbox(4, 5, 2, 2, label=self._label('person'),
attributes={
'truncated': False,
'difficult': False,
'occluded': False,
**{
a.name: a.value % 2 == 1
for a in VOC.VocAction
}
},
id=2, group=2,
# TODO: Actions should be excluded
# as soon as correct merge is implemented
),
Bbox(5.5, 6, 2, 2, label=self._label(
VOC.VocBodyPart(1).name),
group=2
)
]
),
])
with TestDir() as test_dir:
generate_dummy_voc(test_dir)
parsed_train = VocLayoutExtractor(
osp.join(test_dir, 'ImageSets', 'Layout', 'train.txt'))
compare_datasets(self, DstExtractor(), parsed_train)
compare_datasets(self, DstExtractor(), dataset)
def test_can_load_voc_action(self):
class DstExtractor(TestExtractorBase):
def __iter__(self):
return iter([
DatasetItem(id='2007_000001', subset='train',
annotations=[
Bbox(4, 5, 2, 2, label=self._label('person'),
attributes={
'truncated': False,
'difficult': False,
'occluded': False,
**{
a.name: a.value % 2 == 1
for a in VOC.VocAction
}
# TODO: group should be excluded
# as soon as correct merge is implemented
},
id=2, group=2,
),
]
),
])
with TestDir() as test_dir:
generate_dummy_voc(test_dir)
parsed_train = VocActionExtractor(
osp.join(test_dir, 'ImageSets', 'Action', 'train.txt'))
compare_datasets(self, DstExtractor(), parsed_train)
def test_can_detect_voc(self):
self.assertTrue(VocImporter.detect(DUMMY_DATASET_DIR))
class VocConverterTest(TestCase):
def _test_save_and_load(self, source_dataset, converter, test_dir,
@ -860,39 +666,3 @@ class VocConverterTest(TestCase):
with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
VocConverter(label_map='voc'), test_dir)
class VocImportTest(TestCase):
def test_can_import(self):
with TestDir() as test_dir:
subsets = generate_dummy_voc(test_dir)
dataset = Project.import_from(test_dir, 'voc').make_dataset()
self.assertEqual(len(VOC.VocTask) * len(subsets),
len(dataset.sources))
self.assertEqual(set(subsets), set(dataset.subsets()))
self.assertEqual(
sum([len(s) for _, s in subsets.items()]),
len(dataset))
def test_can_detect_voc(self):
with TestDir() as test_dir:
generate_dummy_voc(test_dir)
dataset_found = VocImporter.detect(test_dir)
self.assertTrue(dataset_found)
class VocFormatTest(TestCase):
def test_can_write_and_parse_labelmap(self):
src_label_map = VOC.make_voc_label_map()
src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']]
src_label_map['ww'] = [(10, 20, 30), [], ['act3']]
with TestDir() as test_dir:
file_path = osp.join(test_dir, 'test.txt')
VOC.write_label_map(file_path, src_label_map)
dst_label_map = VOC.parse_label_map(file_path)
self.assertEqual(src_label_map, dst_label_map)

@ -6,6 +6,7 @@ from unittest import TestCase
from datumaro.components.extractor import (Extractor, DatasetItem,
AnnotationType, Bbox, LabelCategories,
)
from datumaro.components.project import Project
from datumaro.plugins.yolo_format.importer import YoloImporter
from datumaro.plugins.yolo_format.converter import YoloConverter
from datumaro.util.image import Image, save_image
@ -115,13 +116,19 @@ class YoloFormatTest(TestCase):
compare_datasets(self, source_dataset, parsed_dataset)
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'yolo_dataset')
class YoloImporterTest(TestCase):
def test_can_detect(self):
class TestExtractor(Extractor):
self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR))
def test_can_import(self):
class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=Image(path='1.jpg', size=(10, 15)),
image=np.ones((10, 15, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
@ -136,7 +143,7 @@ class YoloImporterTest(TestCase):
AnnotationType.label: label_categories,
}
with TestDir() as test_dir:
YoloConverter()(TestExtractor(), save_dir=test_dir)
dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
.make_dataset()
self.assertTrue(YoloImporter.detect(test_dir))
compare_datasets(self, DstExtractor(), dataset)

Loading…
Cancel
Save