[Datumaro] Add YOLO converter (#906)
* Add YOLO converter * Added yolo extractor * Added YOLO format test * Add YOLO export in UImain
parent
1af9105bb1
commit
5458de7464
@ -0,0 +1,110 @@
|
||||
|
||||
# Copyright (C) 2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from collections import OrderedDict
|
||||
import logging as log
|
||||
import os
|
||||
import os.path as osp
|
||||
|
||||
from datumaro.components.converter import Converter
|
||||
from datumaro.components.extractor import AnnotationType
|
||||
from datumaro.components.formats.yolo import YoloPath
|
||||
from datumaro.util.image import save_image
|
||||
|
||||
|
||||
def _make_yolo_bbox(img_size, box):
|
||||
# https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py
|
||||
# <x> <y> <width> <height> - values relative to width and height of image
|
||||
# <x> <y> - are center of rectangle
|
||||
x = (box[0] + box[2]) / 2 / img_size[0]
|
||||
y = (box[1] + box[3]) / 2 / img_size[1]
|
||||
w = (box[2] - box[0]) / img_size[0]
|
||||
h = (box[3] - box[1]) / img_size[1]
|
||||
return x, y, w, h
|
||||
|
||||
class YoloConverter(Converter):
|
||||
# https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects
|
||||
|
||||
def __init__(self, task=None, save_images=False, apply_colormap=False):
|
||||
super().__init__()
|
||||
self._task = task
|
||||
self._save_images = save_images
|
||||
self._apply_colormap = apply_colormap
|
||||
|
||||
def __call__(self, extractor, save_dir):
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
label_categories = extractor.categories()[AnnotationType.label]
|
||||
label_ids = {label.name: idx
|
||||
for idx, label in enumerate(label_categories.items)}
|
||||
with open(osp.join(save_dir, 'obj.names'), 'w') as f:
|
||||
f.writelines('%s\n' % l[0]
|
||||
for l in sorted(label_ids.items(), key=lambda x: x[1]))
|
||||
|
||||
subsets = extractor.subsets()
|
||||
if len(subsets) == 0:
|
||||
subsets = [ None ]
|
||||
|
||||
subset_lists = OrderedDict()
|
||||
|
||||
for subset_name in subsets:
|
||||
if subset_name and subset_name in YoloPath.SUBSET_NAMES:
|
||||
subset = extractor.get_subset(subset_name)
|
||||
elif not subset_name:
|
||||
subset_name = YoloPath.DEFAULT_SUBSET_NAME
|
||||
subset = extractor
|
||||
else:
|
||||
log.warn("Skipping subset export '%s'. "
|
||||
"If specified, the only valid names are %s" % \
|
||||
(subset_name, ', '.join(
|
||||
"'%s'" % s for s in YoloPath.SUBSET_NAMES)))
|
||||
continue
|
||||
|
||||
subset_dir = osp.join(save_dir, 'obj_%s_data' % subset_name)
|
||||
os.makedirs(subset_dir, exist_ok=True)
|
||||
|
||||
image_paths = OrderedDict()
|
||||
|
||||
for item in subset:
|
||||
image_name = '%s.jpg' % item.id
|
||||
image_paths[item.id] = osp.join('data',
|
||||
osp.basename(subset_dir), image_name)
|
||||
|
||||
if self._save_images:
|
||||
image_path = osp.join(subset_dir, image_name)
|
||||
if not osp.exists(image_path):
|
||||
save_image(image_path, item.image)
|
||||
|
||||
height, width, _ = item.image.shape
|
||||
|
||||
yolo_annotation = ''
|
||||
for bbox in item.annotations:
|
||||
if bbox.type is not AnnotationType.bbox:
|
||||
continue
|
||||
if bbox.label is None:
|
||||
continue
|
||||
|
||||
yolo_bb = _make_yolo_bbox((width, height), bbox.points)
|
||||
yolo_bb = ' '.join('%.6f' % p for p in yolo_bb)
|
||||
yolo_annotation += '%s %s\n' % (bbox.label, yolo_bb)
|
||||
|
||||
annotation_path = osp.join(subset_dir, '%s.txt' % item.id)
|
||||
with open(annotation_path, 'w') as f:
|
||||
f.write(yolo_annotation)
|
||||
|
||||
subset_list_name = '%s.txt' % subset_name
|
||||
subset_lists[subset_name] = subset_list_name
|
||||
with open(osp.join(save_dir, subset_list_name), 'w') as f:
|
||||
f.writelines('%s\n' % s for s in image_paths.values())
|
||||
|
||||
with open(osp.join(save_dir, 'obj.data'), 'w') as f:
|
||||
f.write('classes = %s\n' % len(label_ids))
|
||||
|
||||
for subset_name, subset_list_name in subset_lists.items():
|
||||
f.write('%s = %s\n' % (subset_name,
|
||||
osp.join('data', subset_list_name)))
|
||||
|
||||
f.write('names = %s\n' % osp.join('data', 'obj.names'))
|
||||
f.write('backup = backup/\n')
|
||||
@ -0,0 +1,162 @@
|
||||
|
||||
# Copyright (C) 2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from collections import OrderedDict
|
||||
import os.path as osp
|
||||
import re
|
||||
|
||||
from datumaro.components.extractor import (Extractor, DatasetItem,
|
||||
AnnotationType, BboxObject, LabelCategories
|
||||
)
|
||||
from datumaro.components.formats.yolo import YoloPath
|
||||
from datumaro.util.image import lazy_image
|
||||
|
||||
|
||||
class YoloExtractor(Extractor):
|
||||
class Subset(Extractor):
|
||||
def __init__(self, name, parent):
|
||||
super().__init__()
|
||||
self._name = name
|
||||
self._parent = parent
|
||||
self.items = OrderedDict()
|
||||
|
||||
def __iter__(self):
|
||||
for item_id in self.items:
|
||||
yield self._parent._get(item_id, self._name)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.items)
|
||||
|
||||
def categories(self):
|
||||
return self._parent.categories()
|
||||
|
||||
def __init__(self, config_path):
|
||||
super().__init__()
|
||||
|
||||
if not osp.isfile(config_path):
|
||||
raise Exception("Can't read dataset descriptor file '%s'" % \
|
||||
config_path)
|
||||
|
||||
rootpath = osp.dirname(config_path)
|
||||
self._path = rootpath
|
||||
|
||||
with open(config_path, 'r') as f:
|
||||
config_lines = f.readlines()
|
||||
|
||||
subsets = OrderedDict()
|
||||
names_path = None
|
||||
|
||||
for line in config_lines:
|
||||
match = re.match(r'(\w+)\s*=\s*(.+)$', line)
|
||||
if not match:
|
||||
continue
|
||||
|
||||
key = match.group(1)
|
||||
value = match.group(2)
|
||||
if key == 'names':
|
||||
names_path = value
|
||||
elif key in YoloPath.SUBSET_NAMES:
|
||||
subsets[key] = value
|
||||
else:
|
||||
continue
|
||||
|
||||
if not names_path:
|
||||
raise Exception("Failed to parse labels path from '%s'" % \
|
||||
config_path)
|
||||
|
||||
for subset_name, list_path in subsets.items():
|
||||
list_path = self._make_local_path(list_path)
|
||||
if not osp.isfile(list_path):
|
||||
raise Exception("Not found '%s' subset list file" % subset_name)
|
||||
|
||||
subset = YoloExtractor.Subset(subset_name, self)
|
||||
with open(list_path, 'r') as f:
|
||||
subset.items = OrderedDict(
|
||||
(osp.splitext(osp.basename(p))[0], p.strip()) for p in f)
|
||||
|
||||
for image_path in subset.items.values():
|
||||
image_path = self._make_local_path(image_path)
|
||||
if not osp.isfile(image_path):
|
||||
raise Exception("Can't find image '%s'" % image_path)
|
||||
|
||||
subsets[subset_name] = subset
|
||||
|
||||
self._subsets = subsets
|
||||
|
||||
self._categories = {
|
||||
AnnotationType.label:
|
||||
self._load_categories(self._make_local_path(names_path))
|
||||
}
|
||||
|
||||
def _make_local_path(self, path):
|
||||
default_base = osp.join('data', '')
|
||||
if path.startswith(default_base): # default path
|
||||
path = path[len(default_base) : ]
|
||||
return osp.join(self._path, path) # relative or absolute path
|
||||
|
||||
def _get(self, item_id, subset_name):
|
||||
subset = self._subsets[subset_name]
|
||||
item = subset.items[item_id]
|
||||
|
||||
if isinstance(item, str):
|
||||
image_path = self._make_local_path(item)
|
||||
image = lazy_image(image_path)
|
||||
h, w, _ = image().shape
|
||||
anno_path = osp.splitext(image_path)[0] + '.txt'
|
||||
annotations = self._parse_annotations(anno_path, w, h)
|
||||
|
||||
item = DatasetItem(id=item_id, subset=subset_name,
|
||||
image=image, annotations=annotations)
|
||||
subset.items[item_id] = item
|
||||
|
||||
return item
|
||||
|
||||
@staticmethod
|
||||
def _parse_annotations(anno_path, image_width, image_height):
|
||||
with open(anno_path, 'r') as f:
|
||||
annotations = []
|
||||
for line in f:
|
||||
label_id, xc, yc, w, h = line.strip().split()
|
||||
label_id = int(label_id)
|
||||
w = float(w)
|
||||
h = float(h)
|
||||
x = float(xc) - w * 0.5
|
||||
y = float(yc) - h * 0.5
|
||||
annotations.append(BboxObject(
|
||||
x * image_width, y * image_height,
|
||||
w * image_width, h * image_height,
|
||||
label=label_id
|
||||
))
|
||||
return annotations
|
||||
|
||||
@staticmethod
|
||||
def _load_categories(names_path):
|
||||
label_categories = LabelCategories()
|
||||
|
||||
with open(names_path, 'r') as f:
|
||||
for label in f:
|
||||
label_categories.add(label)
|
||||
|
||||
return label_categories
|
||||
|
||||
def categories(self):
|
||||
return self._categories
|
||||
|
||||
def __iter__(self):
|
||||
for subset in self._subsets.values():
|
||||
for item in subset:
|
||||
yield item
|
||||
|
||||
def __len__(self):
|
||||
length = 0
|
||||
for subset in self._subsets.values():
|
||||
length += len(subset)
|
||||
return length
|
||||
|
||||
def subsets(self):
|
||||
return list(self._subsets)
|
||||
|
||||
def get_subset(self, name):
|
||||
return self._subsets[name]
|
||||
@ -0,0 +1,9 @@
|
||||
|
||||
# Copyright (C) 2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
|
||||
class YoloPath:
|
||||
DEFAULT_SUBSET_NAME = 'train'
|
||||
SUBSET_NAMES = ['train', 'valid']
|
||||
@ -0,0 +1,32 @@
|
||||
|
||||
# Copyright (C) 2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import os.path as osp
|
||||
from datumaro.util import dir_items
|
||||
|
||||
|
||||
class YoloImporter:
|
||||
def __call__(self, path, **extra_params):
|
||||
from datumaro.components.project import Project # cyclic import
|
||||
project = Project()
|
||||
|
||||
if not osp.exists(path):
|
||||
raise Exception("Failed to find 'yolo' dataset at '%s'" % path)
|
||||
|
||||
configs = []
|
||||
if osp.isfile(path):
|
||||
configs = path
|
||||
elif osp.isdir(path):
|
||||
configs = [osp.join(path, p) for p in dir_items(path, '.data')]
|
||||
|
||||
for config_path in configs:
|
||||
source_name = osp.splitext(osp.basename(config_path))[0]
|
||||
project.add_source(source_name, {
|
||||
'url': config_path,
|
||||
'format': 'yolo',
|
||||
'options': extra_params,
|
||||
})
|
||||
|
||||
return project
|
||||
@ -0,0 +1,69 @@
|
||||
import numpy as np
|
||||
|
||||
from unittest import TestCase
|
||||
|
||||
from datumaro.components.extractor import (Extractor, DatasetItem,
|
||||
AnnotationType, BboxObject, LabelCategories,
|
||||
)
|
||||
from datumaro.components.importers.yolo import YoloImporter
|
||||
from datumaro.components.converters.yolo import YoloConverter
|
||||
from datumaro.util.test_utils import TestDir
|
||||
|
||||
|
||||
class YoloFormatTest(TestCase):
|
||||
def test_can_save_and_load(self):
|
||||
class TestExtractor(Extractor):
|
||||
def __iter__(self):
|
||||
items = [
|
||||
DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
|
||||
annotations=[
|
||||
BboxObject(0, 2, 4, 2, label=2),
|
||||
BboxObject(0, 1, 2, 3, label=4),
|
||||
]),
|
||||
DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
|
||||
annotations=[
|
||||
BboxObject(0, 2, 4, 2, label=2),
|
||||
BboxObject(3, 3, 2, 3, label=4),
|
||||
BboxObject(2, 1, 2, 3, label=4),
|
||||
]),
|
||||
|
||||
DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
|
||||
annotations=[
|
||||
BboxObject(0, 1, 5, 2, label=2),
|
||||
BboxObject(0, 2, 3, 2, label=5),
|
||||
BboxObject(0, 2, 4, 2, label=6),
|
||||
BboxObject(0, 7, 3, 2, label=7),
|
||||
]),
|
||||
]
|
||||
return iter(items)
|
||||
|
||||
def categories(self):
|
||||
label_categories = LabelCategories()
|
||||
for i in range(10):
|
||||
label_categories.add('label_' + str(i))
|
||||
return {
|
||||
AnnotationType.label: label_categories,
|
||||
}
|
||||
|
||||
with TestDir() as test_dir:
|
||||
source_dataset = TestExtractor()
|
||||
|
||||
YoloConverter(save_images=True)(source_dataset, test_dir.path)
|
||||
parsed_dataset = YoloImporter()(test_dir.path).make_dataset()
|
||||
|
||||
self.assertListEqual(
|
||||
sorted(source_dataset.subsets()),
|
||||
sorted(parsed_dataset.subsets()),
|
||||
)
|
||||
self.assertEqual(len(source_dataset), len(parsed_dataset))
|
||||
for subset_name in source_dataset.subsets():
|
||||
source_subset = source_dataset.get_subset(subset_name)
|
||||
parsed_subset = parsed_dataset.get_subset(subset_name)
|
||||
for item_a, item_b in zip(source_subset, parsed_subset):
|
||||
self.assertEqual(len(item_a.annotations), len(item_b.annotations))
|
||||
for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
|
||||
self.assertEqual(ann_a.type, ann_b.type)
|
||||
self.assertAlmostEqual(ann_a.x, ann_b.x)
|
||||
self.assertAlmostEqual(ann_a.y, ann_b.y)
|
||||
self.assertAlmostEqual(ann_a.w, ann_b.w)
|
||||
self.assertAlmostEqual(ann_a.h, ann_b.h)
|
||||
Loading…
Reference in New Issue