[Datumaro] Fixes (#1953)

* Add absolute image path in rest api extractor

* Add default split for random split

* Fix image path in datumaro format

* Preserve bboxes in coco format

* update changelog

Co-authored-by: Nikita Manovich <40690625+nmanovic@users.noreply.github.com>
main
zhiltsov-max 6 years ago committed by GitHub
parent e7585b8ce9
commit 0062ecdec3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Smaller object details (<https://github.com/opencv/cvat/pull/1877>)
- `COCO` format does not convert bboxes to polygons on export (<https://github.com/opencv/cvat/pull/1953>)
- It is impossible to submit a DL model in OpenVINO format using UI. Now you can deploy new models on the server using serverless functions (<https://github.com/opencv/cvat/pull/1767>)
- Files and folders under share path are now alphabetically sorted

@ -100,7 +100,7 @@ class cvat_rest_api_task_images(SourceExtractor):
if entry.get('height') and entry.get('width'):
size = (entry['height'], entry['width'])
image = Image(data=self._make_image_loader(item_id),
path=item_filename, size=size)
path=self._image_local_path(item_id), size=size)
item = DatasetItem(id=item_id, image=image)
items.append((item.id, item))

@ -298,8 +298,8 @@ class _InstancesConverter(_TaskConverter):
rles = mask_utils.merge(rles)
area = mask_utils.area(rles)
else:
x, y, w, h = bbox
segmentation = [[x, y, x + w, y, x + w, y + h, x, y + h]]
_, _, w, h = bbox
segmentation = []
area = w * h
elem = {

@ -54,7 +54,8 @@ class _SubsetWriter:
if item.has_image:
path = item.image.path
if self._context._save_images:
path = self._context._save_image(item)
path = self._context._make_image_filename(item)
self._context._save_image(item, path)
item_desc['image'] = {
'size': item.image.size,

@ -322,6 +322,7 @@ class RandomSplit(Transform, CliPlugin):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('-s', '--subset', action='append',
type=cls._split_arg, dest='splits',
default=[('train', 0.67), ('test', 0.33)],
help="Subsets in the form of: '<subset>:<ratio>' (repeatable)")
parser.add_argument('--seed', type=int, help="Random seed")
return parser

@ -4,8 +4,8 @@ import os.path as osp
from unittest import TestCase
from datumaro.components.project import (Project, Dataset)
from datumaro.components.extractor import (Extractor, DatasetItem,
from datumaro.components.project import Project, Dataset
from datumaro.components.extractor import (DatasetItem,
AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
LabelCategories, PointsCategories
)
@ -26,7 +26,6 @@ DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset')
class CocoImporterTest(TestCase):
def test_can_import(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
subset='val', attributes={'id': 1},
@ -349,7 +348,6 @@ class CocoConverterTest(TestCase):
CocoLabelsConverter.convert, test_dir)
def test_can_save_and_load_keypoints(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
annotations=[
@ -373,11 +371,11 @@ class CocoConverterTest(TestCase):
Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
]),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
str(i) for i in range(10)),
AnnotationType.points: PointsCategories.from_iterable(
(i, None, [[0, 1], [1, 2]]) for i in range(10)
),
AnnotationType.label: LabelCategories.from_iterable(
str(i) for i in range(10)),
AnnotationType.points: PointsCategories.from_iterable(
(i, None, [[0, 1], [1, 2]]) for i in range(10)
),
})
target_dataset = Dataset.from_iterable([
@ -393,30 +391,30 @@ class CocoConverterTest(TestCase):
Points([1, 2, 3, 4, 2, 3],
group=2, id=2,
attributes={'is_crowd': False}),
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
Bbox(1, 2, 2, 2,
group=2, id=2,
attributes={'is_crowd': False}),
Points([1, 2, 0, 2, 4, 1],
label=5, group=3, id=3,
attributes={'is_crowd': False}),
Polygon([0, 1, 4, 1, 4, 2, 0, 2],
Bbox(0, 1, 4, 1,
label=5, group=3, id=3,
attributes={'is_crowd': False}),
Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
group=5, id=5,
attributes={'is_crowd': False}),
Polygon([1, 2, 3, 2, 3, 4, 1, 4],
Bbox(1, 2, 2, 2,
group=5, id=5,
attributes={'is_crowd': False}),
], attributes={'id': 1}),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
str(i) for i in range(10)),
AnnotationType.points: PointsCategories.from_iterable(
(i, None, [[0, 1], [1, 2]]) for i in range(10)
),
AnnotationType.label: LabelCategories.from_iterable(
str(i) for i in range(10)),
AnnotationType.points: PointsCategories.from_iterable(
(i, None, [[0, 1], [1, 2]]) for i in range(10)
),
})
with TestDir() as test_dir:

@ -32,56 +32,58 @@ class DatumaroConverterTest(TestCase):
compare_datasets_strict(self,
expected=target_dataset, actual=parsed_dataset)
label_categories = LabelCategories()
for i in range(5):
label_categories.add('cat' + str(i))
mask_categories = MaskCategories(
generate_colormap(len(label_categories.items)))
points_categories = PointsCategories()
for index, _ in enumerate(label_categories.items):
points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
test_dataset = Dataset.from_iterable([
DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
annotations=[
Caption('hello', id=1),
Caption('world', id=2, group=5),
Label(2, id=3, attributes={
'x': 1,
'y': '2',
}),
Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
'score': 1.0,
}),
Bbox(5, 6, 7, 8, id=5, group=5),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
]),
DatasetItem(id=21, subset='train',
annotations=[
Caption('test'),
Label(2),
Bbox(1, 2, 3, 4, 5, id=42, group=42)
]),
DatasetItem(id=2, subset='val',
annotations=[
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
]),
DatasetItem(id=42, subset='test',
attributes={'a1': 5, 'a2': '42'}),
DatasetItem(id=42),
DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
], categories={
AnnotationType.label: label_categories,
AnnotationType.mask: mask_categories,
AnnotationType.points: points_categories,
})
@property
def test_dataset(self):
label_categories = LabelCategories()
for i in range(5):
label_categories.add('cat' + str(i))
mask_categories = MaskCategories(
generate_colormap(len(label_categories.items)))
points_categories = PointsCategories()
for index, _ in enumerate(label_categories.items):
points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
return Dataset.from_iterable([
DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
annotations=[
Caption('hello', id=1),
Caption('world', id=2, group=5),
Label(2, id=3, attributes={
'x': 1,
'y': '2',
}),
Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
'score': 1.0,
}),
Bbox(5, 6, 7, 8, id=5, group=5),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
]),
DatasetItem(id=21, subset='train',
annotations=[
Caption('test'),
Label(2),
Bbox(1, 2, 3, 4, 5, id=42, group=42)
]),
DatasetItem(id=2, subset='val',
annotations=[
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
]),
DatasetItem(id=42, subset='test',
attributes={'a1': 5, 'a2': '42'}),
DatasetItem(id=42),
DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
], categories={
AnnotationType.label: label_categories,
AnnotationType.mask: mask_categories,
AnnotationType.points: points_categories,
})
def test_can_save_and_load(self):
with TestDir() as test_dir:

Loading…
Cancel
Save