Az/pascal voc loader (#613)

main
Andrey Zhavoronkov 7 years ago committed by Nikita Manovich
parent 85ae933af5
commit f17847ff33

@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to create a custom extractors for unsupported media types
- Added in PDF extractor
- Added in a command line model manager tester
- Pascal VOC format support
### Changed
- Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)

@ -27,6 +27,14 @@ CVAT is free, online, interactive video and image annotation tool for computer v
- [Tutorial for polygons](https://www.youtube.com/watch?v=XTwfXDh4clI)
- [Semi-automatic segmentation](https://www.youtube.com/watch?v=vnqXZ-Z-VTQ)
## Supported formats
| Annotation format | Dumper | Loader |
| ------------------------- | ------ | ------ |
| CVAT XML v1.1 for images | X | X |
| CVAT XML v1.1 for a video | X | X |
| Pascal VOC | X | X |
## Links
- [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat)
- [Intel Software: Computer Vision Annotation Tool: A Universal Approach to Data Annotation](https://software.intel.com/en-us/articles/computer-vision-annotation-tool-a-universal-approach-to-data-annotation)

@ -37,7 +37,7 @@ It allows to download and upload annotations in different formats and easily add
- **name** - unique name for each format
- **dumpers and loaders** - lists of objects that describes exposed dumpers and loaders and must
have following keys:
1. display_name - **unique** string used as ID for a dumpers and loaders.
1. display_name - **unique** string used as ID for dumpers and loaders.
Also this string is displayed in CVAT UI.
Possible to use a named placeholders like the python format function
(supports only name, format and version variables).
@ -49,16 +49,13 @@ It allows to download and upload annotations in different formats and easily add
def dump_handler(file_object, annotations):
```
Inside of the script environment 3 variables are available:
- file_object - python's standard file object returned by open() function and exposing a file-oriented API
Inside of the script environment 2 variables are available:
- **file_object** - python's standard file object returned by open() function and exposing a file-oriented API
(with methods such as read() or write()) to an underlying resource.
- **annotations** - instance of [Annotation](annotation.py#L106) class.
- **spec** - string with name of the requested specification
(if the annotation format defines them).
It may be useful if one script implements more than one format support.
Annotation class expose API and some additional pre-defined types that allow to get/add shapes inside
a parser/dumper code.
a loader/dumper code.
Short description of the public methods:
- **Annotation.shapes** - property, returns a generator of Annotation.LabeledShape objects
@ -115,7 +112,7 @@ It allows to download and upload annotations in different formats and easily add
file_object.write(...)
...
```
Pseudocode for a parser code
Pseudocode for a loader code
```python
...
#read file_object

@ -408,3 +408,7 @@ class Annotation:
track_len += len(track['shapes'])
return len(self._annotation_ir.tags) + len(self._annotation_ir.shapes) + track_len
@property
def frame_info(self):
return self._frame_info

@ -4,7 +4,9 @@
from cvat.apps.annotation import models
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from cvat.apps.annotation.serializers import AnnotationFormatSerializer
from django.core.files import File
import os
from copy import deepcopy
@ -19,16 +21,20 @@ def register_format(format_file):
raise Exception("Could not find \'format_spec\' definition in format file specification")
format_spec = deepcopy(global_vars["format_spec"])
format_spec["handler_file"] = File(open(format_file))
for spec in format_spec["loaders"] + format_spec["dumpers"]:
spec["display_name"] = spec["display_name"].format(
name=format_spec["name"],
format=spec["format"],
version=spec["version"],
)
if not models.AnnotationFormat.objects.filter(name=format_spec["name"]).exists():
format_spec["handler_file"] = os.path.relpath(format_file, settings.BASE_DIR)
for spec in format_spec["loaders"] + format_spec["dumpers"]:
spec["display_name"] = spec["display_name"].format(
name=format_spec["name"],
format=spec["format"],
version=spec["version"],
)
try:
annotation_format = models.AnnotationFormat.objects.get(name=format_spec["name"])
serializer = AnnotationFormatSerializer(annotation_format, data=format_spec)
if serializer.is_valid(raise_exception=True):
serializer.save()
except ObjectDoesNotExist:
serializer = AnnotationFormatSerializer(data=format_spec)
if serializer.is_valid(raise_exception=True):
serializer.save()

@ -0,0 +1,74 @@
# Generated by Django 2.1.9 on 2019-08-05 06:27
import cvat.apps.engine.models
from django.db import migrations, models
import django.db.models.deletion
def split_handlers(apps, schema_editor):
db_alias = schema_editor.connection.alias
handler_model = apps.get_model('annotation', 'AnnotationHandler')
dumper_model = apps.get_model('annotation', "AnnotationDumper")
loader_model = apps.get_model('annotation', 'AnnotationLoader')
for db_handler in handler_model.objects.all():
if db_handler.type == "dumper":
new_handler = dumper_model()
else:
new_handler = loader_model()
new_handler.display_name = db_handler.display_name
new_handler.format = db_handler.format
new_handler.version = db_handler.version
new_handler.handler = db_handler.handler
new_handler.annotation_format = db_handler.annotation_format
new_handler.save()
db_handler.delete()
class Migration(migrations.Migration):
dependencies = [
('annotation', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='AnnotationDumper',
fields=[
('display_name', cvat.apps.engine.models.SafeCharField(max_length=256, primary_key=True, serialize=False)),
('format', models.CharField(max_length=16)),
('version', models.CharField(max_length=16)),
('handler', models.CharField(max_length=256)),
('annotation_format', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='annotation.AnnotationFormat')),
],
options={
'abstract': False,
'default_permissions': (),
},
),
migrations.CreateModel(
name='AnnotationLoader',
fields=[
('display_name', cvat.apps.engine.models.SafeCharField(max_length=256, primary_key=True, serialize=False)),
('format', models.CharField(max_length=16)),
('version', models.CharField(max_length=16)),
('handler', models.CharField(max_length=256)),
('annotation_format', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='annotation.AnnotationFormat')),
],
options={
'abstract': False,
'default_permissions': (),
},
),
migrations.RunPython(
code=split_handlers,
),
migrations.RemoveField(
model_name='annotationhandler',
name='annotation_format',
),
migrations.DeleteModel(
name='AnnotationHandler',
),
]

@ -3,30 +3,17 @@
# SPDX-License-Identifier: MIT
import os
from enum import Enum
from django.db import models
from django.conf import settings
from django.core.files.storage import FileSystemStorage
from cvat.apps.engine.models import SafeCharField
from django.contrib.auth.models import User
from cvat.apps.engine.models import SafeCharField
def upload_file_handler(instance, filename):
return os.path.join('formats', str(instance.id), filename)
class HandlerType(str, Enum):
DUMPER = 'dumper'
LOADER = 'loader'
@classmethod
def choices(self):
return tuple((x.value, x.name) for x in self)
def __str__(self):
return self.value
class AnnotationFormat(models.Model):
name = SafeCharField(max_length=256)
owner = models.ForeignKey(User, null=True, blank=True,
@ -42,8 +29,6 @@ class AnnotationFormat(models.Model):
default_permissions = ()
class AnnotationHandler(models.Model):
type = models.CharField(max_length=16,
choices=HandlerType.choices())
display_name = SafeCharField(max_length=256, primary_key=True)
format = models.CharField(max_length=16)
version = models.CharField(max_length=16)
@ -52,3 +37,10 @@ class AnnotationHandler(models.Model):
class Meta:
default_permissions = ()
abstract = True
class AnnotationDumper(AnnotationHandler):
pass
class AnnotationLoader(AnnotationHandler):
pass

@ -8,11 +8,70 @@ format_spec = {
"handler": "dump"
},
],
"loaders": [],
"loaders": [
{
"display_name": "{name} {format} {version}",
"format": "ZIP",
"version": "1.0",
"handler": "load"
},
],
}
def load(file_object, annotations, spec):
raise NotImplementedError
def load(file_object, annotations):
from pyunpack import Archive
import os
import re
from tempfile import TemporaryDirectory
def match_frame(frame_info, filename):
def get_filename(path):
return os.path.splitext(os.path.basename(path))[0]
# try to match by filename
pascal_filename = get_filename(filename)
for frame_number, info in frame_info.items():
cvat_filename = get_filename(info['path'])
if cvat_filename == pascal_filename:
return frame_number
# try to extract framenumber from filename
numbers = re.findall(r'\d+', filename)
if numbers and len(numbers) == 1:
return int(numbers[0])
raise Exception('Cannot match filename or determinate framenumber for {} filename'.format(filename))
def parse_xml_file(annotation_file):
import xml.etree.ElementTree as ET
root = ET.parse(annotation_file).getroot()
filename = root.find('filename').text
for obj_tag in root.iter('object'):
bbox_tag = obj_tag.find("bndbox")
label = obj_tag.find('name').text
xmin = float(bbox_tag.find('xmin').text)
ymin = float(bbox_tag.find('ymin').text)
xmax = float(bbox_tag.find('xmax').text)
ymax = float(bbox_tag.find('ymax').text)
annotations.add_shape(annotations.LabeledShape(
type='rectangle',
frame=match_frame(annotations.frame_info, filename),
label=label,
points=[xmin, ymin, xmax, ymax],
occluded=False,
attributes=[],
))
archive_file = file_object if isinstance(file_object, str) else getattr(file_object, 'name')
with TemporaryDirectory() as tmp_dir:
Archive(archive_file).extractall(tmp_dir)
for dirpath, _, filenames in os.walk(tmp_dir):
for _file in filenames:
if '.xml' == os.path.splitext(_file)[1]:
parse_xml_file(os.path.join(dirpath, _file))
def dump(file_object, annotations):
from pascal_voc_writer import Writer

@ -5,55 +5,72 @@
from rest_framework import serializers
from cvat.apps.annotation import models
class AnnotationHandlerSerializer(serializers.ModelSerializer):
class AnnotationDumperSerializer(serializers.ModelSerializer):
class Meta:
model = models.AnnotationHandler
model = models.AnnotationDumper
exclude = ('annotation_format',)
# https://www.django-rest-framework.org/api-guide/validators/#updating-nested-serializers
extra_kwargs = {
'display_name': {
'validators': [],
},
}
class AnnotationLoaderSerializer(serializers.ModelSerializer):
class Meta:
model = models.AnnotationLoader
exclude = ('annotation_format',)
# https://www.django-rest-framework.org/api-guide/validators/#updating-nested-serializers
extra_kwargs = {
'display_name': {
'validators': [],
},
}
class AnnotationFormatSerializer(serializers.ModelSerializer):
handlers = AnnotationHandlerSerializer(many=True, source='annotationhandler_set')
dumpers = AnnotationDumperSerializer(many=True, source="annotationdumper_set")
loaders = AnnotationLoaderSerializer(many=True, source="annotationloader_set")
class Meta:
model = models.AnnotationFormat
exclude = ("handler_file", )
fields = "__all__"
# pylint: disable=no-self-use
def create(self, validated_data):
handlers = validated_data.pop('handlers')
dumpers = validated_data.pop("annotationdumper_set")
loaders = validated_data.pop("annotationloader_set")
annotation_format = models.AnnotationFormat.objects.create(**validated_data)
annotation_format = models.AnnotationFormat()
annotation_format.name = validated_data["name"]
annotation_format.handler_file = validated_data["handler_file"].name
annotation_format.save()
handlers = [models.AnnotationHandler(annotation_format=annotation_format, **handler) for handler in handlers]
models.AnnotationHandler.objects.bulk_create(handlers)
for dumper in dumpers:
models.AnnotationDumper(annotation_format=annotation_format, **dumper).save()
for loader in loaders:
models.AnnotationLoader(annotation_format=annotation_format, **loader).save()
return annotation_format
# pylint: disable=no-self-use
def to_internal_value(self, data):
_data = data.copy()
_data["handlers"] = []
for d in _data.pop("dumpers"):
d["type"] = models.HandlerType.DUMPER
_data["handlers"].append(d)
for l in _data.pop("loaders"):
l["type"] = models.HandlerType.LOADER
_data["handlers"].append(l)
return _data
def to_representation(self, instance):
data = super().to_representation(instance)
data['dumpers'] = []
data['loaders'] = []
for handler in data.pop("handlers"):
handler_type = handler.pop("type")
if handler_type == models.HandlerType.DUMPER:
data["dumpers"].append(handler)
else:
data["loaders"].append(handler)
return data
def update(self, instance, validated_data):
dumper_names = [handler["display_name"] for handler in validated_data["annotationdumper_set"]]
loader_names = [handler["display_name"] for handler in validated_data["annotationloader_set"]]
handlers_to_delete = [d for d in instance.annotationdumper_set.all() if d.display_name not in dumper_names] + \
[l for l in instance.annotationloader_set.all() if l.display_name not in loader_names]
for db_handler in handlers_to_delete:
db_handler.delete()
for dumper in validated_data["annotationdumper_set"]:
models.AnnotationDumper(annotation_format=instance, **dumper).save()
for loader in validated_data["annotationloader_set"]:
models.AnnotationLoader(annotation_format=instance, **loader).save()
instance.save()
return instance
class AnnotationFileSerializer(serializers.Serializer):
annotation_file = serializers.FileField()

@ -76,9 +76,8 @@ class TaskView {
});
}
_upload(uploadAnnotationButton) {
_upload(uploadAnnotationButton, format) {
const button = $(uploadAnnotationButton);
const CVATformat = this._annotationFormats.find(el => el.name === 'CVAT');
$('<input type="file" accept="text/xml">').on('change', async (onChangeEvent) => {
const file = onChangeEvent.target.files[0];
$(onChangeEvent.target).remove();
@ -88,8 +87,7 @@ class TaskView {
const annotationData = new FormData();
annotationData.append('annotation_file', file);
try {
await uploadTaskAnnotationRequest(this._task.id, annotationData,
CVATformat.loaders[0].display_name);
await uploadTaskAnnotationRequest(this._task.id, annotationData, format);
} catch (error) {
showMessage(error.message);
} finally {
@ -133,25 +131,40 @@ class TaskView {
const buttonsContainer = $('<div class="dashboardButtonsUI"> </div>').appendTo(this._UI);
const downloadButton = $('<button class="regular dashboardButtonUI"> Dump Annotation </button>');
const dropdownMenu = $('<ul class="dropdown-content hidden"></ul>');
const dropdownDownloadMenu = $('<ul class="dropdown-content hidden"></ul>');
const uploadButton = $('<button class="regular dashboardButtonUI"> Upload Annotation </button>');
const dropdownUploadMenu = $('<ul class="dropdown-content hidden"></ul>');
for (const format of this._annotationFormats) {
for (const dumpSpec of format.dumpers) {
dropdownMenu.append($(`<li>${dumpSpec.display_name}</li>`).on('click', () => {
dropdownMenu.addClass('hidden');
this._dump(downloadButton[0], dumpSpec.display_name);
for (const dumper of format.dumpers) {
dropdownDownloadMenu.append($(`<li>${dumper.display_name}</li>`).on('click', () => {
dropdownDownloadMenu.addClass('hidden');
this._dump(downloadButton[0], dumper.display_name);
}));
}
for (const loader of format.loaders) {
dropdownUploadMenu.append($(`<li>${loader.display_name}</li>`).on('click', () => {
dropdownUploadMenu.addClass('hidden');
userConfirm('The current annotation will be lost. Are you sure?', () => {
this._upload(uploadButton, loader.display_name);
});
}));
}
}
$('<div class="dropdown"></div>').append(
downloadButton.on('click', () => {
dropdownMenu.toggleClass('hidden');
dropdownDownloadMenu.toggleClass('hidden');
}),
).append(dropdownMenu).appendTo(buttonsContainer);
).append(dropdownDownloadMenu).appendTo(buttonsContainer);
$('<button class="regular dashboardButtonUI"> Upload Annotation </button>').on('click', (e) => {
userConfirm('The current annotation will be lost. Are you sure?', () => this._upload(e.target));
}).appendTo(buttonsContainer);
$('<div class="dropdown"></div>').append(
uploadButton.on('click', () => {
dropdownUploadMenu.toggleClass('hidden');
}),
).append(dropdownUploadMenu).appendTo(buttonsContainer);
$('<button class="regular dashboardButtonUI"> Update Task </button>').on('click', () => {
this._update();

@ -65,7 +65,7 @@ function blurAllElements() {
}
function uploadAnnotation(jobId, shapeCollectionModel, historyModel, annotationSaverModel,
uploadAnnotationButton, formatId, parseSpec) {
uploadAnnotationButton, format) {
$('#annotationFileSelector').one('change', async (changedFileEvent) => {
const file = changedFileEvent.target.files['0'];
changedFileEvent.target.value = '';
@ -75,7 +75,7 @@ function uploadAnnotation(jobId, shapeCollectionModel, historyModel, annotationS
const annotationData = new FormData();
annotationData.append('annotation_file', file);
try {
await uploadJobAnnotationRequest(jobId, annotationData, formatId, parseSpec);
await uploadJobAnnotationRequest(jobId, annotationData, format);
historyModel.empty();
shapeCollectionModel.empty();
const data = await $.get(`/api/v1/jobs/${jobId}/annotations`);
@ -401,6 +401,30 @@ function setupMenu(job, task, shapeCollectionModel,
}
}).appendTo('#downloadDropdownMenu');
}
for (const loader of format.loaders) {
$(`<li>${loader.display_name}</li>`).on('click', async () => {
$('#uploadAnnotationButton')[0].disabled = true;
$('#uploadDropdownMenu').addClass('hidden');
try {
userConfirm('Current annotation will be removed from the client. Continue?',
async () => {
await uploadAnnotation(
job.id,
shapeCollectionModel,
historyModel,
annotationSaverModel,
$('#uploadAnnotationButton'),
loader.display_name,
);
});
} catch (error) {
showMessage(error.message);
} finally {
$('#uploadAnnotationButton')[0].disabled = false;
}
}).appendTo('#uploadDropdownMenu');
}
}
$('#downloadAnnotationButton').on('click', () => {
@ -408,19 +432,7 @@ function setupMenu(job, task, shapeCollectionModel,
});
$('#uploadAnnotationButton').on('click', () => {
hide();
const CVATformat = annotationFormats.find(el => el.name === 'CVAT');
userConfirm('Current annotation will be removed from the client. Continue?',
() => {
uploadAnnotation(
job.id,
shapeCollectionModel,
historyModel,
annotationSaverModel,
$('#uploadAnnotationButton'),
CVATformat.loaders[0].display_name,
);
});
$('#uploadDropdownMenu').toggleClass('hidden');
});
$('#removeAnnotationButton').on('click', () => {

@ -334,7 +334,10 @@
<button id="downloadAnnotationButton" class="menuButton semiBold h2"> Dump Annotation </button>
<ul id="downloadDropdownMenu" class="dropdown-content hidden"></ul>
</div>
<button id="uploadAnnotationButton" class="menuButton semiBold h2"> Upload Annotation </button>
<div class="dropdown">
<button id="uploadAnnotationButton" class="menuButton semiBold h2"> Upload Annotation </button>
<ul id="uploadDropdownMenu" class="dropdown-content hidden"></ul>
</div>
<button id="removeAnnotationButton" class="menuButton semiBold h2"> Remove Annotation </button>
<button id="settingsButton" class="menuButton semiBold h2"> Settings </button>
<button id="fullScreenButton" class="menuButton semiBold h2"> Fullscreen Player </button>

@ -14,7 +14,6 @@ from django.contrib.auth.models import User, Group
from cvat.apps.engine.models import (Task, Segment, Job, StatusChoice,
AttributeType)
from cvat.apps.annotation.models import AnnotationFormat
from cvat.apps.annotation.models import HandlerType
from unittest import mock
import io
import xml.etree.ElementTree as ET
@ -2025,7 +2024,7 @@ class TaskAnnotationAPITestCase(JobAnnotationAPITestCase):
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
cvat_format = AnnotationFormat.objects.get(name="CVAT")
for annotation_handler in cvat_format.annotationhandler_set.filter(type=HandlerType.DUMPER):
for annotation_handler in cvat_format.annotationdumper_set.all():
response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator,
"format={}".format(annotation_handler.display_name))
self.assertEqual(response.status_code, HTTP_202_ACCEPTED)

@ -42,7 +42,7 @@ from django.contrib.auth.models import User
from django.core.exceptions import ObjectDoesNotExist
from cvat.apps.authentication import auth
from rest_framework.permissions import SAFE_METHODS
from cvat.apps.annotation.models import AnnotationHandler
from cvat.apps.annotation.models import AnnotationDumper, AnnotationLoader
from cvat.apps.annotation.format import get_annotation_formats
# Server REST API
@ -276,7 +276,7 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
dump_format = request.query_params.get("format", "")
try:
db_dumper = AnnotationHandler.objects.get(display_name=dump_format)
db_dumper = AnnotationDumper.objects.get(display_name=dump_format)
except ObjectDoesNotExist:
raise serializers.ValidationError(
"Please specify a correct 'format' parameter for the request")
@ -507,7 +507,7 @@ def load_data_proxy(request, rq_id, rq_func, pk):
serializer = AnnotationFileSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
try:
db_parser = AnnotationHandler.objects.get(pk=upload_format)
db_parser = AnnotationLoader.objects.get(pk=upload_format)
except ObjectDoesNotExist:
raise serializers.ValidationError(
"Please specify a correct 'format' parameter for the upload request")

@ -10,7 +10,7 @@ from cvat.apps.engine.models import Task, Job, User
from cvat.apps.engine.annotation import dump_task_data
from cvat.apps.engine.plugins import add_plugin
from cvat.apps.git.models import GitStatusChoice
from cvat.apps.annotation.models import AnnotationHandler
from cvat.apps.annotation.models import AnnotationDumper
from cvat.apps.git.models import GitData
from collections import OrderedDict
@ -264,7 +264,7 @@ class Git:
timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
display_name = "CVAT XML 1.1"
display_name += " for images" if self._task_mode == "annotation" else " for videos"
cvat_dumper = AnnotationHandler.objects.get(display_name=display_name)
cvat_dumper = AnnotationDumper.objects.get(display_name=display_name)
dump_name = os.path.join(db_task.get_task_dirname(),
"git_annotation_{}.".format(timestamp) + "dump")
dump_task_data(

Loading…
Cancel
Save