diff --git a/CHANGELOG.md b/CHANGELOG.md index 41419f59..ae557f0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Ability to create a custom extractors for unsupported media types - Added in PDF extractor - Added in a command line model manager tester +- Pascal VOC format support ### Changed - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before) diff --git a/README.md b/README.md index 18f9267b..bd0cdab7 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,14 @@ CVAT is free, online, interactive video and image annotation tool for computer v - [Tutorial for polygons](https://www.youtube.com/watch?v=XTwfXDh4clI) - [Semi-automatic segmentation](https://www.youtube.com/watch?v=vnqXZ-Z-VTQ) +## Supported formats + +| Annotation format | Dumper | Loader | +| ------------------------- | ------ | ------ | +| CVAT XML v1.1 for images | X | X | +| CVAT XML v1.1 for a video | X | X | +| Pascal VOC | X | X | + ## Links - [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat) - [Intel Software: Computer Vision Annotation Tool: A Universal Approach to Data Annotation](https://software.intel.com/en-us/articles/computer-vision-annotation-tool-a-universal-approach-to-data-annotation) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 9788ea8b..c1a317b4 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -37,7 +37,7 @@ It allows to download and upload annotations in different formats and easily add - **name** - unique name for each format - **dumpers and loaders** - lists of objects that describes exposed dumpers and loaders and must have following keys: - 1. display_name - **unique** string used as ID for a dumpers and loaders. + 1. display_name - **unique** string used as ID for dumpers and loaders. Also this string is displayed in CVAT UI. Possible to use a named placeholders like the python format function (supports only name, format and version variables). @@ -49,16 +49,13 @@ It allows to download and upload annotations in different formats and easily add def dump_handler(file_object, annotations): ``` - Inside of the script environment 3 variables are available: - - file_object - python's standard file object returned by open() function and exposing a file-oriented API + Inside of the script environment 2 variables are available: + - **file_object** - python's standard file object returned by open() function and exposing a file-oriented API (with methods such as read() or write()) to an underlying resource. - **annotations** - instance of [Annotation](annotation.py#L106) class. - - **spec** - string with name of the requested specification - (if the annotation format defines them). - It may be useful if one script implements more than one format support. Annotation class expose API and some additional pre-defined types that allow to get/add shapes inside - a parser/dumper code. + a loader/dumper code. Short description of the public methods: - **Annotation.shapes** - property, returns a generator of Annotation.LabeledShape objects @@ -115,7 +112,7 @@ It allows to download and upload annotations in different formats and easily add file_object.write(...) ... ``` - Pseudocode for a parser code + Pseudocode for a loader code ```python ... #read file_object diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py index 27c1ed9e..2d06ec7d 100644 --- a/cvat/apps/annotation/annotation.py +++ b/cvat/apps/annotation/annotation.py @@ -408,3 +408,7 @@ class Annotation: track_len += len(track['shapes']) return len(self._annotation_ir.tags) + len(self._annotation_ir.shapes) + track_len + + @property + def frame_info(self): + return self._frame_info diff --git a/cvat/apps/annotation/format.py b/cvat/apps/annotation/format.py index d32f6277..497c3812 100644 --- a/cvat/apps/annotation/format.py +++ b/cvat/apps/annotation/format.py @@ -4,7 +4,9 @@ from cvat.apps.annotation import models from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist from cvat.apps.annotation.serializers import AnnotationFormatSerializer +from django.core.files import File import os from copy import deepcopy @@ -19,16 +21,20 @@ def register_format(format_file): raise Exception("Could not find \'format_spec\' definition in format file specification") format_spec = deepcopy(global_vars["format_spec"]) + format_spec["handler_file"] = File(open(format_file)) + for spec in format_spec["loaders"] + format_spec["dumpers"]: + spec["display_name"] = spec["display_name"].format( + name=format_spec["name"], + format=spec["format"], + version=spec["version"], + ) - if not models.AnnotationFormat.objects.filter(name=format_spec["name"]).exists(): - format_spec["handler_file"] = os.path.relpath(format_file, settings.BASE_DIR) - for spec in format_spec["loaders"] + format_spec["dumpers"]: - spec["display_name"] = spec["display_name"].format( - name=format_spec["name"], - format=spec["format"], - version=spec["version"], - ) - + try: + annotation_format = models.AnnotationFormat.objects.get(name=format_spec["name"]) + serializer = AnnotationFormatSerializer(annotation_format, data=format_spec) + if serializer.is_valid(raise_exception=True): + serializer.save() + except ObjectDoesNotExist: serializer = AnnotationFormatSerializer(data=format_spec) if serializer.is_valid(raise_exception=True): serializer.save() diff --git a/cvat/apps/annotation/migrations/0002_auto_20190805_0927.py b/cvat/apps/annotation/migrations/0002_auto_20190805_0927.py new file mode 100644 index 00000000..6b289864 --- /dev/null +++ b/cvat/apps/annotation/migrations/0002_auto_20190805_0927.py @@ -0,0 +1,74 @@ +# Generated by Django 2.1.9 on 2019-08-05 06:27 + +import cvat.apps.engine.models +from django.db import migrations, models +import django.db.models.deletion + +def split_handlers(apps, schema_editor): + db_alias = schema_editor.connection.alias + handler_model = apps.get_model('annotation', 'AnnotationHandler') + dumper_model = apps.get_model('annotation', "AnnotationDumper") + loader_model = apps.get_model('annotation', 'AnnotationLoader') + + + for db_handler in handler_model.objects.all(): + if db_handler.type == "dumper": + new_handler = dumper_model() + else: + new_handler = loader_model() + + new_handler.display_name = db_handler.display_name + new_handler.format = db_handler.format + new_handler.version = db_handler.version + new_handler.handler = db_handler.handler + new_handler.annotation_format = db_handler.annotation_format + + new_handler.save() + db_handler.delete() + +class Migration(migrations.Migration): + + dependencies = [ + ('annotation', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='AnnotationDumper', + fields=[ + ('display_name', cvat.apps.engine.models.SafeCharField(max_length=256, primary_key=True, serialize=False)), + ('format', models.CharField(max_length=16)), + ('version', models.CharField(max_length=16)), + ('handler', models.CharField(max_length=256)), + ('annotation_format', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='annotation.AnnotationFormat')), + ], + options={ + 'abstract': False, + 'default_permissions': (), + }, + ), + migrations.CreateModel( + name='AnnotationLoader', + fields=[ + ('display_name', cvat.apps.engine.models.SafeCharField(max_length=256, primary_key=True, serialize=False)), + ('format', models.CharField(max_length=16)), + ('version', models.CharField(max_length=16)), + ('handler', models.CharField(max_length=256)), + ('annotation_format', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='annotation.AnnotationFormat')), + ], + options={ + 'abstract': False, + 'default_permissions': (), + }, + ), + migrations.RunPython( + code=split_handlers, + ), + migrations.RemoveField( + model_name='annotationhandler', + name='annotation_format', + ), + migrations.DeleteModel( + name='AnnotationHandler', + ), + ] diff --git a/cvat/apps/annotation/models.py b/cvat/apps/annotation/models.py index 7c6fe616..3595327d 100644 --- a/cvat/apps/annotation/models.py +++ b/cvat/apps/annotation/models.py @@ -3,30 +3,17 @@ # SPDX-License-Identifier: MIT import os -from enum import Enum from django.db import models from django.conf import settings from django.core.files.storage import FileSystemStorage - -from cvat.apps.engine.models import SafeCharField from django.contrib.auth.models import User +from cvat.apps.engine.models import SafeCharField def upload_file_handler(instance, filename): return os.path.join('formats', str(instance.id), filename) -class HandlerType(str, Enum): - DUMPER = 'dumper' - LOADER = 'loader' - - @classmethod - def choices(self): - return tuple((x.value, x.name) for x in self) - - def __str__(self): - return self.value - class AnnotationFormat(models.Model): name = SafeCharField(max_length=256) owner = models.ForeignKey(User, null=True, blank=True, @@ -42,8 +29,6 @@ class AnnotationFormat(models.Model): default_permissions = () class AnnotationHandler(models.Model): - type = models.CharField(max_length=16, - choices=HandlerType.choices()) display_name = SafeCharField(max_length=256, primary_key=True) format = models.CharField(max_length=16) version = models.CharField(max_length=16) @@ -52,3 +37,10 @@ class AnnotationHandler(models.Model): class Meta: default_permissions = () + abstract = True + +class AnnotationDumper(AnnotationHandler): + pass + +class AnnotationLoader(AnnotationHandler): + pass diff --git a/cvat/apps/annotation/pascal_voc.py b/cvat/apps/annotation/pascal_voc.py index dd54e828..3541cb61 100644 --- a/cvat/apps/annotation/pascal_voc.py +++ b/cvat/apps/annotation/pascal_voc.py @@ -8,11 +8,70 @@ format_spec = { "handler": "dump" }, ], - "loaders": [], + "loaders": [ + { + "display_name": "{name} {format} {version}", + "format": "ZIP", + "version": "1.0", + "handler": "load" + }, + ], } -def load(file_object, annotations, spec): - raise NotImplementedError +def load(file_object, annotations): + from pyunpack import Archive + import os + import re + from tempfile import TemporaryDirectory + + def match_frame(frame_info, filename): + def get_filename(path): + return os.path.splitext(os.path.basename(path))[0] + + # try to match by filename + pascal_filename = get_filename(filename) + for frame_number, info in frame_info.items(): + cvat_filename = get_filename(info['path']) + if cvat_filename == pascal_filename: + return frame_number + + # try to extract framenumber from filename + numbers = re.findall(r'\d+', filename) + if numbers and len(numbers) == 1: + return int(numbers[0]) + + raise Exception('Cannot match filename or determinate framenumber for {} filename'.format(filename)) + + def parse_xml_file(annotation_file): + import xml.etree.ElementTree as ET + root = ET.parse(annotation_file).getroot() + filename = root.find('filename').text + + for obj_tag in root.iter('object'): + bbox_tag = obj_tag.find("bndbox") + label = obj_tag.find('name').text + xmin = float(bbox_tag.find('xmin').text) + ymin = float(bbox_tag.find('ymin').text) + xmax = float(bbox_tag.find('xmax').text) + ymax = float(bbox_tag.find('ymax').text) + + annotations.add_shape(annotations.LabeledShape( + type='rectangle', + frame=match_frame(annotations.frame_info, filename), + label=label, + points=[xmin, ymin, xmax, ymax], + occluded=False, + attributes=[], + )) + + archive_file = file_object if isinstance(file_object, str) else getattr(file_object, 'name') + with TemporaryDirectory() as tmp_dir: + Archive(archive_file).extractall(tmp_dir) + + for dirpath, _, filenames in os.walk(tmp_dir): + for _file in filenames: + if '.xml' == os.path.splitext(_file)[1]: + parse_xml_file(os.path.join(dirpath, _file)) def dump(file_object, annotations): from pascal_voc_writer import Writer diff --git a/cvat/apps/annotation/serializers.py b/cvat/apps/annotation/serializers.py index a72d7f2f..8fa8b345 100644 --- a/cvat/apps/annotation/serializers.py +++ b/cvat/apps/annotation/serializers.py @@ -5,55 +5,72 @@ from rest_framework import serializers from cvat.apps.annotation import models -class AnnotationHandlerSerializer(serializers.ModelSerializer): +class AnnotationDumperSerializer(serializers.ModelSerializer): class Meta: - model = models.AnnotationHandler + model = models.AnnotationDumper exclude = ('annotation_format',) + # https://www.django-rest-framework.org/api-guide/validators/#updating-nested-serializers + extra_kwargs = { + 'display_name': { + 'validators': [], + }, + } +class AnnotationLoaderSerializer(serializers.ModelSerializer): + class Meta: + model = models.AnnotationLoader + exclude = ('annotation_format',) + # https://www.django-rest-framework.org/api-guide/validators/#updating-nested-serializers + extra_kwargs = { + 'display_name': { + 'validators': [], + }, + } class AnnotationFormatSerializer(serializers.ModelSerializer): - handlers = AnnotationHandlerSerializer(many=True, source='annotationhandler_set') + dumpers = AnnotationDumperSerializer(many=True, source="annotationdumper_set") + loaders = AnnotationLoaderSerializer(many=True, source="annotationloader_set") class Meta: model = models.AnnotationFormat - exclude = ("handler_file", ) + fields = "__all__" # pylint: disable=no-self-use def create(self, validated_data): - handlers = validated_data.pop('handlers') + dumpers = validated_data.pop("annotationdumper_set") + loaders = validated_data.pop("annotationloader_set") - annotation_format = models.AnnotationFormat.objects.create(**validated_data) + annotation_format = models.AnnotationFormat() + annotation_format.name = validated_data["name"] + annotation_format.handler_file = validated_data["handler_file"].name + annotation_format.save() - handlers = [models.AnnotationHandler(annotation_format=annotation_format, **handler) for handler in handlers] - models.AnnotationHandler.objects.bulk_create(handlers) + for dumper in dumpers: + models.AnnotationDumper(annotation_format=annotation_format, **dumper).save() + + for loader in loaders: + models.AnnotationLoader(annotation_format=annotation_format, **loader).save() return annotation_format # pylint: disable=no-self-use - def to_internal_value(self, data): - _data = data.copy() - _data["handlers"] = [] - for d in _data.pop("dumpers"): - d["type"] = models.HandlerType.DUMPER - _data["handlers"].append(d) - - for l in _data.pop("loaders"): - l["type"] = models.HandlerType.LOADER - _data["handlers"].append(l) - return _data - - def to_representation(self, instance): - data = super().to_representation(instance) - data['dumpers'] = [] - data['loaders'] = [] - for handler in data.pop("handlers"): - handler_type = handler.pop("type") - if handler_type == models.HandlerType.DUMPER: - data["dumpers"].append(handler) - else: - data["loaders"].append(handler) - - return data + def update(self, instance, validated_data): + dumper_names = [handler["display_name"] for handler in validated_data["annotationdumper_set"]] + loader_names = [handler["display_name"] for handler in validated_data["annotationloader_set"]] + + handlers_to_delete = [d for d in instance.annotationdumper_set.all() if d.display_name not in dumper_names] + \ + [l for l in instance.annotationloader_set.all() if l.display_name not in loader_names] + + for db_handler in handlers_to_delete: + db_handler.delete() + + for dumper in validated_data["annotationdumper_set"]: + models.AnnotationDumper(annotation_format=instance, **dumper).save() + for loader in validated_data["annotationloader_set"]: + models.AnnotationLoader(annotation_format=instance, **loader).save() + + instance.save() + return instance class AnnotationFileSerializer(serializers.Serializer): annotation_file = serializers.FileField() diff --git a/cvat/apps/dashboard/static/dashboard/js/dashboard.js b/cvat/apps/dashboard/static/dashboard/js/dashboard.js index 11f3ddb1..da74d893 100644 --- a/cvat/apps/dashboard/static/dashboard/js/dashboard.js +++ b/cvat/apps/dashboard/static/dashboard/js/dashboard.js @@ -76,9 +76,8 @@ class TaskView { }); } - _upload(uploadAnnotationButton) { + _upload(uploadAnnotationButton, format) { const button = $(uploadAnnotationButton); - const CVATformat = this._annotationFormats.find(el => el.name === 'CVAT'); $('').on('change', async (onChangeEvent) => { const file = onChangeEvent.target.files[0]; $(onChangeEvent.target).remove(); @@ -88,8 +87,7 @@ class TaskView { const annotationData = new FormData(); annotationData.append('annotation_file', file); try { - await uploadTaskAnnotationRequest(this._task.id, annotationData, - CVATformat.loaders[0].display_name); + await uploadTaskAnnotationRequest(this._task.id, annotationData, format); } catch (error) { showMessage(error.message); } finally { @@ -133,25 +131,40 @@ class TaskView { const buttonsContainer = $('