From 1454ec7ecce10eba5dfbab2cede17a0f3f8234c2 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com>
Date: Tue, 6 Aug 2019 11:25:20 +0300
Subject: [PATCH] Az/yolo format support (#619)

* added yolo loader/dumper
* changed format_spec
* updated reamde, changelog
* Used bold font for default dump format
---
 CHANGELOG.md                                  |   2 +-
 README.md                                     |   4 +-
 cvat/apps/annotation/annotation.py            |   4 +-
 cvat/apps/annotation/pascal_voc.py            |   6 +-
 cvat/apps/annotation/settings.py              |   1 +
 cvat/apps/annotation/yolo.py                  | 136 ++++++++++++++++++
 cvat/apps/auto_annotation/model_manager.py    |  25 +---
 .../static/dashboard/js/dashboard.js          |  10 +-
 cvat/apps/engine/annotation.py                |  15 +-
 .../engine/static/engine/js/annotationUI.js   |   9 +-
 cvat/apps/engine/static/engine/js/base.js     |   6 +
 .../{utils/import_modules.py => utils.py}     |  20 +++
 cvat/apps/engine/views.py                     |   6 +-
 13 files changed, 204 insertions(+), 40 deletions(-)
 create mode 100644 cvat/apps/annotation/yolo.py
 rename cvat/apps/engine/{utils/import_modules.py => utils.py} (58%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae557f0d..0dc92d9e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to create a custom extractors for unsupported media types
 - Added in PDF extractor
 - Added in a command line model manager tester
-- Pascal VOC format support
+- Ability to dump/load annotations in several formats from UI (CVAT, Pascal VOC, YOLO)
 
 ### Changed
 - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)
diff --git a/README.md b/README.md
index bd0cdab7..42397a8d 100644
--- a/README.md
+++ b/README.md
@@ -27,13 +27,15 @@ CVAT is free, online, interactive video and image annotation tool for computer v
 - [Tutorial for polygons](https://www.youtube.com/watch?v=XTwfXDh4clI)
 - [Semi-automatic segmentation](https://www.youtube.com/watch?v=vnqXZ-Z-VTQ)
 
-## Supported formats
+## Supported annotation formats
 
+Format selection is possible after clicking on the Upload annotation / Dump annotation button.
 | Annotation format         | Dumper | Loader |
 | ------------------------- | ------ | ------ |
 | CVAT XML v1.1 for images  | X      | X      |
 | CVAT XML v1.1 for a video | X      | X      |
 | Pascal VOC                | X      | X      |
+| YOLO                      | X      | X      |
 
 ## Links
 - [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat)
diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py
index 2d06ec7d..a142eaeb 100644
--- a/cvat/apps/annotation/annotation.py
+++ b/cvat/apps/annotation/annotation.py
@@ -119,9 +119,9 @@ class Annotation:
         self._create_callback=create_callback
         self._MAX_ANNO_SIZE=30000
 
-        db_labels = self._db_task.label_set.all().prefetch_related('attributespec_set')
+        db_labels = self._db_task.label_set.all().prefetch_related('attributespec_set').order_by('pk')
 
-        self._label_mapping = {db_label.id: db_label for db_label in db_labels}
+        self._label_mapping = OrderedDict((db_label.id, db_label) for db_label in db_labels)
 
         self._attribute_mapping = {
             'mutable': {},
diff --git a/cvat/apps/annotation/pascal_voc.py b/cvat/apps/annotation/pascal_voc.py
index 3541cb61..b711e018 100644
--- a/cvat/apps/annotation/pascal_voc.py
+++ b/cvat/apps/annotation/pascal_voc.py
@@ -45,7 +45,7 @@ def load(file_object, annotations):
     def parse_xml_file(annotation_file):
         import xml.etree.ElementTree as ET
         root = ET.parse(annotation_file).getroot()
-        filename = root.find('filename').text
+        frame_number = match_frame(annotations.frame_info, root.find('filename').text)
 
         for obj_tag in root.iter('object'):
             bbox_tag = obj_tag.find("bndbox")
@@ -57,14 +57,14 @@ def load(file_object, annotations):
 
             annotations.add_shape(annotations.LabeledShape(
                 type='rectangle',
-                frame=match_frame(annotations.frame_info, filename),
+                frame=frame_number,
                 label=label,
                 points=[xmin, ymin, xmax, ymax],
                 occluded=False,
                 attributes=[],
             ))
 
-    archive_file = file_object if isinstance(file_object, str) else getattr(file_object, 'name')
+    archive_file = getattr(file_object, 'name')
     with TemporaryDirectory() as tmp_dir:
         Archive(archive_file).extractall(tmp_dir)
 
diff --git a/cvat/apps/annotation/settings.py b/cvat/apps/annotation/settings.py
index 7d4f5ebb..e973f5fd 100644
--- a/cvat/apps/annotation/settings.py
+++ b/cvat/apps/annotation/settings.py
@@ -8,4 +8,5 @@ path_prefix = os.path.join('cvat', 'apps', 'annotation')
 BUILTIN_FORMATS = (
     os.path.join(path_prefix, 'cvat.py'),
     os.path.join(path_prefix,'pascal_voc.py'),
+    os.path.join(path_prefix,'yolo.py'),
 )
diff --git a/cvat/apps/annotation/yolo.py b/cvat/apps/annotation/yolo.py
new file mode 100644
index 00000000..34010865
--- /dev/null
+++ b/cvat/apps/annotation/yolo.py
@@ -0,0 +1,136 @@
+format_spec = {
+    "name": "YOLO",
+    "dumpers": [
+        {
+            "display_name": "{name} {format} {version}",
+            "format": "ZIP",
+            "version": "1.0",
+            "handler": "dump"
+        },
+    ],
+    "loaders": [
+        {
+            "display_name": "{name} {format} {version}",
+            "format": "ZIP",
+            "version": "1.0",
+            "handler": "load"
+        },
+    ],
+}
+
+def get_filename(path):
+    import os
+    return os.path.splitext(os.path.basename(path))[0]
+
+def load(file_object, annotations):
+    from pyunpack import Archive
+    import os
+    from tempfile import TemporaryDirectory
+    from glob import glob
+
+    def convert_from_yolo(img_size, box):
+        # convertation formulas are based on https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py
+        # <x> <y> <width> <height> - float values relative to width and height of image
+        # <x> <y> - are center of rectangle
+        def clamp(value, _min, _max):
+            return max(min(_max, value), _min)
+        xtl = clamp(img_size[0] * (box[0] - box[2] / 2), 0, img_size[0])
+        ytl = clamp(img_size[1] * (box[1] - box[3] / 2), 0, img_size[1])
+        xbr = clamp(img_size[0] * (box[0] + box[2] / 2), 0, img_size[0])
+        ybr = clamp(img_size[1] * (box[1] + box[3] / 2), 0, img_size[1])
+
+        return [xtl, ytl, xbr, ybr]
+
+    def parse_yolo_obj(img_size, obj):
+        label_id, x, y, w, h = obj.split(" ")
+        return int(label_id), convert_from_yolo(img_size, (float(x), float(y), float(w), float(h)))
+
+    def match_frame(frame_info, filename):
+        import re
+        # try to match by filename
+        yolo_filename = get_filename(filename)
+        for frame_number, info in frame_info.items():
+            cvat_filename = get_filename(info["path"])
+            if cvat_filename == yolo_filename:
+                return frame_number
+
+        # try to extract frame number from filename
+        numbers = re.findall(r"\d+", filename)
+        if numbers and len(numbers) == 1:
+            return int(numbers[0])
+
+        raise Exception("Cannot match filename or determinate framenumber for {} filename".format(filename))
+
+    def parse_yolo_file(annotation_file, labels_mapping):
+        frame_number = match_frame(annotations.frame_info, annotation_file)
+        with open(annotation_file, "r") as fp:
+            line = fp.readline()
+            while line:
+                frame_info = annotations.frame_info[frame_number]
+                label_id, points = parse_yolo_obj((frame_info["width"], frame_info["height"]), line)
+                annotations.add_shape(annotations.LabeledShape(
+                    type="rectangle",
+                    frame=frame_number,
+                    label=labels_mapping[label_id],
+                    points=points,
+                    occluded=False,
+                    attributes=[],
+                ))
+                line = fp.readline()
+
+    def load_labels(labels_file):
+        with open(labels_file, "r") as f:
+            return {idx: label.strip() for idx, label in enumerate(f.readlines()) if label.strip()}
+
+    archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name")
+    with TemporaryDirectory() as tmp_dir:
+        Archive(archive_file).extractall(tmp_dir)
+
+        labels_file = glob(os.path.join(tmp_dir, "*.names"))
+        if not labels_file:
+            raise Exception("Could not find '*.names' file with labels in uploaded archive")
+        elif len(labels_file) == 1:
+            labels_mapping = load_labels(labels_file[0])
+        else:
+            raise Exception("Too many '*.names' files in uploaded archive: {}".format(labels_file))
+
+        for dirpath, _, filenames in os.walk(tmp_dir):
+            for file in filenames:
+                if ".txt" == os.path.splitext(file)[1]:
+                    parse_yolo_file(os.path.join(dirpath, file), labels_mapping)
+
+def dump(file_object, annotations):
+    from zipfile import ZipFile
+
+    # convertation formulas are based on https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py
+    # <x> <y> <width> <height> - float values relative to width and height of image
+    # <x> <y> - are center of rectangle
+    def convert_to_yolo(img_size, box):
+        x = (box[0] + box[2]) / 2 / img_size[0]
+        y = (box[1] + box[3]) / 2 / img_size[1]
+        w = (box[2] - box[0]) / img_size[0]
+        h = (box[3] - box[1]) / img_size[1]
+
+        return x, y, w, h
+
+    labels_ids = {label[1]["name"]: idx for idx, label in enumerate(annotations.meta["task"]["labels"])}
+
+    with ZipFile(file_object, "w") as output_zip:
+        for frame_annotation in annotations.group_by_frame():
+            image_name = frame_annotation.name
+            annotation_name = "{}.txt".format(get_filename(image_name))
+            width = frame_annotation.width
+            height = frame_annotation.height
+
+            yolo_annotation = ""
+            for shape in frame_annotation.labeled_shapes:
+                if shape.type != "rectangle":
+                    continue
+
+                label = shape.label
+                yolo_bb = convert_to_yolo((width, height), shape.points)
+                yolo_bb = " ".join("{:.6f}".format(p) for p in yolo_bb)
+                yolo_annotation += "{} {}\n".format(labels_ids[label], yolo_bb)
+
+            output_zip.writestr(annotation_name, yolo_annotation)
+        output_zip.writestr("obj.names", "\n".join(l[0] for l in sorted(labels_ids.items(), key=lambda x:x[1])))
diff --git a/cvat/apps/auto_annotation/model_manager.py b/cvat/apps/auto_annotation/model_manager.py
index 2fee3c2b..a2b73333 100644
--- a/cvat/apps/auto_annotation/model_manager.py
+++ b/cvat/apps/auto_annotation/model_manager.py
@@ -10,8 +10,6 @@ import rq
 import shutil
 import tempfile
 import itertools
-import sys
-import traceback
 
 from django.db import transaction
 from django.utils import timezone
@@ -26,7 +24,8 @@ from cvat.apps.engine.annotation import put_task_data, patch_task_data
 from .models import AnnotationModel, FrameworkChoice
 from .model_loader import ModelLoader, load_labelmap
 from .image_loader import ImageLoader
-from cvat.apps.engine.utils.import_modules import import_modules
+from cvat.apps.engine.utils import import_modules, execute_python_code
+
 
 
 def _remove_old_file(model_file_field):
@@ -269,9 +268,6 @@ class Results():
             "attributes": attributes or {},
         }
 
-class InterpreterError(Exception):
-    pass
-
 def _process_detections(detections, path_to_conv_script, restricted=True):
     results = Results()
     local_vars = {
@@ -296,21 +292,10 @@ def _process_detections(detections, path_to_conv_script, restricted=True):
         imports = import_modules(source_code)
         global_vars.update(imports)
 
-    try:
-        exec(source_code, global_vars, local_vars)
-    except SyntaxError as err:
-        error_class = err.__class__.__name__
-        detail = err.args[0]
-        line_number = err.lineno
-    except Exception as err:
-        error_class = err.__class__.__name__
-        detail = err.args[0]
-        cl, exc, tb = sys.exc_info()
-        line_number = traceback.extract_tb(tb)[-1][1]
-    else:
-        return results
 
-    raise InterpreterError("%s at line %d: %s" % (error_class, line_number, detail))
+    execute_python_code(source_code, global_vars, local_vars)
+
+    return results
 
 def run_inference_engine_annotation(data, model_file, weights_file,
        labels_mapping, attribute_spec, convertation_file, job=None, update_progress=None, restricted=True):
diff --git a/cvat/apps/dashboard/static/dashboard/js/dashboard.js b/cvat/apps/dashboard/static/dashboard/js/dashboard.js
index da74d893..bd504ff1 100644
--- a/cvat/apps/dashboard/static/dashboard/js/dashboard.js
+++ b/cvat/apps/dashboard/static/dashboard/js/dashboard.js
@@ -11,6 +11,7 @@
     LabelsInfo:false
     showMessage:false
     showOverlay:false
+    isDefaultFormat:false
 */
 
 class TaskView {
@@ -138,10 +139,15 @@ class TaskView {
 
         for (const format of this._annotationFormats) {
             for (const dumper of format.dumpers) {
-                dropdownDownloadMenu.append($(`<li>${dumper.display_name}</li>`).on('click', () => {
+                const listItem = $(`<li>${dumper.display_name}</li>`).on('click', () => {
                     dropdownDownloadMenu.addClass('hidden');
                     this._dump(downloadButton[0], dumper.display_name);
-                }));
+                });
+
+                if (isDefaultFormat(dumper.display_name, this._task.mode)) {
+                    listItem.addClass('bold');
+                }
+                dropdownDownloadMenu.append(listItem);
             }
 
             for (const loader of format.loaders) {
diff --git a/cvat/apps/engine/annotation.py b/cvat/apps/engine/annotation.py
index 8f5fc696..b85b51d9 100644
--- a/cvat/apps/engine/annotation.py
+++ b/cvat/apps/engine/annotation.py
@@ -13,12 +13,12 @@ from django.db import transaction
 from cvat.apps.profiler import silk_profile
 from cvat.apps.engine.plugins import plugin_decorator
 from cvat.apps.annotation.annotation import AnnotationIR, Annotation
+from cvat.apps.engine.utils import execute_python_code, import_modules
 
 from . import models
 from .data_manager import DataManager
 from .log import slogger
 from . import serializers
-from .utils.import_modules import import_modules
 
 class PatchAction(str, Enum):
     CREATE = "create"
@@ -593,12 +593,13 @@ class JobAnnotation:
             global_vars = globals()
             imports = import_modules(source_code)
             global_vars.update(imports)
-            exec(source_code, global_vars)
+
+            execute_python_code(source_code, global_vars)
 
             global_vars["file_object"] = file_object
             global_vars["annotations"] = annotation_importer
 
-            exec("{}(file_object, annotations)".format(loader.handler), global_vars)
+            execute_python_code("{}(file_object, annotations)".format(loader.handler), global_vars)
         self.create(annotation_importer.data.slice(self.start_frame, self.stop_frame).serialize())
 
 class TaskAnnotation:
@@ -679,11 +680,11 @@ class TaskAnnotation:
             global_vars = globals()
             imports = import_modules(source_code)
             global_vars.update(imports)
-            exec(source_code, global_vars)
+            execute_python_code(source_code, global_vars)
             global_vars["file_object"] = dump_file
             global_vars["annotations"] = anno_exporter
 
-            exec("{}(file_object, annotations)".format(dumper.handler), global_vars)
+            execute_python_code("{}(file_object, annotations)".format(dumper.handler), global_vars)
 
     def upload(self, annotation_file, loader):
         annotation_importer = Annotation(
@@ -698,12 +699,12 @@ class TaskAnnotation:
             global_vars = globals()
             imports = import_modules(source_code)
             global_vars.update(imports)
-            exec(source_code, global_vars)
+            execute_python_code(source_code, global_vars)
 
             global_vars["file_object"] = file_object
             global_vars["annotations"] = annotation_importer
 
-            exec("{}(file_object, annotations)".format(loader.handler), global_vars)
+            execute_python_code("{}(file_object, annotations)".format(loader.handler), global_vars)
         self.create(annotation_importer.data.serialize())
 
     @property
diff --git a/cvat/apps/engine/static/engine/js/annotationUI.js b/cvat/apps/engine/static/engine/js/annotationUI.js
index de33ada7..346fad9b 100644
--- a/cvat/apps/engine/static/engine/js/annotationUI.js
+++ b/cvat/apps/engine/static/engine/js/annotationUI.js
@@ -46,6 +46,7 @@
     buildAnnotationSaver:false
     LabelsInfo:false
     uploadJobAnnotationRequest:false
+    isDefaultFormat:false
 */
 
 async function initLogger(jobID) {
@@ -389,7 +390,7 @@ function setupMenu(job, task, shapeCollectionModel,
 
     for (const format of annotationFormats) {
         for (const dumpSpec of format.dumpers) {
-            $(`<li>${dumpSpec.display_name}</li>`).on('click', async () => {
+            const listItem = $(`<li>${dumpSpec.display_name}</li>`).on('click', async () => {
                 $('#downloadAnnotationButton')[0].disabled = true;
                 $('#downloadDropdownMenu').addClass('hidden');
                 try {
@@ -399,7 +400,11 @@ function setupMenu(job, task, shapeCollectionModel,
                 } finally {
                     $('#downloadAnnotationButton')[0].disabled = false;
                 }
-            }).appendTo('#downloadDropdownMenu');
+            });
+            if (isDefaultFormat(dumpSpec.display_name, task.mode)) {
+                listItem.addClass('bold');
+            }
+            $('#downloadDropdownMenu').append(listItem);
         }
 
         for (const loader of format.loaders) {
diff --git a/cvat/apps/engine/static/engine/js/base.js b/cvat/apps/engine/static/engine/js/base.js
index 31054ef7..434892ac 100644
--- a/cvat/apps/engine/static/engine/js/base.js
+++ b/cvat/apps/engine/static/engine/js/base.js
@@ -11,6 +11,7 @@
     showOverlay
     uploadJobAnnotationRequest
     uploadTaskAnnotationRequest
+    isDefaultFormat
 */
 
 /* global
@@ -221,3 +222,8 @@ $(document).ready(() => {
         height: `${window.screen.height * 0.95}px`,
     });
 });
+
+function isDefaultFormat(dumperName, taskMode) {
+    return (dumperName === 'CVAT XML 1.1 for videos' && taskMode === 'interpolation')
+    || (dumperName === 'CVAT XML 1.1 for images' && taskMode === 'annotation');
+}
diff --git a/cvat/apps/engine/utils/import_modules.py b/cvat/apps/engine/utils.py
similarity index 58%
rename from cvat/apps/engine/utils/import_modules.py
rename to cvat/apps/engine/utils.py
index 89d8daee..a0cedbde 100644
--- a/cvat/apps/engine/utils/import_modules.py
+++ b/cvat/apps/engine/utils.py
@@ -1,6 +1,8 @@
 import ast
 from collections import namedtuple
 import importlib
+import sys
+import traceback
 
 Import = namedtuple("Import", ["module", "name", "alias"])
 
@@ -34,3 +36,21 @@ def import_modules(source_code: str):
             results[import_.name] = loaded_module
 
     return results
+
+class InterpreterError(Exception):
+    pass
+
+def execute_python_code(source_code, global_vars=None, local_vars=None):
+    try:
+        exec(source_code, global_vars, local_vars)
+    except SyntaxError as err:
+        error_class = err.__class__.__name__
+        details = err.args[0]
+        line_number = err.lineno
+        raise InterpreterError("{} at line {}: {}".format(error_class, line_number, details))
+    except Exception as err:
+        error_class = err.__class__.__name__
+        details = err.args[0]
+        _, _, tb = sys.exc_info()
+        line_number = traceback.extract_tb(tb)[-1][1]
+        raise InterpreterError("{} at line {}: {}".format(error_class, line_number, details))
diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py
index f2fde49e..9823f31e 100644
--- a/cvat/apps/engine/views.py
+++ b/cvat/apps/engine/views.py
@@ -306,8 +306,9 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
                     finally:
                         rq_job.delete()
             elif rq_job.is_failed:
+                exc_info = str(rq_job.exc_info)
                 rq_job.delete()
-                return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+                return Response(data=exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
             else:
                 return Response(status=status.HTTP_202_ACCEPTED)
 
@@ -534,7 +535,8 @@ def load_data_proxy(request, rq_id, rq_func, pk):
         elif rq_job.is_failed:
             os.close(rq_job.meta['tmp_file_descriptor'])
             os.remove(rq_job.meta['tmp_file'])
+            exc_info = str(rq_job.exc_info)
             rq_job.delete()
-            return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+            return Response(data=exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
 
     return Response(status=status.HTTP_202_ACCEPTED)