Replace YOLO format support in CVAT with Datumaro (#1151)

* Employ transforms and item wrapper * Add image class and tests * Add image info support to formats * Fix cli * Fix merge and voc converte * Update remote images extractor * Codacy * Remove item name, require path in Image * Merge images of dataset items * Update tests * Add image dir converter * Update Datumaro format * Update COCO format with image info * Update CVAT format with image info * Update TFrecord format with image info * Update VOC formar with image info * Update YOLO format with image info * Update dataset manager bindings with image info * Add image name to id transform * Replace YOLO export and import in CVAT with Datumaro
6 years ago · 3ce1a6ffe9
parent b36f402f9c
commit 3ce1a6ffe9
1 changed files with 34 additions and 86 deletions
--- a/cvat/apps/annotation/yolo.py
+++ b/cvat/apps/annotation/yolo.py
@ -24,98 +24,46 @@ format_spec = {
 def load(file_object, annotations):
    from pyunpack import Archive
-    import os
+    import os.path as osp
    from tempfile import TemporaryDirectory
    from glob import glob
-
+    from datumaro.plugins.yolo_format.importer import YoloImporter
-    def convert_from_yolo(img_size, box):
+    from cvat.apps.dataset_manager.bindings import import_dm_annotations
        # convertation formulas are based on https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py
        # <x> <y> <width> <height> - float values relative to width and height of image
        # <x> <y> - are center of rectangle
        def clamp(value, _min, _max):
            return max(min(_max, value), _min)
        xtl = clamp(img_size[0] * (box[0] - box[2] / 2), 0, img_size[0])
        ytl = clamp(img_size[1] * (box[1] - box[3] / 2), 0, img_size[1])
        xbr = clamp(img_size[0] * (box[0] + box[2] / 2), 0, img_size[0])
        ybr = clamp(img_size[1] * (box[1] + box[3] / 2), 0, img_size[1])
        return [xtl, ytl, xbr, ybr]
    def parse_yolo_obj(img_size, obj):
        label_id, x, y, w, h = obj.split(" ")
        return int(label_id), convert_from_yolo(img_size, (float(x), float(y), float(w), float(h)))
    def parse_yolo_file(annotation_file, labels_mapping):
        frame_number = annotations.match_frame(annotation_file)
        with open(annotation_file, "r") as fp:
            line = fp.readline()
            while line:
                frame_info = annotations.frame_info[frame_number]
                label_id, points = parse_yolo_obj((frame_info["width"], frame_info["height"]), line)
                annotations.add_shape(annotations.LabeledShape(
                    type="rectangle",
                    frame=frame_number,
                    label=labels_mapping[label_id],
                    points=points,
                    occluded=False,
                    attributes=[],
                ))
                line = fp.readline()
    def load_labels(labels_file):
        with open(labels_file, "r") as f:
            return {idx: label.strip() for idx, label in enumerate(f.readlines()) if label.strip()}
    archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name")
    with TemporaryDirectory() as tmp_dir:
        Archive(archive_file).extractall(tmp_dir)
-        labels_file = glob(os.path.join(tmp_dir, "*.names"))
+        image_info = {}
-        if not labels_file:
+        anno_files = glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)
-            raise Exception("Could not find '*.names' file with labels in uploaded archive")
+        for filename in anno_files:
-        elif len(labels_file) == 1:
+            filename = osp.basename(filename)
-            labels_mapping = load_labels(labels_file[0])
+            frame_info = None
-        else:
+            try:
-            raise Exception("Too many '*.names' files in uploaded archive: {}".format(labels_file))
+                frame_info = annotations.frame_info[
-
+                    int(osp.splitext(filename)[0])]
-        for dirpath, _, filenames in os.walk(tmp_dir):
+            except Exception:
-            for file in filenames:
+                pass
-                if ".txt" == os.path.splitext(file)[1]:
+            try:
-                    parse_yolo_file(os.path.join(dirpath, file), labels_mapping)
+                frame_info = annotations.match_frame(filename)
                frame_info = annotations.frame_info[frame_info]
            except Exception:
                pass
            if frame_info is not None:
                image_info[osp.splitext(filename)[0]] = \
                    (frame_info['height'], frame_info['width'])
        dm_project = YoloImporter()(tmp_dir, image_info=image_info)
        dm_dataset = dm_project.make_dataset()
        import_dm_annotations(dm_dataset, annotations)
 def dump(file_object, annotations):
-    from zipfile import ZipFile
+    from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
-    import os
+    from cvat.apps.dataset_manager.util import make_zip_archive
-
+    from datumaro.components.project import Environment
-    # convertation formulas are based on https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py
+    from tempfile import TemporaryDirectory
-    # <x> <y> <width> <height> - float values relative to width and height of image
+    extractor = CvatAnnotationsExtractor('', annotations)
-    # <x> <y> - are center of rectangle
+    converter = Environment().make_converter('yolo')
-    def convert_to_yolo(img_size, box):
+    with TemporaryDirectory() as temp_dir:
-        x = (box[0] + box[2]) / 2 / img_size[0]
+        converter(extractor, save_dir=temp_dir)
-        y = (box[1] + box[3]) / 2 / img_size[1]
+        make_zip_archive(temp_dir, file_object)
        w = (box[2] - box[0]) / img_size[0]
        h = (box[3] - box[1]) / img_size[1]
        return x, y, w, h
    labels_ids = {label[1]["name"]: idx for idx, label in enumerate(annotations.meta["task"]["labels"])}
    with ZipFile(file_object, "w") as output_zip:
        for frame_annotation in annotations.group_by_frame():
            image_name = frame_annotation.name
            annotation_name = "{}.txt".format(os.path.splitext(os.path.basename(image_name))[0])
            width = frame_annotation.width
            height = frame_annotation.height
            yolo_annotation = ""
            for shape in frame_annotation.labeled_shapes:
                if shape.type != "rectangle":
                    continue
                label = shape.label
                yolo_bb = convert_to_yolo((width, height), shape.points)
                yolo_bb = " ".join("{:.6f}".format(p) for p in yolo_bb)
                yolo_annotation += "{} {}\n".format(labels_ids[label], yolo_bb)
            output_zip.writestr(annotation_name, yolo_annotation)
        output_zip.writestr("obj.names", "\n".join(l[0] for l in sorted(labels_ids.items(), key=lambda x:x[1])))