From c2bf8eda01c17a505f8f65273b0ac7d856221a21 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com>
Date: Fri, 9 Aug 2019 18:28:41 +0300
Subject: [PATCH] Az/coco loader format support (#630)

* added coco dumper
* added license note
* added coco loader
---
 CHANGELOG.md                 |   2 +-
 README.md                    |  13 ++-
 cvat/apps/annotation/coco.py | 220 +++++++++++++++++++++++++----------
 3 files changed, 167 insertions(+), 68 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0dc92d9e..e9b05595 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to create a custom extractors for unsupported media types
 - Added in PDF extractor
 - Added in a command line model manager tester
-- Ability to dump/load annotations in several formats from UI (CVAT, Pascal VOC, YOLO)
+- Ability to dump/load annotations in several formats from UI (CVAT, Pascal VOC, YOLO, MS COCO)
 
 ### Changed
 - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)
diff --git a/README.md b/README.md
index 053d6058..1a672e90 100644
--- a/README.md
+++ b/README.md
@@ -31,12 +31,13 @@ CVAT is free, online, interactive video and image annotation tool for computer v
 
 Format selection is possible after clicking on the Upload annotation / Dump annotation button.
 
-| Annotation format         | Dumper | Loader |
-| ------------------------- | ------ | ------ |
-| CVAT XML v1.1 for images  | X      | X      |
-| CVAT XML v1.1 for a video | X      | X      |
-| Pascal VOC                | X      | X      |
-| YOLO                      | X      | X      |
+| Annotation format                                                                  | Dumper | Loader |
+| ---------------------------------------------------------------------------------- | ------ | ------ |
+| [CVAT XML v1.1 for images](cvat/apps/documentation/xml_format.md#annotation)       | X      | X      |
+| [CVAT XML v1.1 for a video](cvat/apps/documentation/xml_format.md#interpolation)   | X      | X      |
+| [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/)                              | X      | X      |
+| [YOLO](https://pjreddie.com/darknet/yolo/)                                         | X      | X      |
+| [MS COCO Object Detection](http://cocodataset.org/#format-data)                    | X      | X      |
 
 ## Links
 - [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat)
diff --git a/cvat/apps/annotation/coco.py b/cvat/apps/annotation/coco.py
index dfce6dfc..50f83d39 100644
--- a/cvat/apps/annotation/coco.py
+++ b/cvat/apps/annotation/coco.py
@@ -13,48 +13,58 @@ format_spec = {
         },
     ],
     "loaders": [
+        {
+            "display_name": "{name} {format} {version}",
+            "format": "JSON",
+            "version": "1.0",
+            "handler": "load"
+        },
     ],
 }
 
+def mask_to_polygon(mask, tolerance=1.0, area_threshold=1):
+    """Convert object's mask to polygon [[x1,y1, x2,y2 ...], [...]]
+    Args:
+        mask: object's mask presented as 2D array of 0 and 1
+        tolerance: maximum distance from original points of polygon to approximated
+        area_threshold: if area of a polygon is less than this value, remove this small object
+    """
+    from skimage import measure
+    from pycocotools import mask as mask_util
+    import numpy as np
+
+    polygons = []
+    # pad mask with 0 around borders
+    padded_mask = np.pad(mask, pad_width=1, mode='constant', constant_values=0)
+    contours = measure.find_contours(padded_mask, 0.5)
+    # Fix coordinates after padding
+    contours = np.subtract(contours, 1)
+    for contour in contours:
+        if not np.array_equal(contour[0], contour[-1]):
+            contour = np.vstack((contour, contour[0]))
+        contour = measure.approximate_polygon(contour, tolerance)
+        if len(contour) > 2:
+            contour = np.flip(contour, axis=1)
+            reshaped_contour = []
+            for xy in contour:
+                reshaped_contour.append(xy[0])
+                reshaped_contour.append(xy[1])
+            reshaped_contour = [point if point > 0 else 0 for point in reshaped_contour]
+
+            # Check if area of a polygon is enough
+            rle = mask_util.frPyObjects([reshaped_contour], mask.shape[0], mask.shape[1])
+            area = mask_util.area(rle)
+            if sum(area) > area_threshold:
+                polygons.append(reshaped_contour)
+    return polygons
+
 def dump(file_object, annotations):
     import numpy as np
     import json
-    from skimage import measure
+    from collections import OrderedDict
     from pycocotools import mask as mask_util
     from pycocotools import coco as coco_loader
 
-    def mask_to_polygon(mask, tolerance=1.0, area_threshold=1):
-        """Convert object's mask to polygon [[x1,y1, x2,y2 ...], [...]]
-        Args:
-            mask: object's mask presented as 2D array of 0 and 1
-            tolerance: maximum distance from original points of polygon to approximated
-            area_threshold: if area of a polygon is less than this value, remove this small object
-        """
-        polygons = []
-        # pad mask with 0 around borders
-        padded_mask = np.pad(mask, pad_width=1, mode='constant', constant_values=0)
-        contours = measure.find_contours(padded_mask, 0.5)
-        # Fix coordinates after padding
-        contours = np.subtract(contours, 1)
-        for contour in contours:
-            if not np.array_equal(contour[0], contour[-1]):
-                contour = np.vstack((contour, contour[0]))
-            contour = measure.approximate_polygon(contour, tolerance)
-            if len(contour) > 2:
-                contour = np.flip(contour, axis=1)
-                reshaped_contour = []
-                for xy in contour:
-                    reshaped_contour.append(xy[0])
-                    reshaped_contour.append(xy[1])
-                for rcontour in reshaped_contour:
-                    if rcontour < 0:
-                        rcontour = 0
-                # Check if area of a polygon is enough
-                rle = mask_util.frPyObjects([reshaped_contour], mask.shape[0], mask.shape[1])
-                area = mask_util.area(rle)
-                if sum(area) > area_threshold:
-                    polygons.append(reshaped_contour)
-        return polygons
 
     def fix_segments_intersections(polygons, height, width, img_name,
                                 threshold=0.0, ratio_tolerance=0.001, area_threshold=1):
@@ -143,11 +153,11 @@ def dump(file_object, annotations):
         Args:
             result_annotation: output annotation in COCO representation
         """
-        result_annotation['licenses'].append({
-            'name': '',
-            'id': 0,
-            'url': ''
-        })
+        result_annotation['licenses'].append(OrderedDict([
+            ('name', ''),
+            ('id', 0),
+            ('url', ''),
+        ]))
 
 
     def insert_info_data(annotations, result_annotation):
@@ -161,14 +171,14 @@ def dump(file_object, annotations):
         date = annotations.meta['dumped']
         date = date.split(' ')[0]
         year = date.split('-')[0]
-        result_annotation['info'] = {
-            'contributor': '',
-            'date_created': date,
-            'description': description,
-            'url': '',
-            'version': version,
-            'year': year
-        }
+        result_annotation['info'] = OrderedDict([
+            ('contributor', ''),
+            ('date_created', date),
+            ('description', description),
+            ('url', ''),
+            ('version', version),
+            ('year', year),
+        ])
 
 
     def insert_categories_data(annotations, result_annotation):
@@ -188,12 +198,14 @@ def dump(file_object, annotations):
             cat_id = 0
             for name in names:
                 category_map[name] = cat_id
-                categories.append({'id': cat_id, 'name': name, 'supercategory': ''})
+                categories.append(OrderedDict([
+                    ('id', cat_id),
+                    ('name', name),
+                    ('supercategory', ''),
+                ]))
                 cat_id += 1
             return category_map, categories
 
-        categories = []
-        category_map = {}
         label_names = [label[1]["name"] for label in annotations.meta['task']['labels']]
 
         category_map, categories = get_categories(label_names, sort=True)
@@ -208,7 +220,7 @@ def dump(file_object, annotations):
             image: dictionary with data for image from original annotation
             result_annotation: output annotation in COCO representation
         """
-        new_img = {}
+        new_img = OrderedDict()
         new_img['coco_url'] = ''
         new_img['date_captured'] = ''
         new_img['flickr_url'] = ''
@@ -229,7 +241,7 @@ def dump(file_object, annotations):
             obj: includes data for the object [label, polygon]
             result_annotation: output annotation in COCO representation
         """
-        new_anno = {}
+        new_anno = OrderedDict()
         new_anno['category_id'] = category_map[obj['label']]
         new_anno['id'] = segm_id
         new_anno['image_id'] = image.frame
@@ -240,19 +252,18 @@ def dump(file_object, annotations):
         new_anno['bbox'] = bbox
         result_annotation['annotations'].append(new_anno)
 
-    result_annotation = {
-        'licenses': [],
-        'info': {},
-        'categories': [],
-        'images': [],
-        'annotations': []
-    }
+    result_annotation = OrderedDict([
+        ('licenses', []),
+        ('info', {}),
+        ('categories', []),
+        ('images', []),
+        ('annotations', []),
+    ])
 
     insert_license_data(result_annotation)
     insert_info_data(annotations, result_annotation)
     category_map = insert_categories_data(annotations, result_annotation)
 
-    segm_id = 0
     for img in annotations.group_by_frame():
         polygons = []
 
@@ -262,6 +273,7 @@ def dump(file_object, annotations):
                     'label': shape.label,
                     'points': shape.points,
                     'z_order': shape.z_order,
+                    'group': shape.group,
                 }
 
                 if shape.type == 'rectangle':
@@ -270,6 +282,7 @@ def dump(file_object, annotations):
                     xbr = polygon['points'][2]
                     ybr = polygon['points'][3]
                     polygon['points'] = [xtl, ytl, xbr, ytl, xbr, ybr, xtl, ybr]
+
                 polygons.append(polygon)
 
         polygons.sort(key=lambda x: int(x['z_order']))
@@ -278,10 +291,27 @@ def dump(file_object, annotations):
         insert_image_data(img, result_annotation)
         polygons = fix_segments_intersections(polygons, img.height, img.width, img.name)
 
+        # combine grouped polygons with the same label
+        grouped_poligons = OrderedDict()
+        ungrouped_poligons = []
+        for polygon in polygons:
+            group_id = polygon['group']
+            label = polygon['label']
+            if group_id != 0:
+                if group_id not in grouped_poligons:
+                    grouped_poligons[group_id] = OrderedDict()
+
+                if label not in grouped_poligons[group_id]:
+                    grouped_poligons[group_id][label] = polygon
+                else:
+                    grouped_poligons[group_id][label]['points'].extend(polygon['points'])
+            else:
+                ungrouped_poligons.append(polygon)
+        polygons = ungrouped_poligons + [poly for group in grouped_poligons.values() for poly in group.values()]
+
         # Create new annotation for this image
-        for poly in polygons:
+        for segm_id, poly in enumerate(polygons):
             insert_annotation_data(img, category_map, segm_id, poly, result_annotation)
-            segm_id += 1
 
     file_object.write(json.dumps(result_annotation, indent=2).encode())
     file_object.flush()
@@ -291,3 +321,71 @@ def dump(file_object, annotations):
         coco_loader.COCO(file_object.name)
     except:
         raise
+
+def load(file_object, annotations):
+    from pycocotools import coco as coco_loader
+    from pycocotools import mask as mask_utils
+    import numpy as np
+
+    def get_filename(path):
+        import os
+        return os.path.splitext(os.path.basename(path))[0]
+
+    def match_frame(frame_info, filename):
+        import re
+        # try to match by filename
+        yolo_filename = get_filename(filename)
+        for frame_number, info in frame_info.items():
+            cvat_filename = get_filename(info["path"])
+            if cvat_filename == yolo_filename:
+                return frame_number
+
+        # try to extract frame number from filename
+        numbers = re.findall(r"\d+", filename)
+        if numbers and len(numbers) == 1:
+            return int(numbers[0])
+
+        raise Exception("Cannot match filename or determinate framenumber for {} filename".format(filename))
+
+    coco = coco_loader.COCO(file_object.name)
+    labels={cat['id']: cat['name'] for cat in coco.loadCats(coco.getCatIds())}
+
+    group_idx = 0
+    for img_id in coco.getImgIds():
+        anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
+        img = coco.loadImgs(ids=img_id)[0]
+        frame_number = match_frame(annotations.frame_info, img['file_name'])
+        for ann in anns:
+            group = 0
+            label_name = labels[ann['category_id']]
+            if 'segmentation' in ann:
+                polygons = []
+                # polygon
+                if ann['iscrowd'] == 0:
+                    polygons = ann['segmentation']
+                # mask
+                else:
+                    if isinstance(ann['segmentation']['counts'], list):
+                        rle = mask_utils.frPyObjects([ann['segmentation']], img['height'], img['width'])
+                    else:
+                        rle = [ann['segmentation']]
+
+                    mask = np.array(mask_utils.decode(rle), dtype=np.uint8)
+                    mask = np.sum(mask, axis=2)
+                    mask = np.array(mask > 0, dtype=np.uint8)
+                    polygons = mask_to_polygon(mask)
+
+                if len(polygons) > 1:
+                    group_idx += 1
+                    group = group_idx
+
+                for polygon in polygons:
+                    annotations.add_shape(annotations.LabeledShape(
+                        type='polygon',
+                        frame=frame_number,
+                        label=label_name,
+                        points=polygon,
+                        occluded=False,
+                        attributes=[],
+                        group=group,
+                    ))