[Datumaro] Optimize mask operations (#1232)

* Optimize mask to rle * Optimize mask operations * Fix dm format cmdline * Use RLE masks in datumaro format
6 years ago · 24130cda41
parent be5577d013
commit 24130cda41
6 changed files with 76 additions and 63 deletions
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@ -200,14 +200,12 @@ class Mask(Annotation):
        return self._z_order
    def as_class_mask(self, label_id=None):
        from datumaro.util.mask_tools import make_index_mask
        if label_id is None:
            label_id = self.label
-        return make_index_mask(self.image, label_id)
+        return self.image * label_id
    def as_instance_mask(self, instance_id):
-        from datumaro.util.mask_tools import make_index_mask
+        return self.image * instance_id
        return make_index_mask(self.image, instance_id)
    def get_area(self):
        return np.count_nonzero(self.image)
--- a/datumaro/datumaro/plugins/coco_format/converter.py
+++ b/datumaro/datumaro/plugins/coco_format/converter.py
@ -278,7 +278,10 @@ class _InstancesConverter(_TaskConverter):
        is_crowd = mask is not None
        if is_crowd:
-            segmentation = mask
+            segmentation = {
                'counts': list(int(c) for c in mask['counts']),
                'size': list(int(c) for c in mask['size'])
            }
        else:
            segmentation = [list(map(float, p)) for p in polygons]
--- a/datumaro/datumaro/plugins/datumaro_format/converter.py
+++ b/datumaro/datumaro/plugins/datumaro_format/converter.py
@ -6,16 +6,18 @@
 # pylint: disable=no-self-use
 import json
 import numpy as np
 import os
 import os.path as osp
 from datumaro.components.converter import Converter
 from datumaro.components.extractor import (
    DEFAULT_SUBSET_NAME, Annotation,
-    Label, Mask, Points, Polygon, PolyLine, Bbox, Caption,
+    Label, Mask, RleMask, Points, Polygon, PolyLine, Bbox, Caption,
    LabelCategories, MaskCategories, PointsCategories
 )
 from datumaro.util.image import save_image
 import pycocotools.mask as mask_utils
 from datumaro.components.cli_plugin import CliPlugin
 from .format import DatumaroPath
@ -40,8 +42,6 @@ class _SubsetWriter:
            'items': [],
        }
        self._next_mask_id = 1
    @property
    def categories(self):
        return self._data['categories']
@ -123,33 +123,22 @@ class _SubsetWriter:
        })
        return converted
    def _save_mask(self, mask):
        mask_id = None
        if mask is None:
            return mask_id
        mask_id = self._next_mask_id
        self._next_mask_id += 1
        filename = '%d%s' % (mask_id, DatumaroPath.MASK_EXT)
        masks_dir = osp.join(self._context._annotations_dir,
            DatumaroPath.MASKS_DIR)
        os.makedirs(masks_dir, exist_ok=True)
        path = osp.join(masks_dir, filename)
        save_image(path, mask)
        return mask_id
    def _convert_mask_object(self, obj):
        converted = self._convert_annotation(obj)
-        mask = obj.image
+        if isinstance(obj, RleMask):
-        mask_id = None
+            rle = obj.rle
-        if mask is not None:
+        else:
-            mask_id = self._save_mask(mask)
+            rle = mask_utils.encode(
                np.require(obj.image, dtype=np.uint8, requirements='F'))
        converted.update({
            'label_id': _cast(obj.label, int),
-            'mask_id': _cast(mask_id, int),
+            'rle': {
                # serialize as compressed COCO mask
                'counts': rle['counts'].decode('ascii'),
                'size': list(int(c) for c in rle['size']),
            }
        })
        return converted
@ -289,6 +278,7 @@ class _Converter:
 class DatumaroConverter(Converter, CliPlugin):
    @classmethod
    def build_cmdline_parser(cls, **kwargs):
        parser = super().build_cmdline_parser(**kwargs)
        parser.add_argument('--save-images', action='store_true',
            help="Save images (default: %(default)s)")
        return parser
--- a/datumaro/datumaro/plugins/datumaro_format/extractor.py
+++ b/datumaro/datumaro/plugins/datumaro_format/extractor.py
@ -4,16 +4,14 @@
 # SPDX-License-Identifier: MIT
 import json
 import logging as log
 import os.path as osp
 from datumaro.components.extractor import (SourceExtractor,
    DEFAULT_SUBSET_NAME, DatasetItem,
-    AnnotationType, Label, Mask, Points, Polygon, PolyLine, Bbox, Caption,
+    AnnotationType, Label, RleMask, Points, Polygon, PolyLine, Bbox, Caption,
    LabelCategories, MaskCategories, PointsCategories
 )
 from datumaro.util.image import Image
 from datumaro.util.mask_tools import lazy_mask
 from .format import DatumaroPath
@ -127,19 +125,9 @@ class DatumaroExtractor(SourceExtractor):
            elif ann_type == AnnotationType.mask:
                label_id = ann.get('label_id')
-                mask_id = str(ann.get('mask_id'))
+                rle = ann['rle']
-
+                rle['counts'] = rle['counts'].encode('ascii')
-                mask_path = osp.join(self._path, DatumaroPath.ANNOTATIONS_DIR,
+                loaded.append(RleMask(rle=rle, label=label_id,
                    DatumaroPath.MASKS_DIR, mask_id + DatumaroPath.MASK_EXT)
                mask = None
                if osp.isfile(mask_path):
                    mask = lazy_mask(mask_path)
                else:
                    log.warn("Not found mask image file '%s', skipped." % \
                        mask_path)
                loaded.append(Mask(label=label_id, image=mask,
                    id=ann_id, attributes=attributes, group=group))
            elif ann_type == AnnotationType.polyline:
--- a/datumaro/datumaro/util/mask_tools.py
+++ b/datumaro/datumaro/util/mask_tools.py
@ -111,15 +111,20 @@ def load_mask(path, inverse_colormap=None):
 def lazy_mask(path, inverse_colormap=None):
    return lazy_image(path, lambda path: load_mask(path, inverse_colormap))
 def mask_to_rle(binary_mask):
-    counts = []
+    # walk in row-major order as COCO format specifies
-    for i, (value, elements) in enumerate(
+    bounded = binary_mask.ravel(order='F')
-            groupby(binary_mask.ravel(order='F'))):
+
-        # decoding starts from 0
+    # add borders to sequence
-        if i == 0 and value == 1:
+    # find boundary positions for sequences and compute their lengths
-            counts.append(0)
+    difs = np.diff(bounded, prepend=[1 - bounded[0]], append=[1 - bounded[-1]])
-        counts.append(len(list(elements)))
+    counts, = np.where(difs != 0)
    # start RLE encoding from 0 as COCO format specifies
    if bounded[0] != 0:
        counts = np.diff(counts, prepend=[0])
    else:
        counts = np.diff(counts)
    return {
        'counts': counts,
@ -267,7 +272,7 @@ def find_mask_bbox(mask):
 def merge_masks(masks):
    """
-        Merges masks into one, mask order is resposible for z order.
+        Merges masks into one, mask order is responsible for z order.
    """
    if not masks:
        return None
--- a/datumaro/tests/test_masks.py
+++ b/datumaro/tests/test_masks.py
@ -68,15 +68,7 @@ class PolygonConversionsTest(TestCase):
            self.assertTrue(np.array_equal(e_mask, c_mask),
                '#%s: %s\n%s\n' % (i, e_mask, c_mask))
-    def test_mask_to_rle(self):
+    def _test_mask_to_rle(self, source_mask):
        source_mask = np.array([
            [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
            [0, 0, 1, 1, 0, 1, 0, 1, 0, 0],
            [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        ])
        rle_uncompressed = mask_tools.mask_to_rle(source_mask)
        from pycocotools import mask as mask_utils
@ -87,6 +79,43 @@ class PolygonConversionsTest(TestCase):
        self.assertTrue(np.array_equal(source_mask, resulting_mask),
            '%s\n%s\n' % (source_mask, resulting_mask))
    def test_mask_to_rle_multi(self):
        cases = [
            np.array([
                [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
                [0, 0, 1, 1, 0, 1, 0, 1, 0, 0],
                [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
                [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            ]),
            np.array([
                [0]
            ]),
            np.array([
                [1]
            ]),
            np.array([
                [1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
                [0, 0, 1, 1, 1, 0, 0, 0, 0, 0],
                [1, 0, 1, 0, 1, 1, 1, 0, 0, 0],
                [1, 1, 0, 1, 0, 1, 1, 1, 1, 0],
                [1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
                [1, 0, 0, 1, 0, 0, 0, 1, 0, 1],
                [1, 1, 0, 0, 1, 1, 0, 0, 0, 1],
                [0, 0, 1, 0, 0, 0, 1, 1, 1, 1],
                [1, 1, 0, 0, 0, 0, 0, 1, 0, 0],
                [1, 1, 1, 1, 1, 0, 1, 0, 1, 0],
                [0, 1, 0, 1, 1, 1, 1, 1, 0, 0],
                [0, 1, 0, 0, 0, 1, 0, 0, 1, 0],
                [1, 1, 0, 1, 0, 0, 1, 1, 1, 1],
            ])
        ]
        for case in cases:
            self._test_mask_to_rle(case)
 class ColormapOperationsTest(TestCase):
    def test_can_paint_mask(self):
        mask = np.zeros((1, 3), dtype=np.uint8)