Raw image copying in dataset export (#2229)

* Raw image copying in dataset export * use byteimage * use opencv, swith frame data type for videos * Fix image reading * update dm dependency
5 years ago · 84b8a85a20
parent 572b5f2610
commit 84b8a85a20
5 changed files with 50 additions and 23 deletions
--- a/cvat/apps/dataset_manager/bindings.py
+++ b/cvat/apps/dataset_manager/bindings.py
@ -13,7 +13,7 @@ import datumaro.components.extractor as datumaro
 from cvat.apps.engine.frame_provider import FrameProvider
 from cvat.apps.engine.models import AttributeType, ShapeType
 from datumaro.util import cast
-from datumaro.util.image import Image
+from datumaro.util.image import ByteImage, Image

 from .annotation import AnnotationManager, TrackManager

@ -457,18 +457,37 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor):

        dm_items = []

+        is_video = task_data.meta['task']['mode'] == 'interpolation'
+        ext = ''
+        if is_video:
+            ext = FrameProvider.VIDEO_FRAME_EXT
        if include_images:
            frame_provider = FrameProvider(task_data.db_task.data)
+            if is_video:
+                # optimization for videos: use numpy arrays instead of bytes
+                # some formats or transforms can require image data
+                def _make_image(i, **kwargs):
+                    loader = lambda _: frame_provider.get_frame(i,
+                        quality=frame_provider.Quality.ORIGINAL,
+                        out_type=frame_provider.Type.NUMPY_ARRAY)[0]
+                    return Image(loader=loader, **kwargs)
+            else:
+                # for images use encoded data to avoid recoding
+                def _make_image(i, **kwargs):
+                    loader = lambda _: frame_provider.get_frame(i,
+                        quality=frame_provider.Quality.ORIGINAL,
+                        out_type=frame_provider.Type.BUFFER)[0].getvalue()
+                    return ByteImage(data=loader, **kwargs)

        for frame_data in task_data.group_by_frame(include_empty=True):
-            loader = None
+            image_args = {
+                'path': frame_data.name + ext,
+                'size': (frame_data.height, frame_data.width),
+            }
            if include_images:
-                loader = lambda p, i=frame_data.idx: frame_provider.get_frame(i,
-                    quality=frame_provider.Quality.ORIGINAL,
-                    out_type=frame_provider.Type.NUMPY_ARRAY)[0]
-            dm_image = Image(path=frame_data.name, loader=loader,
-                size=(frame_data.height, frame_data.width)
-            )
+                dm_image = _make_image(frame_data.idx, **image_args)
+            else:
+                dm_image = Image(**image_args)
            dm_anno = self._read_cvat_anno(frame_data, task_data)
            dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0],
                annotations=dm_anno, image=dm_image,
--- a/cvat/apps/dataset_manager/formats/cvat.py
+++ b/cvat/apps/dataset_manager/formats/cvat.py
@ -531,6 +531,10 @@ def _export(dst_file, task_data, anno_callback, save_images=False):
            anno_callback(f, task_data)

        if save_images:
+            ext = ''
+            if task_data.meta['task']['mode'] == 'interpolation':
+                ext = FrameProvider.VIDEO_FRAME_EXT
+
            img_dir = osp.join(temp_dir, 'images')
            frame_provider = FrameProvider(task_data.db_task.data)
            frames = frame_provider.get_frames(
@ -538,9 +542,6 @@ def _export(dst_file, task_data, anno_callback, save_images=False):
                frame_provider.Type.BUFFER)
            for frame_id, (frame_data, _) in enumerate(frames):
                frame_name = task_data.frame_info[frame_id]['path']
-                ext = ''
-                if not '.' in osp.basename(frame_name):
-                    ext = '.png'
                img_path = osp.join(img_dir, frame_name + ext)
                os.makedirs(osp.dirname(img_path), exist_ok=True)
                with open(img_path, 'wb') as f:
--- a/cvat/apps/engine/frame_provider.py
+++ b/cvat/apps/engine/frame_provider.py
@ -6,6 +6,7 @@ import math
 from enum import Enum
 from io import BytesIO

+import cv2
 import numpy as np
 from PIL import Image

@ -43,6 +44,9 @@ class RandomAccessIterator:
        self.pos = -1

 class FrameProvider:
+    VIDEO_FRAME_EXT = '.PNG'
+    VIDEO_FRAME_MIME = 'image/png'
+
    class Quality(Enum):
        COMPRESSED = 0
        ORIGINAL = 100
@ -129,13 +133,14 @@ class FrameProvider:

        return chunk_number_

-    @staticmethod
-    def _av_frame_to_png_bytes(av_frame):
-        pil_img = av_frame.to_image()
-        buf = BytesIO()
-        pil_img.save(buf, format='PNG')
-        buf.seek(0)
-        return buf
+    @classmethod
+    def _av_frame_to_png_bytes(cls, av_frame):
+        ext = cls.VIDEO_FRAME_EXT
+        image = av_frame.to_ndarray(format='bgr24')
+        success, result = cv2.imencode(ext, image)
+        if not success:
+            raise Exception("Failed to encode image to '%s' format" % (ext))
+        return BytesIO(result.tobytes())

    def _convert_frame(self, frame, reader_class, out_type):
        if out_type == self.Type.BUFFER:
@ -144,11 +149,11 @@ class FrameProvider:
            return frame.to_image() if reader_class is VideoReader else Image.open(frame)
        elif out_type == self.Type.NUMPY_ARRAY:
            if reader_class is VideoReader:
-                image = np.array(frame.to_image())
+                image = frame.to_ndarray(format='bgr24')
            else:
                image = np.array(Image.open(frame))
-            if len(image.shape) == 3 and image.shape[2] in {3, 4}:
-                image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
+                if len(image.shape) == 3 and image.shape[2] in {3, 4}:
+                    image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
            return image
        else:
            raise Exception('unsupported output type')
@ -171,7 +176,7 @@ class FrameProvider:

        frame = self._convert_frame(frame, loader.reader_class, out_type)
        if loader.reader_class is VideoReader:
-            return (frame, 'image/png')
+            return (frame, self.VIDEO_FRAME_MIME)
        return (frame, mimetypes.guess_type(frame_name))

    def get_frames(self, quality=Quality.ORIGINAL, out_type=Type.BUFFER):
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@ -244,6 +244,8 @@ class VideoReader(IMediaReader):
        return pos / stream.duration if stream.duration else None

    def _get_av_container(self):
+        if isinstance(self._source_path[0], io.BytesIO):
+            self._source_path[0].seek(0) # required for re-reading
        return av.open(self._source_path[0])

    def get_preview(self):
--- a/cvat/requirements/base.txt
+++ b/cvat/requirements/base.txt
@ -44,4 +44,4 @@ tensorflow==2.2.1 # Optional requirement of Datumaro
 # archives. Don't use as a python module because it has GPL license.
 patool==1.12
 diskcache==5.0.2
-git+https://github.com/openvinotoolkit/datumaro@v0.1.0
+git+https://github.com/openvinotoolkit/datumaro@v0.1.2