Added ability to correct upload video with a rotation record in the metadata (#2218)

* Added ability to correct upload video with a rotation record in the metadata * fix sizes of rotated preview * fix sizes of rotated frame * Added tests for uploaded video with rotation record in metadata * Used OpenCV instead of PIL * Fixed tests * Update CHANGELOG * fix * Moved function Co-authored-by: Nikita Manovich <nikita.manovich@intel.com>
5 years ago · bbfa880d1f
parent fd767f95a1
commit bbfa880d1f
7 changed files with 115 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.2.0] - Unreleased
 ### Added
 - Removed Z-Order flag from task creation process
 - Ability to login into CVAT-UI with token from api/v1/auth/login (<https://github.com/openvinotoolkit/cvat/pull/2234>)
 - Added layout grids toggling ('ctrl + alt + Enter')
@ -29,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to upload prepared meta information along with a video when creating a task (<https://github.com/openvinotoolkit/cvat/pull/2217>)
 - Optional chaining plugin for cvat-canvas and cvat-ui (<https://github.com/openvinotoolkit/cvat/pull/2249>)
 - MOTS png mask format support (<https://github.com/openvinotoolkit/cvat/pull/2198>)
 - Ability to correct upload video with a rotation record in the metadata (<https://github.com/openvinotoolkit/cvat/pull/2218>)
 ### Changed
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@ -14,6 +14,7 @@ import av
 import numpy as np
 from pyunpack import Archive
 from PIL import Image, ImageFile
 from cvat.apps.engine.utils import rotate_image
 # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
 # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
@ -228,6 +229,16 @@ class VideoReader(IMediaReader):
                for image in packet.decode():
                    frame_num += 1
                    if self._has_frame(frame_num - 1):
                        if packet.stream.metadata.get('rotate'):
                            old_image = image
                            image = av.VideoFrame().from_ndarray(
                                rotate_image(
                                    image.to_ndarray(format='bgr24'),
                                    360 - int(container.streams.video[0].metadata.get('rotate'))
                                ),
                                format ='bgr24'
                            )
                            image.pts = old_image.pts
                        yield (image, self._source_path[0], image.pts)
    def __iter__(self):
@ -252,7 +263,15 @@ class VideoReader(IMediaReader):
        container = self._get_av_container()
        stream = container.streams.video[0]
        preview = next(container.decode(stream))
-        return self._get_preview(preview.to_image())
+        return self._get_preview(preview.to_image() if not stream.metadata.get('rotate') \
            else av.VideoFrame().from_ndarray(
                rotate_image(
                    preview.to_ndarray(format='bgr24'),
                    360 - int(container.streams.video[0].metadata.get('rotate'))
                ),
                format ='bgr24'
            ).to_image()
        )
    def get_image_size(self, i):
        image = (next(iter(self)))[0]
--- a/cvat/apps/engine/prepare.py
+++ b/cvat/apps/engine/prepare.py
@ -6,6 +6,7 @@ import av
 from collections import OrderedDict
 import hashlib
 import os
 from cvat.apps.engine.utils import rotate_image
 class WorkWithVideo:
    def __init__(self, **kwargs):
@ -24,7 +25,6 @@ class WorkWithVideo:
        video_stream.thread_type = 'AUTO'
        return video_stream
 class AnalyzeVideo(WorkWithVideo):
    def check_type_first_frame(self):
        container = self._open_video_container(self.source_path, mode='r')
@ -76,7 +76,17 @@ class PrepareInfo(WorkWithVideo):
    @property
    def frame_sizes(self):
        container = self._open_video_container(self.source_path, 'r')
        frame = next(iter(self.key_frames.values()))
        if container.streams.video[0].metadata.get('rotate'):
            frame = av.VideoFrame().from_ndarray(
                rotate_image(
                    frame.to_ndarray(format='bgr24'),
                    360 - int(container.streams.video[0].metadata.get('rotate'))
                ),
                format ='bgr24'
            )
        self._close_video_container(container)
        return (frame.width, frame.height)
    def check_key_frame(self, container, video_stream, key_frame):
@ -150,6 +160,14 @@ class PrepareInfo(WorkWithVideo):
                if frame_number < start_chunk_frame_number:
                    continue
                elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
                    if video_stream.metadata.get('rotate'):
                        frame = av.VideoFrame().from_ndarray(
                            rotate_image(
                                frame.to_ndarray(format='bgr24'),
                                360 - int(container.streams.video[0].metadata.get('rotate'))
                            ),
                            format ='bgr24'
                        )
                    yield frame
                elif (frame_number - start_chunk_frame_number) % step:
                    continue
@ -177,6 +195,14 @@ class UploadedMeta(PrepareInfo):
        container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream)
        for packet in container.demux(video_stream):
            for frame in packet.decode():
                if video_stream.metadata.get('rotate'):
                    frame = av.VideoFrame().from_ndarray(
                        rotate_image(
                            frame.to_ndarray(format='bgr24'),
                            360 - int(container.streams.video[0].metadata.get('rotate'))
                        ),
                        format ='bgr24'
                    )
                self._close_video_container(container)
                return (frame.width, frame.height)
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@ -294,6 +294,7 @@ def _create_thread(tid, data):
    if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
       for media_type, media_files in media.items():
            if not media_files:
                continue
--- a/cvat/apps/engine/tests/assets/test_rotated_90_video.mp4
+++ b/cvat/apps/engine/tests/assets/test_rotated_90_video.mp4
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@ -1548,6 +1548,16 @@ class TaskDataAPITestCase(APITestCase):
            video.write(data.read())
        cls._image_sizes[filename] = img_sizes
        filename = "test_rotated_90_video.mp4"
        path = os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4')
        container = av.open(path, 'r')
        for frame in container.decode(video=0):
            # pyav ignores rotation record in metadata when decoding frames
            img_sizes = [(frame.height, frame.width)] * container.streams.video[0].frames
            break
        container.close()
        cls._image_sizes[filename] = img_sizes
        filename = os.path.join("videos", "test_video_1.mp4")
        path = os.path.join(settings.SHARE_ROOT, filename)
        os.makedirs(os.path.dirname(path))
@ -2003,7 +2013,7 @@ class TaskDataAPITestCase(APITestCase):
            os.path.join(settings.SHARE_ROOT, "videos")
        )
        task_spec = {
-            "name": "my video with meta info task #11",
+            "name": "my video with meta info task #13",
            "overlap": 0,
            "segment_size": 0,
            "labels": [
@ -2022,6 +2032,47 @@ class TaskDataAPITestCase(APITestCase):
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO,
                                            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
        task_spec = {
            "name": "my cached video task #14",
            "overlap": 0,
            "segment_size": 0,
            "labels": [
                {"name": "car"},
                {"name": "person"},
            ]
        }
        task_data = {
            "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
            "image_quality": 70,
            "use_zip_chunks": True
        }
        image_sizes = self._image_sizes['test_rotated_90_video.mp4']
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.FILE_SYSTEM)
        task_spec = {
            "name": "my video task #15",
            "overlap": 0,
            "segment_size": 0,
            "labels": [
                {"name": "car"},
                {"name": "person"},
            ]
        }
        task_data = {
            "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
            "image_quality": 70,
            "use_cache": True,
            "use_zip_chunks": True
        }
        image_sizes = self._image_sizes['test_rotated_90_video.mp4']
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
    def test_api_v1_tasks_id_data_admin(self):
        self._test_api_v1_tasks_id_data(self.admin)
--- a/cvat/apps/engine/utils.py
+++ b/cvat/apps/engine/utils.py
@ -3,6 +3,7 @@
 # SPDX-License-Identifier: MIT
 import ast
 import cv2 as cv
 from collections import namedtuple
 import importlib
 import sys
@ -74,3 +75,16 @@ def av_scan_paths(*paths):
        res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if res.returncode:
            raise ValidationError(res.stdout)
 def rotate_image(image, angle):
    height, width = image.shape[:2]
    image_center = (width/2, height/2)
    matrix = cv.getRotationMatrix2D(image_center, angle, 1.)
    abs_cos = abs(matrix[0,0])
    abs_sin = abs(matrix[0,1])
    bound_w = int(height * abs_sin + width * abs_cos)
    bound_h = int(height * abs_cos + width * abs_sin)
    matrix[0, 2] += bound_w/2 - image_center[0]
    matrix[1, 2] += bound_h/2 - image_center[1]
    matrix = cv.warpAffine(image, matrix, (bound_w, bound_h))
    return matrix