diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a5cd4ec..4f4fd382 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.2.0] - Unreleased ### Added - - Removed Z-Order flag from task creation process - Ability to login into CVAT-UI with token from api/v1/auth/login () - Added layout grids toggling ('ctrl + alt + Enter') @@ -29,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Ability to upload prepared meta information along with a video when creating a task () - Optional chaining plugin for cvat-canvas and cvat-ui () - MOTS png mask format support () +- Ability to correct upload video with a rotation record in the metadata () ### Changed diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 21430838..b58bf98c 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -14,6 +14,7 @@ import av import numpy as np from pyunpack import Archive from PIL import Image, ImageFile +from cvat.apps.engine.utils import rotate_image # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file @@ -228,6 +229,16 @@ class VideoReader(IMediaReader): for image in packet.decode(): frame_num += 1 if self._has_frame(frame_num - 1): + if packet.stream.metadata.get('rotate'): + old_image = image + image = av.VideoFrame().from_ndarray( + rotate_image( + image.to_ndarray(format='bgr24'), + 360 - int(container.streams.video[0].metadata.get('rotate')) + ), + format ='bgr24' + ) + image.pts = old_image.pts yield (image, self._source_path[0], image.pts) def __iter__(self): @@ -252,7 +263,15 @@ class VideoReader(IMediaReader): container = self._get_av_container() stream = container.streams.video[0] preview = next(container.decode(stream)) - return self._get_preview(preview.to_image()) + return self._get_preview(preview.to_image() if not stream.metadata.get('rotate') \ + else av.VideoFrame().from_ndarray( + rotate_image( + preview.to_ndarray(format='bgr24'), + 360 - int(container.streams.video[0].metadata.get('rotate')) + ), + format ='bgr24' + ).to_image() + ) def get_image_size(self, i): image = (next(iter(self)))[0] diff --git a/cvat/apps/engine/prepare.py b/cvat/apps/engine/prepare.py index 9465b680..9ee54630 100644 --- a/cvat/apps/engine/prepare.py +++ b/cvat/apps/engine/prepare.py @@ -6,6 +6,7 @@ import av from collections import OrderedDict import hashlib import os +from cvat.apps.engine.utils import rotate_image class WorkWithVideo: def __init__(self, **kwargs): @@ -24,7 +25,6 @@ class WorkWithVideo: video_stream.thread_type = 'AUTO' return video_stream - class AnalyzeVideo(WorkWithVideo): def check_type_first_frame(self): container = self._open_video_container(self.source_path, mode='r') @@ -76,7 +76,17 @@ class PrepareInfo(WorkWithVideo): @property def frame_sizes(self): + container = self._open_video_container(self.source_path, 'r') frame = next(iter(self.key_frames.values())) + if container.streams.video[0].metadata.get('rotate'): + frame = av.VideoFrame().from_ndarray( + rotate_image( + frame.to_ndarray(format='bgr24'), + 360 - int(container.streams.video[0].metadata.get('rotate')) + ), + format ='bgr24' + ) + self._close_video_container(container) return (frame.width, frame.height) def check_key_frame(self, container, video_stream, key_frame): @@ -150,6 +160,14 @@ class PrepareInfo(WorkWithVideo): if frame_number < start_chunk_frame_number: continue elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step): + if video_stream.metadata.get('rotate'): + frame = av.VideoFrame().from_ndarray( + rotate_image( + frame.to_ndarray(format='bgr24'), + 360 - int(container.streams.video[0].metadata.get('rotate')) + ), + format ='bgr24' + ) yield frame elif (frame_number - start_chunk_frame_number) % step: continue @@ -177,6 +195,14 @@ class UploadedMeta(PrepareInfo): container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream) for packet in container.demux(video_stream): for frame in packet.decode(): + if video_stream.metadata.get('rotate'): + frame = av.VideoFrame().from_ndarray( + rotate_image( + frame.to_ndarray(format='bgr24'), + 360 - int(container.streams.video[0].metadata.get('rotate')) + ), + format ='bgr24' + ) self._close_video_container(container) return (frame.width, frame.height) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index fad3654f..e724d242 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -294,6 +294,7 @@ def _create_thread(tid, data): if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): + if not media_files: continue diff --git a/cvat/apps/engine/tests/assets/test_rotated_90_video.mp4 b/cvat/apps/engine/tests/assets/test_rotated_90_video.mp4 new file mode 100644 index 00000000..31bd0737 Binary files /dev/null and b/cvat/apps/engine/tests/assets/test_rotated_90_video.mp4 differ diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index b7448fff..583f210b 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -1548,6 +1548,16 @@ class TaskDataAPITestCase(APITestCase): video.write(data.read()) cls._image_sizes[filename] = img_sizes + filename = "test_rotated_90_video.mp4" + path = os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4') + container = av.open(path, 'r') + for frame in container.decode(video=0): + # pyav ignores rotation record in metadata when decoding frames + img_sizes = [(frame.height, frame.width)] * container.streams.video[0].frames + break + container.close() + cls._image_sizes[filename] = img_sizes + filename = os.path.join("videos", "test_video_1.mp4") path = os.path.join(settings.SHARE_ROOT, filename) os.makedirs(os.path.dirname(path)) @@ -2003,7 +2013,7 @@ class TaskDataAPITestCase(APITestCase): os.path.join(settings.SHARE_ROOT, "videos") ) task_spec = { - "name": "my video with meta info task #11", + "name": "my video with meta info task #13", "overlap": 0, "segment_size": 0, "labels": [ @@ -2022,6 +2032,47 @@ class TaskDataAPITestCase(APITestCase): self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO, self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE) + task_spec = { + "name": "my cached video task #14", + "overlap": 0, + "segment_size": 0, + "labels": [ + {"name": "car"}, + {"name": "person"}, + ] + } + + task_data = { + "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'), + "image_quality": 70, + "use_zip_chunks": True + } + + image_sizes = self._image_sizes['test_rotated_90_video.mp4'] + self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, + self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.FILE_SYSTEM) + + task_spec = { + "name": "my video task #15", + "overlap": 0, + "segment_size": 0, + "labels": [ + {"name": "car"}, + {"name": "person"}, + ] + } + + task_data = { + "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'), + "image_quality": 70, + "use_cache": True, + "use_zip_chunks": True + } + + image_sizes = self._image_sizes['test_rotated_90_video.mp4'] + self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, + self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE) + def test_api_v1_tasks_id_data_admin(self): self._test_api_v1_tasks_id_data(self.admin) diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index e1ad9ef8..854393cf 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: MIT import ast +import cv2 as cv from collections import namedtuple import importlib import sys @@ -74,3 +75,16 @@ def av_scan_paths(*paths): res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if res.returncode: raise ValidationError(res.stdout) + +def rotate_image(image, angle): + height, width = image.shape[:2] + image_center = (width/2, height/2) + matrix = cv.getRotationMatrix2D(image_center, angle, 1.) + abs_cos = abs(matrix[0,0]) + abs_sin = abs(matrix[0,1]) + bound_w = int(height * abs_sin + width * abs_cos) + bound_h = int(height * abs_cos + width * abs_sin) + matrix[0, 2] += bound_w/2 - image_center[0] + matrix[1, 2] += bound_h/2 - image_center[1] + matrix = cv.warpAffine(image, matrix, (bound_w, bound_h)) + return matrix