Added ability to correct upload video with a rotation record in the metadata (#2218)

* Added ability to correct upload video with a rotation record in the metadata

* fix sizes of rotated preview

* fix sizes of rotated frame

* Added tests for uploaded video with rotation record in metadata

* Used OpenCV instead of PIL

* Fixed tests

* Update CHANGELOG

* fix

* Moved function

Co-authored-by: Nikita Manovich <nikita.manovich@intel.com>
main
Maria Khrustaleva 5 years ago committed by GitHub
parent fd767f95a1
commit bbfa880d1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [1.2.0] - Unreleased ## [1.2.0] - Unreleased
### Added ### Added
- Removed Z-Order flag from task creation process - Removed Z-Order flag from task creation process
- Ability to login into CVAT-UI with token from api/v1/auth/login (<https://github.com/openvinotoolkit/cvat/pull/2234>) - Ability to login into CVAT-UI with token from api/v1/auth/login (<https://github.com/openvinotoolkit/cvat/pull/2234>)
- Added layout grids toggling ('ctrl + alt + Enter') - Added layout grids toggling ('ctrl + alt + Enter')
@ -29,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to upload prepared meta information along with a video when creating a task (<https://github.com/openvinotoolkit/cvat/pull/2217>) - Ability to upload prepared meta information along with a video when creating a task (<https://github.com/openvinotoolkit/cvat/pull/2217>)
- Optional chaining plugin for cvat-canvas and cvat-ui (<https://github.com/openvinotoolkit/cvat/pull/2249>) - Optional chaining plugin for cvat-canvas and cvat-ui (<https://github.com/openvinotoolkit/cvat/pull/2249>)
- MOTS png mask format support (<https://github.com/openvinotoolkit/cvat/pull/2198>) - MOTS png mask format support (<https://github.com/openvinotoolkit/cvat/pull/2198>)
- Ability to correct upload video with a rotation record in the metadata (<https://github.com/openvinotoolkit/cvat/pull/2218>)
### Changed ### Changed

@ -14,6 +14,7 @@ import av
import numpy as np import numpy as np
from pyunpack import Archive from pyunpack import Archive
from PIL import Image, ImageFile from PIL import Image, ImageFile
from cvat.apps.engine.utils import rotate_image
# fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
# see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
@ -228,6 +229,16 @@ class VideoReader(IMediaReader):
for image in packet.decode(): for image in packet.decode():
frame_num += 1 frame_num += 1
if self._has_frame(frame_num - 1): if self._has_frame(frame_num - 1):
if packet.stream.metadata.get('rotate'):
old_image = image
image = av.VideoFrame().from_ndarray(
rotate_image(
image.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
image.pts = old_image.pts
yield (image, self._source_path[0], image.pts) yield (image, self._source_path[0], image.pts)
def __iter__(self): def __iter__(self):
@ -252,7 +263,15 @@ class VideoReader(IMediaReader):
container = self._get_av_container() container = self._get_av_container()
stream = container.streams.video[0] stream = container.streams.video[0]
preview = next(container.decode(stream)) preview = next(container.decode(stream))
return self._get_preview(preview.to_image()) return self._get_preview(preview.to_image() if not stream.metadata.get('rotate') \
else av.VideoFrame().from_ndarray(
rotate_image(
preview.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
).to_image()
)
def get_image_size(self, i): def get_image_size(self, i):
image = (next(iter(self)))[0] image = (next(iter(self)))[0]

@ -6,6 +6,7 @@ import av
from collections import OrderedDict from collections import OrderedDict
import hashlib import hashlib
import os import os
from cvat.apps.engine.utils import rotate_image
class WorkWithVideo: class WorkWithVideo:
def __init__(self, **kwargs): def __init__(self, **kwargs):
@ -24,7 +25,6 @@ class WorkWithVideo:
video_stream.thread_type = 'AUTO' video_stream.thread_type = 'AUTO'
return video_stream return video_stream
class AnalyzeVideo(WorkWithVideo): class AnalyzeVideo(WorkWithVideo):
def check_type_first_frame(self): def check_type_first_frame(self):
container = self._open_video_container(self.source_path, mode='r') container = self._open_video_container(self.source_path, mode='r')
@ -76,7 +76,17 @@ class PrepareInfo(WorkWithVideo):
@property @property
def frame_sizes(self): def frame_sizes(self):
container = self._open_video_container(self.source_path, 'r')
frame = next(iter(self.key_frames.values())) frame = next(iter(self.key_frames.values()))
if container.streams.video[0].metadata.get('rotate'):
frame = av.VideoFrame().from_ndarray(
rotate_image(
frame.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
self._close_video_container(container)
return (frame.width, frame.height) return (frame.width, frame.height)
def check_key_frame(self, container, video_stream, key_frame): def check_key_frame(self, container, video_stream, key_frame):
@ -150,6 +160,14 @@ class PrepareInfo(WorkWithVideo):
if frame_number < start_chunk_frame_number: if frame_number < start_chunk_frame_number:
continue continue
elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step): elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
if video_stream.metadata.get('rotate'):
frame = av.VideoFrame().from_ndarray(
rotate_image(
frame.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
yield frame yield frame
elif (frame_number - start_chunk_frame_number) % step: elif (frame_number - start_chunk_frame_number) % step:
continue continue
@ -177,6 +195,14 @@ class UploadedMeta(PrepareInfo):
container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream) container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream)
for packet in container.demux(video_stream): for packet in container.demux(video_stream):
for frame in packet.decode(): for frame in packet.decode():
if video_stream.metadata.get('rotate'):
frame = av.VideoFrame().from_ndarray(
rotate_image(
frame.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
self._close_video_container(container) self._close_video_container(container)
return (frame.width, frame.height) return (frame.width, frame.height)

@ -294,6 +294,7 @@ def _create_thread(tid, data):
if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
for media_type, media_files in media.items(): for media_type, media_files in media.items():
if not media_files: if not media_files:
continue continue

@ -1548,6 +1548,16 @@ class TaskDataAPITestCase(APITestCase):
video.write(data.read()) video.write(data.read())
cls._image_sizes[filename] = img_sizes cls._image_sizes[filename] = img_sizes
filename = "test_rotated_90_video.mp4"
path = os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4')
container = av.open(path, 'r')
for frame in container.decode(video=0):
# pyav ignores rotation record in metadata when decoding frames
img_sizes = [(frame.height, frame.width)] * container.streams.video[0].frames
break
container.close()
cls._image_sizes[filename] = img_sizes
filename = os.path.join("videos", "test_video_1.mp4") filename = os.path.join("videos", "test_video_1.mp4")
path = os.path.join(settings.SHARE_ROOT, filename) path = os.path.join(settings.SHARE_ROOT, filename)
os.makedirs(os.path.dirname(path)) os.makedirs(os.path.dirname(path))
@ -2003,7 +2013,7 @@ class TaskDataAPITestCase(APITestCase):
os.path.join(settings.SHARE_ROOT, "videos") os.path.join(settings.SHARE_ROOT, "videos")
) )
task_spec = { task_spec = {
"name": "my video with meta info task #11", "name": "my video with meta info task #13",
"overlap": 0, "overlap": 0,
"segment_size": 0, "segment_size": 0,
"labels": [ "labels": [
@ -2022,6 +2032,47 @@ class TaskDataAPITestCase(APITestCase):
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO, self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE) self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
task_spec = {
"name": "my cached video task #14",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
"image_quality": 70,
"use_zip_chunks": True
}
image_sizes = self._image_sizes['test_rotated_90_video.mp4']
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.FILE_SYSTEM)
task_spec = {
"name": "my video task #15",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
"image_quality": 70,
"use_cache": True,
"use_zip_chunks": True
}
image_sizes = self._image_sizes['test_rotated_90_video.mp4']
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
def test_api_v1_tasks_id_data_admin(self): def test_api_v1_tasks_id_data_admin(self):
self._test_api_v1_tasks_id_data(self.admin) self._test_api_v1_tasks_id_data(self.admin)

@ -3,6 +3,7 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import ast import ast
import cv2 as cv
from collections import namedtuple from collections import namedtuple
import importlib import importlib
import sys import sys
@ -74,3 +75,16 @@ def av_scan_paths(*paths):
res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if res.returncode: if res.returncode:
raise ValidationError(res.stdout) raise ValidationError(res.stdout)
def rotate_image(image, angle):
height, width = image.shape[:2]
image_center = (width/2, height/2)
matrix = cv.getRotationMatrix2D(image_center, angle, 1.)
abs_cos = abs(matrix[0,0])
abs_sin = abs(matrix[0,1])
bound_w = int(height * abs_sin + width * abs_cos)
bound_h = int(height * abs_cos + width * abs_sin)
matrix[0, 2] += bound_w/2 - image_center[0]
matrix[1, 2] += bound_h/2 - image_center[1]
matrix = cv.warpAffine(image, matrix, (bound_w, bound_h))
return matrix

Loading…
Cancel
Save