Manifest (#2763)
* Added support for manifest file * Added data migration * Updated tests * Update CHANGELOG * Update manifest documentation * Fix case with 3d data Co-authored-by: Nikita Manovich <nikita.manovich@intel.com>main
parent
e41c301251
commit
6c38ad0701
@ -0,0 +1,83 @@
|
|||||||
|
# Generated by Django 3.1.1 on 2021-02-20 08:36
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
from re import search
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
from cvat.apps.engine.models import (DimensionType, StorageChoice,
|
||||||
|
StorageMethodChoice)
|
||||||
|
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
|
||||||
|
|
||||||
|
def migrate_data(apps, shema_editor):
|
||||||
|
Data = apps.get_model("engine", "Data")
|
||||||
|
query_set = Data.objects.filter(storage_method=StorageMethodChoice.CACHE)
|
||||||
|
for db_data in query_set:
|
||||||
|
try:
|
||||||
|
upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id)
|
||||||
|
if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')):
|
||||||
|
os.remove(os.path.join(upload_dir, 'meta_info.txt'))
|
||||||
|
else:
|
||||||
|
for path in glob.glob(f'{upload_dir}/dummy_*.txt'):
|
||||||
|
os.remove(path)
|
||||||
|
# it's necessary for case with long data migration
|
||||||
|
if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')):
|
||||||
|
continue
|
||||||
|
data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT
|
||||||
|
if hasattr(db_data, 'video'):
|
||||||
|
media_file = os.path.join(data_dir, db_data.video.path)
|
||||||
|
manifest = VideoManifestManager(manifest_path=upload_dir)
|
||||||
|
meta_info = manifest.prepare_meta(media_file=media_file)
|
||||||
|
manifest.create(meta_info)
|
||||||
|
manifest.init_index()
|
||||||
|
else:
|
||||||
|
manifest = ImageManifestManager(manifest_path=upload_dir)
|
||||||
|
sources = []
|
||||||
|
if db_data.storage == StorageChoice.LOCAL:
|
||||||
|
for (root, _, files) in os.walk(data_dir):
|
||||||
|
sources.extend([os.path.join(root, f) for f in files])
|
||||||
|
sources.sort()
|
||||||
|
# using share, this means that we can not explicitly restore the entire data structure
|
||||||
|
else:
|
||||||
|
sources = [os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame')]
|
||||||
|
if any(list(filter(lambda x: x.dimension==DimensionType.DIM_3D, db_data.tasks.all()))):
|
||||||
|
content = []
|
||||||
|
for source in sources:
|
||||||
|
name, ext = os.path.splitext(os.path.relpath(source, upload_dir))
|
||||||
|
content.append({
|
||||||
|
'name': name,
|
||||||
|
'extension': ext
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir)
|
||||||
|
content = meta_info.content
|
||||||
|
|
||||||
|
if db_data.storage == StorageChoice.SHARE:
|
||||||
|
def _get_frame_step(str_):
|
||||||
|
match = search("step\s*=\s*([1-9]\d*)", str_)
|
||||||
|
return int(match.group(1)) if match else 1
|
||||||
|
step = _get_frame_step(db_data.frame_filter)
|
||||||
|
start = db_data.start_frame
|
||||||
|
stop = db_data.stop_frame + 1
|
||||||
|
images_range = range(start, stop, step)
|
||||||
|
result_content = []
|
||||||
|
for i in range(stop):
|
||||||
|
item = content.pop(0) if i in images_range else dict()
|
||||||
|
result_content.append(item)
|
||||||
|
content = result_content
|
||||||
|
manifest.create(content)
|
||||||
|
manifest.init_index()
|
||||||
|
except Exception as ex:
|
||||||
|
print(str(ex))
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('engine', '0037_task_subset'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(migrate_data)
|
||||||
|
]
|
||||||
@ -1,277 +0,0 @@
|
|||||||
# Copyright (C) 2020 Intel Corporation
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
import av
|
|
||||||
from collections import OrderedDict
|
|
||||||
import hashlib
|
|
||||||
import os
|
|
||||||
from cvat.apps.engine.utils import rotate_image
|
|
||||||
|
|
||||||
class WorkWithVideo:
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
if not kwargs.get('source_path'):
|
|
||||||
raise Exception('No sourse path')
|
|
||||||
self.source_path = kwargs.get('source_path')
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _open_video_container(sourse_path, mode, options=None):
|
|
||||||
return av.open(sourse_path, mode=mode, options=options)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _close_video_container(container):
|
|
||||||
container.close()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_video_stream(container):
|
|
||||||
video_stream = next(stream for stream in container.streams if stream.type == 'video')
|
|
||||||
video_stream.thread_type = 'AUTO'
|
|
||||||
return video_stream
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_frame_size(container):
|
|
||||||
video_stream = WorkWithVideo._get_video_stream(container)
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
if video_stream.metadata.get('rotate'):
|
|
||||||
frame = av.VideoFrame().from_ndarray(
|
|
||||||
rotate_image(
|
|
||||||
frame.to_ndarray(format='bgr24'),
|
|
||||||
360 - int(container.streams.video[0].metadata.get('rotate')),
|
|
||||||
),
|
|
||||||
format ='bgr24',
|
|
||||||
)
|
|
||||||
return frame.width, frame.height
|
|
||||||
|
|
||||||
class AnalyzeVideo(WorkWithVideo):
|
|
||||||
def check_type_first_frame(self):
|
|
||||||
container = self._open_video_container(self.source_path, mode='r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
self._close_video_container(container)
|
|
||||||
assert frame.pict_type.name == 'I', 'First frame is not key frame'
|
|
||||||
return
|
|
||||||
|
|
||||||
def check_video_timestamps_sequences(self):
|
|
||||||
container = self._open_video_container(self.source_path, mode='r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
|
|
||||||
frame_pts = -1
|
|
||||||
frame_dts = -1
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
|
|
||||||
if None not in [frame.pts, frame_pts] and frame.pts <= frame_pts:
|
|
||||||
self._close_video_container(container)
|
|
||||||
raise Exception('Invalid pts sequences')
|
|
||||||
|
|
||||||
if None not in [frame.dts, frame_dts] and frame.dts <= frame_dts:
|
|
||||||
self._close_video_container(container)
|
|
||||||
raise Exception('Invalid dts sequences')
|
|
||||||
|
|
||||||
frame_pts, frame_dts = frame.pts, frame.dts
|
|
||||||
self._close_video_container(container)
|
|
||||||
|
|
||||||
def md5_hash(frame):
|
|
||||||
return hashlib.md5(frame.to_image().tobytes()).hexdigest()
|
|
||||||
|
|
||||||
class PrepareInfo(WorkWithVideo):
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
|
|
||||||
if not kwargs.get('meta_path'):
|
|
||||||
raise Exception('No meta path')
|
|
||||||
|
|
||||||
self.meta_path = kwargs.get('meta_path')
|
|
||||||
self.key_frames = {}
|
|
||||||
self.frames = 0
|
|
||||||
|
|
||||||
container = self._open_video_container(self.source_path, 'r')
|
|
||||||
self.width, self.height = self._get_frame_size(container)
|
|
||||||
self._close_video_container(container)
|
|
||||||
|
|
||||||
def get_task_size(self):
|
|
||||||
return self.frames
|
|
||||||
|
|
||||||
@property
|
|
||||||
def frame_sizes(self):
|
|
||||||
return (self.width, self.height)
|
|
||||||
|
|
||||||
def check_key_frame(self, container, video_stream, key_frame):
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
if md5_hash(frame) != key_frame[1]['md5'] or frame.pts != key_frame[1]['pts']:
|
|
||||||
self.key_frames.pop(key_frame[0])
|
|
||||||
return
|
|
||||||
|
|
||||||
def check_seek_key_frames(self):
|
|
||||||
container = self._open_video_container(self.source_path, mode='r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
|
|
||||||
key_frames_copy = self.key_frames.copy()
|
|
||||||
|
|
||||||
for key_frame in key_frames_copy.items():
|
|
||||||
container.seek(offset=key_frame[1]['pts'], stream=video_stream)
|
|
||||||
self.check_key_frame(container, video_stream, key_frame)
|
|
||||||
|
|
||||||
def check_frames_ratio(self, chunk_size):
|
|
||||||
return (len(self.key_frames) and (self.frames // len(self.key_frames)) <= 2 * chunk_size)
|
|
||||||
|
|
||||||
def save_key_frames(self):
|
|
||||||
container = self._open_video_container(self.source_path, mode='r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
frame_number = 0
|
|
||||||
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
if frame.key_frame:
|
|
||||||
self.key_frames[frame_number] = {
|
|
||||||
'pts': frame.pts,
|
|
||||||
'md5': md5_hash(frame),
|
|
||||||
}
|
|
||||||
frame_number += 1
|
|
||||||
|
|
||||||
self.frames = frame_number
|
|
||||||
self._close_video_container(container)
|
|
||||||
|
|
||||||
def save_meta_info(self):
|
|
||||||
with open(self.meta_path, 'w') as meta_file:
|
|
||||||
for index, frame in self.key_frames.items():
|
|
||||||
meta_file.write('{} {}\n'.format(index, frame['pts']))
|
|
||||||
|
|
||||||
def get_nearest_left_key_frame(self, start_chunk_frame_number):
|
|
||||||
start_decode_frame_number = 0
|
|
||||||
start_decode_timestamp = 0
|
|
||||||
|
|
||||||
with open(self.meta_path, 'r') as file:
|
|
||||||
for line in file:
|
|
||||||
frame_number, timestamp = line.strip().split(' ')
|
|
||||||
|
|
||||||
if int(frame_number) <= start_chunk_frame_number:
|
|
||||||
start_decode_frame_number = frame_number
|
|
||||||
start_decode_timestamp = timestamp
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
return int(start_decode_frame_number), int(start_decode_timestamp)
|
|
||||||
|
|
||||||
def decode_needed_frames(self, chunk_number, db_data):
|
|
||||||
step = db_data.get_frame_step()
|
|
||||||
start_chunk_frame_number = db_data.start_frame + chunk_number * db_data.chunk_size * step
|
|
||||||
end_chunk_frame_number = min(start_chunk_frame_number + (db_data.chunk_size - 1) * step + 1, db_data.stop_frame + 1)
|
|
||||||
start_decode_frame_number, start_decode_timestamp = self.get_nearest_left_key_frame(start_chunk_frame_number)
|
|
||||||
container = self._open_video_container(self.source_path, mode='r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
container.seek(offset=start_decode_timestamp, stream=video_stream)
|
|
||||||
|
|
||||||
frame_number = start_decode_frame_number - 1
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
frame_number += 1
|
|
||||||
if frame_number < start_chunk_frame_number:
|
|
||||||
continue
|
|
||||||
elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
|
|
||||||
if video_stream.metadata.get('rotate'):
|
|
||||||
frame = av.VideoFrame().from_ndarray(
|
|
||||||
rotate_image(
|
|
||||||
frame.to_ndarray(format='bgr24'),
|
|
||||||
360 - int(container.streams.video[0].metadata.get('rotate'))
|
|
||||||
),
|
|
||||||
format ='bgr24'
|
|
||||||
)
|
|
||||||
yield frame
|
|
||||||
elif (frame_number - start_chunk_frame_number) % step:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
self._close_video_container(container)
|
|
||||||
return
|
|
||||||
|
|
||||||
self._close_video_container(container)
|
|
||||||
|
|
||||||
class UploadedMeta(PrepareInfo):
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
uploaded_meta = kwargs.get('uploaded_meta')
|
|
||||||
assert uploaded_meta is not None , 'No uploaded meta path'
|
|
||||||
|
|
||||||
with open(uploaded_meta, 'r') as meta_file:
|
|
||||||
lines = meta_file.read().strip().split('\n')
|
|
||||||
self.frames = int(lines.pop())
|
|
||||||
|
|
||||||
key_frames = {int(line.split()[0]): int(line.split()[1]) for line in lines}
|
|
||||||
self.key_frames = OrderedDict(sorted(key_frames.items(), key=lambda x: x[0]))
|
|
||||||
|
|
||||||
@property
|
|
||||||
def frame_sizes(self):
|
|
||||||
container = self._open_video_container(self.source_path, 'r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream)
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
if video_stream.metadata.get('rotate'):
|
|
||||||
frame = av.VideoFrame().from_ndarray(
|
|
||||||
rotate_image(
|
|
||||||
frame.to_ndarray(format='bgr24'),
|
|
||||||
360 - int(container.streams.video[0].metadata.get('rotate'))
|
|
||||||
),
|
|
||||||
format ='bgr24'
|
|
||||||
)
|
|
||||||
self._close_video_container(container)
|
|
||||||
return (frame.width, frame.height)
|
|
||||||
|
|
||||||
def save_meta_info(self):
|
|
||||||
with open(self.meta_path, 'w') as meta_file:
|
|
||||||
for index, pts in self.key_frames.items():
|
|
||||||
meta_file.write('{} {}\n'.format(index, pts))
|
|
||||||
|
|
||||||
def check_key_frame(self, container, video_stream, key_frame):
|
|
||||||
for packet in container.demux(video_stream):
|
|
||||||
for frame in packet.decode():
|
|
||||||
assert frame.pts == key_frame[1], "Uploaded meta information does not match the video"
|
|
||||||
return
|
|
||||||
|
|
||||||
def check_seek_key_frames(self):
|
|
||||||
container = self._open_video_container(self.source_path, mode='r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
|
|
||||||
for key_frame in self.key_frames.items():
|
|
||||||
container.seek(offset=key_frame[1], stream=video_stream)
|
|
||||||
self.check_key_frame(container, video_stream, key_frame)
|
|
||||||
|
|
||||||
self._close_video_container(container)
|
|
||||||
|
|
||||||
def check_frames_numbers(self):
|
|
||||||
container = self._open_video_container(self.source_path, mode='r')
|
|
||||||
video_stream = self._get_video_stream(container)
|
|
||||||
# not all videos contain information about numbers of frames
|
|
||||||
if video_stream.frames:
|
|
||||||
self._close_video_container(container)
|
|
||||||
assert video_stream.frames == self.frames, "Uploaded meta information does not match the video"
|
|
||||||
return
|
|
||||||
self._close_video_container(container)
|
|
||||||
|
|
||||||
def prepare_meta(media_file, upload_dir=None, meta_dir=None, chunk_size=None):
|
|
||||||
paths = {
|
|
||||||
'source_path': os.path.join(upload_dir, media_file) if upload_dir else media_file,
|
|
||||||
'meta_path': os.path.join(meta_dir, 'meta_info.txt') if meta_dir else os.path.join(upload_dir, 'meta_info.txt'),
|
|
||||||
}
|
|
||||||
analyzer = AnalyzeVideo(source_path=paths.get('source_path'))
|
|
||||||
analyzer.check_type_first_frame()
|
|
||||||
analyzer.check_video_timestamps_sequences()
|
|
||||||
|
|
||||||
meta_info = PrepareInfo(source_path=paths.get('source_path'),
|
|
||||||
meta_path=paths.get('meta_path'))
|
|
||||||
meta_info.save_key_frames()
|
|
||||||
meta_info.check_seek_key_frames()
|
|
||||||
meta_info.save_meta_info()
|
|
||||||
smooth_decoding = meta_info.check_frames_ratio(chunk_size) if chunk_size else None
|
|
||||||
return (meta_info, smooth_decoding)
|
|
||||||
|
|
||||||
def prepare_meta_for_upload(func, *args):
|
|
||||||
meta_info, smooth_decoding = func(*args)
|
|
||||||
with open(meta_info.meta_path, 'a') as meta_file:
|
|
||||||
meta_file.write(str(meta_info.get_task_size()))
|
|
||||||
return smooth_decoding
|
|
||||||
@ -0,0 +1,118 @@
|
|||||||
|
## Simple command line to prepare dataset manifest file
|
||||||
|
|
||||||
|
### Steps before use
|
||||||
|
|
||||||
|
When used separately from Computer Vision Annotation Tool(CVAT), the required dependencies must be installed
|
||||||
|
|
||||||
|
#### Ubuntu:20.04
|
||||||
|
|
||||||
|
Install dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# General
|
||||||
|
sudo apt-get update && sudo apt-get --no-install-recommends install -y \
|
||||||
|
python3-dev python3-pip python3-venv pkg-config
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Library components
|
||||||
|
sudo apt-get install --no-install-recommends -y \
|
||||||
|
libavformat-dev libavcodec-dev libavdevice-dev \
|
||||||
|
libavutil-dev libswscale-dev libswresample-dev libavfilter-dev
|
||||||
|
```
|
||||||
|
|
||||||
|
Create an environment and install the necessary python modules:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m venv .env
|
||||||
|
. .env/bin/activate
|
||||||
|
pip install -U pip
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using
|
||||||
|
|
||||||
|
```bash
|
||||||
|
usage: python create.py [-h] [--force] [--output-dir .] source
|
||||||
|
|
||||||
|
positional arguments:
|
||||||
|
source Source paths
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
--force Use this flag to prepare the manifest file for video data if by default the video does not meet the requirements
|
||||||
|
and a manifest file is not prepared
|
||||||
|
--output-dir OUTPUT_DIR
|
||||||
|
Directory where the manifest file will be saved
|
||||||
|
```
|
||||||
|
|
||||||
|
### Alternative way to use with openvino/cvat_server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -it --entrypoint python3 -v /path/to/host/data/:/path/inside/container/:rw openvino/cvat_server
|
||||||
|
utils/dataset_manifest/create.py --output-dir /path/to/manifest/directory/ /path/to/data/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Examples of using
|
||||||
|
|
||||||
|
Create a dataset manifest in the current directory with video which contains enough keyframes:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python create.py ~/Documents/video.mp4
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a dataset manifest with video which does not contain enough keyframes:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python create.py --force --output-dir ~/Documents ~/Documents/video.mp4
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a dataset manifest with images:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python create.py --output-dir ~/Documents ~/Documents/images/
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a dataset manifest with pattern (may be used `*`, `?`, `[]`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python create.py --output-dir ~/Documents "/home/${USER}/Documents/**/image*.jpeg"
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a dataset manifest with `openvino/cvat_server`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -it --entrypoint python3 -v ~/Documents/data/:${HOME}/manifest/:rw openvino/cvat_server
|
||||||
|
utils/dataset_manifest/create.py --output-dir ~/manifest/ ~/manifest/images/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Examples of generated `manifest.jsonl` files
|
||||||
|
|
||||||
|
A maifest file contains some intuitive information and some specific like:
|
||||||
|
|
||||||
|
`pts` - time at which the frame should be shown to the user
|
||||||
|
`checksum` - `md5` hash sum for the specific image/frame
|
||||||
|
|
||||||
|
#### For a video
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"version":"1.0"}
|
||||||
|
{"type":"video"}
|
||||||
|
{"properties":{"name":"video.mp4","resolution":[1280,720],"length":778}}
|
||||||
|
{"number":0,"pts":0,"checksum":"17bb40d76887b56fe8213c6fded3d540"}
|
||||||
|
{"number":135,"pts":486000,"checksum":"9da9b4d42c1206d71bf17a7070a05847"}
|
||||||
|
{"number":270,"pts":972000,"checksum":"a1c3a61814f9b58b00a795fa18bb6d3e"}
|
||||||
|
{"number":405,"pts":1458000,"checksum":"18c0803b3cc1aa62ac75b112439d2b62"}
|
||||||
|
{"number":540,"pts":1944000,"checksum":"4551ecea0f80e95a6c32c32e70cac59e"}
|
||||||
|
{"number":675,"pts":2430000,"checksum":"0e72faf67e5218c70b506445ac91cdd7"}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### For a dataset with images
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"version":"1.0"}
|
||||||
|
{"type":"images"}
|
||||||
|
{"name":"image1","extension":".jpg","width":720,"height":405,"checksum":"548918ec4b56132a5cff1d4acabe9947"}
|
||||||
|
{"name":"image2","extension":".jpg","width":183,"height":275,"checksum":"4b4eefd03cc6a45c1c068b98477fb639"}
|
||||||
|
{"name":"image3","extension":".jpg","width":301,"height":167,"checksum":"0e454a6f4a13d56c82890c98be063663"}
|
||||||
|
```
|
||||||
@ -0,0 +1,4 @@
|
|||||||
|
# Copyright (C) 2021 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
from .core import VideoManifestManager, ImageManifestManager
|
||||||
@ -0,0 +1,446 @@
|
|||||||
|
# Copyright (C) 2021 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
import av
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from collections import OrderedDict
|
||||||
|
from contextlib import closing
|
||||||
|
from PIL import Image
|
||||||
|
from .utils import md5_hash, rotate_image
|
||||||
|
|
||||||
|
class VideoStreamReader:
|
||||||
|
def __init__(self, source_path):
|
||||||
|
self.source_path = source_path
|
||||||
|
self._key_frames = OrderedDict()
|
||||||
|
self.frames = 0
|
||||||
|
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
self.width, self.height = self._get_frame_size(container)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_video_stream(container):
|
||||||
|
video_stream = next(stream for stream in container.streams if stream.type == 'video')
|
||||||
|
video_stream.thread_type = 'AUTO'
|
||||||
|
return video_stream
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_frame_size(container):
|
||||||
|
video_stream = VideoStreamReader._get_video_stream(container)
|
||||||
|
for packet in container.demux(video_stream):
|
||||||
|
for frame in packet.decode():
|
||||||
|
if video_stream.metadata.get('rotate'):
|
||||||
|
frame = av.VideoFrame().from_ndarray(
|
||||||
|
rotate_image(
|
||||||
|
frame.to_ndarray(format='bgr24'),
|
||||||
|
360 - int(container.streams.video[0].metadata.get('rotate')),
|
||||||
|
),
|
||||||
|
format ='bgr24',
|
||||||
|
)
|
||||||
|
return frame.width, frame.height
|
||||||
|
|
||||||
|
def check_type_first_frame(self):
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
video_stream = self._get_video_stream(container)
|
||||||
|
|
||||||
|
for packet in container.demux(video_stream):
|
||||||
|
for frame in packet.decode():
|
||||||
|
if not frame.pict_type.name == 'I':
|
||||||
|
raise Exception('First frame is not key frame')
|
||||||
|
return
|
||||||
|
|
||||||
|
def check_video_timestamps_sequences(self):
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
video_stream = self._get_video_stream(container)
|
||||||
|
|
||||||
|
frame_pts = -1
|
||||||
|
frame_dts = -1
|
||||||
|
for packet in container.demux(video_stream):
|
||||||
|
for frame in packet.decode():
|
||||||
|
|
||||||
|
if None not in {frame.pts, frame_pts} and frame.pts <= frame_pts:
|
||||||
|
raise Exception('Invalid pts sequences')
|
||||||
|
|
||||||
|
if None not in {frame.dts, frame_dts} and frame.dts <= frame_dts:
|
||||||
|
raise Exception('Invalid dts sequences')
|
||||||
|
|
||||||
|
frame_pts, frame_dts = frame.pts, frame.dts
|
||||||
|
|
||||||
|
def rough_estimate_frames_ratio(self, upper_bound):
|
||||||
|
analyzed_frames_number, key_frames_number = 0, 0
|
||||||
|
_processing_end = False
|
||||||
|
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
video_stream = self._get_video_stream(container)
|
||||||
|
for packet in container.demux(video_stream):
|
||||||
|
for frame in packet.decode():
|
||||||
|
if frame.key_frame:
|
||||||
|
key_frames_number += 1
|
||||||
|
analyzed_frames_number += 1
|
||||||
|
if upper_bound == analyzed_frames_number:
|
||||||
|
_processing_end = True
|
||||||
|
break
|
||||||
|
if _processing_end:
|
||||||
|
break
|
||||||
|
# In our case no videos with non-key first frame, so 1 key frame is guaranteed
|
||||||
|
return analyzed_frames_number // key_frames_number
|
||||||
|
|
||||||
|
def validate_frames_ratio(self, chunk_size):
|
||||||
|
upper_bound = 3 * chunk_size
|
||||||
|
ratio = self.rough_estimate_frames_ratio(upper_bound + 1)
|
||||||
|
assert ratio < upper_bound, 'Too few keyframes'
|
||||||
|
|
||||||
|
def get_size(self):
|
||||||
|
return self.frames
|
||||||
|
|
||||||
|
@property
|
||||||
|
def frame_sizes(self):
|
||||||
|
return (self.width, self.height)
|
||||||
|
|
||||||
|
def validate_key_frame(self, container, video_stream, key_frame):
|
||||||
|
for packet in container.demux(video_stream):
|
||||||
|
for frame in packet.decode():
|
||||||
|
if md5_hash(frame) != key_frame[1]['md5'] or frame.pts != key_frame[1]['pts']:
|
||||||
|
self._key_frames.pop(key_frame[0])
|
||||||
|
return
|
||||||
|
|
||||||
|
def validate_seek_key_frames(self):
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
video_stream = self._get_video_stream(container)
|
||||||
|
|
||||||
|
key_frames_copy = self._key_frames.copy()
|
||||||
|
|
||||||
|
for key_frame in key_frames_copy.items():
|
||||||
|
container.seek(offset=key_frame[1]['pts'], stream=video_stream)
|
||||||
|
self.validate_key_frame(container, video_stream, key_frame)
|
||||||
|
|
||||||
|
def save_key_frames(self):
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
video_stream = self._get_video_stream(container)
|
||||||
|
frame_number = 0
|
||||||
|
|
||||||
|
for packet in container.demux(video_stream):
|
||||||
|
for frame in packet.decode():
|
||||||
|
if frame.key_frame:
|
||||||
|
self._key_frames[frame_number] = {
|
||||||
|
'pts': frame.pts,
|
||||||
|
'md5': md5_hash(frame),
|
||||||
|
}
|
||||||
|
frame_number += 1
|
||||||
|
self.frames = frame_number
|
||||||
|
|
||||||
|
@property
|
||||||
|
def key_frames(self):
|
||||||
|
return self._key_frames
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._key_frames)
|
||||||
|
|
||||||
|
#TODO: need to change it in future
|
||||||
|
def __iter__(self):
|
||||||
|
for idx, key_frame in self._key_frames.items():
|
||||||
|
yield (idx, key_frame['pts'], key_frame['md5'])
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetImagesReader:
|
||||||
|
def __init__(self, sources, is_sorted=True, use_image_hash=False, *args, **kwargs):
|
||||||
|
self._sources = sources if is_sorted else sorted(sources)
|
||||||
|
self._content = []
|
||||||
|
self._data_dir = kwargs.get('data_dir', None)
|
||||||
|
self._use_image_hash = use_image_hash
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for image in self._sources:
|
||||||
|
img = Image.open(image, mode='r')
|
||||||
|
img_name = os.path.relpath(image, self._data_dir) if self._data_dir \
|
||||||
|
else os.path.basename(image)
|
||||||
|
name, extension = os.path.splitext(img_name)
|
||||||
|
image_properties = {
|
||||||
|
'name': name,
|
||||||
|
'extension': extension,
|
||||||
|
'width': img.width,
|
||||||
|
'height': img.height,
|
||||||
|
}
|
||||||
|
if self._use_image_hash:
|
||||||
|
image_properties['checksum'] = md5_hash(img)
|
||||||
|
yield image_properties
|
||||||
|
|
||||||
|
def create(self):
|
||||||
|
for item in self:
|
||||||
|
self._content.append(item)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content(self):
|
||||||
|
return self._content
|
||||||
|
|
||||||
|
class _Manifest:
|
||||||
|
FILE_NAME = 'manifest.jsonl'
|
||||||
|
VERSION = '1.0'
|
||||||
|
|
||||||
|
def __init__(self, path, is_created=False):
|
||||||
|
assert path, 'A path to manifest file not found'
|
||||||
|
self._path = os.path.join(path, self.FILE_NAME) if os.path.isdir(path) else path
|
||||||
|
self._is_created = is_created
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self):
|
||||||
|
return self._path
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_created(self):
|
||||||
|
return self._is_created
|
||||||
|
|
||||||
|
@is_created.setter
|
||||||
|
def is_created(self, value):
|
||||||
|
assert isinstance(value, bool)
|
||||||
|
self._is_created = value
|
||||||
|
|
||||||
|
# Needed for faster iteration over the manifest file, will be generated to work inside CVAT
|
||||||
|
# and will not be generated when manually creating a manifest
|
||||||
|
class _Index:
|
||||||
|
FILE_NAME = 'index.json'
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
assert path and os.path.isdir(path), 'No index directory path'
|
||||||
|
self._path = os.path.join(path, self.FILE_NAME)
|
||||||
|
self._index = {}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self):
|
||||||
|
return self._path
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
with open(self._path, 'w') as index_file:
|
||||||
|
json.dump(self._index, index_file, separators=(',', ':'))
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
with open(self._path, 'r') as index_file:
|
||||||
|
self._index = json.load(index_file,
|
||||||
|
object_hook=lambda d: {int(k): v for k, v in d.items()})
|
||||||
|
|
||||||
|
def create(self, manifest, skip):
|
||||||
|
assert os.path.exists(manifest), 'A manifest file not exists, index cannot be created'
|
||||||
|
with open(manifest, 'r+') as manifest_file:
|
||||||
|
while skip:
|
||||||
|
manifest_file.readline()
|
||||||
|
skip -= 1
|
||||||
|
image_number = 0
|
||||||
|
position = manifest_file.tell()
|
||||||
|
line = manifest_file.readline()
|
||||||
|
while line:
|
||||||
|
if line.strip():
|
||||||
|
self._index[image_number] = position
|
||||||
|
image_number += 1
|
||||||
|
position = manifest_file.tell()
|
||||||
|
line = manifest_file.readline()
|
||||||
|
|
||||||
|
def partial_update(self, manifest, number):
|
||||||
|
assert os.path.exists(manifest), 'A manifest file not exists, index cannot be updated'
|
||||||
|
with open(manifest, 'r+') as manifest_file:
|
||||||
|
manifest_file.seek(self._index[number])
|
||||||
|
line = manifest_file.readline()
|
||||||
|
while line:
|
||||||
|
if line.strip():
|
||||||
|
self._index[number] = manifest_file.tell()
|
||||||
|
number += 1
|
||||||
|
line = manifest_file.readline()
|
||||||
|
|
||||||
|
def __getitem__(self, number):
|
||||||
|
assert 0 <= number < len(self), \
|
||||||
|
'A invalid index number: {}\nMax: {}'.format(number, len(self))
|
||||||
|
return self._index[number]
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._index)
|
||||||
|
|
||||||
|
class _ManifestManager(ABC):
|
||||||
|
BASE_INFORMATION = {
|
||||||
|
'version' : 1,
|
||||||
|
'type': 2,
|
||||||
|
}
|
||||||
|
def __init__(self, path, *args, **kwargs):
|
||||||
|
self._manifest = _Manifest(path)
|
||||||
|
|
||||||
|
def _parse_line(self, line):
|
||||||
|
""" Getting a random line from the manifest file """
|
||||||
|
with open(self._manifest.path, 'r') as manifest_file:
|
||||||
|
if isinstance(line, str):
|
||||||
|
assert line in self.BASE_INFORMATION.keys(), \
|
||||||
|
'An attempt to get non-existent information from the manifest'
|
||||||
|
for _ in range(self.BASE_INFORMATION[line]):
|
||||||
|
fline = manifest_file.readline()
|
||||||
|
return json.loads(fline)[line]
|
||||||
|
else:
|
||||||
|
assert self._index, 'No prepared index'
|
||||||
|
offset = self._index[line]
|
||||||
|
manifest_file.seek(offset)
|
||||||
|
properties = manifest_file.readline()
|
||||||
|
return json.loads(properties)
|
||||||
|
|
||||||
|
def init_index(self):
|
||||||
|
self._index = _Index(os.path.dirname(self._manifest.path))
|
||||||
|
if os.path.exists(self._index.path):
|
||||||
|
self._index.load()
|
||||||
|
else:
|
||||||
|
self._index.create(self._manifest.path, 3 if self._manifest.TYPE == 'video' else 2)
|
||||||
|
self._index.dump()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def create(self, content, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def partial_update(self, number, properties):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
with open(self._manifest.path, 'r') as manifest_file:
|
||||||
|
manifest_file.seek(self._index[0])
|
||||||
|
image_number = 0
|
||||||
|
line = manifest_file.readline()
|
||||||
|
while line:
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
yield (image_number, json.loads(line))
|
||||||
|
image_number += 1
|
||||||
|
line = manifest_file.readline()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def manifest(self):
|
||||||
|
return self._manifest
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
if hasattr(self, '_index'):
|
||||||
|
return len(self._index)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
return self._parse_line(item)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def index(self):
|
||||||
|
return self._index
|
||||||
|
|
||||||
|
class VideoManifestManager(_ManifestManager):
|
||||||
|
def __init__(self, manifest_path, *args, **kwargs):
|
||||||
|
super().__init__(manifest_path)
|
||||||
|
setattr(self._manifest, 'TYPE', 'video')
|
||||||
|
self.BASE_INFORMATION['properties'] = 3
|
||||||
|
|
||||||
|
def create(self, content, **kwargs):
|
||||||
|
""" Creating and saving a manifest file """
|
||||||
|
with open(self._manifest.path, 'w') as manifest_file:
|
||||||
|
base_info = {
|
||||||
|
'version': self._manifest.VERSION,
|
||||||
|
'type': self._manifest.TYPE,
|
||||||
|
'properties': {
|
||||||
|
'name': os.path.basename(content.source_path),
|
||||||
|
'resolution': content.frame_sizes,
|
||||||
|
'length': content.get_size(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for key, value in base_info.items():
|
||||||
|
json_item = json.dumps({key: value}, separators=(',', ':'))
|
||||||
|
manifest_file.write(f'{json_item}\n')
|
||||||
|
|
||||||
|
for item in content:
|
||||||
|
json_item = json.dumps({
|
||||||
|
'number': item[0],
|
||||||
|
'pts': item[1],
|
||||||
|
'checksum': item[2]
|
||||||
|
}, separators=(',', ':'))
|
||||||
|
manifest_file.write(f"{json_item}\n")
|
||||||
|
self._manifest.is_created = True
|
||||||
|
|
||||||
|
def partial_update(self, number, properties):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def prepare_meta(media_file, upload_dir=None, chunk_size=36, force=False):
|
||||||
|
source_path = os.path.join(upload_dir, media_file) if upload_dir else media_file
|
||||||
|
meta_info = VideoStreamReader(source_path=source_path)
|
||||||
|
meta_info.check_type_first_frame()
|
||||||
|
try:
|
||||||
|
meta_info.validate_frames_ratio(chunk_size)
|
||||||
|
except AssertionError:
|
||||||
|
if not force:
|
||||||
|
raise
|
||||||
|
meta_info.check_video_timestamps_sequences()
|
||||||
|
meta_info.save_key_frames()
|
||||||
|
meta_info.validate_seek_key_frames()
|
||||||
|
return meta_info
|
||||||
|
|
||||||
|
#TODO: add generic manifest structure file validation
|
||||||
|
class ManifestValidator:
|
||||||
|
def validate_base_info(self):
|
||||||
|
with open(self._manifest.path, 'r') as manifest_file:
|
||||||
|
assert self._manifest.VERSION != json.loads(manifest_file.readline())['version']
|
||||||
|
assert self._manifest.TYPE != json.loads(manifest_file.readline())['type']
|
||||||
|
|
||||||
|
class VideoManifestValidator(VideoManifestManager):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self.source_path = kwargs.pop('source_path')
|
||||||
|
super().__init__(self, **kwargs)
|
||||||
|
|
||||||
|
def validate_key_frame(self, container, video_stream, key_frame):
|
||||||
|
for packet in container.demux(video_stream):
|
||||||
|
for frame in packet.decode():
|
||||||
|
assert frame.pts == key_frame['pts'], "The uploaded manifest does not match the video"
|
||||||
|
return
|
||||||
|
|
||||||
|
def validate_seek_key_frames(self):
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
video_stream = self._get_video_stream(container)
|
||||||
|
last_key_frame = None
|
||||||
|
|
||||||
|
for _, key_frame in self:
|
||||||
|
# check that key frames sequence sorted
|
||||||
|
if last_key_frame and last_key_frame['number'] >= key_frame['number']:
|
||||||
|
raise AssertionError('Invalid saved key frames sequence in manifest file')
|
||||||
|
container.seek(offset=key_frame['pts'], stream=video_stream)
|
||||||
|
self.validate_key_frame(container, video_stream, key_frame)
|
||||||
|
last_key_frame = key_frame
|
||||||
|
|
||||||
|
def validate_frame_numbers(self):
|
||||||
|
with closing(av.open(self.source_path, mode='r')) as container:
|
||||||
|
video_stream = self._get_video_stream(container)
|
||||||
|
# not all videos contain information about numbers of frames
|
||||||
|
frames = video_stream.frames
|
||||||
|
if frames:
|
||||||
|
assert frames == self['properties']['length'], "The uploaded manifest does not match the video"
|
||||||
|
return
|
||||||
|
|
||||||
|
class ImageManifestManager(_ManifestManager):
|
||||||
|
def __init__(self, manifest_path):
|
||||||
|
super().__init__(manifest_path)
|
||||||
|
setattr(self._manifest, 'TYPE', 'images')
|
||||||
|
|
||||||
|
def create(self, content, **kwargs):
|
||||||
|
""" Creating and saving a manifest file"""
|
||||||
|
with open(self._manifest.path, 'w') as manifest_file:
|
||||||
|
base_info = {
|
||||||
|
'version': self._manifest.VERSION,
|
||||||
|
'type': self._manifest.TYPE,
|
||||||
|
}
|
||||||
|
for key, value in base_info.items():
|
||||||
|
json_item = json.dumps({key: value}, separators=(',', ':'))
|
||||||
|
manifest_file.write(f'{json_item}\n')
|
||||||
|
|
||||||
|
for item in content:
|
||||||
|
json_item = json.dumps({
|
||||||
|
key: value for key, value in item.items()
|
||||||
|
}, separators=(',', ':'))
|
||||||
|
manifest_file.write(f"{json_item}\n")
|
||||||
|
self._manifest.is_created = True
|
||||||
|
|
||||||
|
def partial_update(self, number, properties):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def prepare_meta(sources, **kwargs):
|
||||||
|
meta_info = DatasetImagesReader(sources=sources, **kwargs)
|
||||||
|
meta_info.create()
|
||||||
|
return meta_info
|
||||||
@ -0,0 +1,91 @@
|
|||||||
|
# Copyright (C) 2021 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
import argparse
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from glob import glob
|
||||||
|
|
||||||
|
def _define_data_type(media):
|
||||||
|
media_type, _ = mimetypes.guess_type(media)
|
||||||
|
if media_type:
|
||||||
|
return media_type.split('/')[0]
|
||||||
|
|
||||||
|
def _is_video(media_file):
|
||||||
|
return _define_data_type(media_file) == 'video'
|
||||||
|
|
||||||
|
def _is_image(media_file):
|
||||||
|
return _define_data_type(media_file) == 'image'
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--force', action='store_true',
|
||||||
|
help='Use this flag to prepare the manifest file for video data '
|
||||||
|
'if by default the video does not meet the requirements and a manifest file is not prepared')
|
||||||
|
parser.add_argument('--output-dir',type=str, help='Directory where the manifest file will be saved',
|
||||||
|
default=os.getcwd())
|
||||||
|
parser.add_argument('source', type=str, help='Source paths')
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_args()
|
||||||
|
|
||||||
|
manifest_directory = os.path.abspath(args.output_dir)
|
||||||
|
os.makedirs(manifest_directory, exist_ok=True)
|
||||||
|
source = os.path.abspath(args.source)
|
||||||
|
|
||||||
|
sources = []
|
||||||
|
if not os.path.isfile(source): # directory/pattern with images
|
||||||
|
data_dir = None
|
||||||
|
if os.path.isdir(source):
|
||||||
|
data_dir = source
|
||||||
|
for root, _, files in os.walk(source):
|
||||||
|
sources.extend([os.path.join(root, f) for f in files if _is_image(f)])
|
||||||
|
else:
|
||||||
|
items = source.lstrip('/').split('/')
|
||||||
|
position = 0
|
||||||
|
try:
|
||||||
|
for item in items:
|
||||||
|
if set(item) & {'*', '?', '[', ']'}:
|
||||||
|
break
|
||||||
|
position += 1
|
||||||
|
else:
|
||||||
|
raise Exception('Wrong positional argument')
|
||||||
|
assert position != 0, 'Wrong pattern: there must be a common root'
|
||||||
|
data_dir = source.split(items[position])[0]
|
||||||
|
except Exception as ex:
|
||||||
|
sys.exit(str(ex))
|
||||||
|
sources = list(filter(_is_image, glob(source, recursive=True)))
|
||||||
|
try:
|
||||||
|
assert len(sources), 'A images was not found'
|
||||||
|
manifest = ImageManifestManager(manifest_path=manifest_directory)
|
||||||
|
meta_info = manifest.prepare_meta(sources=sources, is_sorted=False,
|
||||||
|
use_image_hash=True, data_dir=data_dir)
|
||||||
|
manifest.create(meta_info)
|
||||||
|
except Exception as ex:
|
||||||
|
sys.exit(str(ex))
|
||||||
|
else: # video
|
||||||
|
try:
|
||||||
|
assert _is_video(source), 'You can specify a video path or a directory/pattern with images'
|
||||||
|
manifest = VideoManifestManager(manifest_path=manifest_directory)
|
||||||
|
try:
|
||||||
|
meta_info = manifest.prepare_meta(media_file=source, force=args.force)
|
||||||
|
except AssertionError as ex:
|
||||||
|
if str(ex) == 'Too few keyframes':
|
||||||
|
msg = 'NOTE: prepared manifest file contains too few key frames for smooth decoding.\n' \
|
||||||
|
'Use --force flag if you still want to prepare a manifest file.'
|
||||||
|
print(msg)
|
||||||
|
sys.exit(2)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
manifest.create(meta_info)
|
||||||
|
except Exception as ex:
|
||||||
|
sys.exit(str(ex))
|
||||||
|
|
||||||
|
print('The manifest file has been prepared')
|
||||||
|
if __name__ == "__main__":
|
||||||
|
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.append(base_dir)
|
||||||
|
from dataset_manifest.core import VideoManifestManager, ImageManifestManager
|
||||||
|
main()
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
av==8.0.2 --no-binary=av
|
||||||
|
opencv-python-headless==4.4.0.42
|
||||||
|
Pillow==7.2.0
|
||||||
@ -0,0 +1,24 @@
|
|||||||
|
# Copyright (C) 2021 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
import hashlib
|
||||||
|
import cv2 as cv
|
||||||
|
from av import VideoFrame
|
||||||
|
|
||||||
|
def rotate_image(image, angle):
|
||||||
|
height, width = image.shape[:2]
|
||||||
|
image_center = (width/2, height/2)
|
||||||
|
matrix = cv.getRotationMatrix2D(image_center, angle, 1.)
|
||||||
|
abs_cos = abs(matrix[0,0])
|
||||||
|
abs_sin = abs(matrix[0,1])
|
||||||
|
bound_w = int(height * abs_sin + width * abs_cos)
|
||||||
|
bound_h = int(height * abs_cos + width * abs_sin)
|
||||||
|
matrix[0, 2] += bound_w/2 - image_center[0]
|
||||||
|
matrix[1, 2] += bound_h/2 - image_center[1]
|
||||||
|
matrix = cv.warpAffine(image, matrix, (bound_w, bound_h))
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
def md5_hash(frame):
|
||||||
|
if isinstance(frame, VideoFrame):
|
||||||
|
frame = frame.to_image()
|
||||||
|
return hashlib.md5(frame.tobytes()).hexdigest() # nosec
|
||||||
@ -1,30 +0,0 @@
|
|||||||
# Simple command line for prepare meta information for video data
|
|
||||||
|
|
||||||
**Usage**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
usage: prepare.py [-h] [-chunk_size CHUNK_SIZE] video_file meta_directory
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
video_file Path to video file
|
|
||||||
meta_directory Directory where the file with meta information will be saved
|
|
||||||
|
|
||||||
optional arguments:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
-chunk_size CHUNK_SIZE
|
|
||||||
Chunk size that will be specified when creating the task with specified video and generated meta information
|
|
||||||
```
|
|
||||||
|
|
||||||
**NOTE**: For smooth video decoding, the `chunk size` must be greater than or equal to the ratio of number of frames
|
|
||||||
to a number of key frames.
|
|
||||||
You can understand the approximate `chunk size` by preparing and looking at the file with meta information.
|
|
||||||
|
|
||||||
**NOTE**: If ratio of number of frames to number of key frames is small compared to the `chunk size`,
|
|
||||||
then when creating a task with prepared meta information, you should expect that the waiting time for some chunks
|
|
||||||
will be longer than the waiting time for other chunks. (At the first iteration, when there is no chunk in the cache)
|
|
||||||
|
|
||||||
**Examples**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python prepare.py ~/Documents/some_video.mp4 ~/Documents
|
|
||||||
```
|
|
||||||
@ -1,37 +0,0 @@
|
|||||||
# Copyright (C) 2020 Intel Corporation
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: MIT
|
|
||||||
import argparse
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
def get_args():
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('video_file',
|
|
||||||
type=str,
|
|
||||||
help='Path to video file')
|
|
||||||
parser.add_argument('meta_directory',
|
|
||||||
type=str,
|
|
||||||
help='Directory where the file with meta information will be saved')
|
|
||||||
parser.add_argument('-chunk_size',
|
|
||||||
type=int,
|
|
||||||
help='Chunk size that will be specified when creating the task with specified video and generated meta information')
|
|
||||||
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
args = get_args()
|
|
||||||
try:
|
|
||||||
smooth_decoding = prepare_meta_for_upload(prepare_meta, args.video_file, None, args.meta_directory, args.chunk_size)
|
|
||||||
print('Meta information for video has been prepared')
|
|
||||||
|
|
||||||
if smooth_decoding != None and not smooth_decoding:
|
|
||||||
print('NOTE: prepared meta information contains too few key frames for smooth decoding.')
|
|
||||||
except Exception:
|
|
||||||
print('Impossible to prepare meta information')
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
sys.path.append(base_dir)
|
|
||||||
from cvat.apps.engine.prepare import prepare_meta, prepare_meta_for_upload
|
|
||||||
main()
|
|
||||||
Loading…
Reference in New Issue