Merge branch 'cache' of https://github.com/Marishka17/cvat into Marishka17-cache

main
Nikita Manovich 6 years ago
commit 28225f9dc6

@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to configure email verification for new users (<https://github.com/opencv/cvat/pull/1929>) - Ability to configure email verification for new users (<https://github.com/opencv/cvat/pull/1929>)
- Link to django admin page from UI (<https://github.com/opencv/cvat/pull/2068>) - Link to django admin page from UI (<https://github.com/opencv/cvat/pull/2068>)
- Notification message when users use wrong browser (<https://github.com/opencv/cvat/pull/2070>) - Notification message when users use wrong browser (<https://github.com/opencv/cvat/pull/2070>)
- Ability to work with data on the fly (https://github.com/opencv/cvat/pull/2007)
### Changed ### Changed
- Shape coordinates are rounded to 2 digits in dumped annotations (<https://github.com/opencv/cvat/pull/1970>) - Shape coordinates are rounded to 2 digits in dumped annotations (<https://github.com/opencv/cvat/pull/1970>)

@ -1,6 +1,6 @@
{ {
"name": "cvat-core", "name": "cvat-core",
"version": "3.5.0", "version": "3.6.0",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

@ -1,6 +1,6 @@
{ {
"name": "cvat-core", "name": "cvat-core",
"version": "3.5.0", "version": "3.6.0",
"description": "Part of Computer Vision Tool which presents an interface for client-side integration", "description": "Part of Computer Vision Tool which presents an interface for client-side integration",
"main": "babel.config.js", "main": "babel.config.js",
"scripts": { "scripts": {

@ -833,6 +833,7 @@
data_compressed_chunk_type: undefined, data_compressed_chunk_type: undefined,
data_original_chunk_type: undefined, data_original_chunk_type: undefined,
use_zip_chunks: undefined, use_zip_chunks: undefined,
use_cache: undefined,
}; };
for (const property in data) { for (const property in data) {
@ -1088,6 +1089,24 @@
data.use_zip_chunks = useZipChunks; data.use_zip_chunks = useZipChunks;
}, },
}, },
/**
* @name useCache
* @type {boolean}
* @memberof module:API.cvat.classes.Task
* @instance
* @throws {module:API.cvat.exceptions.ArgumentError}
*/
useCache: {
get: () => data.use_cache,
set: (useCache) => {
if (typeof (useCache) !== 'boolean') {
throw new ArgumentError(
'Value must be a boolean',
);
}
data.use_cache = useCache;
},
},
/** /**
* After task has been created value can be appended only. * After task has been created value can be appended only.
* @name labels * @name labels
@ -1666,6 +1685,7 @@
remote_files: this.remoteFiles, remote_files: this.remoteFiles,
image_quality: this.imageQuality, image_quality: this.imageQuality,
use_zip_chunks: this.useZipChunks, use_zip_chunks: this.useZipChunks,
use_cache: this.useCache,
}; };
if (typeof (this.startFrame) !== 'undefined') { if (typeof (this.startFrame) !== 'undefined') {

@ -1,6 +1,6 @@
{ {
"name": "cvat-ui", "name": "cvat-ui",
"version": "1.8.4", "version": "1.9.0",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

@ -1,6 +1,6 @@
{ {
"name": "cvat-ui", "name": "cvat-ui",
"version": "1.8.4", "version": "1.9.0",
"description": "CVAT single-page application", "description": "CVAT single-page application",
"main": "src/index.tsx", "main": "src/index.tsx",
"scripts": { "scripts": {

@ -386,6 +386,7 @@ ThunkAction<Promise<void>, {}, {}, AnyAction> {
z_order: data.advanced.zOrder, z_order: data.advanced.zOrder,
image_quality: 70, image_quality: 70,
use_zip_chunks: data.advanced.useZipChunks, use_zip_chunks: data.advanced.useZipChunks,
use_cache: data.advanced.useCache,
}; };
if (data.advanced.bugTracker) { if (data.advanced.bugTracker) {

@ -26,6 +26,7 @@ export interface AdvancedConfiguration {
repository?: string; repository?: string;
useZipChunks: boolean; useZipChunks: boolean;
dataChunkSize?: number; dataChunkSize?: number;
useCache: boolean;
} }
type Props = FormComponentProps & { type Props = FormComponentProps & {
@ -380,6 +381,24 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
); );
} }
private renderCreateTaskMethod(): JSX.Element {
const { form } = this.props;
return (
<Form.Item help='Using cache to store data.'>
{form.getFieldDecorator('useCache', {
initialValue: false,
valuePropName: 'checked',
})(
<Checkbox>
<Text className='cvat-text-color'>
Use cache
</Text>
</Checkbox>,
)}
</Form.Item>
);
}
private renderChunkSize(): JSX.Element { private renderChunkSize(): JSX.Element {
const { form } = this.props; const { form } = this.props;
@ -434,6 +453,12 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
</Col> </Col>
</Row> </Row>
<Row>
<Col>
{this.renderCreateTaskMethod()}
</Col>
</Row>
<Row type='flex' justify='start'> <Row type='flex' justify='start'>
<Col span={7}> <Col span={7}>
{this.renderImageQuality()} {this.renderImageQuality()}

@ -42,6 +42,7 @@ const defaultState = {
zOrder: false, zOrder: false,
lfs: false, lfs: false,
useZipChunks: true, useZipChunks: true,
useCache: true,
}, },
labels: [], labels: [],
files: { files: {

@ -0,0 +1,57 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
from diskcache import Cache
from django.conf import settings
from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter, ZipChunkWriter,
Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter)
from cvat.apps.engine.models import DataChoice
from .prepare import PrepareInfo
import os
from io import BytesIO
class CacheInteraction:
def __init__(self):
self._cache = Cache(settings.CACHE_ROOT)
def __del__(self):
self._cache.close()
def get_buff_mime(self, chunk_number, quality, db_data):
chunk, tag = self._cache.get('{}_{}_{}'.format(db_data.id, chunk_number, quality), tag=True)
if not chunk:
chunk, tag = self.prepare_chunk_buff(db_data, quality, chunk_number)
self.save_chunk(db_data.id, chunk_number, quality, chunk, tag)
return chunk, tag
def prepare_chunk_buff(self, db_data, quality, chunk_number):
from cvat.apps.engine.frame_provider import FrameProvider
extractor_classes = {
FrameProvider.Quality.COMPRESSED : Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter,
FrameProvider.Quality.ORIGINAL : Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter,
}
image_quality = 100 if extractor_classes[quality] in [Mpeg4ChunkWriter, ZipChunkWriter] else db_data.image_quality
mime_type = 'video/mp4' if extractor_classes[quality] in [Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter] else 'application/zip'
extractor = extractor_classes[quality](image_quality)
images = []
buff = BytesIO()
if os.path.exists(db_data.get_meta_path()):
source_path = os.path.join(db_data.get_upload_dirname(), db_data.video.path)
meta = PrepareInfo(source_path=source_path, meta_path=db_data.get_meta_path())
for frame in meta.decode_needed_frames(chunk_number, db_data):
images.append(frame)
extractor.save_as_chunk([(image, source_path, None) for image in images], buff)
else:
with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file:
images = [os.path.join(db_data.get_upload_dirname(), line.strip()) for line in dummy_file]
extractor.save_as_chunk([(image, image, None) for image in images], buff)
buff.seek(0)
return buff, mime_type
def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type):
self._cache.set('{}_{}_{}'.format(db_data_id, chunk_number, quality), buff, tag=mime_type)

@ -11,8 +11,8 @@ from PIL import Image
from cvat.apps.engine.media_extractors import VideoReader, ZipReader from cvat.apps.engine.media_extractors import VideoReader, ZipReader
from cvat.apps.engine.mime_types import mimetypes from cvat.apps.engine.mime_types import mimetypes
from cvat.apps.engine.models import DataChoice from cvat.apps.engine.models import DataChoice, StorageMethodChoice
from .cache import CacheInteraction
class RandomAccessIterator: class RandomAccessIterator:
def __init__(self, iterable): def __init__(self, iterable):
@ -65,6 +65,19 @@ class FrameProvider:
self.reader_class([self.get_chunk_path(chunk_id)])) self.reader_class([self.get_chunk_path(chunk_id)]))
return self.chunk_reader return self.chunk_reader
class BuffChunkLoader(ChunkLoader):
def __init__(self, reader_class, path_getter, quality, db_data):
super().__init__(reader_class, path_getter)
self.quality = quality
self.db_data = db_data
def load(self, chunk_id):
if self.chunk_id != chunk_id:
self.chunk_id = chunk_id
self.chunk_reader = RandomAccessIterator(
self.reader_class([self.get_chunk_path(chunk_id, self.quality, self.db_data)[0]]))
return self.chunk_reader
def __init__(self, db_data): def __init__(self, db_data):
self._db_data = db_data self._db_data = db_data
self._loaders = {} self._loaders = {}
@ -73,12 +86,27 @@ class FrameProvider:
DataChoice.IMAGESET: ZipReader, DataChoice.IMAGESET: ZipReader,
DataChoice.VIDEO: VideoReader, DataChoice.VIDEO: VideoReader,
} }
self._loaders[self.Quality.COMPRESSED] = self.ChunkLoader(
reader_class[db_data.compressed_chunk_type], if db_data.storage_method == StorageMethodChoice.CACHE:
db_data.get_compressed_chunk_path) cache = CacheInteraction()
self._loaders[self.Quality.ORIGINAL] = self.ChunkLoader(
reader_class[db_data.original_chunk_type], self._loaders[self.Quality.COMPRESSED] = self.BuffChunkLoader(
db_data.get_original_chunk_path) reader_class[db_data.compressed_chunk_type],
cache.get_buff_mime,
self.Quality.COMPRESSED,
self._db_data)
self._loaders[self.Quality.ORIGINAL] = self.BuffChunkLoader(
reader_class[db_data.original_chunk_type],
cache.get_buff_mime,
self.Quality.ORIGINAL,
self._db_data)
else:
self._loaders[self.Quality.COMPRESSED] = self.ChunkLoader(
reader_class[db_data.compressed_chunk_type],
db_data.get_compressed_chunk_path)
self._loaders[self.Quality.ORIGINAL] = self.ChunkLoader(
reader_class[db_data.original_chunk_type],
db_data.get_original_chunk_path)
def __len__(self): def __len__(self):
return self._db_data.size return self._db_data.size
@ -129,6 +157,8 @@ class FrameProvider:
def get_chunk(self, chunk_number, quality=Quality.ORIGINAL): def get_chunk(self, chunk_number, quality=Quality.ORIGINAL):
chunk_number = self._validate_chunk_number(chunk_number) chunk_number = self._validate_chunk_number(chunk_number)
if self._db_data.storage_method == StorageMethodChoice.CACHE:
return self._loaders[quality].get_chunk_path(chunk_number, quality, self._db_data)
return self._loaders[quality].get_chunk_path(chunk_number) return self._loaders[quality].get_chunk_path(chunk_number)
def get_frame(self, frame_number, quality=Quality.ORIGINAL, def get_frame(self, frame_number, quality=Quality.ORIGINAL,

@ -125,22 +125,17 @@ class DirectoryReader(ImageListReader):
class ArchiveReader(DirectoryReader): class ArchiveReader(DirectoryReader):
def __init__(self, source_path, step=1, start=0, stop=None): def __init__(self, source_path, step=1, start=0, stop=None):
self._tmp_dir = create_tmp_dir()
self._archive_source = source_path[0] self._archive_source = source_path[0]
Archive(self._archive_source).extractall(self._tmp_dir) Archive(self._archive_source).extractall(os.path.dirname(source_path[0]))
super().__init__( super().__init__(
source_path=[self._tmp_dir], source_path=[os.path.dirname(source_path[0])],
step=step, step=step,
start=start, start=start,
stop=stop, stop=stop,
) )
def __del__(self): def __del__(self):
delete_tmp_dir(self._tmp_dir) os.remove(self._archive_source)
def get_path(self, i):
base_dir = os.path.dirname(self._archive_source)
return os.path.join(base_dir, os.path.relpath(self._source_path[i], self._tmp_dir))
class PdfReader(DirectoryReader): class PdfReader(DirectoryReader):
def __init__(self, source_path, step=1, start=0, stop=None): def __init__(self, source_path, step=1, start=0, stop=None):
@ -191,7 +186,14 @@ class ZipReader(ImageListReader):
return io.BytesIO(self._zip_source.read(self._source_path[i])) return io.BytesIO(self._zip_source.read(self._source_path[i]))
def get_path(self, i): def get_path(self, i):
return os.path.join(os.path.dirname(self._zip_source.filename), self._source_path[i]) if self._zip_source.filename:
return os.path.join(os.path.dirname(self._zip_source.filename), self._source_path[i])
else: # necessary for mime_type definition
return self._source_path[i]
def extract(self):
self._zip_source.extractall(os.path.dirname(self._zip_source.filename))
os.remove(self._zip_source.filename)
class VideoReader(IMediaReader): class VideoReader(IMediaReader):
def __init__(self, source_path, step=1, start=0, stop=None): def __init__(self, source_path, step=1, start=0, stop=None):
@ -303,14 +305,14 @@ class Mpeg4ChunkWriter(IChunkWriter):
self._output_fps = 25 self._output_fps = 25
@staticmethod @staticmethod
def _create_av_container(path, w, h, rate, options): def _create_av_container(path, w, h, rate, options, f='mp4'):
# x264 requires width and height must be divisible by 2 for yuv420p # x264 requires width and height must be divisible by 2 for yuv420p
if h % 2: if h % 2:
h += 1 h += 1
if w % 2: if w % 2:
w += 1 w += 1
container = av.open(path, 'w') container = av.open(path, 'w',format=f)
video_stream = container.add_stream('libx264', rate=rate) video_stream = container.add_stream('libx264', rate=rate)
video_stream.pix_fmt = "yuv420p" video_stream.pix_fmt = "yuv420p"
video_stream.width = w video_stream.width = w

@ -0,0 +1,36 @@
# Generated by Django 2.2.13 on 2020-08-13 05:49
from cvat.apps.engine.media_extractors import _is_archive, _is_zip
import cvat.apps.engine.models
from django.conf import settings
from django.db import migrations, models
import os
from pyunpack import Archive
def unzip(apps, schema_editor):
Data = apps.get_model("engine", "Data")
data_q_set = Data.objects.all()
archive_paths = []
for data_instance in data_q_set:
for root, _, files in os.walk(os.path.join(settings.MEDIA_DATA_ROOT, '{}/raw/'.format(data_instance.id))):
archive_paths.extend([os.path.join(root, file) for file in files if _is_archive(file) or _is_zip(file)])
for path in archive_paths:
Archive(path).extractall(os.path.dirname(path))
os.remove(path)
class Migration(migrations.Migration):
dependencies = [
('engine', '0028_labelcolor'),
]
operations = [
migrations.AddField(
model_name='data',
name='storage_method',
field=models.CharField(choices=[('cache', 'CACHE'), ('file_system', 'FILE_SYSTEM')], default=cvat.apps.engine.models.StorageMethodChoice('file_system'), max_length=15),
),
migrations.RunPython(unzip),
]

@ -43,6 +43,17 @@ class DataChoice(str, Enum):
def __str__(self): def __str__(self):
return self.value return self.value
class StorageMethodChoice(str, Enum):
CACHE = 'cache'
FILE_SYSTEM = 'file_system'
@classmethod
def choices(cls):
return tuple((x.value, x.name) for x in cls)
def __str__(self):
return self.value
class Data(models.Model): class Data(models.Model):
chunk_size = models.PositiveIntegerField(null=True) chunk_size = models.PositiveIntegerField(null=True)
size = models.PositiveIntegerField(default=0) size = models.PositiveIntegerField(default=0)
@ -54,6 +65,7 @@ class Data(models.Model):
default=DataChoice.IMAGESET) default=DataChoice.IMAGESET)
original_chunk_type = models.CharField(max_length=32, choices=DataChoice.choices(), original_chunk_type = models.CharField(max_length=32, choices=DataChoice.choices(),
default=DataChoice.IMAGESET) default=DataChoice.IMAGESET)
storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM)
class Meta: class Meta:
default_permissions = () default_permissions = ()
@ -102,6 +114,12 @@ class Data(models.Model):
def get_preview_path(self): def get_preview_path(self):
return os.path.join(self.get_data_dirname(), 'preview.jpeg') return os.path.join(self.get_data_dirname(), 'preview.jpeg')
def get_meta_path(self):
return os.path.join(self.get_upload_dirname(), 'meta_info.txt')
def get_dummy_chunk_path(self, chunk_number):
return os.path.join(self.get_upload_dirname(), 'dummy_{}.txt'.format(chunk_number))
class Video(models.Model): class Video(models.Model):
data = models.OneToOneField(Data, on_delete=models.CASCADE, related_name="video", null=True) data = models.OneToOneField(Data, on_delete=models.CASCADE, related_name="video", null=True)
path = models.CharField(max_length=1024, default='') path = models.CharField(max_length=1024, default='')

@ -0,0 +1,155 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
import av
import hashlib
class WorkWithVideo:
def __init__(self, **kwargs):
if not kwargs.get('source_path'):
raise Exception('No sourse path')
self.source_path = kwargs.get('source_path')
def _open_video_container(self, sourse_path, mode, options=None):
return av.open(sourse_path, mode=mode, options=options)
def _close_video_container(self, container):
container.close()
def _get_video_stream(self, container):
video_stream = next(stream for stream in container.streams if stream.type == 'video')
video_stream.thread_type = 'AUTO'
return video_stream
class AnalyzeVideo(WorkWithVideo):
def check_type_first_frame(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
for packet in container.demux(video_stream):
for frame in packet.decode():
self._close_video_container(container)
assert frame.pict_type.name == 'I', 'First frame is not key frame'
return
def check_video_timestamps_sequences(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
frame_pts = -1
frame_dts = -1
for packet in container.demux(video_stream):
for frame in packet.decode():
if None not in [frame.pts, frame_pts] and frame.pts <= frame_pts:
self._close_video_container(container)
raise Exception('Invalid pts sequences')
if None not in [frame.dts, frame_dts] and frame.dts <= frame_dts:
self._close_video_container(container)
raise Exception('Invalid dts sequences')
frame_pts, frame_dts = frame.pts, frame.dts
self._close_video_container(container)
def md5_hash(frame):
return hashlib.md5(frame.to_image().tobytes()).hexdigest()
class PrepareInfo(WorkWithVideo):
def __init__(self, **kwargs):
super().__init__(**kwargs)
if not kwargs.get('meta_path'):
raise Exception('No meta path')
self.meta_path = kwargs.get('meta_path')
self.key_frames = {}
self.frames = 0
def get_task_size(self):
return self.frames
def check_seek_key_frames(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
key_frames_copy = self.key_frames.copy()
for index, key_frame in key_frames_copy.items():
container.seek(offset=key_frame.pts, stream=video_stream)
flag = True
for packet in container.demux(video_stream):
for frame in packet.decode():
if md5_hash(frame) != md5_hash(key_frame) or frame.pts != key_frame.pts:
self.key_frames.pop(index)
flag = False
break
if not flag:
break
#TODO: correct ratio of number of frames to keyframes
if len(self.key_frames) == 0:
raise Exception('Too few keyframes')
def save_key_frames(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
frame_number = 0
for packet in container.demux(video_stream):
for frame in packet.decode():
if frame.key_frame:
self.key_frames[frame_number] = frame
frame_number += 1
self.frames = frame_number
self._close_video_container(container)
def save_meta_info(self):
with open(self.meta_path, 'w') as meta_file:
for index, frame in self.key_frames.items():
meta_file.write('{} {}\n'.format(index, frame.pts))
def get_nearest_left_key_frame(self, start_chunk_frame_number):
start_decode_frame_number = 0
start_decode_timestamp = 0
with open(self.meta_path, 'r') as file:
for line in file:
frame_number, timestamp = line.strip().split(' ')
if int(frame_number) <= start_chunk_frame_number:
start_decode_frame_number = frame_number
start_decode_timestamp = timestamp
else:
break
return int(start_decode_frame_number), int(start_decode_timestamp)
def decode_needed_frames(self, chunk_number, db_data):
step = db_data.get_frame_step()
start_chunk_frame_number = db_data.start_frame + chunk_number * db_data.chunk_size * step
end_chunk_frame_number = min(start_chunk_frame_number + (db_data.chunk_size - 1) * step + 1, db_data.stop_frame + 1)
start_decode_frame_number, start_decode_timestamp = self.get_nearest_left_key_frame(start_chunk_frame_number)
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
container.seek(offset=start_decode_timestamp, stream=video_stream)
frame_number = start_decode_frame_number - 1
for packet in container.demux(video_stream):
for frame in packet.decode():
frame_number += 1
if frame_number < start_chunk_frame_number:
continue
elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
yield frame
elif (frame_number - start_chunk_frame_number) % step:
continue
else:
self._close_video_container(container)
return
self._close_video_container(container)

@ -170,11 +170,13 @@ class DataSerializer(serializers.ModelSerializer):
client_files = ClientFileSerializer(many=True, default=[]) client_files = ClientFileSerializer(many=True, default=[])
server_files = ServerFileSerializer(many=True, default=[]) server_files = ServerFileSerializer(many=True, default=[])
remote_files = RemoteFileSerializer(many=True, default=[]) remote_files = RemoteFileSerializer(many=True, default=[])
use_cache = serializers.BooleanField(default=False)
class Meta: class Meta:
model = models.Data model = models.Data
fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter', fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter',
'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks') 'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks',
'use_cache')
# pylint: disable=no-self-use # pylint: disable=no-self-use
def validate_frame_filter(self, value): def validate_frame_filter(self, value):
@ -202,6 +204,7 @@ class DataSerializer(serializers.ModelSerializer):
server_files = validated_data.pop('server_files') server_files = validated_data.pop('server_files')
remote_files = validated_data.pop('remote_files') remote_files = validated_data.pop('remote_files')
validated_data.pop('use_zip_chunks') validated_data.pop('use_zip_chunks')
validated_data.pop('use_cache')
db_data = models.Data.objects.create(**validated_data) db_data = models.Data.objects.create(**validated_data)
data_path = db_data.get_data_dirname() data_path = db_data.get_data_dirname()

@ -14,7 +14,7 @@ from urllib import parse as urlparse
from urllib import request as urlrequest from urllib import request as urlrequest
from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES, Mpeg4ChunkWriter, ZipChunkWriter, Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES, Mpeg4ChunkWriter, ZipChunkWriter, Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter
from cvat.apps.engine.models import DataChoice from cvat.apps.engine.models import DataChoice, StorageMethodChoice
from cvat.apps.engine.utils import av_scan_paths from cvat.apps.engine.utils import av_scan_paths
import django_rq import django_rq
@ -24,6 +24,7 @@ from distutils.dir_util import copy_tree
from . import models from . import models
from .log import slogger from .log import slogger
from .prepare import PrepareInfo, AnalyzeVideo
############################# Low Level server API ############################# Low Level server API
@ -243,6 +244,8 @@ def _create_thread(tid, data):
start=db_data.start_frame, start=db_data.start_frame,
stop=data['stop_frame'], stop=data['stop_frame'],
) )
if extractor.__class__ == MEDIA_TYPES['zip']['extractor']:
extractor.extract()
db_task.mode = task_mode db_task.mode = task_mode
db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET
db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET
@ -276,37 +279,94 @@ def _create_thread(tid, data):
else: else:
db_data.chunk_size = 36 db_data.chunk_size = 36
video_path = "" video_path = ""
video_size = (0, 0) video_size = (0, 0)
counter = itertools.count() if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for media_type, media_files in media.items():
for chunk_idx, chunk_data in generator: if media_files:
chunk_data = list(chunk_data) if task_mode == MEDIA_TYPES['video']['mode']:
original_chunk_path = db_data.get_original_chunk_path(chunk_idx) try:
original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) analyzer = AnalyzeVideo(source_path=os.path.join(upload_dir, media_files[0]))
analyzer.check_type_first_frame()
compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) analyzer.check_video_timestamps_sequences()
img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path)
meta_info = PrepareInfo(source_path=os.path.join(upload_dir, media_files[0]),
if db_task.mode == 'annotation': meta_path=os.path.join(upload_dir, 'meta_info.txt'))
db_images.extend([ meta_info.save_key_frames()
models.Image( meta_info.check_seek_key_frames()
data=db_data, meta_info.save_meta_info()
path=os.path.relpath(data[1], upload_dir),
frame=data[2], all_frames = meta_info.get_task_size()
width=size[0], db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step()))
height=size[1]) video_path = os.path.join(upload_dir, media_files[0])
frame = meta_info.key_frames.get(next(iter(meta_info.key_frames)))
for data, size in zip(chunk_data, img_sizes) video_size = (frame.width, frame.height)
])
else: except Exception:
video_size = img_sizes[0] db_data.storage_method = StorageMethodChoice.FILE_SYSTEM
video_path = chunk_data[0][1]
else:#images,archive
counter_ = itertools.count()
if isinstance(extractor, MEDIA_TYPES['archive']['extractor']):
media_files = [os.path.relpath(path, upload_dir) for path in extractor._source_path]
elif isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
media_files = extractor._source_path
numbers_sequence = range(db_data.start_frame, min(data['stop_frame'] if data['stop_frame'] else len(media_files), len(media_files)), db_data.get_frame_step())
m_paths = []
m_paths = [(path, numb) for numb, path in enumerate(sorted(media_files)) if numb in numbers_sequence]
for chunk_number, media_paths in itertools.groupby(m_paths, lambda x: next(counter_) // db_data.chunk_size):
media_paths = list(media_paths)
img_sizes = []
from PIL import Image
with open(db_data.get_dummy_chunk_path(chunk_number), 'w') as dummy_chunk:
for path, _ in media_paths:
dummy_chunk.write(path+'\n')
img_sizes += [Image.open(os.path.join(upload_dir, path)).size]
db_data.size += len(media_paths)
db_images.extend([
models.Image(
data=db_data,
path=data[0],
frame=data[1],
width=size[0],
height=size[1])
for data, size in zip(media_paths, img_sizes)
])
if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
counter = itertools.count()
generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size)
for chunk_idx, chunk_data in generator:
chunk_data = list(chunk_data)
original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path)
compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx)
img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path)
if db_task.mode == 'annotation':
db_images.extend([
models.Image(
data=db_data,
path=os.path.relpath(data[1], upload_dir),
frame=data[2],
width=size[0],
height=size[1])
for data, size in zip(chunk_data, img_sizes)
])
else:
video_size = img_sizes[0]
video_path = chunk_data[0][1]
db_data.size += len(chunk_data) db_data.size += len(chunk_data)
progress = extractor.get_progress(chunk_data[-1][2]) progress = extractor.get_progress(chunk_data[-1][2])
update_progress(progress) update_progress(progress)
if db_task.mode == 'annotation': if db_task.mode == 'annotation':
models.Image.objects.bulk_create(db_images) models.Image.objects.bulk_create(db_images)

@ -72,13 +72,14 @@ import av
import numpy as np import numpy as np
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Group, User from django.contrib.auth.models import Group, User
from django.http import HttpResponse
from PIL import Image from PIL import Image
from pycocotools import coco as coco_loader from pycocotools import coco as coco_loader
from rest_framework import status from rest_framework import status
from rest_framework.test import APIClient, APITestCase from rest_framework.test import APIClient, APITestCase
from cvat.apps.engine.models import (AttributeType, Data, Job, Project, from cvat.apps.engine.models import (AttributeType, Data, Job, Project,
Segment, StatusChoice, Task) Segment, StatusChoice, Task, StorageMethodChoice)
_setUpModule() _setUpModule()
@ -1670,7 +1671,8 @@ class TaskDataAPITestCase(APITestCase):
stream = container.streams.video[0] stream = container.streams.video[0]
return [f.to_image() for f in container.decode(stream)] return [f.to_image() for f in container.decode(stream)]
def _test_api_v1_tasks_id_data_spec(self, user, spec, data, expected_compressed_type, expected_original_type, image_sizes): def _test_api_v1_tasks_id_data_spec(self, user, spec, data, expected_compressed_type, expected_original_type, image_sizes,
expected_storage_method=StorageMethodChoice.FILE_SYSTEM):
# create task # create task
response = self._create_task(user, spec) response = self._create_task(user, spec)
self.assertEqual(response.status_code, status.HTTP_201_CREATED) self.assertEqual(response.status_code, status.HTTP_201_CREATED)
@ -1694,6 +1696,7 @@ class TaskDataAPITestCase(APITestCase):
self.assertEqual(expected_compressed_type, task["data_compressed_chunk_type"]) self.assertEqual(expected_compressed_type, task["data_compressed_chunk_type"])
self.assertEqual(expected_original_type, task["data_original_chunk_type"]) self.assertEqual(expected_original_type, task["data_original_chunk_type"])
self.assertEqual(len(image_sizes), task["size"]) self.assertEqual(len(image_sizes), task["size"])
self.assertEqual(expected_storage_method, Task.objects.get(pk=task_id).data.storage_method)
# check preview # check preview
response = self._get_preview(task_id, user) response = self._get_preview(task_id, user)
@ -1706,7 +1709,10 @@ class TaskDataAPITestCase(APITestCase):
response = self._get_compressed_chunk(task_id, user, 0) response = self._get_compressed_chunk(task_id, user, 0)
self.assertEqual(response.status_code, expected_status_code) self.assertEqual(response.status_code, expected_status_code)
if expected_status_code == status.HTTP_200_OK: if expected_status_code == status.HTTP_200_OK:
compressed_chunk = io.BytesIO(b"".join(response.streaming_content)) if isinstance(response, HttpResponse):
compressed_chunk = io.BytesIO(response.content)
else:
compressed_chunk = io.BytesIO(b"".join(response.streaming_content))
if task["data_compressed_chunk_type"] == self.ChunkType.IMAGESET: if task["data_compressed_chunk_type"] == self.ChunkType.IMAGESET:
images = self._extract_zip_chunk(compressed_chunk) images = self._extract_zip_chunk(compressed_chunk)
else: else:
@ -1721,7 +1727,10 @@ class TaskDataAPITestCase(APITestCase):
response = self._get_original_chunk(task_id, user, 0) response = self._get_original_chunk(task_id, user, 0)
self.assertEqual(response.status_code, expected_status_code) self.assertEqual(response.status_code, expected_status_code)
if expected_status_code == status.HTTP_200_OK: if expected_status_code == status.HTTP_200_OK:
original_chunk = io.BytesIO(b"".join(response.streaming_content)) if isinstance(response, HttpResponse):
original_chunk = io.BytesIO(response.getvalue())
else:
original_chunk = io.BytesIO(b"".join(response.streaming_content))
if task["data_original_chunk_type"] == self.ChunkType.IMAGESET: if task["data_original_chunk_type"] == self.ChunkType.IMAGESET:
images = self._extract_zip_chunk(original_chunk) images = self._extract_zip_chunk(original_chunk)
else: else:
@ -1909,6 +1918,74 @@ class TaskDataAPITestCase(APITestCase):
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, image_sizes) self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, image_sizes)
task_spec = {
"name": "use_cache video task #8",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"server_files[0]": 'test_video_1.mp4',
"image_quality": 70,
"use_cache": True,
}
image_sizes = self._image_sizes[task_data["server_files[0]"]]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
task_spec = {
"name": "use_cache images task #9",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"server_files[0]": "test_1.jpg",
"server_files[1]": "test_2.jpg",
"server_files[2]": "test_3.jpg",
"image_quality": 70,
"use_cache": True,
}
image_sizes = [
self._image_sizes[task_data["server_files[0]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[2]"]],
]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.IMAGESET, image_sizes, StorageMethodChoice.CACHE)
task_spec = {
"name": "my zip archive task #10",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"server_files[0]": "test_archive_1.zip",
"image_quality": 70,
"use_cache": True
}
image_sizes = self._image_sizes[task_data["server_files[0]"]]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.IMAGESET, image_sizes, StorageMethodChoice.CACHE)
def test_api_v1_tasks_id_data_admin(self): def test_api_v1_tasks_id_data_admin(self):
self._test_api_v1_tasks_id_data(self.admin) self._test_api_v1_tasks_id_data(self.admin)

@ -34,7 +34,7 @@ import cvat.apps.dataset_manager.views # pylint: disable=unused-import
from cvat.apps.authentication import auth from cvat.apps.authentication import auth
from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer
from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import Job, StatusChoice, Task from cvat.apps.engine.models import Job, StatusChoice, Task, StorageMethodChoice
from cvat.apps.engine.serializers import ( from cvat.apps.engine.serializers import (
AboutSerializer, AnnotationFileSerializer, BasicUserSerializer, AboutSerializer, AnnotationFileSerializer, BasicUserSerializer,
DataMetaSerializer, DataSerializer, ExceptionSerializer, DataMetaSerializer, DataSerializer, ExceptionSerializer,
@ -374,6 +374,11 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
db_task.save() db_task.save()
data = {k:v for k, v in serializer.data.items()} data = {k:v for k, v in serializer.data.items()}
data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks'] data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks']
data['use_cache'] = serializer.validated_data['use_cache']
if data['use_cache']:
db_task.data.storage_method = StorageMethodChoice.CACHE
db_task.data.save(update_fields=['storage_method'])
# if the value of stop_frame is 0, then inside the function we cannot know # if the value of stop_frame is 0, then inside the function we cannot know
# the value specified by the user or it's default value from the database # the value specified by the user or it's default value from the database
if 'stop_frame' not in serializer.validated_data: if 'stop_frame' not in serializer.validated_data:
@ -398,16 +403,23 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
try: try:
db_task = self.get_object() db_task = self.get_object()
db_data = db_task.data
frame_provider = FrameProvider(db_task.data) frame_provider = FrameProvider(db_task.data)
if data_type == 'chunk': if data_type == 'chunk':
data_id = int(data_id) data_id = int(data_id)
data_quality = FrameProvider.Quality.COMPRESSED \ data_quality = FrameProvider.Quality.COMPRESSED \
if data_quality == 'compressed' else FrameProvider.Quality.ORIGINAL if data_quality == 'compressed' else FrameProvider.Quality.ORIGINAL
path = os.path.realpath(frame_provider.get_chunk(data_id, data_quality))
#TODO: av.FFmpegError processing
if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
buff, mime_type = frame_provider.get_chunk(data_id, data_quality)
return HttpResponse(buff.getvalue(), content_type=mime_type)
# Follow symbol links if the chunk is a link on a real image otherwise # Follow symbol links if the chunk is a link on a real image otherwise
# mimetype detection inside sendfile will work incorrectly. # mimetype detection inside sendfile will work incorrectly.
path = os.path.realpath(frame_provider.get_chunk(data_id, data_quality))
return sendfile(request, path) return sendfile(request, path)
elif data_type == 'frame': elif data_type == 'frame':

@ -50,3 +50,4 @@ av==6.2.0
# The package is used by pyunpack as a command line tool to support multiple # The package is used by pyunpack as a command line tool to support multiple
# archives. Don't use as a python module because it has GPL license. # archives. Don't use as a python module because it has GPL license.
patool==1.12 patool==1.12
diskcache==4.1.0

@ -324,6 +324,9 @@ os.makedirs(DATA_ROOT, exist_ok=True)
MEDIA_DATA_ROOT = os.path.join(DATA_ROOT, 'data') MEDIA_DATA_ROOT = os.path.join(DATA_ROOT, 'data')
os.makedirs(MEDIA_DATA_ROOT, exist_ok=True) os.makedirs(MEDIA_DATA_ROOT, exist_ok=True)
CACHE_ROOT = os.path.join(DATA_ROOT, 'cache')
os.makedirs(CACHE_ROOT, exist_ok=True)
TASKS_ROOT = os.path.join(DATA_ROOT, 'tasks') TASKS_ROOT = os.path.join(DATA_ROOT, 'tasks')
os.makedirs(TASKS_ROOT, exist_ok=True) os.makedirs(TASKS_ROOT, exist_ok=True)
@ -422,3 +425,17 @@ RESTRICTIONS = {
'engine.role.admin', 'engine.role.admin',
), ),
} }
CACHES = {
'default' : {
'BACKEND' : 'diskcache.DjangoCache',
'LOCATION' : CACHE_ROOT,
'TIMEOUT' : None,
'OPTIONS' : {
#'statistics' :True,
'size_limit' : 2 ** 40, # 1 тб
}
}
}
USE_CACHE = True

@ -22,6 +22,8 @@ os.makedirs(TASKS_ROOT, exist_ok=True)
MODELS_ROOT = os.path.join(DATA_ROOT, 'models') MODELS_ROOT = os.path.join(DATA_ROOT, 'models')
os.makedirs(MODELS_ROOT, exist_ok=True) os.makedirs(MODELS_ROOT, exist_ok=True)
CACHE_ROOT = os.path.join(DATA_ROOT, 'cache')
os.makedirs(CACHE_ROOT, exist_ok=True)
# To avoid ERROR django.security.SuspiciousFileOperation: # To avoid ERROR django.security.SuspiciousFileOperation:
# The joined path (...) is located outside of the base path component # The joined path (...) is located outside of the base path component

Loading…
Cancel
Save