Merge branch 'cache' of https://github.com/Marishka17/cvat into Marishka17-cache

main
Nikita Manovich 6 years ago
commit 28225f9dc6

@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to configure email verification for new users (<https://github.com/opencv/cvat/pull/1929>)
- Link to django admin page from UI (<https://github.com/opencv/cvat/pull/2068>)
- Notification message when users use wrong browser (<https://github.com/opencv/cvat/pull/2070>)
- Ability to work with data on the fly (https://github.com/opencv/cvat/pull/2007)
### Changed
- Shape coordinates are rounded to 2 digits in dumped annotations (<https://github.com/opencv/cvat/pull/1970>)

@ -1,6 +1,6 @@
{
"name": "cvat-core",
"version": "3.5.0",
"version": "3.6.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {

@ -1,6 +1,6 @@
{
"name": "cvat-core",
"version": "3.5.0",
"version": "3.6.0",
"description": "Part of Computer Vision Tool which presents an interface for client-side integration",
"main": "babel.config.js",
"scripts": {

@ -833,6 +833,7 @@
data_compressed_chunk_type: undefined,
data_original_chunk_type: undefined,
use_zip_chunks: undefined,
use_cache: undefined,
};
for (const property in data) {
@ -1088,6 +1089,24 @@
data.use_zip_chunks = useZipChunks;
},
},
/**
* @name useCache
* @type {boolean}
* @memberof module:API.cvat.classes.Task
* @instance
* @throws {module:API.cvat.exceptions.ArgumentError}
*/
useCache: {
get: () => data.use_cache,
set: (useCache) => {
if (typeof (useCache) !== 'boolean') {
throw new ArgumentError(
'Value must be a boolean',
);
}
data.use_cache = useCache;
},
},
/**
* After task has been created value can be appended only.
* @name labels
@ -1666,6 +1685,7 @@
remote_files: this.remoteFiles,
image_quality: this.imageQuality,
use_zip_chunks: this.useZipChunks,
use_cache: this.useCache,
};
if (typeof (this.startFrame) !== 'undefined') {

@ -1,6 +1,6 @@
{
"name": "cvat-ui",
"version": "1.8.4",
"version": "1.9.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {

@ -1,6 +1,6 @@
{
"name": "cvat-ui",
"version": "1.8.4",
"version": "1.9.0",
"description": "CVAT single-page application",
"main": "src/index.tsx",
"scripts": {

@ -386,6 +386,7 @@ ThunkAction<Promise<void>, {}, {}, AnyAction> {
z_order: data.advanced.zOrder,
image_quality: 70,
use_zip_chunks: data.advanced.useZipChunks,
use_cache: data.advanced.useCache,
};
if (data.advanced.bugTracker) {

@ -26,6 +26,7 @@ export interface AdvancedConfiguration {
repository?: string;
useZipChunks: boolean;
dataChunkSize?: number;
useCache: boolean;
}
type Props = FormComponentProps & {
@ -380,6 +381,24 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
);
}
private renderCreateTaskMethod(): JSX.Element {
const { form } = this.props;
return (
<Form.Item help='Using cache to store data.'>
{form.getFieldDecorator('useCache', {
initialValue: false,
valuePropName: 'checked',
})(
<Checkbox>
<Text className='cvat-text-color'>
Use cache
</Text>
</Checkbox>,
)}
</Form.Item>
);
}
private renderChunkSize(): JSX.Element {
const { form } = this.props;
@ -434,6 +453,12 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
</Col>
</Row>
<Row>
<Col>
{this.renderCreateTaskMethod()}
</Col>
</Row>
<Row type='flex' justify='start'>
<Col span={7}>
{this.renderImageQuality()}

@ -42,6 +42,7 @@ const defaultState = {
zOrder: false,
lfs: false,
useZipChunks: true,
useCache: true,
},
labels: [],
files: {

@ -0,0 +1,57 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
from diskcache import Cache
from django.conf import settings
from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter, ZipChunkWriter,
Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter)
from cvat.apps.engine.models import DataChoice
from .prepare import PrepareInfo
import os
from io import BytesIO
class CacheInteraction:
def __init__(self):
self._cache = Cache(settings.CACHE_ROOT)
def __del__(self):
self._cache.close()
def get_buff_mime(self, chunk_number, quality, db_data):
chunk, tag = self._cache.get('{}_{}_{}'.format(db_data.id, chunk_number, quality), tag=True)
if not chunk:
chunk, tag = self.prepare_chunk_buff(db_data, quality, chunk_number)
self.save_chunk(db_data.id, chunk_number, quality, chunk, tag)
return chunk, tag
def prepare_chunk_buff(self, db_data, quality, chunk_number):
from cvat.apps.engine.frame_provider import FrameProvider
extractor_classes = {
FrameProvider.Quality.COMPRESSED : Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter,
FrameProvider.Quality.ORIGINAL : Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter,
}
image_quality = 100 if extractor_classes[quality] in [Mpeg4ChunkWriter, ZipChunkWriter] else db_data.image_quality
mime_type = 'video/mp4' if extractor_classes[quality] in [Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter] else 'application/zip'
extractor = extractor_classes[quality](image_quality)
images = []
buff = BytesIO()
if os.path.exists(db_data.get_meta_path()):
source_path = os.path.join(db_data.get_upload_dirname(), db_data.video.path)
meta = PrepareInfo(source_path=source_path, meta_path=db_data.get_meta_path())
for frame in meta.decode_needed_frames(chunk_number, db_data):
images.append(frame)
extractor.save_as_chunk([(image, source_path, None) for image in images], buff)
else:
with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file:
images = [os.path.join(db_data.get_upload_dirname(), line.strip()) for line in dummy_file]
extractor.save_as_chunk([(image, image, None) for image in images], buff)
buff.seek(0)
return buff, mime_type
def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type):
self._cache.set('{}_{}_{}'.format(db_data_id, chunk_number, quality), buff, tag=mime_type)

@ -11,8 +11,8 @@ from PIL import Image
from cvat.apps.engine.media_extractors import VideoReader, ZipReader
from cvat.apps.engine.mime_types import mimetypes
from cvat.apps.engine.models import DataChoice
from cvat.apps.engine.models import DataChoice, StorageMethodChoice
from .cache import CacheInteraction
class RandomAccessIterator:
def __init__(self, iterable):
@ -65,6 +65,19 @@ class FrameProvider:
self.reader_class([self.get_chunk_path(chunk_id)]))
return self.chunk_reader
class BuffChunkLoader(ChunkLoader):
def __init__(self, reader_class, path_getter, quality, db_data):
super().__init__(reader_class, path_getter)
self.quality = quality
self.db_data = db_data
def load(self, chunk_id):
if self.chunk_id != chunk_id:
self.chunk_id = chunk_id
self.chunk_reader = RandomAccessIterator(
self.reader_class([self.get_chunk_path(chunk_id, self.quality, self.db_data)[0]]))
return self.chunk_reader
def __init__(self, db_data):
self._db_data = db_data
self._loaders = {}
@ -73,12 +86,27 @@ class FrameProvider:
DataChoice.IMAGESET: ZipReader,
DataChoice.VIDEO: VideoReader,
}
self._loaders[self.Quality.COMPRESSED] = self.ChunkLoader(
reader_class[db_data.compressed_chunk_type],
db_data.get_compressed_chunk_path)
self._loaders[self.Quality.ORIGINAL] = self.ChunkLoader(
reader_class[db_data.original_chunk_type],
db_data.get_original_chunk_path)
if db_data.storage_method == StorageMethodChoice.CACHE:
cache = CacheInteraction()
self._loaders[self.Quality.COMPRESSED] = self.BuffChunkLoader(
reader_class[db_data.compressed_chunk_type],
cache.get_buff_mime,
self.Quality.COMPRESSED,
self._db_data)
self._loaders[self.Quality.ORIGINAL] = self.BuffChunkLoader(
reader_class[db_data.original_chunk_type],
cache.get_buff_mime,
self.Quality.ORIGINAL,
self._db_data)
else:
self._loaders[self.Quality.COMPRESSED] = self.ChunkLoader(
reader_class[db_data.compressed_chunk_type],
db_data.get_compressed_chunk_path)
self._loaders[self.Quality.ORIGINAL] = self.ChunkLoader(
reader_class[db_data.original_chunk_type],
db_data.get_original_chunk_path)
def __len__(self):
return self._db_data.size
@ -129,6 +157,8 @@ class FrameProvider:
def get_chunk(self, chunk_number, quality=Quality.ORIGINAL):
chunk_number = self._validate_chunk_number(chunk_number)
if self._db_data.storage_method == StorageMethodChoice.CACHE:
return self._loaders[quality].get_chunk_path(chunk_number, quality, self._db_data)
return self._loaders[quality].get_chunk_path(chunk_number)
def get_frame(self, frame_number, quality=Quality.ORIGINAL,

@ -125,22 +125,17 @@ class DirectoryReader(ImageListReader):
class ArchiveReader(DirectoryReader):
def __init__(self, source_path, step=1, start=0, stop=None):
self._tmp_dir = create_tmp_dir()
self._archive_source = source_path[0]
Archive(self._archive_source).extractall(self._tmp_dir)
Archive(self._archive_source).extractall(os.path.dirname(source_path[0]))
super().__init__(
source_path=[self._tmp_dir],
source_path=[os.path.dirname(source_path[0])],
step=step,
start=start,
stop=stop,
)
def __del__(self):
delete_tmp_dir(self._tmp_dir)
def get_path(self, i):
base_dir = os.path.dirname(self._archive_source)
return os.path.join(base_dir, os.path.relpath(self._source_path[i], self._tmp_dir))
os.remove(self._archive_source)
class PdfReader(DirectoryReader):
def __init__(self, source_path, step=1, start=0, stop=None):
@ -191,7 +186,14 @@ class ZipReader(ImageListReader):
return io.BytesIO(self._zip_source.read(self._source_path[i]))
def get_path(self, i):
return os.path.join(os.path.dirname(self._zip_source.filename), self._source_path[i])
if self._zip_source.filename:
return os.path.join(os.path.dirname(self._zip_source.filename), self._source_path[i])
else: # necessary for mime_type definition
return self._source_path[i]
def extract(self):
self._zip_source.extractall(os.path.dirname(self._zip_source.filename))
os.remove(self._zip_source.filename)
class VideoReader(IMediaReader):
def __init__(self, source_path, step=1, start=0, stop=None):
@ -303,14 +305,14 @@ class Mpeg4ChunkWriter(IChunkWriter):
self._output_fps = 25
@staticmethod
def _create_av_container(path, w, h, rate, options):
def _create_av_container(path, w, h, rate, options, f='mp4'):
# x264 requires width and height must be divisible by 2 for yuv420p
if h % 2:
h += 1
if w % 2:
w += 1
container = av.open(path, 'w')
container = av.open(path, 'w',format=f)
video_stream = container.add_stream('libx264', rate=rate)
video_stream.pix_fmt = "yuv420p"
video_stream.width = w

@ -0,0 +1,36 @@
# Generated by Django 2.2.13 on 2020-08-13 05:49
from cvat.apps.engine.media_extractors import _is_archive, _is_zip
import cvat.apps.engine.models
from django.conf import settings
from django.db import migrations, models
import os
from pyunpack import Archive
def unzip(apps, schema_editor):
Data = apps.get_model("engine", "Data")
data_q_set = Data.objects.all()
archive_paths = []
for data_instance in data_q_set:
for root, _, files in os.walk(os.path.join(settings.MEDIA_DATA_ROOT, '{}/raw/'.format(data_instance.id))):
archive_paths.extend([os.path.join(root, file) for file in files if _is_archive(file) or _is_zip(file)])
for path in archive_paths:
Archive(path).extractall(os.path.dirname(path))
os.remove(path)
class Migration(migrations.Migration):
dependencies = [
('engine', '0028_labelcolor'),
]
operations = [
migrations.AddField(
model_name='data',
name='storage_method',
field=models.CharField(choices=[('cache', 'CACHE'), ('file_system', 'FILE_SYSTEM')], default=cvat.apps.engine.models.StorageMethodChoice('file_system'), max_length=15),
),
migrations.RunPython(unzip),
]

@ -43,6 +43,17 @@ class DataChoice(str, Enum):
def __str__(self):
return self.value
class StorageMethodChoice(str, Enum):
CACHE = 'cache'
FILE_SYSTEM = 'file_system'
@classmethod
def choices(cls):
return tuple((x.value, x.name) for x in cls)
def __str__(self):
return self.value
class Data(models.Model):
chunk_size = models.PositiveIntegerField(null=True)
size = models.PositiveIntegerField(default=0)
@ -54,6 +65,7 @@ class Data(models.Model):
default=DataChoice.IMAGESET)
original_chunk_type = models.CharField(max_length=32, choices=DataChoice.choices(),
default=DataChoice.IMAGESET)
storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM)
class Meta:
default_permissions = ()
@ -102,6 +114,12 @@ class Data(models.Model):
def get_preview_path(self):
return os.path.join(self.get_data_dirname(), 'preview.jpeg')
def get_meta_path(self):
return os.path.join(self.get_upload_dirname(), 'meta_info.txt')
def get_dummy_chunk_path(self, chunk_number):
return os.path.join(self.get_upload_dirname(), 'dummy_{}.txt'.format(chunk_number))
class Video(models.Model):
data = models.OneToOneField(Data, on_delete=models.CASCADE, related_name="video", null=True)
path = models.CharField(max_length=1024, default='')

@ -0,0 +1,155 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
import av
import hashlib
class WorkWithVideo:
def __init__(self, **kwargs):
if not kwargs.get('source_path'):
raise Exception('No sourse path')
self.source_path = kwargs.get('source_path')
def _open_video_container(self, sourse_path, mode, options=None):
return av.open(sourse_path, mode=mode, options=options)
def _close_video_container(self, container):
container.close()
def _get_video_stream(self, container):
video_stream = next(stream for stream in container.streams if stream.type == 'video')
video_stream.thread_type = 'AUTO'
return video_stream
class AnalyzeVideo(WorkWithVideo):
def check_type_first_frame(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
for packet in container.demux(video_stream):
for frame in packet.decode():
self._close_video_container(container)
assert frame.pict_type.name == 'I', 'First frame is not key frame'
return
def check_video_timestamps_sequences(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
frame_pts = -1
frame_dts = -1
for packet in container.demux(video_stream):
for frame in packet.decode():
if None not in [frame.pts, frame_pts] and frame.pts <= frame_pts:
self._close_video_container(container)
raise Exception('Invalid pts sequences')
if None not in [frame.dts, frame_dts] and frame.dts <= frame_dts:
self._close_video_container(container)
raise Exception('Invalid dts sequences')
frame_pts, frame_dts = frame.pts, frame.dts
self._close_video_container(container)
def md5_hash(frame):
return hashlib.md5(frame.to_image().tobytes()).hexdigest()
class PrepareInfo(WorkWithVideo):
def __init__(self, **kwargs):
super().__init__(**kwargs)
if not kwargs.get('meta_path'):
raise Exception('No meta path')
self.meta_path = kwargs.get('meta_path')
self.key_frames = {}
self.frames = 0
def get_task_size(self):
return self.frames
def check_seek_key_frames(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
key_frames_copy = self.key_frames.copy()
for index, key_frame in key_frames_copy.items():
container.seek(offset=key_frame.pts, stream=video_stream)
flag = True
for packet in container.demux(video_stream):
for frame in packet.decode():
if md5_hash(frame) != md5_hash(key_frame) or frame.pts != key_frame.pts:
self.key_frames.pop(index)
flag = False
break
if not flag:
break
#TODO: correct ratio of number of frames to keyframes
if len(self.key_frames) == 0:
raise Exception('Too few keyframes')
def save_key_frames(self):
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
frame_number = 0
for packet in container.demux(video_stream):
for frame in packet.decode():
if frame.key_frame:
self.key_frames[frame_number] = frame
frame_number += 1
self.frames = frame_number
self._close_video_container(container)
def save_meta_info(self):
with open(self.meta_path, 'w') as meta_file:
for index, frame in self.key_frames.items():
meta_file.write('{} {}\n'.format(index, frame.pts))
def get_nearest_left_key_frame(self, start_chunk_frame_number):
start_decode_frame_number = 0
start_decode_timestamp = 0
with open(self.meta_path, 'r') as file:
for line in file:
frame_number, timestamp = line.strip().split(' ')
if int(frame_number) <= start_chunk_frame_number:
start_decode_frame_number = frame_number
start_decode_timestamp = timestamp
else:
break
return int(start_decode_frame_number), int(start_decode_timestamp)
def decode_needed_frames(self, chunk_number, db_data):
step = db_data.get_frame_step()
start_chunk_frame_number = db_data.start_frame + chunk_number * db_data.chunk_size * step
end_chunk_frame_number = min(start_chunk_frame_number + (db_data.chunk_size - 1) * step + 1, db_data.stop_frame + 1)
start_decode_frame_number, start_decode_timestamp = self.get_nearest_left_key_frame(start_chunk_frame_number)
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
container.seek(offset=start_decode_timestamp, stream=video_stream)
frame_number = start_decode_frame_number - 1
for packet in container.demux(video_stream):
for frame in packet.decode():
frame_number += 1
if frame_number < start_chunk_frame_number:
continue
elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
yield frame
elif (frame_number - start_chunk_frame_number) % step:
continue
else:
self._close_video_container(container)
return
self._close_video_container(container)

@ -170,11 +170,13 @@ class DataSerializer(serializers.ModelSerializer):
client_files = ClientFileSerializer(many=True, default=[])
server_files = ServerFileSerializer(many=True, default=[])
remote_files = RemoteFileSerializer(many=True, default=[])
use_cache = serializers.BooleanField(default=False)
class Meta:
model = models.Data
fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter',
'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks')
'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks',
'use_cache')
# pylint: disable=no-self-use
def validate_frame_filter(self, value):
@ -202,6 +204,7 @@ class DataSerializer(serializers.ModelSerializer):
server_files = validated_data.pop('server_files')
remote_files = validated_data.pop('remote_files')
validated_data.pop('use_zip_chunks')
validated_data.pop('use_cache')
db_data = models.Data.objects.create(**validated_data)
data_path = db_data.get_data_dirname()

@ -14,7 +14,7 @@ from urllib import parse as urlparse
from urllib import request as urlrequest
from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES, Mpeg4ChunkWriter, ZipChunkWriter, Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter
from cvat.apps.engine.models import DataChoice
from cvat.apps.engine.models import DataChoice, StorageMethodChoice
from cvat.apps.engine.utils import av_scan_paths
import django_rq
@ -24,6 +24,7 @@ from distutils.dir_util import copy_tree
from . import models
from .log import slogger
from .prepare import PrepareInfo, AnalyzeVideo
############################# Low Level server API
@ -243,6 +244,8 @@ def _create_thread(tid, data):
start=db_data.start_frame,
stop=data['stop_frame'],
)
if extractor.__class__ == MEDIA_TYPES['zip']['extractor']:
extractor.extract()
db_task.mode = task_mode
db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET
db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET
@ -276,37 +279,94 @@ def _create_thread(tid, data):
else:
db_data.chunk_size = 36
video_path = ""
video_size = (0, 0)
counter = itertools.count()
generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size)
for chunk_idx, chunk_data in generator:
chunk_data = list(chunk_data)
original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path)
compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx)
img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path)
if db_task.mode == 'annotation':
db_images.extend([
models.Image(
data=db_data,
path=os.path.relpath(data[1], upload_dir),
frame=data[2],
width=size[0],
height=size[1])
for data, size in zip(chunk_data, img_sizes)
])
else:
video_size = img_sizes[0]
video_path = chunk_data[0][1]
if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
for media_type, media_files in media.items():
if media_files:
if task_mode == MEDIA_TYPES['video']['mode']:
try:
analyzer = AnalyzeVideo(source_path=os.path.join(upload_dir, media_files[0]))
analyzer.check_type_first_frame()
analyzer.check_video_timestamps_sequences()
meta_info = PrepareInfo(source_path=os.path.join(upload_dir, media_files[0]),
meta_path=os.path.join(upload_dir, 'meta_info.txt'))
meta_info.save_key_frames()
meta_info.check_seek_key_frames()
meta_info.save_meta_info()
all_frames = meta_info.get_task_size()
db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step()))
video_path = os.path.join(upload_dir, media_files[0])
frame = meta_info.key_frames.get(next(iter(meta_info.key_frames)))
video_size = (frame.width, frame.height)
except Exception:
db_data.storage_method = StorageMethodChoice.FILE_SYSTEM
else:#images,archive
counter_ = itertools.count()
if isinstance(extractor, MEDIA_TYPES['archive']['extractor']):
media_files = [os.path.relpath(path, upload_dir) for path in extractor._source_path]
elif isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
media_files = extractor._source_path
numbers_sequence = range(db_data.start_frame, min(data['stop_frame'] if data['stop_frame'] else len(media_files), len(media_files)), db_data.get_frame_step())
m_paths = []
m_paths = [(path, numb) for numb, path in enumerate(sorted(media_files)) if numb in numbers_sequence]
for chunk_number, media_paths in itertools.groupby(m_paths, lambda x: next(counter_) // db_data.chunk_size):
media_paths = list(media_paths)
img_sizes = []
from PIL import Image
with open(db_data.get_dummy_chunk_path(chunk_number), 'w') as dummy_chunk:
for path, _ in media_paths:
dummy_chunk.write(path+'\n')
img_sizes += [Image.open(os.path.join(upload_dir, path)).size]
db_data.size += len(media_paths)
db_images.extend([
models.Image(
data=db_data,
path=data[0],
frame=data[1],
width=size[0],
height=size[1])
for data, size in zip(media_paths, img_sizes)
])
if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
counter = itertools.count()
generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size)
for chunk_idx, chunk_data in generator:
chunk_data = list(chunk_data)
original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path)
compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx)
img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path)
if db_task.mode == 'annotation':
db_images.extend([
models.Image(
data=db_data,
path=os.path.relpath(data[1], upload_dir),
frame=data[2],
width=size[0],
height=size[1])
for data, size in zip(chunk_data, img_sizes)
])
else:
video_size = img_sizes[0]
video_path = chunk_data[0][1]
db_data.size += len(chunk_data)
progress = extractor.get_progress(chunk_data[-1][2])
update_progress(progress)
db_data.size += len(chunk_data)
progress = extractor.get_progress(chunk_data[-1][2])
update_progress(progress)
if db_task.mode == 'annotation':
models.Image.objects.bulk_create(db_images)
@ -324,4 +384,4 @@ def _create_thread(tid, data):
preview.save(db_data.get_preview_path())
slogger.glob.info("Founded frames {} for Data #{}".format(db_data.size, db_data.id))
_save_task_to_db(db_task)
_save_task_to_db(db_task)

@ -72,13 +72,14 @@ import av
import numpy as np
from django.conf import settings
from django.contrib.auth.models import Group, User
from django.http import HttpResponse
from PIL import Image
from pycocotools import coco as coco_loader
from rest_framework import status
from rest_framework.test import APIClient, APITestCase
from cvat.apps.engine.models import (AttributeType, Data, Job, Project,
Segment, StatusChoice, Task)
Segment, StatusChoice, Task, StorageMethodChoice)
_setUpModule()
@ -1670,7 +1671,8 @@ class TaskDataAPITestCase(APITestCase):
stream = container.streams.video[0]
return [f.to_image() for f in container.decode(stream)]
def _test_api_v1_tasks_id_data_spec(self, user, spec, data, expected_compressed_type, expected_original_type, image_sizes):
def _test_api_v1_tasks_id_data_spec(self, user, spec, data, expected_compressed_type, expected_original_type, image_sizes,
expected_storage_method=StorageMethodChoice.FILE_SYSTEM):
# create task
response = self._create_task(user, spec)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
@ -1694,6 +1696,7 @@ class TaskDataAPITestCase(APITestCase):
self.assertEqual(expected_compressed_type, task["data_compressed_chunk_type"])
self.assertEqual(expected_original_type, task["data_original_chunk_type"])
self.assertEqual(len(image_sizes), task["size"])
self.assertEqual(expected_storage_method, Task.objects.get(pk=task_id).data.storage_method)
# check preview
response = self._get_preview(task_id, user)
@ -1706,7 +1709,10 @@ class TaskDataAPITestCase(APITestCase):
response = self._get_compressed_chunk(task_id, user, 0)
self.assertEqual(response.status_code, expected_status_code)
if expected_status_code == status.HTTP_200_OK:
compressed_chunk = io.BytesIO(b"".join(response.streaming_content))
if isinstance(response, HttpResponse):
compressed_chunk = io.BytesIO(response.content)
else:
compressed_chunk = io.BytesIO(b"".join(response.streaming_content))
if task["data_compressed_chunk_type"] == self.ChunkType.IMAGESET:
images = self._extract_zip_chunk(compressed_chunk)
else:
@ -1721,7 +1727,10 @@ class TaskDataAPITestCase(APITestCase):
response = self._get_original_chunk(task_id, user, 0)
self.assertEqual(response.status_code, expected_status_code)
if expected_status_code == status.HTTP_200_OK:
original_chunk = io.BytesIO(b"".join(response.streaming_content))
if isinstance(response, HttpResponse):
original_chunk = io.BytesIO(response.getvalue())
else:
original_chunk = io.BytesIO(b"".join(response.streaming_content))
if task["data_original_chunk_type"] == self.ChunkType.IMAGESET:
images = self._extract_zip_chunk(original_chunk)
else:
@ -1909,6 +1918,74 @@ class TaskDataAPITestCase(APITestCase):
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, image_sizes)
task_spec = {
"name": "use_cache video task #8",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"server_files[0]": 'test_video_1.mp4',
"image_quality": 70,
"use_cache": True,
}
image_sizes = self._image_sizes[task_data["server_files[0]"]]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
task_spec = {
"name": "use_cache images task #9",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"server_files[0]": "test_1.jpg",
"server_files[1]": "test_2.jpg",
"server_files[2]": "test_3.jpg",
"image_quality": 70,
"use_cache": True,
}
image_sizes = [
self._image_sizes[task_data["server_files[0]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[2]"]],
]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.IMAGESET, image_sizes, StorageMethodChoice.CACHE)
task_spec = {
"name": "my zip archive task #10",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}
task_data = {
"server_files[0]": "test_archive_1.zip",
"image_quality": 70,
"use_cache": True
}
image_sizes = self._image_sizes[task_data["server_files[0]"]]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.IMAGESET, image_sizes, StorageMethodChoice.CACHE)
def test_api_v1_tasks_id_data_admin(self):
self._test_api_v1_tasks_id_data(self.admin)

@ -34,7 +34,7 @@ import cvat.apps.dataset_manager.views # pylint: disable=unused-import
from cvat.apps.authentication import auth
from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer
from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import Job, StatusChoice, Task
from cvat.apps.engine.models import Job, StatusChoice, Task, StorageMethodChoice
from cvat.apps.engine.serializers import (
AboutSerializer, AnnotationFileSerializer, BasicUserSerializer,
DataMetaSerializer, DataSerializer, ExceptionSerializer,
@ -374,6 +374,11 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
db_task.save()
data = {k:v for k, v in serializer.data.items()}
data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks']
data['use_cache'] = serializer.validated_data['use_cache']
if data['use_cache']:
db_task.data.storage_method = StorageMethodChoice.CACHE
db_task.data.save(update_fields=['storage_method'])
# if the value of stop_frame is 0, then inside the function we cannot know
# the value specified by the user or it's default value from the database
if 'stop_frame' not in serializer.validated_data:
@ -398,16 +403,23 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
try:
db_task = self.get_object()
db_data = db_task.data
frame_provider = FrameProvider(db_task.data)
if data_type == 'chunk':
data_id = int(data_id)
data_quality = FrameProvider.Quality.COMPRESSED \
if data_quality == 'compressed' else FrameProvider.Quality.ORIGINAL
path = os.path.realpath(frame_provider.get_chunk(data_id, data_quality))
#TODO: av.FFmpegError processing
if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
buff, mime_type = frame_provider.get_chunk(data_id, data_quality)
return HttpResponse(buff.getvalue(), content_type=mime_type)
# Follow symbol links if the chunk is a link on a real image otherwise
# mimetype detection inside sendfile will work incorrectly.
path = os.path.realpath(frame_provider.get_chunk(data_id, data_quality))
return sendfile(request, path)
elif data_type == 'frame':

@ -50,3 +50,4 @@ av==6.2.0
# The package is used by pyunpack as a command line tool to support multiple
# archives. Don't use as a python module because it has GPL license.
patool==1.12
diskcache==4.1.0

@ -324,6 +324,9 @@ os.makedirs(DATA_ROOT, exist_ok=True)
MEDIA_DATA_ROOT = os.path.join(DATA_ROOT, 'data')
os.makedirs(MEDIA_DATA_ROOT, exist_ok=True)
CACHE_ROOT = os.path.join(DATA_ROOT, 'cache')
os.makedirs(CACHE_ROOT, exist_ok=True)
TASKS_ROOT = os.path.join(DATA_ROOT, 'tasks')
os.makedirs(TASKS_ROOT, exist_ok=True)
@ -422,3 +425,17 @@ RESTRICTIONS = {
'engine.role.admin',
),
}
CACHES = {
'default' : {
'BACKEND' : 'diskcache.DjangoCache',
'LOCATION' : CACHE_ROOT,
'TIMEOUT' : None,
'OPTIONS' : {
#'statistics' :True,
'size_limit' : 2 ** 40, # 1 тб
}
}
}
USE_CACHE = True

@ -22,6 +22,8 @@ os.makedirs(TASKS_ROOT, exist_ok=True)
MODELS_ROOT = os.path.join(DATA_ROOT, 'models')
os.makedirs(MODELS_ROOT, exist_ok=True)
CACHE_ROOT = os.path.join(DATA_ROOT, 'cache')
os.makedirs(CACHE_ROOT, exist_ok=True)
# To avoid ERROR django.security.SuspiciousFileOperation:
# The joined path (...) is located outside of the base path component

Loading…
Cancel
Save