diff --git a/cvat-core/src/session.js b/cvat-core/src/session.js index 5d5bd7b2..ccaff82c 100644 --- a/cvat-core/src/session.js +++ b/cvat-core/src/session.js @@ -814,6 +814,7 @@ data_compressed_chunk_type: undefined, data_original_chunk_type: undefined, use_zip_chunks: undefined, + use_cache: undefined, }; for (const property in data) { @@ -1069,6 +1070,24 @@ data.use_zip_chunks = useZipChunks; }, }, + /** + * @name useCache + * @type {boolean} + * @memberof module:API.cvat.classes.Task + * @instance + * @throws {module:API.cvat.exceptions.ArgumentError} + */ + useCache: { + get: () => data.use_cache, + set: (useCache) => { + if (typeof (useCache) !== 'boolean') { + throw new ArgumentError( + 'Value must be a boolean', + ); + } + data.use_cache = useCache; + }, + }, /** * After task has been created value can be appended only. * @name labels @@ -1639,6 +1658,7 @@ remote_files: this.remoteFiles, image_quality: this.imageQuality, use_zip_chunks: this.useZipChunks, + use_cache: this.useCache, }; if (typeof (this.startFrame) !== 'undefined') { diff --git a/cvat-ui/src/actions/tasks-actions.ts b/cvat-ui/src/actions/tasks-actions.ts index fd1bfbc4..9fc4c1e8 100644 --- a/cvat-ui/src/actions/tasks-actions.ts +++ b/cvat-ui/src/actions/tasks-actions.ts @@ -390,6 +390,7 @@ ThunkAction, {}, {}, AnyAction> { z_order: data.advanced.zOrder, image_quality: 70, use_zip_chunks: data.advanced.useZipChunks, + use_cache: data.advanced.useCache, }; if (data.advanced.bugTracker) { diff --git a/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx b/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx index b038408b..89a760e0 100644 --- a/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx +++ b/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx @@ -26,6 +26,7 @@ export interface AdvancedConfiguration { repository?: string; useZipChunks: boolean; dataChunkSize?: number; + useCache: boolean; } type Props = FormComponentProps & { @@ -380,6 +381,24 @@ class AdvancedConfigurationForm extends React.PureComponent { ); } + private renderCreateTaskMethod(): JSX.Element { + const { form } = this.props; + return ( + + {form.getFieldDecorator('useCache', { + initialValue: true, + valuePropName: 'checked', + })( + + + Use cache + + , + )} + + ); + } + private renderChunkSize(): JSX.Element { const { form } = this.props; @@ -433,6 +452,12 @@ class AdvancedConfigurationForm extends React.PureComponent { + + + {this.renderCreateTaskMethod()} + + + {this.renderImageQuality()} diff --git a/cvat-ui/src/components/create-task-page/create-task-content.tsx b/cvat-ui/src/components/create-task-page/create-task-content.tsx index 2bd32807..229015ef 100644 --- a/cvat-ui/src/components/create-task-page/create-task-content.tsx +++ b/cvat-ui/src/components/create-task-page/create-task-content.tsx @@ -39,6 +39,7 @@ const defaultState = { zOrder: false, lfs: false, useZipChunks: true, + useCache: true, }, labels: [], files: { diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index c2bc020f..0099b349 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -125,22 +125,21 @@ class DirectoryReader(ImageListReader): class ArchiveReader(DirectoryReader): def __init__(self, source_path, step=1, start=0, stop=None): - self._tmp_dir = create_tmp_dir() self._archive_source = source_path[0] - Archive(self._archive_source).extractall(self._tmp_dir) + Archive(self._archive_source).extractall(os.path.dirname(source_path[0])) super().__init__( - source_path=[self._tmp_dir], + source_path=[os.path.dirname(source_path[0])], step=step, start=start, stop=stop, ) def __del__(self): - delete_tmp_dir(self._tmp_dir) + os.remove(self._archive_source) def get_path(self, i): base_dir = os.path.dirname(self._archive_source) - return os.path.join(base_dir, os.path.relpath(self._source_path[i], self._tmp_dir)) + return os.path.join(base_dir, os.path.relpath(self._source_path[i], base_dir)) class PdfReader(DirectoryReader): def __init__(self, source_path, step=1, start=0, stop=None): @@ -193,6 +192,10 @@ class ZipReader(ImageListReader): def get_path(self, i): return os.path.join(os.path.dirname(self._zip_source.filename), self._source_path[i]) + def extract(self): + self._zip_source.extractall(os.path.dirname(self._zip_source.filename)) + os.remove(self._zip_source.filename) + class VideoReader(IMediaReader): def __init__(self, source_path, step=1, start=0, stop=None): super().__init__( @@ -312,7 +315,7 @@ class ZipCompressedChunkWriter(IChunkWriter): def save_as_chunk_to_buff(self, images, format_='jpeg'): buff = io.BytesIO() - with zipfile.ZipFile(buff, 'w') as zip_file: + with zipfile.ZipFile(buff, 'x') as zip_file: for idx, image in enumerate(images): (_, _, image_buf) = self._compress_image(image, self._image_quality) arcname = '{:06d}.{}'.format(idx, format_) @@ -452,6 +455,52 @@ class Mpeg4CompressedChunkWriter(Mpeg4ChunkWriter): output_container.close() return [(input_w, input_h)] + def save_as_chunk_to_buff(self, frames, format_): + if not frames: + raise Exception('no images to save') + + buff = io.BytesIO() + input_w = frames[0].width + input_h = frames[0].height + + downscale_factor = 1 + while input_h / downscale_factor >= 1080: + downscale_factor *= 2 + + output_h = input_h // downscale_factor + output_w = input_w // downscale_factor + + + output_container, output_v_stream = self._create_av_container( + path=buff, + w=output_w, + h=output_h, + rate=self._output_fps, + options={ + 'profile': 'baseline', + 'coder': '0', + 'crf': str(self._image_quality), + 'wpredp': '0', + 'flags': '-loop' + }, + f=format_, + ) + + for frame in frames: + # let libav set the correct pts and time_base + frame.pts = None + frame.time_base = None + + for packet in output_v_stream.encode(frame): + output_container.mux(packet) + + # Flush streams + for packet in output_v_stream.encode(): + output_container.mux(packet) + output_container.close() + buff.seek(0) + return buff + def _is_archive(path): mime = mimetypes.guess_type(path) mime_type = mime[0] diff --git a/cvat/apps/engine/migrations/0028_data_storage_method.py b/cvat/apps/engine/migrations/0028_data_storage_method.py new file mode 100644 index 00000000..6490f5bf --- /dev/null +++ b/cvat/apps/engine/migrations/0028_data_storage_method.py @@ -0,0 +1,19 @@ +# Generated by Django 2.2.13 on 2020-08-13 05:49 + +import cvat.apps.engine.models +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('engine', '0027_auto_20200719_1552'), + ] + + operations = [ + migrations.AddField( + model_name='data', + name='storage_method', + field=models.CharField(choices=[('cache', 'CACHE'), ('file_system', 'FILE_SYSTEM')], default=cvat.apps.engine.models.StorageMethodChoice('file_system'), max_length=15), + ), + ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index d2f8e26e..1abec5a8 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -43,6 +43,17 @@ class DataChoice(str, Enum): def __str__(self): return self.value +class StorageMethodChoice(str, Enum): + CACHE = 'cache' + FILE_SYSTEM = 'file_system' + + @classmethod + def choices(cls): + return tuple((x.value, x.name) for x in cls) + + def __str__(self): + return self.value + class Data(models.Model): chunk_size = models.PositiveIntegerField(null=True) size = models.PositiveIntegerField(default=0) @@ -54,6 +65,7 @@ class Data(models.Model): default=DataChoice.IMAGESET) original_chunk_type = models.CharField(max_length=32, choices=DataChoice.choices(), default=DataChoice.IMAGESET) + storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM) class Meta: default_permissions = () diff --git a/cvat/apps/engine/prepare.py b/cvat/apps/engine/prepare.py index 01d1c367..69d74b5a 100644 --- a/cvat/apps/engine/prepare.py +++ b/cvat/apps/engine/prepare.py @@ -4,7 +4,7 @@ import hashlib class WorkWithVideo: def __init__(self, **kwargs): if not kwargs.get('source_path'): - raise Exeption('No sourse path') + raise Exception('No sourse path') self.source_path = kwargs.get('source_path') def _open_video_container(self, sourse_path, mode, options=None): diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 90e24714..e789ae94 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -167,11 +167,13 @@ class DataSerializer(serializers.ModelSerializer): client_files = ClientFileSerializer(many=True, default=[]) server_files = ServerFileSerializer(many=True, default=[]) remote_files = RemoteFileSerializer(many=True, default=[]) + use_cache = serializers.BooleanField(default=False) class Meta: model = models.Data fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter', - 'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks') + 'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks', + 'use_cache') # pylint: disable=no-self-use def validate_frame_filter(self, value): @@ -199,6 +201,7 @@ class DataSerializer(serializers.ModelSerializer): server_files = validated_data.pop('server_files') remote_files = validated_data.pop('remote_files') validated_data.pop('use_zip_chunks') + validated_data.pop('use_cache') db_data = models.Data.objects.create(**validated_data) data_path = db_data.get_data_dirname() diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index b468d4ef..fc2f3759 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -14,7 +14,7 @@ from urllib import parse as urlparse from urllib import request as urlrequest from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES, Mpeg4ChunkWriter, ZipChunkWriter, Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter -from cvat.apps.engine.models import DataChoice +from cvat.apps.engine.models import DataChoice, StorageMethodChoice from cvat.apps.engine.utils import av_scan_paths import django_rq @@ -245,6 +245,8 @@ def _create_thread(tid, data): start=db_data.start_frame, stop=data['stop_frame'], ) + if extractor.__class__ == MEDIA_TYPES['zip']['extractor']: + extractor.extract() db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET @@ -278,25 +280,22 @@ def _create_thread(tid, data): else: db_data.chunk_size = 36 - #it's better to add the field to the Task model - video_suitable_on_the_fly_processing = True video_path = "" video_size = (0, 0) - if settings.USE_CACHE: + if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if media_files: if task_mode == MEDIA_TYPES['video']['mode']: try: - analizer = AnalyzeVideo(source_path=os.path.join(upload_dir, media_files[0])) - analizer.check_type_first_frame() - analizer.check_video_timestamps_sequences() + analyzer = AnalyzeVideo(source_path=os.path.join(upload_dir, media_files[0])) + analyzer.check_type_first_frame() + analyzer.check_video_timestamps_sequences() meta_info = PrepareInfo(source_path=os.path.join(upload_dir, media_files[0]), meta_path=os.path.join(upload_dir, 'meta_info.txt')) meta_info.save_key_frames() - #meta_info.test_seek() meta_info.check_seek_key_frames() meta_info.save_meta_info() @@ -306,14 +305,17 @@ def _create_thread(tid, data): video_size = (frame.width, frame.height) except AssertionError as ex: - video_suitable_on_the_fly_processing = False + db_data.storage_method = StorageMethodChoice.FILE_SYSTEM except Exception as ex: - video_suitable_on_the_fly_processing = False + db_data.storage_method = StorageMethodChoice.FILE_SYSTEM - else:#images, TODO:archive + else:#images,archive with Cache(settings.CACHE_ROOT) as cache: counter_ = itertools.count() + if extractor.__class__ in [MEDIA_TYPES['archive']['extractor'], MEDIA_TYPES['zip']['extractor']]: + media_files = [os.path.join(upload_dir, f) for f in extractor._source_path] + for chunk_number, media_paths in itertools.groupby(media_files, lambda x: next(counter_) // db_data.chunk_size): media_paths = list(media_paths) cache.set('{}_{}'.format(tid, chunk_number), [os.path.join(upload_dir, file_name) for file_name in media_paths], tag='dummy') @@ -321,20 +323,20 @@ def _create_thread(tid, data): img_sizes = [] from PIL import Image for media_path in media_paths: - img_sizes += [Image.open(os.path.join(upload_dir, media_path)).size] + img_sizes += [Image.open(media_path).size] db_data.size += len(media_paths) db_images.extend([ models.Image( data=db_data, - path=data[1], + path=os.path.basename(data[1]), frame=data[0], width=size[0], height=size[1]) - for data, size in zip(enumerate(media_paths), img_sizes) + for data, size in zip(enumerate(media_paths, start=len(db_images)), img_sizes) ]) - if db_task.mode == 'interpolation' and not video_suitable_on_the_fly_processing or not settings.USE_CACHE: + if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 621f85e8..f06f6d4b 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -37,7 +37,7 @@ from cvat.apps.authentication import auth from cvat.apps.authentication.decorators import login_required from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import FrameProvider -from cvat.apps.engine.models import Job, Plugin, StatusChoice, Task, DataChoice +from cvat.apps.engine.models import Job, Plugin, StatusChoice, Task, DataChoice, StorageMethodChoice from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, BasicUserSerializer, DataMetaSerializer, DataSerializer, ExceptionSerializer, @@ -419,6 +419,11 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet): db_task.save() data = {k:v for k, v in serializer.data.items()} data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks'] + data['use_cache'] = serializer.validated_data['use_cache'] + if data['use_cache']: + db_task.data.storage_method = StorageMethodChoice.CACHE + db_task.data.save(update_fields=['storage_method']) + # if the value of stop_frame is 0, then inside the function we cannot know # the value specified by the user or it's default value from the database if 'stop_frame' not in serializer.validated_data: @@ -455,7 +460,7 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet): path = os.path.realpath(frame_provider.get_chunk(data_id, data_quality)) #TODO: av.FFmpegError processing - if settings.USE_CACHE: + if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: with Cache(settings.CACHE_ROOT) as cache: buff = None chunk, tag = cache.get('{}_{}_{}'.format(db_task.id, data_id, quality), tag=True)