diff --git a/CHANGELOG.md b/CHANGELOG.md index c8d4d3b0..3b75dda9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support source and target storages (server part) () - Tests for import/export annotation, dataset, backup from/to cloud storage () - Added Python SDK package (`cvat-sdk`) () +- Previews for jobs () ### Changed - Bumped nuclio version to 1.8.14 () diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 8b0aebb8..ce81389b 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -111,7 +111,7 @@ class IMediaReader(ABC): pass @abstractmethod - def get_preview(self): + def get_preview(self, frame): pass @abstractmethod @@ -198,11 +198,11 @@ class ImageListReader(IMediaReader): def get_progress(self, pos): return (pos - self._start + 1) / (self._stop - self._start) - def get_preview(self): + def get_preview(self, frame): if self._dimension == DimensionType.DIM_3D: fp = open(os.path.join(os.path.dirname(__file__), 'assets/3d_preview.jpeg'), "rb") else: - fp = open(self._source_path[0], "rb") + fp = open(self._source_path[frame], "rb") return self._get_preview(fp) def get_image_size(self, i): @@ -338,12 +338,13 @@ class ZipReader(ImageListReader): def __del__(self): self._zip_source.close() - def get_preview(self): + def get_preview(self, frame): if self._dimension == DimensionType.DIM_3D: # TODO fp = open(os.path.join(os.path.dirname(__file__), 'assets/3d_preview.jpeg'), "rb") return self._get_preview(fp) - io_image = io.BytesIO(self._zip_source.read(self._source_path[0])) + + io_image = io.BytesIO(self._zip_source.read(self._source_path[frame])) return self._get_preview(io_image) def get_image_size(self, i): @@ -453,19 +454,23 @@ class VideoReader(IMediaReader): duration = duration_sec * tb_denominator return duration - def get_preview(self): + def get_preview(self, frame): container = self._get_av_container() stream = container.streams.video[0] - preview = next(container.decode(stream)) - return self._get_preview(preview.to_image() if not stream.metadata.get('rotate') \ - else av.VideoFrame().from_ndarray( - rotate_image( - preview.to_ndarray(format='bgr24'), - 360 - int(container.streams.video[0].metadata.get('rotate')) - ), - format ='bgr24' - ).to_image() - ) + tb_denominator = stream.time_base.denominator + needed_time = int((frame / stream.guessed_rate) * tb_denominator) + container.seek(offset=needed_time, stream=stream) + for packet in container.demux(stream): + for frame in packet.decode(): + return self._get_preview(frame.to_image() if not stream.metadata.get('rotate') \ + else av.VideoFrame().from_ndarray( + rotate_image( + frame.to_ndarray(format='bgr24'), + 360 - int(container.streams.video[0].metadata.get('rotate')) + ), + format ='bgr24' + ).to_image() + ) def get_image_size(self, i): image = (next(iter(self)))[0] diff --git a/cvat/apps/engine/migrations/0056_jobs_previews.py b/cvat/apps/engine/migrations/0056_jobs_previews.py new file mode 100644 index 00000000..b8722018 --- /dev/null +++ b/cvat/apps/engine/migrations/0056_jobs_previews.py @@ -0,0 +1,64 @@ +# Generated by Django 3.2.14 on 2022-07-18 06:37 + +import os +import shutil +from django.db import migrations +from django.conf import settings +from cvat.apps.engine.log import get_logger + +MIGRATION_NAME = os.path.splitext(os.path.basename(__file__))[0] +MIGRATION_LOG = os.path.join(settings.MIGRATIONS_LOGS_ROOT, f"{MIGRATION_NAME}.log") + +def _get_query_set(apps): + Job = apps.get_model("engine", "Job") + query_set = Job.objects.all() + return query_set + +def _get_job_preview_path(jid): + return os.path.join(settings.JOBS_ROOT, str(jid), "preview.jpeg") + +def _get_data_preview_path(did): + return os.path.join(settings.MEDIA_DATA_ROOT, str(did), "preview.jpeg") + +def create_previews(apps, schema_editor): + logger = get_logger(MIGRATION_NAME, MIGRATION_LOG) + query_set = _get_query_set(apps) + logger.info(f'Migration has been started. Need to create {query_set.count()} previews.') + for db_job in query_set: + try: + jid = db_job.id + did = db_job.segment.task.data.id + task_preview = _get_data_preview_path(did) + job_preview = _get_job_preview_path(jid) + if os.path.isfile(task_preview) and not os.path.isfile(job_preview): + shutil.copy(task_preview, job_preview) + except Exception as e: + logger.error(f'Cannot create preview for job {db_job.id}') + logger.error(str(e)) + +def delete_previews(apps, schema_editor): + logger = get_logger(MIGRATION_NAME, MIGRATION_LOG) + query_set = _get_query_set(apps) + logger.info(f'Reverse migration has been started. Need to delete {query_set.count()} previews.') + for db_job in query_set: + try: + jid = db_job.id + job_preview = _get_job_preview_path(jid) + if os.path.isfile(job_preview): + os.remove(job_preview) + except Exception as e: + logger.error(f'Cannot delete preview for job {db_job.id}') + logger.error(str(e)) + +class Migration(migrations.Migration): + + dependencies = [ + ('engine', '0055_jobs_directories'), + ] + + operations = [ + migrations.RunPython( + code=create_previews, + reverse_code=delete_previews + ) + ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index fe5c5088..7607437a 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -463,6 +463,8 @@ class Job(models.Model): }) db_commit.save() + def get_preview_path(self): + return os.path.join(self.get_dirname(), "preview.jpeg") class Meta: default_permissions = () diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 5267010c..269323f1 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -75,27 +75,31 @@ def _copy_data_from_source(server_files, upload_dir, server_dir=None): os.makedirs(target_dir) shutil.copyfile(source_path, target_path) -def _save_task_to_db(db_task): - job = rq.get_current_job() - job.meta['status'] = 'Task is being saved in database' - job.save_meta() - +def _get_task_segment_data(db_task, data_size): segment_size = db_task.segment_size segment_step = segment_size - if segment_size == 0 or segment_size > db_task.data.size: - segment_size = db_task.data.size - db_task.segment_size = segment_size + if segment_size == 0 or segment_size > data_size: + segment_size = data_size # Segment step must be more than segment_size + overlap in single-segment tasks # Otherwise a task contains an extra segment segment_step = sys.maxsize - default_overlap = 5 if db_task.mode == 'interpolation' else 0 - if db_task.overlap is None: - db_task.overlap = default_overlap - db_task.overlap = min(db_task.overlap, segment_size // 2) + overlap = 5 if db_task.mode == 'interpolation' else 0 + if db_task.overlap is not None: + overlap = min(db_task.overlap, segment_size // 2) + + segment_step -= overlap + return segment_step, segment_size, overlap - segment_step -= db_task.overlap +def _save_task_to_db(db_task, extractor): + job = rq.get_current_job() + job.meta['status'] = 'Task is being saved in database' + job.save_meta() + + segment_step, segment_size, overlap = _get_task_segment_data(db_task, db_task.data.size) + db_task.segment_size = segment_size + db_task.overlap = overlap for start_frame in range(0, db_task.data.size, segment_step): stop_frame = min(start_frame + segment_size - 1, db_task.data.size - 1) @@ -112,12 +116,13 @@ def _save_task_to_db(db_task): db_job = models.Job(segment=db_segment) db_job.save() - # create job directory job_path = db_job.get_dirname() if os.path.isdir(job_path): shutil.rmtree(job_path) os.makedirs(job_path) + preview = extractor.get_preview(frame=start_frame) + preview.save(db_job.get_preview_path()) db_task.data.save() db_task.save() @@ -328,8 +333,12 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): } cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) sorted_media = sort(media['image'], data['sorting_method']) - first_sorted_media_image = sorted_media[0] - cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) + + data_size = len(sorted_media) + segment_step, *_ = _get_task_segment_data(db_task, data_size) + for start_frame in range(0, data_size, segment_step): + first_sorted_media_image = sorted_media[start_frame] + cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) # prepare task manifest file from cloud storage manifest file # NOTE we should create manifest before defining chunk_size @@ -665,8 +674,8 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) - preview = extractor.get_preview() - preview.save(db_data.get_preview_path()) + task_preview = extractor.get_preview(frame=0) + task_preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) - _save_task_to_db(db_task) + _save_task_to_db(db_task, extractor) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 14fe6189..d4e4abcc 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -597,7 +597,7 @@ class DataChunkGetter: self.dimension = task_dim - def __call__(self, request, start, stop, db_data): + def __call__(self, request, start, stop, db_data, db_object): if not db_data: raise NotFound(detail='Cannot find requested data') @@ -630,7 +630,7 @@ class DataChunkGetter: return HttpResponse(buf.getvalue(), content_type=mime) elif self.type == 'preview': - return sendfile(request, frame_provider.get_preview()) + return sendfile(request, db_object.get_preview_path()) elif self.type == 'context_image': if not (start <= self.number <= stop): @@ -934,7 +934,7 @@ class TaskViewSet(UploadMixin, AnnotationMixin, viewsets.ModelViewSet, Serialize self._object.dimension) return data_getter(request, self._object.data.start_frame, - self._object.data.stop_frame, self._object.data) + self._object.data.stop_frame, self._object.data, self._object.data) @extend_schema(methods=['PATCH'], operation_id='tasks_partial_update_data_file', @@ -1492,7 +1492,7 @@ class JobViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, db_job.segment.task.dimension) return data_getter(request, db_job.segment.start_frame, - db_job.segment.stop_frame, db_job.segment.task.data) + db_job.segment.stop_frame, db_job.segment.task.data, db_job) @extend_schema(summary='Method provides a meta information about media files which are related with the job', responses={ @@ -1988,7 +1988,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet): with NamedTemporaryFile() as temp_image: storage.download_file(preview_path, temp_image.name) reader = ImageListReader([temp_image.name]) - preview = reader.get_preview() + preview = reader.get_preview(frame=0) preview.save(db_storage.get_preview_path()) content_type = mimetypes.guess_type(db_storage.get_preview_path())[0] return HttpResponse(open(db_storage.get_preview_path(), 'rb').read(), content_type)