From 47860c9d227dbb59603a527237691ee0b2d057b7 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 10 Jan 2023 19:45:42 +0200 Subject: [PATCH] Remove previews downloading when task creating with cloud storage data (#5499) PR removes previews downloading from CS when task creating (skipped in PR #5478). In addition, I had to change the test to check for the file name existing in the message when the specified file is not found in the bucket, because now the preview is no longer downloaded at the stage of creating a task. --- cvat/apps/engine/task.py | 12 ++------ tests/python/rest_api/test_tasks.py | 47 ++++++++++++++++++++++++----- tests/python/shared/utils/s3.py | 7 +++++ 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index b5c41f9e..401e8274 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -318,6 +318,9 @@ def _create_task_manifest_based_on_cloud_storage_manifest( content = list(map(_add_prefix, raw_content)) else: sequence, content = cloud_storage_manifest.get_subset(sorted_media) + if not content: + raise ValidationError('There is no intersection of the files specified' + 'in the request with the contents of the bucket') sorted_content = (i[1] for i in sorted(zip(sequence, content))) manifest.create(sorted_content) @@ -354,8 +357,6 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): ) if is_data_in_cloud: - cloud_storage_instance = db_storage_to_storage_instance(db_data.cloud_storage) - manifest = ImageManifestManager(db_data.get_manifest_path()) cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file), @@ -392,13 +393,6 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): elif is_data_in_cloud: sorted_media = sort(media['image'], data['sorting_method']) - # download previews from cloud storage - data_size = len(sorted_media) - segment_step, *_ = _get_task_segment_data(db_task, data_size) - for preview_frame in range(0, data_size, segment_step): - preview = sorted_media[preview_frame] - cloud_storage_instance.download_file(preview, os.path.join(upload_dir, preview)) - # Define task manifest content based on cloud storage manifest content and uploaded files _create_task_manifest_based_on_cloud_storage_manifest( sorted_media, cloud_storage_manifest_prefix, diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index 10e80719..06c61964 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -425,7 +425,8 @@ class TestPostTaskData: sleep(1) raise Exception("Cannot create task") - def _test_create_task(self, username, spec, data, content_type, **kwargs): + @staticmethod + def _test_create_task(username, spec, data, content_type, **kwargs): with make_api_client(username) as api_client: (task, response) = api_client.tasks_api.create(spec, **kwargs) assert response.status == HTTPStatus.CREATED @@ -435,7 +436,7 @@ class TestPostTaskData: ) assert response.status == HTTPStatus.ACCEPTED - status = self._wait_until_task_is_created(api_client.tasks_api, task.id) + status = TestPostTaskData._wait_until_task_is_created(api_client.tasks_api, task.id) assert status.state.value == "Finished" return task.id @@ -798,16 +799,22 @@ class TestPostTaskData: status = self._test_cannot_create_task(self._USERNAME, task_spec, data_spec) assert "No media data found" in status.message + +@pytest.mark.usefixtures("restore_db_per_function") +@pytest.mark.usefixtures("restore_cvat_data") +class TestWorkWithTask: + _USERNAME = "admin1" + @pytest.mark.with_external_services @pytest.mark.parametrize( "cloud_storage_id, manifest, org", [(1, "manifest.jsonl", "")], # public bucket ) - def test_cannot_create_task_with_mythical_cloud_storage_data( - self, cloud_storage_id, manifest, org + def test_work_with_task_containing_non_stable_cloud_storage_files( + self, cloud_storage_id, manifest, org, cloud_storages, request ): - mythical_file = "mythical.jpg" - cloud_storage_content = [mythical_file, manifest] + image_name = "image_case_65_1.png" + cloud_storage_content = [image_name, manifest] task_spec = { "name": f"Task with mythical file from cloud storage {cloud_storage_id}", @@ -821,8 +828,32 @@ class TestPostTaskData: "server_files": cloud_storage_content, } - status = self._test_cannot_create_task(self._USERNAME, task_spec, data_spec, org=org) - assert mythical_file in status.message + task_id = TestPostTaskData._test_create_task( + self._USERNAME, task_spec, data_spec, content_type="application/json", org=org + ) + + # save image from the "public" bucket and remove it temporary + + s3_client = s3.make_client() + bucket_name = cloud_storages[cloud_storage_id]["resource"] + + image = s3_client.download_fileobj(bucket_name, image_name) + s3_client.remove_file(bucket_name, image_name) + request.addfinalizer( + partial(s3_client.create_file, bucket=bucket_name, filename=image_name, data=image) + ) + + with make_api_client(self._USERNAME) as api_client: + try: + api_client.tasks_api.retrieve_data( + task_id, number=0, quality="original", type="frame" + ) + raise AssertionError("Frame should not exist") + except AssertionError: + raise + except Exception as ex: + assert ex.status == HTTPStatus.NOT_FOUND + assert image_name in ex.body @pytest.mark.usefixtures("restore_db_per_class") diff --git a/tests/python/shared/utils/s3.py b/tests/python/shared/utils/s3.py index ee259081..b27f6a17 100644 --- a/tests/python/shared/utils/s3.py +++ b/tests/python/shared/utils/s3.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +from io import BytesIO + import boto3 from botocore.exceptions import ClientError @@ -40,6 +42,11 @@ class S3Client: else: raise + def download_fileobj(self, bucket: str, key: str) -> bytes: + with BytesIO() as data: + self.client.download_fileobj(Bucket=bucket, Key=key, Fileobj=data) + return data.getvalue() + def make_client() -> S3Client: return S3Client(