Remove previews downloading when task creating with cloud storage data (#5499)

PR removes previews downloading from CS when task creating (skipped in
PR #5478). In addition, I had to change the test to check for the file
name existing in the message when the specified file is not found in the
bucket, because now the preview is no longer downloaded at the stage of
creating a task.
main
Maria Khrustaleva 3 years ago committed by GitHub
parent f3843aa74f
commit 47860c9d22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -318,6 +318,9 @@ def _create_task_manifest_based_on_cloud_storage_manifest(
content = list(map(_add_prefix, raw_content))
else:
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
if not content:
raise ValidationError('There is no intersection of the files specified'
'in the request with the contents of the bucket')
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)
@ -354,8 +357,6 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
)
if is_data_in_cloud:
cloud_storage_instance = db_storage_to_storage_instance(db_data.cloud_storage)
manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file),
@ -392,13 +393,6 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
elif is_data_in_cloud:
sorted_media = sort(media['image'], data['sorting_method'])
# download previews from cloud storage
data_size = len(sorted_media)
segment_step, *_ = _get_task_segment_data(db_task, data_size)
for preview_frame in range(0, data_size, segment_step):
preview = sorted_media[preview_frame]
cloud_storage_instance.download_file(preview, os.path.join(upload_dir, preview))
# Define task manifest content based on cloud storage manifest content and uploaded files
_create_task_manifest_based_on_cloud_storage_manifest(
sorted_media, cloud_storage_manifest_prefix,

@ -425,7 +425,8 @@ class TestPostTaskData:
sleep(1)
raise Exception("Cannot create task")
def _test_create_task(self, username, spec, data, content_type, **kwargs):
@staticmethod
def _test_create_task(username, spec, data, content_type, **kwargs):
with make_api_client(username) as api_client:
(task, response) = api_client.tasks_api.create(spec, **kwargs)
assert response.status == HTTPStatus.CREATED
@ -435,7 +436,7 @@ class TestPostTaskData:
)
assert response.status == HTTPStatus.ACCEPTED
status = self._wait_until_task_is_created(api_client.tasks_api, task.id)
status = TestPostTaskData._wait_until_task_is_created(api_client.tasks_api, task.id)
assert status.state.value == "Finished"
return task.id
@ -798,16 +799,22 @@ class TestPostTaskData:
status = self._test_cannot_create_task(self._USERNAME, task_spec, data_spec)
assert "No media data found" in status.message
@pytest.mark.usefixtures("restore_db_per_function")
@pytest.mark.usefixtures("restore_cvat_data")
class TestWorkWithTask:
_USERNAME = "admin1"
@pytest.mark.with_external_services
@pytest.mark.parametrize(
"cloud_storage_id, manifest, org",
[(1, "manifest.jsonl", "")], # public bucket
)
def test_cannot_create_task_with_mythical_cloud_storage_data(
self, cloud_storage_id, manifest, org
def test_work_with_task_containing_non_stable_cloud_storage_files(
self, cloud_storage_id, manifest, org, cloud_storages, request
):
mythical_file = "mythical.jpg"
cloud_storage_content = [mythical_file, manifest]
image_name = "image_case_65_1.png"
cloud_storage_content = [image_name, manifest]
task_spec = {
"name": f"Task with mythical file from cloud storage {cloud_storage_id}",
@ -821,8 +828,32 @@ class TestPostTaskData:
"server_files": cloud_storage_content,
}
status = self._test_cannot_create_task(self._USERNAME, task_spec, data_spec, org=org)
assert mythical_file in status.message
task_id = TestPostTaskData._test_create_task(
self._USERNAME, task_spec, data_spec, content_type="application/json", org=org
)
# save image from the "public" bucket and remove it temporary
s3_client = s3.make_client()
bucket_name = cloud_storages[cloud_storage_id]["resource"]
image = s3_client.download_fileobj(bucket_name, image_name)
s3_client.remove_file(bucket_name, image_name)
request.addfinalizer(
partial(s3_client.create_file, bucket=bucket_name, filename=image_name, data=image)
)
with make_api_client(self._USERNAME) as api_client:
try:
api_client.tasks_api.retrieve_data(
task_id, number=0, quality="original", type="frame"
)
raise AssertionError("Frame should not exist")
except AssertionError:
raise
except Exception as ex:
assert ex.status == HTTPStatus.NOT_FOUND
assert image_name in ex.body
@pytest.mark.usefixtures("restore_db_per_class")

@ -2,6 +2,8 @@
#
# SPDX-License-Identifier: MIT
from io import BytesIO
import boto3
from botocore.exceptions import ClientError
@ -40,6 +42,11 @@ class S3Client:
else:
raise
def download_fileobj(self, bucket: str, key: str) -> bytes:
with BytesIO() as data:
self.client.download_fileobj(Bucket=bucket, Key=key, Fileobj=data)
return data.getvalue()
def make_client() -> S3Client:
return S3Client(

Loading…
Cancel
Save