diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d99f096..5132de4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Request Status Code 500 "StopIteration" when exporting dataset - Generated OpenAPI schema for several endpoints - Annotation window might have top offset if try to move a locked object +- Image search in cloud storage () ### Security - TDB diff --git a/cvat-sdk/.openapi-generator/VERSION b/cvat-sdk/.openapi-generator/VERSION index 89648de3..66672d4e 100644 --- a/cvat-sdk/.openapi-generator/VERSION +++ b/cvat-sdk/.openapi-generator/VERSION @@ -1 +1 @@ -6.0.1-SNAPSHOT \ No newline at end of file +6.1.0-SNAPSHOT \ No newline at end of file diff --git a/cvat-sdk/cvat_sdk/api/cloud_storages_api.py b/cvat-sdk/cvat_sdk/api/cloud_storages_api.py index 21f6cb56..25027c63 100644 --- a/cvat-sdk/cvat_sdk/api/cloud_storages_api.py +++ b/cvat-sdk/cvat_sdk/api/cloud_storages_api.py @@ -385,9 +385,7 @@ class CloudStoragesApi(object): ) self.cloudstorages_retrieve_content_endpoint = _Endpoint( settings={ - "response_schema": ( - {str: (bool, date, datetime, dict, float, int, list, str, none_type)}, - ), + "response_schema": ([str],), "auth": ["SignatureAuthentication", "basicAuth", "cookieAuth", "tokenAuth"], "endpoint_path": "/api/cloudstorages/{id}/content", "operation_id": "cloudstorages_retrieve_content", @@ -1050,9 +1048,7 @@ class CloudStoragesApi(object): _request_auths: typing.Optional[typing.List] = None, _async_call: bool = False, **kwargs, - ) -> typing.Tuple[ - typing.Optional[typing.Dict[str, typing.Union[typing.Any, none_type]]], urllib3.HTTPResponse - ]: + ) -> typing.Tuple[typing.Optional[typing.List[str]], urllib3.HTTPResponse]: """Method returns a manifest content # noqa: E501 This method makes a synchronous HTTP request by default. To make an @@ -1102,7 +1098,7 @@ class CloudStoragesApi(object): _async_call (bool): execute request asynchronously Returns: - ({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, HTTPResponse) + ([str], HTTPResponse) If the method is called asynchronously, returns the request thread. """ diff --git a/cvat-sdk/docs/CloudStoragesApi.md b/cvat-sdk/docs/CloudStoragesApi.md index 233641ba..cf5f3caa 100644 --- a/cvat-sdk/docs/CloudStoragesApi.md +++ b/cvat-sdk/docs/CloudStoragesApi.md @@ -734,7 +734,7 @@ Name | Type | Description | Notes [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) # **cloudstorages_retrieve_content** -> {str: (bool, date, datetime, dict, float, int, list, str, none_type)} cloudstorages_retrieve_content(id) +> [str] cloudstorages_retrieve_content(id) Method returns a manifest content @@ -826,7 +826,7 @@ Name | Type | Description | Notes ### Return type -**{str: (bool, date, datetime, dict, float, int, list, str, none_type)}** +**[str]** ### Authorization diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 269323f1..9f11db2e 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -348,8 +348,20 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file), db_data.cloud_storage.get_storage_dirname() ) + cloud_storage_manifest_prefix = os.path.dirname(manifest_file) cloud_storage_manifest.set_index() - sequence, content = cloud_storage_manifest.get_subset(sorted_media) + if cloud_storage_manifest_prefix: + sorted_media_without_manifest_prefix = [ + os.path.relpath(i, cloud_storage_manifest_prefix) for i in sorted_media + ] + sequence, raw_content = cloud_storage_manifest.get_subset(sorted_media_without_manifest_prefix) + def _add_prefix(properties): + file_name = properties['name'] + properties['name'] = os.path.join(cloud_storage_manifest_prefix, file_name) + return properties + content = list(map(_add_prefix, raw_content)) + else: + sequence, content = cloud_storage_manifest.get_subset(sorted_media) sorted_content = (i[1] for i in sorted(zip(sequence, content))) manifest.create(sorted_content) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index d4e4abcc..f516d862 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -30,6 +30,7 @@ from drf_spectacular.utils import ( OpenApiParameter, OpenApiResponse, PolymorphicProxySerializer, extend_schema_view, extend_schema ) +from drf_spectacular.plumbing import build_array_type, build_basic_type from rest_framework import mixins, serializers, status, viewsets from rest_framework.decorators import action @@ -1895,7 +1896,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet): location=OpenApiParameter.QUERY, type=OpenApiTypes.STR), ], responses={ - '200': OpenApiResponse(response=OpenApiTypes.OBJECT, description='A manifest content'), + '200': OpenApiResponse(response=build_array_type(build_basic_type(OpenApiTypes.STR)), description='A manifest content'), }) @action(detail=True, methods=['GET'], url_path='content') def content(self, request, pk): @@ -1906,6 +1907,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet): if not db_storage.manifests.count(): raise Exception('There is no manifest file') manifest_path = request.query_params.get('manifest_path', db_storage.manifests.first().filename) + manifest_prefix = os.path.dirname(manifest_path) file_status = storage.get_file_status(manifest_path) if file_status == CloudStorageStatus.NOT_FOUND: raise FileNotFoundError(errno.ENOENT, @@ -1921,7 +1923,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet): manifest = ImageManifestManager(full_manifest_path, db_storage.get_storage_dirname()) # need to update index manifest.set_index() - manifest_files = manifest.data + manifest_files = [os.path.join(manifest_prefix, f) for f in manifest.data] return Response(data=manifest_files, content_type="text/plain") except CloudStorageModel.DoesNotExist: @@ -1958,6 +1960,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet): raise Exception('Cannot get the cloud storage preview. There is no manifest file') preview_path = None for manifest_model in db_storage.manifests.all(): + manifest_prefix = os.path.dirname(manifest_model.filename) full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_model.filename) if not os.path.exists(full_manifest_path) or \ datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_model.filename): @@ -1971,7 +1974,8 @@ class CloudStorageViewSet(viewsets.ModelViewSet): if not len(manifest): continue preview_info = manifest[0] - preview_path = ''.join([preview_info['name'], preview_info['extension']]) + preview_filename = ''.join([preview_info['name'], preview_info['extension']]) + preview_path = os.path.join(manifest_prefix, preview_filename) break if not preview_path: msg = 'Cloud storage {} does not contain any images'.format(pk) diff --git a/tests/rest_api/assets/cloudstorages.json b/tests/rest_api/assets/cloudstorages.json index ad27a81e..4cda853e 100644 --- a/tests/rest_api/assets/cloudstorages.json +++ b/tests/rest_api/assets/cloudstorages.json @@ -32,7 +32,7 @@ "display_name": "Bucket 2", "id": 2, "manifests": [ - "manifest.jsonl" + "sub/manifest.jsonl" ], "organization": 2, "owner": { @@ -45,7 +45,7 @@ "provider_type": "AWS_S3_BUCKET", "resource": "private", "specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000", - "updated_date": "2022-03-17T07:23:59.309000Z" + "updated_date": "2022-07-13T12:46:45.587000Z" }, { "created_date": "2022-03-17T07:22:49.519000Z", diff --git a/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 b/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 index 79c5b0f9..57363779 100644 Binary files a/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 and b/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 differ diff --git a/tests/rest_api/assets/cvat_db/data.json b/tests/rest_api/assets/cvat_db/data.json index b60036ea..635db39f 100644 --- a/tests/rest_api/assets/cvat_db/data.json +++ b/tests/rest_api/assets/cvat_db/data.json @@ -4657,7 +4657,7 @@ "model": "engine.manifest", "pk": 2, "fields": { - "filename": "manifest.jsonl", + "filename": "sub/manifest.jsonl", "cloud_storage": 2 } }, @@ -4699,7 +4699,7 @@ "business2" ], "created_date": "2022-03-17T07:23:59.305Z", - "updated_date": "2022-03-17T07:23:59.309Z", + "updated_date": "2022-07-13T12:46:45.587Z", "credentials": "minio_access_key minio_secret_key", "credentials_type": "KEY_SECRET_KEY_PAIR", "specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000", diff --git a/tests/rest_api/assets/users.json b/tests/rest_api/assets/users.json index 5abb7899..b87fe91a 100644 --- a/tests/rest_api/assets/users.json +++ b/tests/rest_api/assets/users.json @@ -310,7 +310,7 @@ "is_active": true, "is_staff": true, "is_superuser": true, - "last_login": "2022-06-29T12:55:15.511000Z", + "last_login": "2022-07-13T12:46:07.059000Z", "last_name": "First", "url": "http://localhost:8080/api/users/1", "username": "admin1" diff --git a/tests/rest_api/docker-compose.minio.yml b/tests/rest_api/docker-compose.minio.yml index 19b04603..572c9482 100644 --- a/tests/rest_api/docker-compose.minio.yml +++ b/tests/rest_api/docker-compose.minio.yml @@ -48,10 +48,16 @@ services: $${MC_PATH} mb $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET}; for BUCKET in $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET}; do - $${MC_PATH} cp --recursive /storage/ $${BUCKET}; + if [ $${BUCKET} == $${MINIO_ALIAS}/$${PRIVATE_BUCKET} ] + then + FULL_PATH=$${BUCKET}/'sub' + else + FULL_PATH=$${BUCKET} + fi + $${MC_PATH} cp --recursive /storage/ $${FULL_PATH}; for i in 1 2; do - $${MC_PATH} cp /storage/manifest.jsonl $${BUCKET}/manifest_$${i}.jsonl; + $${MC_PATH} cp /storage/manifest.jsonl $${FULL_PATH}/manifest_$${i}.jsonl; done; done; $${MC_PATH} policy set public $${MINIO_ALIAS}/$${PUBLIC_BUCKET}; diff --git a/tests/rest_api/test_cloud_storages.py b/tests/rest_api/test_cloud_storages.py index 147959ab..2fb17553 100644 --- a/tests/rest_api/test_cloud_storages.py +++ b/tests/rest_api/test_cloud_storages.py @@ -17,7 +17,8 @@ class TestGetCloudStorage: response_data = response_data.get('results', response_data) assert response.status_code == HTTPStatus.OK - assert DeepDiff(data, response_data, ignore_order=True) == {} + assert DeepDiff(data, response_data, ignore_order=True, + exclude_paths="root['updated_date']") == {} def _test_cannot_see(self, user, storage_id, **kwargs): response = get_method(user, f'cloudstorages/{storage_id}', **kwargs) @@ -132,6 +133,14 @@ class TestPatchCloudStorage: 'manifest_2.jsonl', ], } + _PRIVATE_BUCKET_SPEC = { + 'display_name': 'New display name', + 'description': 'New description', + 'manifests': [ + 'sub/manifest_1.jsonl', + 'sub/manifest_2.jsonl', + ], + } _EXCLUDE_PATHS = [ f"root['{extra_field}']" for extra_field in { # unchanged fields @@ -145,7 +154,7 @@ class TestPatchCloudStorage: response_data = response_data.get('results', response_data) assert response.status_code == HTTPStatus.OK - assert DeepDiff(self._SPEC, response_data, ignore_order=True, + assert DeepDiff(spec, response_data, ignore_order=True, exclude_paths=self._EXCLUDE_PATHS) == {} assert response.status_code == HTTPStatus.OK @@ -186,6 +195,6 @@ class TestPatchCloudStorage: next((u for u in find_users(role=role, org=org_id) if u['id'] != cloud_storage['owner']['id']))['username'] if is_allow: - self._test_can_update(username, storage_id, self._SPEC, org_id=org_id) + self._test_can_update(username, storage_id, self._PRIVATE_BUCKET_SPEC, org_id=org_id) else: - self._test_cannot_update(username, storage_id, self._SPEC, org_id=org_id) + self._test_cannot_update(username, storage_id, self._PRIVATE_BUCKET_SPEC, org_id=org_id) diff --git a/tests/rest_api/test_tasks.py b/tests/rest_api/test_tasks.py index dd69b50c..c9ae5b15 100644 --- a/tests/rest_api/test_tasks.py +++ b/tests/rest_api/test_tasks.py @@ -35,6 +35,12 @@ def generate_image_files(count): return images +def get_cloud_storage_content(username, cloud_storage_id, manifest): + with make_api_client(username) as api_client: + (_, response) = api_client.cloud_storages_api.cloudstorages_retrieve_content(cloud_storage_id, manifest_path=manifest) + data = json.loads(response.data) + return data + @pytest.mark.usefixtures('dontchangedb') class TestGetTasks: @@ -272,7 +278,10 @@ class TestGetTaskDataset: self._test_export_project('admin1', task['id'], format='CVAT for images 1.1') @pytest.mark.usefixtures("changedb") +@pytest.mark.usefixtures("restore_cvat_data") class TestPostTaskData: + _USERNAME = 'admin1' + @staticmethod def _wait_until_task_is_created(api: TasksApi, task_id: int) -> RqStatus: for _ in range(100): @@ -282,14 +291,14 @@ class TestPostTaskData: sleep(1) raise Exception('Cannot create task') - def _test_create_task(self, username, spec, data, files): + def _test_create_task(self, username, spec, data, content_type, **kwargs): with make_api_client(username) as api_client: - (task, response) = api_client.tasks_api.create(TaskWriteRequest(**spec)) + (task, response) = api_client.tasks_api.create(TaskWriteRequest(**spec), **kwargs) assert response.status == HTTPStatus.CREATED - task_data = DataRequest(**data, client_files=list(files.values())) + task_data = DataRequest(**data) (_, response) = api_client.tasks_api.create_data(task.id, task_data, - _content_type="multipart/form-data") + _content_type=content_type, **kwargs) assert response.status == HTTPStatus.ACCEPTED status = self._wait_until_task_is_created(api_client.tasks_api, task.id) @@ -298,9 +307,8 @@ class TestPostTaskData: return task.id def test_can_create_task_with_defined_start_and_stop_frames(self): - username = 'admin1' task_spec = { - 'name': f'test {username} to create a task with defined start and stop frames', + 'name': f'test {self._USERNAME} to create a task with defined start and stop frames', "labels": [{ "name": "car", "color": "#ff00ff", @@ -319,15 +327,38 @@ class TestPostTaskData: task_data = { 'image_quality': 75, 'start_frame': 2, - 'stop_frame': 5 - } - task_files = { - f'client_files[{i}]': image for i, image in enumerate(generate_image_files(7)) + 'stop_frame': 5, + 'client_files': generate_image_files(7), } - task_id = self._test_create_task(username, task_spec, task_data, task_files) + task_id = self._test_create_task(self._USERNAME, task_spec, task_data, content_type="multipart/form-data") # check task size - with make_api_client(username) as api_client: + with make_api_client(self._USERNAME) as api_client: (task, _) = api_client.tasks_api.retrieve(task_id) assert task.size == 4 + + @pytest.mark.parametrize('cloud_storage_id, manifest, org', [ + (1, 'manifest.jsonl', ''), # public bucket + (2, 'sub/manifest.jsonl', 'org2'), # private bucket + ]) + def test_create_task_with_cloud_storage_files(self, cloud_storage_id, manifest, org): + cloud_storage_content = get_cloud_storage_content(self._USERNAME, cloud_storage_id, manifest) + cloud_storage_content.append(manifest) + + task_spec = { + "name": f"Task with files from cloud storage {cloud_storage_id}", + "labels": [{ + "name": "car", + }], + } + + data_spec = { + 'image_quality': 75, + 'use_cache': True, + 'storage': 'cloud_storage', + 'cloud_storage_id': cloud_storage_id, + 'server_files': cloud_storage_content, + } + + _ = self._test_create_task(self._USERNAME, task_spec, data_spec, content_type="application/json", org=org)