diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f916b1b..f945964b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Progressbars in CLI for file uploading and downloading () - `utils/cli` changed to `cvat-cli` package () - Support custom file name for backup () +- Support source and target storages (server part) () +- Tests for import/export annotation, dataset, backup from/to cloud storage () ### Changed - Bumped nuclio version to 1.8.14 () diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 42f164ff..542198cf 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -278,7 +278,7 @@ class AnnotationMixin: if serializer.is_valid(raise_exception=True): return Response(serializer.data) - def import_annotations(self, request, pk, db_obj, import_func, rq_func): + def import_annotations(self, request, pk, db_obj, import_func, rq_func, rq_id): use_default_location = request.query_params.get('use_default_location', True) use_settings = strtobool(str(use_default_location)) obj = db_obj if use_settings else request.query_params @@ -291,9 +291,6 @@ class AnnotationMixin: if location_conf['location'] == Location.CLOUD_STORAGE: format_name = request.query_params.get('format') file_name = request.query_params.get('filename') - rq_id = "{}@/api/{}/{}/annotations/upload".format( - self._object.__class__.__name__.lower(), request.user, pk - ) return import_func( request=request, diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 2e95c357..6791e368 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -625,9 +625,8 @@ class ProjectReadSerializer(serializers.ModelSerializer): class Meta: model = models.Project fields = ('url', 'id', 'name', 'labels', 'tasks', 'owner', 'assignee', - 'bug_tracker', 'task_subsets', # 'owner_id', 'assignee_id', - 'created_date', 'updated_date', 'status', 'dimension', 'organization', - 'target_storage', 'source_storage', + 'bug_tracker', 'task_subsets', 'created_date', 'updated_date', 'status', + 'dimension', 'organization', 'target_storage', 'source_storage', ) read_only_fields = ('created_date', 'updated_date', 'status', 'owner', 'assignee', 'task_subsets', 'dimension', 'organization', 'tasks', diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index e840dead..df0589f1 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -314,7 +314,7 @@ class ProjectViewSet(viewsets.ModelViewSet, UploadMixin, AnnotationMixin, Serial parameters=[ OpenApiParameter('format', description='Desired output format name\n' 'You can get the list of supported formats at:\n/server/annotation/formats', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), OpenApiParameter('filename', description='Desired output file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), OpenApiParameter('action', description='Used to start downloading process after annotation file had been created', @@ -367,7 +367,8 @@ class ProjectViewSet(viewsets.ModelViewSet, UploadMixin, AnnotationMixin, Serial pk=pk, db_obj=self._object, import_func=_import_project_dataset, - rq_func=dm.project.import_dataset_as_project + rq_func=dm.project.import_dataset_as_project, + rq_id=f"/api/project/{pk}/dataset_import", ) else: action = request.query_params.get("action", "").lower() @@ -986,6 +987,7 @@ class TaskViewSet(UploadMixin, AnnotationMixin, viewsets.ModelViewSet, Serialize db_obj=self._object, import_func=_import_annotations, rq_func=dm.task.import_task_annotations, + rq_id = "{}@/api/tasks/{}/annotations/upload".format(request.user, pk) ) elif request.method == 'PUT': format_name = request.query_params.get('format') @@ -1303,6 +1305,7 @@ class JobViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, db_obj=self._object.segment.task, import_func=_import_annotations, rq_func=dm.task.import_job_annotations, + rq_id = "{}@/api/jobs/{}/annotations/upload".format(request.user, pk) ) elif request.method == 'PUT': diff --git a/tests/rest_api/assets/cloudstorages.json b/tests/rest_api/assets/cloudstorages.json index 9ade4829..ad27a81e 100644 --- a/tests/rest_api/assets/cloudstorages.json +++ b/tests/rest_api/assets/cloudstorages.json @@ -1,8 +1,30 @@ { - "count": 2, + "count": 3, "next": null, "previous": null, "results": [ + { + "created_date": "2022-06-29T12:56:18.257000Z", + "credentials_type": "KEY_SECRET_KEY_PAIR", + "description": "Bucket for importing and exporting annotations and backups", + "display_name": "Import/Export bucket", + "id": 3, + "manifests": [ + "manifest.jsonl" + ], + "organization": 2, + "owner": { + "first_name": "Admin", + "id": 1, + "last_name": "First", + "url": "http://localhost:8080/api/users/1", + "username": "admin1" + }, + "provider_type": "AWS_S3_BUCKET", + "resource": "importexportbucket", + "specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000", + "updated_date": "2022-06-29T12:56:18.264000Z" + }, { "created_date": "2022-03-17T07:23:59.305000Z", "credentials_type": "KEY_SECRET_KEY_PAIR", diff --git a/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 b/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 index 2dd5de83..79c5b0f9 100644 Binary files a/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 and b/tests/rest_api/assets/cvat_db/cvat_data.tar.bz2 differ diff --git a/tests/rest_api/assets/cvat_db/data.json b/tests/rest_api/assets/cvat_db/data.json index d6a70063..9aaba081 100644 --- a/tests/rest_api/assets/cvat_db/data.json +++ b/tests/rest_api/assets/cvat_db/data.json @@ -1413,7 +1413,7 @@ "pk": 1, "fields": { "password": "pbkdf2_sha256$260000$DevmxlmLwciP1P6sZs2Qag$U9DFtjTWx96Sk95qY6UXVcvpdQEP2LcoFBftk5D2RKY=", - "last_login": "2022-06-22T09:20:25.189Z", + "last_login": "2022-06-29T12:55:15.511Z", "is_superuser": true, "username": "admin1", "first_name": "Admin", @@ -3710,11 +3710,11 @@ "assignee": 3, "bug_tracker": "", "created_date": "2021-12-14T19:52:37.278Z", - "updated_date": "2022-03-28T13:04:54.669Z", + "updated_date": "2022-06-30T08:56:45.601Z", "status": "annotation", "organization": 2, - "source_storage": null, - "target_storage": null + "source_storage": 3, + "target_storage": 1 } }, { @@ -3898,7 +3898,7 @@ "assignee": 19, "bug_tracker": "", "created_date": "2022-03-05T10:32:19.149Z", - "updated_date": "2022-03-05T10:32:35.568Z", + "updated_date": "2022-06-30T08:56:45.594Z", "overlap": 0, "segment_size": 11, "status": "annotation", @@ -3906,8 +3906,8 @@ "dimension": "2d", "subset": "Train", "organization": 2, - "source_storage": null, - "target_storage": null + "source_storage": 4, + "target_storage": 2 } }, { @@ -6158,6 +6158,14 @@ "cloud_storage": 2 } }, +{ + "model": "engine.manifest", + "pk": 3, + "fields": { + "filename": "manifest.jsonl", + "cloud_storage": 3 + } +}, { "model": "engine.cloudstorage", "pk": 1, @@ -6191,5 +6199,54 @@ "description": "", "organization": 2 } +}, +{ + "model": "engine.cloudstorage", + "pk": 3, + "fields": { + "provider_type": "AWS_S3_BUCKET", + "resource": "importexportbucket", + "display_name": "Import/Export bucket", + "owner": 1, + "created_date": "2022-06-29T12:56:18.257Z", + "updated_date": "2022-06-29T12:56:18.264Z", + "credentials": "minio_access_key minio_secret_key", + "credentials_type": "KEY_SECRET_KEY_PAIR", + "specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000", + "description": "Bucket for importing and exporting annotations and backups", + "organization": 2 + } +}, +{ + "model": "engine.storage", + "pk": 1, + "fields": { + "location": "cloud_storage", + "cloud_storage_id": 2 + } +}, +{ + "model": "engine.storage", + "pk": 2, + "fields": { + "location": "cloud_storage", + "cloud_storage_id": 2 + } +}, +{ + "model": "engine.storage", + "pk": 3, + "fields": { + "location": "cloud_storage", + "cloud_storage_id": 2 + } +}, +{ + "model": "engine.storage", + "pk": 4, + "fields": { + "location": "cloud_storage", + "cloud_storage_id": 2 + } } ] diff --git a/tests/rest_api/assets/projects.json b/tests/rest_api/assets/projects.json index 6d5a2cbc..80fbd03f 100644 --- a/tests/rest_api/assets/projects.json +++ b/tests/rest_api/assets/projects.json @@ -107,16 +107,24 @@ "url": "http://localhost:8080/api/users/10", "username": "business1" }, - "source_storage": null, + "source_storage": { + "cloud_storage_id": 2, + "id": 3, + "location": "cloud_storage" + }, "status": "annotation", - "target_storage": null, + "target_storage": { + "cloud_storage_id": 2, + "id": 1, + "location": "cloud_storage" + }, "task_subsets": [ "Train" ], "tasks": [ 11 ], - "updated_date": "2022-03-28T13:04:54.669000Z", + "updated_date": "2022-06-30T08:56:45.601000Z", "url": "http://localhost:8080/api/projects/2" }, { diff --git a/tests/rest_api/assets/tasks.json b/tests/rest_api/assets/tasks.json index a3d21bea..998b6307 100644 --- a/tests/rest_api/assets/tasks.json +++ b/tests/rest_api/assets/tasks.json @@ -167,11 +167,19 @@ } ], "size": 11, - "source_storage": null, + "source_storage": { + "cloud_storage_id": 2, + "id": 4, + "location": "cloud_storage" + }, "status": "annotation", "subset": "Train", - "target_storage": null, - "updated_date": "2022-03-05T10:32:35.568000Z", + "target_storage": { + "cloud_storage_id": 2, + "id": 2, + "location": "cloud_storage" + }, + "updated_date": "2022-06-30T08:56:45.594000Z", "url": "http://localhost:8080/api/tasks/11" }, { diff --git a/tests/rest_api/assets/users.json b/tests/rest_api/assets/users.json index d14cf1b0..5abb7899 100644 --- a/tests/rest_api/assets/users.json +++ b/tests/rest_api/assets/users.json @@ -310,7 +310,7 @@ "is_active": true, "is_staff": true, "is_superuser": true, - "last_login": "2022-06-22T09:20:25.189000Z", + "last_login": "2022-06-29T12:55:15.511000Z", "last_name": "First", "url": "http://localhost:8080/api/users/1", "username": "admin1" diff --git a/tests/rest_api/docker-compose.minio.yml b/tests/rest_api/docker-compose.minio.yml index ccc7d7d2..19b04603 100644 --- a/tests/rest_api/docker-compose.minio.yml +++ b/tests/rest_api/docker-compose.minio.yml @@ -37,6 +37,7 @@ services: PRIVATE_BUCKET: "private" PUBLIC_BUCKET: "public" TEST_BUCKET: "test" + IMPORT_EXPORT_BUCKET: "importexportbucket" volumes: - ./tests/cypress/integration/actions_tasks/assets/case_65_manifest/:/storage networks: @@ -44,8 +45,8 @@ services: entrypoint: > /bin/sh -c " $${MC_PATH} config host add --quiet --api s3v4 $${MINIO_ALIAS} $${MINIO_HOST} $${MINIO_ACCESS_KEY} $${MINIO_SECRET_KEY}; - $${MC_PATH} mb $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET}; - for BUCKET in $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET}; + $${MC_PATH} mb $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET}; + for BUCKET in $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET}; do $${MC_PATH} cp --recursive /storage/ $${BUCKET}; for i in 1 2; diff --git a/tests/rest_api/requirements.txt b/tests/rest_api/requirements.txt index 34d3f348..148f9427 100644 --- a/tests/rest_api/requirements.txt +++ b/tests/rest_api/requirements.txt @@ -1,4 +1,5 @@ pytest==6.2.5 requests==2.26.0 deepdiff==5.6.0 +boto3==1.17.61 Pillow==9.0.1 diff --git a/tests/rest_api/test_resource_import_export.py b/tests/rest_api/test_resource_import_export.py new file mode 100644 index 00000000..d87cfc58 --- /dev/null +++ b/tests/rest_api/test_resource_import_export.py @@ -0,0 +1,244 @@ +import pytest +import boto3 +import functools +import json + +from botocore.exceptions import ClientError +from http import HTTPStatus + +from .utils.config import ( + get_method, post_method, MINIO_KEY, MINIO_SECRET_KEY, MINIO_ENDPOINT_URL, +) + +FILENAME_TEMPLATE = 'cvat/{}/{}.zip' +FORMAT = 'COCO 1.0' + +def _use_custom_settings(obj, resource, cloud_storage_id): + return { + 'filename': FILENAME_TEMPLATE.format(obj, resource), + 'use_default_location': False, + 'location': 'cloud_storage', + 'cloud_storage_id': cloud_storage_id, + 'format': FORMAT, + } + +def _use_default_settings(obj, resource): + return { + 'filename': FILENAME_TEMPLATE.format(obj, resource), + 'use_default_location': True, + 'format': FORMAT, + } + +def define_client(): + s3 = boto3.resource( + 's3', + aws_access_key_id=MINIO_KEY, + aws_secret_access_key=MINIO_SECRET_KEY, + endpoint_url= MINIO_ENDPOINT_URL, + ) + return s3.meta.client + +def assert_file_does_not_exist(client, bucket, filename): + try: + client.head_object(Bucket=bucket, Key=filename) + raise AssertionError(f'File {filename} on bucket {bucket} already exists') + except ClientError: + pass + +def assert_file_exists(client, bucket, filename): + try: + client.head_object(Bucket=bucket, Key=filename) + except ClientError: + raise AssertionError(f"File {filename} on bucket {bucket} doesn't exist") + +def assert_file_status(func): + @functools.wraps(func) + def wrapper(user, storage_conf, *args, **kwargs): + filename = kwargs['filename'] + bucket = storage_conf['resource'] + # get storage client + client = define_client() + # check that file doesn't exist on the bucket + assert_file_does_not_exist(client, bucket, filename) + func(user, storage_conf, *args, **kwargs) + # check that file exists on the bucket + assert_file_exists(client, bucket, filename) + return wrapper + +def remove_asset(bucket, filename): + client = define_client() + client.delete_object(Bucket=bucket, Key=filename) + +@assert_file_status +def _save_resource_to_cloud_storage(user, storage_conf, obj_id, obj, resource, **kwargs): + response = get_method(user, f'{obj}/{obj_id}/{resource}', **kwargs) + status = response.status_code + + while status != HTTPStatus.OK: + assert status in (HTTPStatus.CREATED, HTTPStatus.ACCEPTED) + response = get_method(user, f'{obj}/{obj_id}/{resource}', action='download', **kwargs) + status = response.status_code + +def _idempotent_saving_resource_to_cloud_storage(*args, **kwargs): + _save_resource_to_cloud_storage(*args, **kwargs) + remove_asset(args[1]['resource'], kwargs['filename']) + +@pytest.mark.usefixtures('dontchangedb') +class TestSaveResource: + _USERNAME = 'admin1' + _ORG = 2 + + @pytest.mark.parametrize('cloud_storage_id', [3]) + @pytest.mark.parametrize('obj_id, obj, resource', [ + (2, 'projects', 'annotations'), + (2, 'projects', 'dataset'), + (2, 'projects', 'backup'), + (11, 'tasks', 'annotations'), + (11, 'tasks', 'dataset'), + (11, 'tasks', 'backup'), + (16, 'jobs', 'annotations'), + (16, 'jobs', 'dataset'), + ]) + def test_save_resource_to_cloud_storage_with_specific_location( + self, cloud_storage_id, obj_id, obj, resource, cloud_storages + ): + cloud_storage = cloud_storages[cloud_storage_id] + kwargs = _use_custom_settings(obj, resource, cloud_storage_id) + if resource == 'backup': + kwargs.pop('format') + + _idempotent_saving_resource_to_cloud_storage(self._USERNAME, cloud_storage, + obj_id, obj, resource, org_id=self._ORG, **kwargs) + + @pytest.mark.parametrize('obj_id, obj, resource', [ + (2, 'projects', 'annotations'), + (2, 'projects', 'dataset'), + (2, 'projects', 'backup'), + (11, 'tasks', 'annotations'), + (11, 'tasks', 'dataset'), + (11, 'tasks', 'backup'), + (16, 'jobs', 'annotations'), + (16, 'jobs', 'dataset'), + ]) + def test_save_resource_to_cloud_storage_with_default_location( + self, obj_id, obj, resource, projects, tasks, jobs, cloud_storages, + ): + objects = { + 'projects': projects, + 'tasks': tasks, + 'jobs': jobs, + } + if obj in ('projects', 'tasks'): + cloud_storage_id = objects[obj][obj_id]['target_storage']['cloud_storage_id'] + else: + task_id = jobs[obj_id]['task_id'] + cloud_storage_id = tasks[task_id]['target_storage']['cloud_storage_id'] + cloud_storage = cloud_storages[cloud_storage_id] + + kwargs = _use_default_settings(obj, resource) + + if resource == 'backup': + kwargs.pop('format') + + _idempotent_saving_resource_to_cloud_storage(self._USERNAME, cloud_storage, + obj_id, obj, resource, org_id=self._ORG, **kwargs) + +def _import_annotations_from_cloud_storage(user, obj_id, obj, **kwargs): + url = f'{obj}/{obj_id}/annotations' + response = post_method(user, url, data=None, **kwargs) + status = response.status_code + + while status != HTTPStatus.CREATED: + assert status == HTTPStatus.ACCEPTED + response = post_method(user, url, data=None, **kwargs) + status = response.status_code + +def _import_backup_from_cloud_storage(user, obj_id, obj, **kwargs): + url = f'{obj}/backup' + response = post_method(user, url, data=None, **kwargs) + status = response.status_code + + while status != HTTPStatus.CREATED: + assert status == HTTPStatus.ACCEPTED + data = json.loads(response.content.decode('utf8')) + response = post_method(user, url, data=data, **kwargs) + status = response.status_code + +def _import_dataset_from_cloud_storage(user, obj_id, obj, **kwargs): + url = f'{obj}/{obj_id}/dataset' + response = post_method(user, url, data=None, **kwargs) + status = response.status_code + + while status != HTTPStatus.CREATED: + assert status == HTTPStatus.ACCEPTED + response = get_method(user, url, action='import_status') + status = response.status_code + +@pytest.mark.usefixtures('changedb') +@pytest.mark.usefixtures('restore_cvat_data') +class TestImportResource: + _USERNAME = 'admin1' + _ORG = 2 + + @pytest.mark.parametrize('cloud_storage_id', [3]) + @pytest.mark.parametrize('obj_id, obj, resource', [ + (2, 'projects', 'dataset'), + (2, 'projects', 'backup'), + (11, 'tasks', 'annotations'), + (11, 'tasks', 'backup'), + (16, 'jobs', 'annotations'), + ]) + def test_import_resource_from_cloud_storage_with_specific_location( + self, cloud_storage_id, obj_id, obj, resource, cloud_storages + ): + cloud_storage = cloud_storages[cloud_storage_id] + kwargs = _use_custom_settings(obj, resource, cloud_storage_id) + export_kwargs = _use_custom_settings(obj, resource, cloud_storage_id) + + if resource == 'backup': + kwargs.pop('format') + kwargs.pop('use_default_location') + export_kwargs.pop('format') + + # export current resource to cloud storage + _save_resource_to_cloud_storage(self._USERNAME, cloud_storage, obj_id, obj, resource, org_id=self._ORG, **export_kwargs) + + import_resource = { + 'annotations': _import_annotations_from_cloud_storage, + 'dataset': _import_dataset_from_cloud_storage, + 'backup': _import_backup_from_cloud_storage, + } + import_resource[resource](self._USERNAME, obj_id, obj, org_id=self._ORG, **kwargs) + remove_asset(cloud_storage['resource'], kwargs['filename']) + + @pytest.mark.parametrize('obj_id, obj, resource', [ + (2, 'projects', 'dataset'), + (11, 'tasks', 'annotations'), + (16, 'jobs', 'annotations'), + ]) + def test_import_resource_from_cloud_storage_with_default_location( + self, obj_id, obj, resource, projects, tasks, jobs, cloud_storages, + ): + objects = { + 'projects': projects, + 'tasks': tasks, + 'jobs': jobs, + } + if obj in ('projects', 'tasks'): + cloud_storage_id = objects[obj][obj_id]['source_storage']['cloud_storage_id'] + else: + task_id = jobs[obj_id]['task_id'] + cloud_storage_id = tasks[task_id]['source_storage']['cloud_storage_id'] + cloud_storage = cloud_storages[cloud_storage_id] + kwargs = _use_default_settings(obj, resource) + + # export current resource to cloud storage + _save_resource_to_cloud_storage(self._USERNAME, cloud_storage, obj_id, obj, resource, org_id=self._ORG, **kwargs) + + import_resource = { + 'annotations': _import_annotations_from_cloud_storage, + 'dataset': _import_dataset_from_cloud_storage, + 'backup': _import_backup_from_cloud_storage, + } + import_resource[resource](self._USERNAME, obj_id, obj, org_id=self._ORG, **kwargs) + remove_asset(cloud_storage['resource'], kwargs['filename']) diff --git a/tests/rest_api/utils/config.py b/tests/rest_api/utils/config.py index a114a7d6..80a98c91 100644 --- a/tests/rest_api/utils/config.py +++ b/tests/rest_api/utils/config.py @@ -12,6 +12,11 @@ USER_PASS = '!Q@W#E$R' # nosec BASE_URL = 'http://localhost:8080/' API_URL = BASE_URL + 'api/' +# MiniIO settings +MINIO_KEY = 'minio_access_key' +MINIO_SECRET_KEY = 'minio_secret_key' # nosec +MINIO_ENDPOINT_URL = 'http://localhost:9000' + def _to_query_params(**kwargs): return '&'.join([f'{k}={v}' for k,v in kwargs.items()])