Images should be searched relative to the manifest file on cloud storage (#8)

main
Maria Khrustaleva 4 years ago committed by GitHub
parent 4cd6b05c02
commit 164c2c34df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Request Status Code 500 "StopIteration" when exporting dataset
- Generated OpenAPI schema for several endpoints
- Annotation window might have top offset if try to move a locked object
- Image search in cloud storage (<https://github.com/cvat-ai/cvat/pull/8>)
### Security
- TDB

@ -1 +1 @@
6.0.1-SNAPSHOT
6.1.0-SNAPSHOT

@ -385,9 +385,7 @@ class CloudStoragesApi(object):
)
self.cloudstorages_retrieve_content_endpoint = _Endpoint(
settings={
"response_schema": (
{str: (bool, date, datetime, dict, float, int, list, str, none_type)},
),
"response_schema": ([str],),
"auth": ["SignatureAuthentication", "basicAuth", "cookieAuth", "tokenAuth"],
"endpoint_path": "/api/cloudstorages/{id}/content",
"operation_id": "cloudstorages_retrieve_content",
@ -1050,9 +1048,7 @@ class CloudStoragesApi(object):
_request_auths: typing.Optional[typing.List] = None,
_async_call: bool = False,
**kwargs,
) -> typing.Tuple[
typing.Optional[typing.Dict[str, typing.Union[typing.Any, none_type]]], urllib3.HTTPResponse
]:
) -> typing.Tuple[typing.Optional[typing.List[str]], urllib3.HTTPResponse]:
"""Method returns a manifest content # noqa: E501
This method makes a synchronous HTTP request by default. To make an
@ -1102,7 +1098,7 @@ class CloudStoragesApi(object):
_async_call (bool): execute request asynchronously
Returns:
({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, HTTPResponse)
([str], HTTPResponse)
If the method is called asynchronously, returns the request
thread.
"""

@ -734,7 +734,7 @@ Name | Type | Description | Notes
[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
# **cloudstorages_retrieve_content**
> {str: (bool, date, datetime, dict, float, int, list, str, none_type)} cloudstorages_retrieve_content(id)
> [str] cloudstorages_retrieve_content(id)
Method returns a manifest content
@ -826,7 +826,7 @@ Name | Type | Description | Notes
### Return type
**{str: (bool, date, datetime, dict, float, int, list, str, none_type)}**
**[str]**
### Authorization

@ -348,8 +348,20 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file),
db_data.cloud_storage.get_storage_dirname()
)
cloud_storage_manifest_prefix = os.path.dirname(manifest_file)
cloud_storage_manifest.set_index()
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
if cloud_storage_manifest_prefix:
sorted_media_without_manifest_prefix = [
os.path.relpath(i, cloud_storage_manifest_prefix) for i in sorted_media
]
sequence, raw_content = cloud_storage_manifest.get_subset(sorted_media_without_manifest_prefix)
def _add_prefix(properties):
file_name = properties['name']
properties['name'] = os.path.join(cloud_storage_manifest_prefix, file_name)
return properties
content = list(map(_add_prefix, raw_content))
else:
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)

@ -30,6 +30,7 @@ from drf_spectacular.utils import (
OpenApiParameter, OpenApiResponse, PolymorphicProxySerializer,
extend_schema_view, extend_schema
)
from drf_spectacular.plumbing import build_array_type, build_basic_type
from rest_framework import mixins, serializers, status, viewsets
from rest_framework.decorators import action
@ -1895,7 +1896,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet):
location=OpenApiParameter.QUERY, type=OpenApiTypes.STR),
],
responses={
'200': OpenApiResponse(response=OpenApiTypes.OBJECT, description='A manifest content'),
'200': OpenApiResponse(response=build_array_type(build_basic_type(OpenApiTypes.STR)), description='A manifest content'),
})
@action(detail=True, methods=['GET'], url_path='content')
def content(self, request, pk):
@ -1906,6 +1907,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet):
if not db_storage.manifests.count():
raise Exception('There is no manifest file')
manifest_path = request.query_params.get('manifest_path', db_storage.manifests.first().filename)
manifest_prefix = os.path.dirname(manifest_path)
file_status = storage.get_file_status(manifest_path)
if file_status == CloudStorageStatus.NOT_FOUND:
raise FileNotFoundError(errno.ENOENT,
@ -1921,7 +1923,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet):
manifest = ImageManifestManager(full_manifest_path, db_storage.get_storage_dirname())
# need to update index
manifest.set_index()
manifest_files = manifest.data
manifest_files = [os.path.join(manifest_prefix, f) for f in manifest.data]
return Response(data=manifest_files, content_type="text/plain")
except CloudStorageModel.DoesNotExist:
@ -1958,6 +1960,7 @@ class CloudStorageViewSet(viewsets.ModelViewSet):
raise Exception('Cannot get the cloud storage preview. There is no manifest file')
preview_path = None
for manifest_model in db_storage.manifests.all():
manifest_prefix = os.path.dirname(manifest_model.filename)
full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_model.filename)
if not os.path.exists(full_manifest_path) or \
datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_model.filename):
@ -1971,7 +1974,8 @@ class CloudStorageViewSet(viewsets.ModelViewSet):
if not len(manifest):
continue
preview_info = manifest[0]
preview_path = ''.join([preview_info['name'], preview_info['extension']])
preview_filename = ''.join([preview_info['name'], preview_info['extension']])
preview_path = os.path.join(manifest_prefix, preview_filename)
break
if not preview_path:
msg = 'Cloud storage {} does not contain any images'.format(pk)

@ -32,7 +32,7 @@
"display_name": "Bucket 2",
"id": 2,
"manifests": [
"manifest.jsonl"
"sub/manifest.jsonl"
],
"organization": 2,
"owner": {
@ -45,7 +45,7 @@
"provider_type": "AWS_S3_BUCKET",
"resource": "private",
"specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000",
"updated_date": "2022-03-17T07:23:59.309000Z"
"updated_date": "2022-07-13T12:46:45.587000Z"
},
{
"created_date": "2022-03-17T07:22:49.519000Z",

@ -4657,7 +4657,7 @@
"model": "engine.manifest",
"pk": 2,
"fields": {
"filename": "manifest.jsonl",
"filename": "sub/manifest.jsonl",
"cloud_storage": 2
}
},
@ -4699,7 +4699,7 @@
"business2"
],
"created_date": "2022-03-17T07:23:59.305Z",
"updated_date": "2022-03-17T07:23:59.309Z",
"updated_date": "2022-07-13T12:46:45.587Z",
"credentials": "minio_access_key minio_secret_key",
"credentials_type": "KEY_SECRET_KEY_PAIR",
"specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000",

@ -310,7 +310,7 @@
"is_active": true,
"is_staff": true,
"is_superuser": true,
"last_login": "2022-06-29T12:55:15.511000Z",
"last_login": "2022-07-13T12:46:07.059000Z",
"last_name": "First",
"url": "http://localhost:8080/api/users/1",
"username": "admin1"

@ -48,10 +48,16 @@ services:
$${MC_PATH} mb $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET};
for BUCKET in $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET};
do
$${MC_PATH} cp --recursive /storage/ $${BUCKET};
if [ $${BUCKET} == $${MINIO_ALIAS}/$${PRIVATE_BUCKET} ]
then
FULL_PATH=$${BUCKET}/'sub'
else
FULL_PATH=$${BUCKET}
fi
$${MC_PATH} cp --recursive /storage/ $${FULL_PATH};
for i in 1 2;
do
$${MC_PATH} cp /storage/manifest.jsonl $${BUCKET}/manifest_$${i}.jsonl;
$${MC_PATH} cp /storage/manifest.jsonl $${FULL_PATH}/manifest_$${i}.jsonl;
done;
done;
$${MC_PATH} policy set public $${MINIO_ALIAS}/$${PUBLIC_BUCKET};

@ -17,7 +17,8 @@ class TestGetCloudStorage:
response_data = response_data.get('results', response_data)
assert response.status_code == HTTPStatus.OK
assert DeepDiff(data, response_data, ignore_order=True) == {}
assert DeepDiff(data, response_data, ignore_order=True,
exclude_paths="root['updated_date']") == {}
def _test_cannot_see(self, user, storage_id, **kwargs):
response = get_method(user, f'cloudstorages/{storage_id}', **kwargs)
@ -132,6 +133,14 @@ class TestPatchCloudStorage:
'manifest_2.jsonl',
],
}
_PRIVATE_BUCKET_SPEC = {
'display_name': 'New display name',
'description': 'New description',
'manifests': [
'sub/manifest_1.jsonl',
'sub/manifest_2.jsonl',
],
}
_EXCLUDE_PATHS = [
f"root['{extra_field}']" for extra_field in {
# unchanged fields
@ -145,7 +154,7 @@ class TestPatchCloudStorage:
response_data = response_data.get('results', response_data)
assert response.status_code == HTTPStatus.OK
assert DeepDiff(self._SPEC, response_data, ignore_order=True,
assert DeepDiff(spec, response_data, ignore_order=True,
exclude_paths=self._EXCLUDE_PATHS) == {}
assert response.status_code == HTTPStatus.OK
@ -186,6 +195,6 @@ class TestPatchCloudStorage:
next((u for u in find_users(role=role, org=org_id) if u['id'] != cloud_storage['owner']['id']))['username']
if is_allow:
self._test_can_update(username, storage_id, self._SPEC, org_id=org_id)
self._test_can_update(username, storage_id, self._PRIVATE_BUCKET_SPEC, org_id=org_id)
else:
self._test_cannot_update(username, storage_id, self._SPEC, org_id=org_id)
self._test_cannot_update(username, storage_id, self._PRIVATE_BUCKET_SPEC, org_id=org_id)

@ -35,6 +35,12 @@ def generate_image_files(count):
return images
def get_cloud_storage_content(username, cloud_storage_id, manifest):
with make_api_client(username) as api_client:
(_, response) = api_client.cloud_storages_api.cloudstorages_retrieve_content(cloud_storage_id, manifest_path=manifest)
data = json.loads(response.data)
return data
@pytest.mark.usefixtures('dontchangedb')
class TestGetTasks:
@ -272,7 +278,10 @@ class TestGetTaskDataset:
self._test_export_project('admin1', task['id'], format='CVAT for images 1.1')
@pytest.mark.usefixtures("changedb")
@pytest.mark.usefixtures("restore_cvat_data")
class TestPostTaskData:
_USERNAME = 'admin1'
@staticmethod
def _wait_until_task_is_created(api: TasksApi, task_id: int) -> RqStatus:
for _ in range(100):
@ -282,14 +291,14 @@ class TestPostTaskData:
sleep(1)
raise Exception('Cannot create task')
def _test_create_task(self, username, spec, data, files):
def _test_create_task(self, username, spec, data, content_type, **kwargs):
with make_api_client(username) as api_client:
(task, response) = api_client.tasks_api.create(TaskWriteRequest(**spec))
(task, response) = api_client.tasks_api.create(TaskWriteRequest(**spec), **kwargs)
assert response.status == HTTPStatus.CREATED
task_data = DataRequest(**data, client_files=list(files.values()))
task_data = DataRequest(**data)
(_, response) = api_client.tasks_api.create_data(task.id, task_data,
_content_type="multipart/form-data")
_content_type=content_type, **kwargs)
assert response.status == HTTPStatus.ACCEPTED
status = self._wait_until_task_is_created(api_client.tasks_api, task.id)
@ -298,9 +307,8 @@ class TestPostTaskData:
return task.id
def test_can_create_task_with_defined_start_and_stop_frames(self):
username = 'admin1'
task_spec = {
'name': f'test {username} to create a task with defined start and stop frames',
'name': f'test {self._USERNAME} to create a task with defined start and stop frames',
"labels": [{
"name": "car",
"color": "#ff00ff",
@ -319,15 +327,38 @@ class TestPostTaskData:
task_data = {
'image_quality': 75,
'start_frame': 2,
'stop_frame': 5
}
task_files = {
f'client_files[{i}]': image for i, image in enumerate(generate_image_files(7))
'stop_frame': 5,
'client_files': generate_image_files(7),
}
task_id = self._test_create_task(username, task_spec, task_data, task_files)
task_id = self._test_create_task(self._USERNAME, task_spec, task_data, content_type="multipart/form-data")
# check task size
with make_api_client(username) as api_client:
with make_api_client(self._USERNAME) as api_client:
(task, _) = api_client.tasks_api.retrieve(task_id)
assert task.size == 4
@pytest.mark.parametrize('cloud_storage_id, manifest, org', [
(1, 'manifest.jsonl', ''), # public bucket
(2, 'sub/manifest.jsonl', 'org2'), # private bucket
])
def test_create_task_with_cloud_storage_files(self, cloud_storage_id, manifest, org):
cloud_storage_content = get_cloud_storage_content(self._USERNAME, cloud_storage_id, manifest)
cloud_storage_content.append(manifest)
task_spec = {
"name": f"Task with files from cloud storage {cloud_storage_id}",
"labels": [{
"name": "car",
}],
}
data_spec = {
'image_quality': 75,
'use_cache': True,
'storage': 'cloud_storage',
'cloud_storage_id': cloud_storage_id,
'server_files': cloud_storage_content,
}
_ = self._test_create_task(self._USERNAME, task_spec, data_spec, content_type="application/json", org=org)

Loading…
Cancel
Save