Hotfix missing manifest file (#116)

main
Maria Khrustaleva 4 years ago committed by GitHub
parent 5e2eda7601
commit 182c39aa8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Annotation window might have top offset if try to move a locked object - Annotation window might have top offset if try to move a locked object
- Image search in cloud storage (<https://github.com/cvat-ai/cvat/pull/8>) - Image search in cloud storage (<https://github.com/cvat-ai/cvat/pull/8>)
- Reset password functionality (<https://github.com/cvat-ai/cvat/pull/52>) - Reset password functionality (<https://github.com/cvat-ai/cvat/pull/52>)
- Creating task with cloud storage data (<https://github.com/cvat-ai/cvat/pull/116>)
### Security ### Security
- TDB - TDB

@ -1,5 +1,6 @@
# Copyright (C) 2018-2022 Intel Corporation # Copyright (C) 2018-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
@ -18,9 +19,11 @@ import requests
import ipaddress import ipaddress
import dns.resolver import dns.resolver
import django_rq import django_rq
import pytz
from django.conf import settings from django.conf import settings
from django.db import transaction from django.db import transaction
from datetime import datetime
from cvat.apps.engine import models from cvat.apps.engine import models
from cvat.apps.engine.log import slogger from cvat.apps.engine.log import slogger
@ -30,7 +33,7 @@ from cvat.apps.engine.utils import av_scan_paths
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest
from utils.dataset_manifest.core import VideoManifestValidator from utils.dataset_manifest.core import VideoManifestValidator
from utils.dataset_manifest.utils import detect_related_images from utils.dataset_manifest.utils import detect_related_images
from .cloud_provider import get_cloud_storage_instance, Credentials from .cloud_provider import db_storage_to_storage_instance
############################# Low Level server API ############################# Low Level server API
@ -207,13 +210,21 @@ def _validate_data(counter, manifest_files=None):
return counter, task_modes[0] return counter, task_modes[0]
def _validate_manifest(manifests, root_dir): def _validate_manifest(manifests, root_dir, is_in_cloud, db_cloud_storage):
if manifests: if manifests:
if len(manifests) != 1: if len(manifests) != 1:
raise Exception('Only one manifest file can be attached with data') raise Exception('Only one manifest file can be attached with data')
manifest_file = manifests[0]
full_manifest_path = os.path.join(root_dir, manifests[0]) full_manifest_path = os.path.join(root_dir, manifests[0])
if is_in_cloud:
cloud_storage_instance = db_storage_to_storage_instance(db_cloud_storage)
# check that cloud storage manifest file exists and is up to date
if not os.path.exists(full_manifest_path) or \
datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) \
< cloud_storage_instance.get_file_last_modified(manifest_file):
cloud_storage_instance.download_file(manifest_file, full_manifest_path)
if is_manifest(full_manifest_path): if is_manifest(full_manifest_path):
return manifests[0] return manifest_file
raise Exception('Invalid manifest was uploaded') raise Exception('Invalid manifest was uploaded')
return None return None
@ -293,6 +304,7 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
db_data = db_task.data db_data = db_task.data
upload_dir = db_data.get_upload_dirname() upload_dir = db_data.get_upload_dirname()
is_data_in_cloud = db_data.storage == models.StorageChoice.CLOUD_STORAGE
if data['remote_files'] and not isDatasetImport: if data['remote_files'] and not isDatasetImport:
data['remote_files'] = _download_data(data['remote_files'], upload_dir) data['remote_files'] = _download_data(data['remote_files'], upload_dir)
@ -310,28 +322,18 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
manifest_root = None manifest_root = None
if db_data.storage in {models.StorageChoice.LOCAL, models.StorageChoice.SHARE}: if db_data.storage in {models.StorageChoice.LOCAL, models.StorageChoice.SHARE}:
manifest_root = upload_dir manifest_root = upload_dir
elif db_data.storage == models.StorageChoice.CLOUD_STORAGE: elif is_data_in_cloud:
manifest_root = db_data.cloud_storage.get_storage_dirname() manifest_root = db_data.cloud_storage.get_storage_dirname()
manifest_file = _validate_manifest(manifest_files, manifest_root) manifest_file = _validate_manifest(
manifest_files, manifest_root,
is_data_in_cloud, db_data.cloud_storage if is_data_in_cloud else None
)
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE): if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected") raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE: if data['server_files'] and is_data_in_cloud:
if not manifest_file: raise Exception('A manifest file not found') cloud_storage_instance = db_storage_to_storage_instance(db_data.cloud_storage)
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})
details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
sorted_media = sort(media['image'], data['sorting_method']) sorted_media = sort(media['image'], data['sorting_method'])
data_size = len(sorted_media) data_size = len(sorted_media)
@ -516,7 +518,7 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
# calculate chunk size if it isn't specified # calculate chunk size if it isn't specified
if db_data.chunk_size is None: if db_data.chunk_size is None:
if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
if not db_data.storage == models.StorageChoice.CLOUD_STORAGE: if not is_data_in_cloud:
w, h = extractor.get_image_size(0) w, h = extractor.get_image_size(0)
else: else:
img_properties = manifest[0] img_properties = manifest[0]

@ -320,12 +320,15 @@ class TestPostTaskData:
(task, _) = api_client.tasks_api.retrieve(task_id) (task, _) = api_client.tasks_api.retrieve(task_id)
assert task.size == 4 assert task.size == 4
@pytest.mark.parametrize('cloud_storage_id, manifest, org', [ @pytest.mark.parametrize('cloud_storage_id, manifest, use_bucket_content, org', [
(1, 'manifest.jsonl', ''), # public bucket (1, 'manifest.jsonl', False, ''), # public bucket
(2, 'sub/manifest.jsonl', 'org2'), # private bucket (2, 'sub/manifest.jsonl', True, 'org2'), # private bucket
]) ])
def test_create_task_with_cloud_storage_files(self, cloud_storage_id, manifest, org): def test_create_task_with_cloud_storage_files(self, cloud_storage_id, manifest, use_bucket_content, org):
cloud_storage_content = get_cloud_storage_content(self._USERNAME, cloud_storage_id, manifest) if use_bucket_content:
cloud_storage_content = get_cloud_storage_content(self._USERNAME, cloud_storage_id, manifest)
else:
cloud_storage_content = ['image_case_65_1.png', 'image_case_65_2.png']
cloud_storage_content.append(manifest) cloud_storage_content.append(manifest)
task_spec = { task_spec = {

Loading…
Cancel
Save