diff --git a/CHANGELOG.md b/CHANGELOG.md index bd0f9580..5b936471 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add "tag" return type for automatic annotation in Nuclio () - Dataset importing to a project () - User is able to customize information that text labels show () +- Support for uploading manifest with any name () ### Changed - TDB diff --git a/cvat-ui/src/components/file-manager/file-manager.tsx b/cvat-ui/src/components/file-manager/file-manager.tsx index f413b626..7f3be516 100644 --- a/cvat-ui/src/components/file-manager/file-manager.tsx +++ b/cvat-ui/src/components/file-manager/file-manager.tsx @@ -90,14 +90,13 @@ export class FileManager extends React.PureComponent { }; } - private loadData = (key: string): Promise => - new Promise((resolve, reject): void => { - const { onLoadData } = this.props; + private loadData = (key: string): Promise => new Promise((resolve, reject): void => { + const { onLoadData } = this.props; - const success = (): void => resolve(); - const failure = (): void => reject(); - onLoadData(key, success, failure); - }); + const success = (): void => resolve(); + const failure = (): void => reject(); + onLoadData(key, success, failure); + }); public reset(): void { const { active } = this.state; @@ -161,8 +160,8 @@ export class FileManager extends React.PureComponent { private renderShareSelector(): JSX.Element { function renderTreeNodes(data: TreeNodeNormal[]): JSX.Element[] { // sort alphabetically - data.sort((a: TreeNodeNormal, b: TreeNodeNormal): number => - a.key.toLocaleString().localeCompare(b.key.toLocaleString())); + data.sort((a: TreeNodeNormal, b: TreeNodeNormal): number => ( + a.key.toLocaleString().localeCompare(b.key.toLocaleString()))); return data.map((item: TreeNodeNormal) => { if (item.children) { return ( @@ -205,8 +204,8 @@ export class FileManager extends React.PureComponent { halfChecked: ReactText[]; }, ): void => { - const keys = (checkedKeys as ReactText[]).map((text: ReactText): string => - text.toLocaleString()); + const keys = (checkedKeys as ReactText[]).map((text: ReactText): string => ( + text.toLocaleString())); this.setState({ files: { ...files, @@ -267,7 +266,7 @@ export class FileManager extends React.PureComponent { !item.endsWith('manifest.jsonl'))} + selectedFiles={files.cloudStorage.filter((item) => !item.endsWith('.jsonl'))} onSelectCloudStorage={(_cloudStorage: CloudStorage | null) => { this.setState({ cloudStorage: _cloudStorage }); }} diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 220ee2e1..d8e25ef7 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -24,7 +24,7 @@ from cvat.apps.engine.log import slogger from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort) from cvat.apps.engine.utils import av_scan_paths -from utils.dataset_manifest import ImageManifestManager, VideoManifestManager +from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest from utils.dataset_manifest.core import VideoManifestValidator from utils.dataset_manifest.utils import detect_related_images from .cloud_provider import get_cloud_storage_instance, Credentials @@ -113,7 +113,7 @@ def _save_task_to_db(db_task): db_task.data.save() db_task.save() -def _count_files(data, manifest_file=None): +def _count_files(data, manifest_files=None): share_root = settings.SHARE_ROOT server_files = [] @@ -143,8 +143,8 @@ def _count_files(data, manifest_file=None): mime = get_mime(full_path) if mime in counter: counter[mime].append(rel_path) - elif 'manifest.jsonl' == os.path.basename(rel_path): - manifest_file.append(rel_path) + elif rel_path.endswith('.jsonl'): + manifest_files.append(rel_path) else: slogger.glob.warn("Skip '{}' file (its mime type doesn't " "correspond to supported MIME file type)".format(full_path)) @@ -163,7 +163,7 @@ def _count_files(data, manifest_file=None): return counter -def _validate_data(counter, manifest_file=None): +def _validate_data(counter, manifest_files=None): unique_entries = 0 multiple_entries = 0 for media_type, media_config in MEDIA_TYPES.items(): @@ -173,7 +173,7 @@ def _validate_data(counter, manifest_file=None): else: multiple_entries += len(counter[media_type]) - if manifest_file and media_type not in ('video', 'image'): + if manifest_files and media_type not in ('video', 'image'): raise Exception('File with meta information can only be uploaded with video/images ') if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1: @@ -193,6 +193,16 @@ def _validate_data(counter, manifest_file=None): return counter, task_modes[0] +def _validate_manifest(manifests, root_dir): + if manifests: + if len(manifests) != 1: + raise Exception('Only one manifest file can be attached with data') + full_manifest_path = os.path.join(root_dir, manifests[0]) + if is_manifest(full_manifest_path): + return manifests[0] + raise Exception('Invalid manifest was uploaded') + return None + def _download_data(urls, upload_dir): job = rq.get_current_job() local_files = {} @@ -233,48 +243,57 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): if data['remote_files'] and not isDatasetImport: data['remote_files'] = _download_data(data['remote_files'], upload_dir) - manifest_file = [] - media = _count_files(data, manifest_file) - media, task_mode = _validate_data(media, manifest_file) - if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE): - raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected") + manifest_files = [] + media = _count_files(data, manifest_files) + media, task_mode = _validate_data(media, manifest_files) if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_source(data['server_files'], upload_dir, data.get('server_files_path')) elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT - else: # cloud storage - if not manifest_file: raise Exception('A manifest file not found') - db_cloud_storage = db_data.cloud_storage - credentials = Credentials() - credentials.convert_from_db({ - 'type': db_cloud_storage.credentials_type, - 'value': db_cloud_storage.credentials, - }) - details = { - 'resource': db_cloud_storage.resource, - 'credentials': credentials, - 'specific_attributes': db_cloud_storage.get_specific_attributes() - } - cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) - sorted_media = sort(media['image'], data['sorting_method']) - first_sorted_media_image = sorted_media[0] - cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) - - # prepare task manifest file from cloud storage manifest file - # NOTE we should create manifest before defining chunk_size - # FIXME in the future when will be implemented archive support - manifest = ImageManifestManager(db_data.get_manifest_path()) - cloud_storage_manifest = ImageManifestManager( - os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]), - db_data.cloud_storage.get_storage_dirname() - ) - cloud_storage_manifest.set_index() - sequence, content = cloud_storage_manifest.get_subset(sorted_media) - sorted_content = (i[1] for i in sorted(zip(sequence, content))) - manifest.create(sorted_content) + manifest_root = None + if db_data.storage in {models.StorageChoice.LOCAL, models.StorageChoice.SHARE}: + manifest_root = upload_dir + elif db_data.storage == models.StorageChoice.CLOUD_STORAGE: + manifest_root = db_data.cloud_storage.get_storage_dirname() + + manifest_file = _validate_manifest(manifest_files, manifest_root) + if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE): + raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected") + + if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE: + if not manifest_file: raise Exception('A manifest file not found') + db_cloud_storage = db_data.cloud_storage + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_cloud_storage.credentials_type, + 'value': db_cloud_storage.credentials, + }) + + details = { + 'resource': db_cloud_storage.resource, + 'credentials': credentials, + 'specific_attributes': db_cloud_storage.get_specific_attributes() + } + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) + sorted_media = sort(media['image'], data['sorting_method']) + first_sorted_media_image = sorted_media[0] + cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) + + # prepare task manifest file from cloud storage manifest file + # NOTE we should create manifest before defining chunk_size + # FIXME in the future when will be implemented archive support + manifest = ImageManifestManager(db_data.get_manifest_path()) + cloud_storage_manifest = ImageManifestManager( + os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file), + db_data.cloud_storage.get_storage_dirname() + ) + cloud_storage_manifest.set_index() + sequence, content = cloud_storage_manifest.get_subset(sorted_media) + sorted_content = (i[1] for i in sorted(zip(sequence, content))) + manifest.create(sorted_content) av_scan_paths(upload_dir) @@ -432,12 +451,12 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): if not media_files: continue - # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') + # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): - shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), + shutil.copyfile(os.path.join(upload_dir, manifest_file), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: - os.remove(os.path.join(upload_dir, manifest_file[0])) + os.remove(os.path.join(upload_dir, manifest_file)) if task_mode == MEDIA_TYPES['video']['mode']: try: diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 424a0051..574ae0ca 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1359,7 +1359,7 @@ class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewS storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) if not db_storage.manifests.count(): raise Exception('There is no manifest file') - manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') + manifest_path = request.query_params.get('manifest_path', db_storage.manifests.first().filename) file_status = storage.get_file_status(manifest_path) if file_status == Status.NOT_FOUND: raise FileNotFoundError(errno.ENOENT, diff --git a/utils/dataset_manifest/__init__.py b/utils/dataset_manifest/__init__.py index f6547acf..5192a14e 100644 --- a/utils/dataset_manifest/__init__.py +++ b/utils/dataset_manifest/__init__.py @@ -1,4 +1,4 @@ # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT -from .core import VideoManifestManager, ImageManifestManager \ No newline at end of file +from .core import VideoManifestManager, ImageManifestManager, is_manifest \ No newline at end of file diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index dcf71633..a6a3ac1d 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -2,15 +2,18 @@ # # SPDX-License-Identifier: MIT +from enum import Enum import av import json import os -from abc import ABC, abstractmethod, abstractproperty + +from abc import ABC, abstractmethod, abstractproperty, abstractstaticmethod from contextlib import closing from tempfile import NamedTemporaryFile - from PIL import Image -from .utils import md5_hash, rotate_image, sort, SortingMethod +from json.decoder import JSONDecodeError + +from .utils import SortingMethod, md5_hash, rotate_image, sort class VideoStreamReader: def __init__(self, source_path, chunk_size, force): @@ -238,8 +241,19 @@ class Dataset3DImagesReader(DatasetImagesReader): yield dict() class _Manifest: + class SupportedVersion(str, Enum): + V1 = '1.0' + V1_1 = '1.1' + + @classmethod + def choices(cls): + return (x.value for x in cls) + + def __str__(self): + return self.value + FILE_NAME = 'manifest.jsonl' - VERSION = '1.1' + VERSION = SupportedVersion.V1_1 def __init__(self, path, upload_dir=None): assert path, 'A path to manifest file not found' @@ -509,13 +523,6 @@ class VideoManifestManager(_ManifestManager): def get_subset(self, subset_names): raise NotImplementedError() -#TODO: add generic manifest structure file validation -class ManifestValidator: - def validate_base_info(self): - with open(self._manifest.path, 'r') as manifest_file: - assert self._manifest.VERSION != json.loads(manifest_file.readline())['version'] - assert self._manifest.TYPE != json.loads(manifest_file.readline())['type'] - class VideoManifestValidator(VideoManifestManager): def __init__(self, source_path, manifest_path): self._source_path = source_path @@ -607,12 +614,113 @@ class ImageManifestManager(_ManifestManager): image_name = f"{image['name']}{image['extension']}" if image_name in subset_names: index_list.append(subset_names.index(image_name)) - subset.append({ + properties = { 'name': f"{image['name']}", 'extension': f"{image['extension']}", 'width': image['width'], 'height': image['height'], - 'meta': image['meta'], - 'checksum': f"{image['checksum']}" - }) + } + for optional_field in {'meta', 'checksum'}: + value = image.get(optional_field) + if value: + properties[optional_field] = value + subset.append(properties) return index_list, subset + + +class _BaseManifestValidator(ABC): + def __init__(self, full_manifest_path): + self._manifest = _Manifest(full_manifest_path) + + def validate(self): + try: + # we cannot use index in general because manifest may be e.g. in share point with ro mode + with open(self._manifest.path, 'r') as manifest: + for validator in self.validators: + line = json.loads(manifest.readline().strip()) + validator(line) + return True + except (ValueError, KeyError, JSONDecodeError): + return False + + @staticmethod + def _validate_version(_dict): + if not _dict['version'] in _Manifest.SupportedVersion.choices(): + raise ValueError('Incorrect version field') + + def _validate_type(self, _dict): + if not _dict['type'] == self.TYPE: + raise ValueError('Incorrect type field') + + @abstractproperty + def validators(self): + pass + + @abstractstaticmethod + def _validate_first_item(_dict): + pass + +class _VideoManifestStructureValidator(_BaseManifestValidator): + TYPE = 'video' + + @property + def validators(self): + return ( + self._validate_version, + self._validate_type, + self._validate_properties, + self._validate_first_item, + ) + + @staticmethod + def _validate_properties(_dict): + properties = _dict['properties'] + if not isinstance(properties['name'], str): + raise ValueError('Incorrect name field') + if not isinstance(properties['resolution'], list): + raise ValueError('Incorrect resolution field') + if not isinstance(properties['length'], int) or properties['length'] == 0: + raise ValueError('Incorrect length field') + + @staticmethod + def _validate_first_item(_dict): + if not isinstance(_dict['number'], int): + raise ValueError('Incorrect number field') + if not isinstance(_dict['pts'], int): + raise ValueError('Incorrect pts field') + +class _DatasetManifestStructureValidator(_BaseManifestValidator): + TYPE = 'images' + + @property + def validators(self): + return ( + self._validate_version, + self._validate_type, + self._validate_first_item, + ) + + @staticmethod + def _validate_first_item(_dict): + if not isinstance(_dict['name'], str): + raise ValueError('Incorrect name field') + if not isinstance(_dict['extension'], str): + raise ValueError('Incorrect extension field') + # width and height are required for 2d data + # FIXME for 3d when manual preparation of the manifest will be implemented + if not isinstance(_dict['width'], int): + raise ValueError('Incorrect width field') + if not isinstance(_dict['height'], int): + raise ValueError('Incorrect height field') + +def is_manifest(full_manifest_path): + return _is_video_manifest(full_manifest_path) or \ + _is_dataset_manifest(full_manifest_path) + +def _is_video_manifest(full_manifest_path): + validator = _VideoManifestStructureValidator(full_manifest_path) + return validator.validate() + +def _is_dataset_manifest(full_manifest_path): + validator = _DatasetManifestStructureValidator(full_manifest_path) + return validator.validate() \ No newline at end of file