|
|
|
|
@ -22,7 +22,7 @@ from django.db import transaction
|
|
|
|
|
from cvat.apps.engine import models
|
|
|
|
|
from cvat.apps.engine.log import slogger
|
|
|
|
|
from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
|
|
|
|
|
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime)
|
|
|
|
|
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
|
|
|
|
|
from cvat.apps.engine.utils import av_scan_paths
|
|
|
|
|
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
|
|
|
|
|
from utils.dataset_manifest.core import VideoManifestValidator
|
|
|
|
|
@ -123,15 +123,18 @@ def _count_files(data, manifest_file=None):
|
|
|
|
|
raise ValueError("Bad file path: " + path)
|
|
|
|
|
server_files.append(path)
|
|
|
|
|
|
|
|
|
|
server_files.sort(reverse=True)
|
|
|
|
|
sorted_server_files = sorted(server_files, reverse=True)
|
|
|
|
|
# The idea of the code is trivial. After sort we will have files in the
|
|
|
|
|
# following order: 'a/b/c/d/2.txt', 'a/b/c/d/1.txt', 'a/b/c/d', 'a/b/c'
|
|
|
|
|
# Let's keep all items which aren't substrings of the previous item. In
|
|
|
|
|
# the example above only 2.txt and 1.txt files will be in the final list.
|
|
|
|
|
# Also need to correctly handle 'a/b/c0', 'a/b/c' case.
|
|
|
|
|
data['server_files'] = [v[1] for v in zip([""] + server_files, server_files)
|
|
|
|
|
without_extra_dirs = [v[1] for v in zip([""] + sorted_server_files, sorted_server_files)
|
|
|
|
|
if not os.path.dirname(v[0]).startswith(v[1])]
|
|
|
|
|
|
|
|
|
|
# we need to keep the original sequence of files
|
|
|
|
|
data['server_files'] = [f for f in server_files if f in without_extra_dirs]
|
|
|
|
|
|
|
|
|
|
def count_files(file_mapping, counter):
|
|
|
|
|
for rel_path, full_path in file_mapping.items():
|
|
|
|
|
mime = get_mime(full_path)
|
|
|
|
|
@ -141,7 +144,7 @@ def _count_files(data, manifest_file=None):
|
|
|
|
|
manifest_file.append(rel_path)
|
|
|
|
|
else:
|
|
|
|
|
slogger.glob.warn("Skip '{}' file (its mime type doesn't "
|
|
|
|
|
"correspond to a video or an image file)".format(full_path))
|
|
|
|
|
"correspond to supported MIME file type)".format(full_path))
|
|
|
|
|
|
|
|
|
|
counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
|
|
|
|
|
|
|
|
|
|
@ -213,6 +216,7 @@ def _download_data(urls, upload_dir):
|
|
|
|
|
def _get_manifest_frame_indexer(start_frame=0, frame_step=1):
|
|
|
|
|
return lambda frame_id: start_frame + frame_id * frame_step
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@transaction.atomic
|
|
|
|
|
def _create_thread(tid, data, isImport=False):
|
|
|
|
|
slogger.glob.info("create task #{}".format(tid))
|
|
|
|
|
@ -222,15 +226,13 @@ def _create_thread(tid, data, isImport=False):
|
|
|
|
|
upload_dir = db_data.get_upload_dirname()
|
|
|
|
|
|
|
|
|
|
if data['remote_files']:
|
|
|
|
|
if db_data.storage != models.StorageChoice.CLOUD_STORAGE:
|
|
|
|
|
data['remote_files'] = _download_data(data['remote_files'], upload_dir)
|
|
|
|
|
|
|
|
|
|
manifest_file = []
|
|
|
|
|
media = _count_files(data, manifest_file)
|
|
|
|
|
media, task_mode = _validate_data(media, manifest_file)
|
|
|
|
|
if manifest_file:
|
|
|
|
|
assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \
|
|
|
|
|
"File with meta information can be uploaded if 'Use cache' option is also selected"
|
|
|
|
|
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
|
|
|
|
|
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
|
|
|
|
|
|
|
|
|
|
if data['server_files']:
|
|
|
|
|
if db_data.storage == models.StorageChoice.LOCAL:
|
|
|
|
|
@ -252,19 +254,22 @@ def _create_thread(tid, data, isImport=False):
|
|
|
|
|
'specific_attributes': db_cloud_storage.get_specific_attributes()
|
|
|
|
|
}
|
|
|
|
|
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
|
|
|
|
|
first_sorted_media_image = sorted(media['image'])[0]
|
|
|
|
|
sorted_media = sort(media['image'], data['sorting_method'])
|
|
|
|
|
first_sorted_media_image = sorted_media[0]
|
|
|
|
|
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))
|
|
|
|
|
|
|
|
|
|
# prepare task manifest file from cloud storage manifest file
|
|
|
|
|
# NOTE we should create manifest before defining chunk_size
|
|
|
|
|
# FIXME in the future when will be implemented archive support
|
|
|
|
|
manifest = ImageManifestManager(db_data.get_manifest_path())
|
|
|
|
|
cloud_storage_manifest = ImageManifestManager(
|
|
|
|
|
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]),
|
|
|
|
|
db_data.cloud_storage.get_storage_dirname()
|
|
|
|
|
)
|
|
|
|
|
cloud_storage_manifest.set_index()
|
|
|
|
|
media_files = sorted(media['image'])
|
|
|
|
|
content = cloud_storage_manifest.get_subset(media_files)
|
|
|
|
|
manifest.create(content)
|
|
|
|
|
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
|
|
|
|
|
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
|
|
|
|
|
manifest.create(sorted_content)
|
|
|
|
|
|
|
|
|
|
av_scan_paths(upload_dir)
|
|
|
|
|
|
|
|
|
|
@ -292,24 +297,48 @@ def _create_thread(tid, data, isImport=False):
|
|
|
|
|
if media_files:
|
|
|
|
|
if extractor is not None:
|
|
|
|
|
raise Exception('Combined data types are not supported')
|
|
|
|
|
source_paths=[os.path.join(upload_dir, f) for f in media_files]
|
|
|
|
|
if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
|
|
|
|
|
source_paths.append(db_data.get_upload_dirname())
|
|
|
|
|
upload_dir = db_data.get_upload_dirname()
|
|
|
|
|
db_data.storage = models.StorageChoice.LOCAL
|
|
|
|
|
if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE:
|
|
|
|
|
manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step())
|
|
|
|
|
db_data.start_frame = 0
|
|
|
|
|
data['stop_frame'] = None
|
|
|
|
|
db_data.frame_filter = ''
|
|
|
|
|
if isImport and media_type != 'video' and db_data.storage_method == models.StorageMethodChoice.CACHE:
|
|
|
|
|
# we should sort media_files according to the manifest content sequence
|
|
|
|
|
manifest = ImageManifestManager(db_data.get_manifest_path())
|
|
|
|
|
manifest.set_index()
|
|
|
|
|
sorted_media_files = []
|
|
|
|
|
for idx in range(len(media_files)):
|
|
|
|
|
properties = manifest[manifest_index(idx)]
|
|
|
|
|
image_name = properties.get('name', None)
|
|
|
|
|
image_extension = properties.get('extension', None)
|
|
|
|
|
|
|
|
|
|
full_image_path = f"{image_name}{image_extension}" if image_name and image_extension else None
|
|
|
|
|
if full_image_path and full_image_path in media_files:
|
|
|
|
|
sorted_media_files.append(full_image_path)
|
|
|
|
|
media_files = sorted_media_files.copy()
|
|
|
|
|
del sorted_media_files
|
|
|
|
|
data['sorting_method'] = models.SortingMethod.PREDEFINED
|
|
|
|
|
source_paths=[os.path.join(upload_dir, f) for f in media_files]
|
|
|
|
|
if manifest_file and not isImport and data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
|
|
|
|
|
raise Exception("It isn't supported to upload manifest file and use random sorting")
|
|
|
|
|
if isImport and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \
|
|
|
|
|
data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
|
|
|
|
|
raise Exception("It isn't supported to import the task that was created without cache but with random/predefined sorting")
|
|
|
|
|
|
|
|
|
|
extractor = MEDIA_TYPES[media_type]['extractor'](
|
|
|
|
|
source_path=source_paths,
|
|
|
|
|
step=db_data.get_frame_step(),
|
|
|
|
|
start=db_data.start_frame,
|
|
|
|
|
stop=data['stop_frame'],
|
|
|
|
|
)
|
|
|
|
|
if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
|
|
|
|
|
source_paths.append(db_data.get_upload_dirname())
|
|
|
|
|
upload_dir = db_data.get_upload_dirname()
|
|
|
|
|
db_data.storage = models.StorageChoice.LOCAL
|
|
|
|
|
|
|
|
|
|
details = {
|
|
|
|
|
'source_path': source_paths,
|
|
|
|
|
'step': db_data.get_frame_step(),
|
|
|
|
|
'start': db_data.start_frame,
|
|
|
|
|
'stop': data['stop_frame'],
|
|
|
|
|
}
|
|
|
|
|
if media_type != 'video':
|
|
|
|
|
details['sorting_method'] = data['sorting_method']
|
|
|
|
|
extractor = MEDIA_TYPES[media_type]['extractor'](**details)
|
|
|
|
|
|
|
|
|
|
validate_dimension = ValidateDimension()
|
|
|
|
|
if isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
|
|
|
|
|
@ -474,8 +503,12 @@ def _create_thread(tid, data, isImport=False):
|
|
|
|
|
chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]
|
|
|
|
|
img_sizes = []
|
|
|
|
|
|
|
|
|
|
for _, frame_id in chunk_paths:
|
|
|
|
|
for chunk_path, frame_id in chunk_paths:
|
|
|
|
|
properties = manifest[manifest_index(frame_id)]
|
|
|
|
|
|
|
|
|
|
# check mapping
|
|
|
|
|
if not chunk_path.endswith(f"{properties['name']}{properties['extension']}"):
|
|
|
|
|
raise Exception('Incorrect file mapping to manifest content')
|
|
|
|
|
if db_task.dimension == models.DimensionType.DIM_2D:
|
|
|
|
|
resolution = (properties['width'], properties['height'])
|
|
|
|
|
else:
|
|
|
|
|
|