@ -24,7 +24,7 @@ from cvat.apps.engine.log import slogger
from cvat . apps . engine . media_extractors import ( MEDIA_TYPES , Mpeg4ChunkWriter , Mpeg4CompressedChunkWriter ,
ValidateDimension , ZipChunkWriter , ZipCompressedChunkWriter , get_mime , sort )
from cvat . apps . engine . utils import av_scan_paths
from utils . dataset_manifest import ImageManifestManager , VideoManifestManager
from utils . dataset_manifest import ImageManifestManager , VideoManifestManager , is_manifest
from utils . dataset_manifest . core import VideoManifestValidator
from utils . dataset_manifest . utils import detect_related_images
from . cloud_provider import get_cloud_storage_instance , Credentials
@ -113,7 +113,7 @@ def _save_task_to_db(db_task):
db_task . data . save ( )
db_task . save ( )
def _count_files ( data , manifest_file = None ) :
def _count_files ( data , manifest_file s = None ) :
share_root = settings . SHARE_ROOT
server_files = [ ]
@ -143,8 +143,8 @@ def _count_files(data, manifest_file=None):
mime = get_mime ( full_path )
if mime in counter :
counter [ mime ] . append ( rel_path )
elif ' manifest.jsonl ' == os . path . basename ( rel_path ) :
manifest_file . append ( rel_path )
elif rel_path . endswith ( ' .jsonl ' ) :
manifest_file s . append ( rel_path )
else :
slogger . glob . warn ( " Skip ' {} ' file (its mime type doesn ' t "
" correspond to supported MIME file type) " . format ( full_path ) )
@ -163,7 +163,7 @@ def _count_files(data, manifest_file=None):
return counter
def _validate_data ( counter , manifest_file = None ) :
def _validate_data ( counter , manifest_file s = None ) :
unique_entries = 0
multiple_entries = 0
for media_type , media_config in MEDIA_TYPES . items ( ) :
@ -173,7 +173,7 @@ def _validate_data(counter, manifest_file=None):
else :
multiple_entries + = len ( counter [ media_type ] )
if manifest_file and media_type not in ( ' video ' , ' image ' ) :
if manifest_file s and media_type not in ( ' video ' , ' image ' ) :
raise Exception ( ' File with meta information can only be uploaded with video/images ' )
if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1 :
@ -193,6 +193,16 @@ def _validate_data(counter, manifest_file=None):
return counter , task_modes [ 0 ]
def _validate_manifest ( manifests , root_dir ) :
if manifests :
if len ( manifests ) != 1 :
raise Exception ( ' Only one manifest file can be attached with data ' )
full_manifest_path = os . path . join ( root_dir , manifests [ 0 ] )
if is_manifest ( full_manifest_path ) :
return manifests [ 0 ]
raise Exception ( ' Invalid manifest was uploaded ' )
return None
def _download_data ( urls , upload_dir ) :
job = rq . get_current_job ( )
local_files = { }
@ -233,48 +243,57 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
if data [ ' remote_files ' ] and not isDatasetImport :
data [ ' remote_files ' ] = _download_data ( data [ ' remote_files ' ] , upload_dir )
manifest_file = [ ]
media = _count_files ( data , manifest_file )
media , task_mode = _validate_data ( media , manifest_file )
if manifest_file and ( not settings . USE_CACHE or db_data . storage_method != models . StorageMethodChoice . CACHE ) :
raise Exception ( " File with meta information can be uploaded if ' Use cache ' option is also selected " )
manifest_files = [ ]
media = _count_files ( data , manifest_files )
media , task_mode = _validate_data ( media , manifest_files )
if data [ ' server_files ' ] :
if db_data . storage == models . StorageChoice . LOCAL :
_copy_data_from_source ( data [ ' server_files ' ] , upload_dir , data . get ( ' server_files_path ' ) )
elif db_data . storage == models . StorageChoice . SHARE :
upload_dir = settings . SHARE_ROOT
else : # cloud storage
if not manifest_file : raise Exception ( ' A manifest file not found ' )
db_cloud_storage = db_data . cloud_storage
credentials = Credentials ( )
credentials . convert_from_db ( {
' type ' : db_cloud_storage . credentials_type ,
' value ' : db_cloud_storage . credentials ,
} )
details = {
' resource ' : db_cloud_storage . resource ,
' credentials ' : credentials ,
' specific_attributes ' : db_cloud_storage . get_specific_attributes ( )
}
cloud_storage_instance = get_cloud_storage_instance ( cloud_provider = db_cloud_storage . provider_type , * * details )
sorted_media = sort ( media [ ' image ' ] , data [ ' sorting_method ' ] )
first_sorted_media_image = sorted_media [ 0 ]
cloud_storage_instance . download_file ( first_sorted_media_image , os . path . join ( upload_dir , first_sorted_media_image ) )
# prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager ( db_data . get_manifest_path ( ) )
cloud_storage_manifest = ImageManifestManager (
os . path . join ( db_data . cloud_storage . get_storage_dirname ( ) , manifest_file [ 0 ] ) ,
db_data . cloud_storage . get_storage_dirname ( )
)
cloud_storage_manifest . set_index ( )
sequence , content = cloud_storage_manifest . get_subset ( sorted_media )
sorted_content = ( i [ 1 ] for i in sorted ( zip ( sequence , content ) ) )
manifest . create ( sorted_content )
manifest_root = None
if db_data . storage in { models . StorageChoice . LOCAL , models . StorageChoice . SHARE } :
manifest_root = upload_dir
elif db_data . storage == models . StorageChoice . CLOUD_STORAGE :
manifest_root = db_data . cloud_storage . get_storage_dirname ( )
manifest_file = _validate_manifest ( manifest_files , manifest_root )
if manifest_file and ( not settings . USE_CACHE or db_data . storage_method != models . StorageMethodChoice . CACHE ) :
raise Exception ( " File with meta information can be uploaded if ' Use cache ' option is also selected " )
if data [ ' server_files ' ] and db_data . storage == models . StorageChoice . CLOUD_STORAGE :
if not manifest_file : raise Exception ( ' A manifest file not found ' )
db_cloud_storage = db_data . cloud_storage
credentials = Credentials ( )
credentials . convert_from_db ( {
' type ' : db_cloud_storage . credentials_type ,
' value ' : db_cloud_storage . credentials ,
} )
details = {
' resource ' : db_cloud_storage . resource ,
' credentials ' : credentials ,
' specific_attributes ' : db_cloud_storage . get_specific_attributes ( )
}
cloud_storage_instance = get_cloud_storage_instance ( cloud_provider = db_cloud_storage . provider_type , * * details )
sorted_media = sort ( media [ ' image ' ] , data [ ' sorting_method ' ] )
first_sorted_media_image = sorted_media [ 0 ]
cloud_storage_instance . download_file ( first_sorted_media_image , os . path . join ( upload_dir , first_sorted_media_image ) )
# prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager ( db_data . get_manifest_path ( ) )
cloud_storage_manifest = ImageManifestManager (
os . path . join ( db_data . cloud_storage . get_storage_dirname ( ) , manifest_file ) ,
db_data . cloud_storage . get_storage_dirname ( )
)
cloud_storage_manifest . set_index ( )
sequence , content = cloud_storage_manifest . get_subset ( sorted_media )
sorted_content = ( i [ 1 ] for i in sorted ( zip ( sequence , content ) ) )
manifest . create ( sorted_content )
av_scan_paths ( upload_dir )
@ -432,12 +451,12 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
if not media_files :
continue
# replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' )
# replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl' )
if manifest_file and not os . path . exists ( db_data . get_manifest_path ( ) ) :
shutil . copyfile ( os . path . join ( upload_dir , manifest_file [0 ] ),
shutil . copyfile ( os . path . join ( upload_dir , manifest_file ),
db_data . get_manifest_path ( ) )
if upload_dir != settings . SHARE_ROOT :
os . remove ( os . path . join ( upload_dir , manifest_file [0 ] ))
os . remove ( os . path . join ( upload_dir , manifest_file ))
if task_mode == MEDIA_TYPES [ ' video ' ] [ ' mode ' ] :
try :