Manifest fixes (#3146)

* Add logger, reverse func
* Fix image filtering
* Fix upload video manifest

Co-authored-by: Nikita Manovich <nikita.manovich@intel.com>
main
Maria Khrustaleva 5 years ago committed by GitHub
parent 73b85a9465
commit e7cca0ec64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Changing a label on canvas does not work when 'Show object details' enabled (<https://github.com/openvinotoolkit/cvat/pull/3084>) - Changing a label on canvas does not work when 'Show object details' enabled (<https://github.com/openvinotoolkit/cvat/pull/3084>)
- Make sure frame unzip web worker correctly terminates after unzipping all images in a requested chunk (<https://github.com/openvinotoolkit/cvat/pull/3096>) - Make sure frame unzip web worker correctly terminates after unzipping all images in a requested chunk (<https://github.com/openvinotoolkit/cvat/pull/3096>)
- Reset password link was unavailable before login (<https://github.com/openvinotoolkit/cvat/pull/3140>) - Reset password link was unavailable before login (<https://github.com/openvinotoolkit/cvat/pull/3140>)
- Manifest: migration (<https://github.com/openvinotoolkit/cvat/pull/3146>)
### Security ### Security

@ -1,7 +1,10 @@
# Generated by Django 3.1.1 on 2021-02-20 08:36 # Generated by Django 3.1.1 on 2021-02-20 08:36
import glob import glob
import itertools
import logging
import os import os
import sys
from re import search from re import search
from django.conf import settings from django.conf import settings
@ -9,40 +12,109 @@ from django.db import migrations
from cvat.apps.engine.models import (DimensionType, StorageChoice, from cvat.apps.engine.models import (DimensionType, StorageChoice,
StorageMethodChoice) StorageMethodChoice)
from cvat.apps.engine.media_extractors import get_mime
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
def migrate_data(apps, shema_editor): def get_logger():
migration = os.path.basename(__file__).split(".")[0]
logger = logging.getLogger(name=migration)
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(os.path.join(settings.MIGRATIONS_LOGS_ROOT, f"{migration}.log"))
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(logging.StreamHandler(sys.stdout))
logger.addHandler(logging.StreamHandler(sys.stderr))
return logger
def _get_query_set(apps):
Data = apps.get_model("engine", "Data") Data = apps.get_model("engine", "Data")
query_set = Data.objects.filter(storage_method=StorageMethodChoice.CACHE) query_set = Data.objects.filter(storage_method=StorageMethodChoice.CACHE)
return query_set
def migrate2meta(apps, shema_editor):
logger = get_logger()
query_set = _get_query_set(apps)
for db_data in query_set:
try:
upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id)
logger.info('Migrate data({}), folder - {}'.format(db_data.id, upload_dir))
meta_path = os.path.join(upload_dir, "meta_info.txt")
if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')):
os.remove(os.path.join(upload_dir, 'manifest.jsonl'))
logger.info('A manifest file has been deleted')
if os.path.exists(os.path.join(upload_dir, 'index.json')):
os.remove(os.path.join(upload_dir, 'index.json'))
logger.info('A manifest index file has been deleted')
data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT
if hasattr(db_data, 'video'):
if os.path.exists(meta_path):
logger.info('A meta_info.txt already exists')
continue
media_file = os.path.join(data_dir, db_data.video.path)
logger.info('Preparing of the video meta has begun')
meta = VideoManifestManager(manifest_path=upload_dir) \
.prepare_meta(media_file=media_file, force=True)
with open(meta_path, "w") as meta_file:
for idx, pts, _ in meta:
meta_file.write(f"{idx} {pts}\n")
else:
name_format = "dummy_{}.txt"
sources = [db_image.path for db_image in db_data.images.all().order_by('frame')]
counter = itertools.count()
logger.info('Preparing of the dummy chunks has begun')
for idx, img_paths in itertools.groupby(sources, lambda x: next(counter) // db_data.chunk_size):
if os.path.exists(os.path.join(upload_dir, name_format.format(idx))):
logger.info(name_format.format(idx) + " already exists")
continue
with open(os.path.join(upload_dir, name_format.format(idx)), "w") as dummy_chunk:
dummy_chunk.writelines([f"{img_path}\n" for img_path in img_paths])
logger.info('Succesfull migration for the data({})'.format(db_data.id))
except Exception as ex:
logger.error(str(ex))
def migrate2manifest(apps, shema_editor):
logger = get_logger()
logger.info('The data migration has been started for creating manifest`s files')
query_set = _get_query_set(apps)
logger.info('Need to update {} data objects'.format(len(query_set)))
for db_data in query_set: for db_data in query_set:
try: try:
upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id) upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id)
logger.info('Migrate data({}), folder - {}'.format(db_data.id, upload_dir))
if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')): if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')):
os.remove(os.path.join(upload_dir, 'meta_info.txt')) os.remove(os.path.join(upload_dir, 'meta_info.txt'))
logger.info('{}/meta_info.txt has been deleted'.format(upload_dir))
else: else:
for path in glob.glob(f'{upload_dir}/dummy_*.txt'): for path in glob.glob(f'{upload_dir}/dummy_*.txt'):
os.remove(path) os.remove(path)
logger.info(f"{path} has been deleted")
# it's necessary for case with long data migration # it's necessary for case with long data migration
if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')):
logger.info('Manifest file already exists')
continue continue
data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT
if hasattr(db_data, 'video'): if hasattr(db_data, 'video'):
media_file = os.path.join(data_dir, db_data.video.path) media_file = os.path.join(data_dir, db_data.video.path)
manifest = VideoManifestManager(manifest_path=upload_dir) manifest = VideoManifestManager(manifest_path=upload_dir)
meta_info = manifest.prepare_meta(media_file=media_file) logger.info('Preparing of the video meta information has begun')
meta_info = manifest.prepare_meta(media_file=media_file, force=True)
logger.info('Manifest creating has begun')
manifest.create(meta_info) manifest.create(meta_info)
logger.info('Index creating has begun')
manifest.init_index() manifest.init_index()
else: else:
manifest = ImageManifestManager(manifest_path=upload_dir) manifest = ImageManifestManager(manifest_path=upload_dir)
sources = [] sources = []
if db_data.storage == StorageChoice.LOCAL: if db_data.storage == StorageChoice.LOCAL:
for (root, _, files) in os.walk(data_dir): for (root, _, files) in os.walk(data_dir):
sources.extend([os.path.join(root, f) for f in files]) sources.extend([os.path.join(root, f) for f in files if get_mime(f) == 'image'])
sources.sort() sources.sort()
# using share, this means that we can not explicitly restore the entire data structure # using share, this means that we can not explicitly restore the entire data structure
else: else:
sources = [os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame')] sources = [os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame')]
if any(list(filter(lambda x: x.dimension==DimensionType.DIM_3D, db_data.tasks.all()))): if any(list(filter(lambda x: x.dimension==DimensionType.DIM_3D, db_data.tasks.all()))):
logger.info('Preparing of images 3d meta information has begun')
content = [] content = []
for source in sources: for source in sources:
name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) name, ext = os.path.splitext(os.path.relpath(source, upload_dir))
@ -51,6 +123,7 @@ def migrate_data(apps, shema_editor):
'extension': ext 'extension': ext
}) })
else: else:
logger.info('Preparing of 2d images meta information has begun')
meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir) meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir)
content = meta_info.content content = meta_info.content
@ -58,6 +131,7 @@ def migrate_data(apps, shema_editor):
def _get_frame_step(str_): def _get_frame_step(str_):
match = search("step\s*=\s*([1-9]\d*)", str_) match = search("step\s*=\s*([1-9]\d*)", str_)
return int(match.group(1)) if match else 1 return int(match.group(1)) if match else 1
logger.info('Data is located on the share, metadata update has been started')
step = _get_frame_step(db_data.frame_filter) step = _get_frame_step(db_data.frame_filter)
start = db_data.start_frame start = db_data.start_frame
stop = db_data.stop_frame + 1 stop = db_data.stop_frame + 1
@ -67,10 +141,13 @@ def migrate_data(apps, shema_editor):
item = content.pop(0) if i in images_range else dict() item = content.pop(0) if i in images_range else dict()
result_content.append(item) result_content.append(item)
content = result_content content = result_content
logger.info('Manifest creating has begun')
manifest.create(content) manifest.create(content)
logger.info('Index creating has begun')
manifest.init_index() manifest.init_index()
logger.info('Succesfull migration for the data({})'.format(db_data.id))
except Exception as ex: except Exception as ex:
print(str(ex)) logger.error(str(ex))
class Migration(migrations.Migration): class Migration(migrations.Migration):
@ -79,5 +156,8 @@ class Migration(migrations.Migration):
] ]
operations = [ operations = [
migrations.RunPython(migrate_data) migrations.RunPython(
code=migrate2manifest,
reverse_code=migrate2meta
)
] ]

@ -325,7 +325,7 @@ class _ManifestManager(ABC):
return self._index return self._index
class VideoManifestManager(_ManifestManager): class VideoManifestManager(_ManifestManager):
def __init__(self, manifest_path, *args, **kwargs): def __init__(self, manifest_path):
super().__init__(manifest_path) super().__init__(manifest_path)
setattr(self._manifest, 'TYPE', 'video') setattr(self._manifest, 'TYPE', 'video')
self.BASE_INFORMATION['properties'] = 3 self.BASE_INFORMATION['properties'] = 3
@ -381,9 +381,15 @@ class ManifestValidator:
assert self._manifest.TYPE != json.loads(manifest_file.readline())['type'] assert self._manifest.TYPE != json.loads(manifest_file.readline())['type']
class VideoManifestValidator(VideoManifestManager): class VideoManifestValidator(VideoManifestManager):
def __init__(self, **kwargs): def __init__(self, source_path, manifest_path):
self.source_path = kwargs.pop('source_path') self.source_path = source_path
super().__init__(self, **kwargs) super().__init__(manifest_path)
@staticmethod
def _get_video_stream(container):
video_stream = next(stream for stream in container.streams if stream.type == 'video')
video_stream.thread_type = 'AUTO'
return video_stream
def validate_key_frame(self, container, video_stream, key_frame): def validate_key_frame(self, container, video_stream, key_frame):
for packet in container.demux(video_stream): for packet in container.demux(video_stream):

Loading…
Cancel
Save