Ability to create custom extractors (#434)

* ability to add custom extractors
* added configurable mimetypes
* added a note to changelog
main
Andrey Zhavoronkov 7 years ago committed by Nikita Manovich
parent d15de7bc29
commit b48d59b5ec

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Remote data source (list of URLs to create an annotation task)
- Auto annotation using Faster R-CNN with Inception v2 (utils/open_model_zoo)
- Auto annotation using Pixel Link mobilenet v2 - text detection (utils/open_model_zoo)
- Ability to create a custom extractors for unsupported media types
### Changed
- Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)

@ -0,0 +1,216 @@
import os
import tempfile
import shutil
import numpy as np
from ffmpy import FFmpeg
from pyunpack import Archive
from PIL import Image
import mimetypes
_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
MEDIA_MIMETYPES_FILES = [
os.path.join(_SCRIPT_DIR, "media.mimetypes"),
]
mimetypes.init(files=MEDIA_MIMETYPES_FILES)
def get_mime(name):
for type_name, type_def in MEDIA_TYPES.items():
if type_def['has_mime_type'](name):
return type_name
return 'unknown'
class MediaExtractor:
def __init__(self, source_path, dest_path, image_quality, step, start, stop):
self._source_path = source_path
self._dest_path = dest_path
self._image_quality = image_quality
self._step = step
self._start = start
self._stop = stop
def get_source_name(self):
return self._source_path
#Note step, start, stop have no affect
class ImageListExtractor(MediaExtractor):
def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
if not source_path:
raise Exception('No image found')
super().__init__(
source_path=sorted(source_path),
dest_path=dest_path,
image_quality=image_quality,
step=1,
start=0,
stop=0,
)
def __iter__(self):
return iter(self._source_path)
def __getitem__(self, k):
return self._source_path[k]
def __len__(self):
return len(self._source_path)
def save_image(self, k, dest_path):
image = Image.open(self[k])
# Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
if image.mode == "I":
# Image mode is 32bit integer pixels.
# Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
im_data = np.array(image)
im_data = im_data * (2**8 / im_data.max())
image = Image.fromarray(im_data.astype(np.int32))
image = image.convert('RGB')
image.save(dest_path, quality=self._image_quality, optimize=True)
height = image.height
width = image.width
image.close()
return width, height
#Note step, start, stop have no affect
class DirectoryExtractor(ImageListExtractor):
def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
image_paths = []
for source in source_path:
for root, _, files in os.walk(source):
paths = [os.path.join(root, f) for f in files]
paths = filter(lambda x: get_mime(x) == 'image', paths)
image_paths.extend(paths)
super().__init__(
source_path=sorted(image_paths),
dest_path=dest_path,
image_quality=image_quality,
step=1,
start=0,
stop=0,
)
#Note step, start, stop have no affect
class ArchiveExtractor(DirectoryExtractor):
def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
Archive(source_path[0]).extractall(dest_path)
super().__init__(
source_path=[dest_path],
dest_path=dest_path,
image_quality=image_quality,
step=1,
start=0,
stop=0,
)
class VideoExtractor(MediaExtractor):
def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
from cvat.apps.engine.log import slogger
_dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
super().__init__(
source_path=source_path[0],
dest_path=_dest_path,
image_quality=image_quality,
step=step,
start=start,
stop=stop,
)
# translate inversed range 1:95 to 2:32
translated_quality = 96 - self._image_quality
translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
target_path = os.path.join(self._tmp_output, '%d.jpg')
output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
filters = ''
if self._stop > 0:
filters = 'between(n,' + str(self._start) + ',' + str(self._stop) + ')'
elif self._start > 0:
filters = 'gte(n,' + str(self._start) + ')'
if self._step > 1:
filters += ('*' if filters else '') + 'not(mod(n-' + str(self._start) + ',' + str(self._step) + '))'
if filters:
output_opts += " -vf select=\"'" + filters + "'\""
ff = FFmpeg(
inputs = {self._source_path: None},
outputs = {target_path: output_opts})
slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
ff.run()
def _getframepath(self, k):
return "{0}/{1}.jpg".format(self._tmp_output, k)
def __iter__(self):
i = 0
while os.path.exists(self._getframepath(i)):
yield self._getframepath(i)
i += 1
def __del__(self):
if self._tmp_output:
shutil.rmtree(self._tmp_output)
def __getitem__(self, k):
return self._getframepath(k)
def __len__(self):
return len(os.listdir(self._tmp_output))
def save_image(self, k, dest_path):
shutil.copyfile(self[k], dest_path)
def _is_archive(path):
mime = mimetypes.guess_type(path)
mime_type = mime[0]
encoding = mime[1]
supportedArchives = ['application/zip', 'application/x-rar-compressed',
'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
'gzip', 'bzip2']
return mime_type in supportedArchives or encoding in supportedArchives
def _is_video(path):
mime = mimetypes.guess_type(path)
return mime[0] is not None and mime[0].startswith('video')
def _is_image(path):
mime = mimetypes.guess_type(path)
return mime[0] is not None and mime[0].startswith('image')
def _is_dir(path):
return os.path.isdir(path)
# 'has_mime_type': function receives 1 argument - path to file.
# Should return True if file has specified media type.
# 'extractor': class that extracts images from specified media.
# 'mode': 'annotation' or 'interpolation' - mode of task that should be created.
# 'unique': True or False - describes how the type can be combined with other.
# True - only one item of this type and no other is allowed
# False - this media types can be combined with other which have unique == False
MEDIA_TYPES = {
'image': {
'has_mime_type': _is_image,
'extractor': ImageListExtractor,
'mode': 'annotation',
'unique': False,
},
'video': {
'has_mime_type': _is_video,
'extractor': VideoExtractor,
'mode': 'interpolation',
'unique': True,
},
'archive': {
'has_mime_type': _is_archive,
'extractor': ArchiveExtractor,
'mode': 'annotation',
'unique': True,
},
'directory': {
'has_mime_type': _is_dir,
'extractor': DirectoryExtractor,
'mode': 'annotation',
'unique': False,
},
}

@ -5,7 +5,7 @@ from io import StringIO
from PIL import Image
from django.db import migrations
from django.conf import settings
from cvat.apps.engine.task import _get_mime
from cvat.apps.engine.media_extractors import get_mime
def parse_attribute(value):
match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value)
@ -81,7 +81,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
video = ""
for root, _, files in os.walk(_get_upload_dirname(db_task)):
fullnames = map(lambda f: os.path.join(root, f), files)
videos = list(filter(lambda x: _get_mime(x) == 'video', fullnames))
videos = list(filter(lambda x: get_mime(x) == 'video', fullnames))
if len(videos):
video = videos[0]
break
@ -100,7 +100,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
filenames = []
for root, _, files in os.walk(_get_upload_dirname(db_task)):
fullnames = map(lambda f: os.path.join(root, f), files)
images = filter(lambda x: _get_mime(x) == 'image', fullnames)
images = filter(lambda x: get_mime(x) == 'image', fullnames)
filenames.extend(images)
filenames.sort()

@ -7,9 +7,6 @@ import os
import sys
import rq
import shutil
import subprocess
import tempfile
import numpy as np
from PIL import Image
from traceback import print_exception
from ast import literal_eval
@ -17,16 +14,11 @@ from urllib import error as urlerror
from urllib import parse as urlparse
from urllib import request as urlrequest
import mimetypes
_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes")
mimetypes.init(files=[_MEDIA_MIMETYPES_FILE])
from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES
import django_rq
from django.conf import settings
from django.db import transaction
from ffmpy import FFmpeg
from pyunpack import Archive
from distutils.dir_util import copy_tree
from . import models
@ -51,49 +43,6 @@ def rq_handler(job, exc_type, exc_value, traceback):
############################# Internal implementation for server API
class _FrameExtractor:
def __init__(self, source_path, compress_quality, step=1, start=0, stop=0):
# translate inversed range 1:95 to 2:32
translated_quality = 96 - compress_quality
translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
self.source = source_path
self.output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
target_path = os.path.join(self.output, '%d.jpg')
output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
filters = ''
if stop > 0:
filters = 'between(n,' + str(start) + ',' + str(stop) + ')'
elif start > 0:
filters = 'gte(n,' + str(start) + ')'
if step > 1:
filters += ('*' if filters else '') + 'not(mod(n-' + str(start) + ',' + str(step) + '))'
if filters:
filters = "select=\"'" + filters + "'\""
if filters:
output_opts += ' -vf ' + filters
ff = FFmpeg(
inputs = {source_path: None},
outputs = {target_path: output_opts})
slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
ff.run()
def getframepath(self, k):
return "{0}/{1}.jpg".format(self.output, k)
def __del__(self):
if self.output:
shutil.rmtree(self.output)
def __getitem__(self, k):
return self.getframepath(k)
def __iter__(self):
i = 0
while os.path.exists(self.getframepath(i)):
yield self[i]
i += 1
def make_image_meta_cache(db_task):
with open(db_task.get_image_meta_cache_path(), 'w') as meta_file:
cache = {
@ -111,7 +60,7 @@ def make_image_meta_cache(db_task):
filenames = []
for root, _, files in os.walk(db_task.get_upload_dirname()):
fullnames = map(lambda f: os.path.join(root, f), files)
images = filter(lambda x: _get_mime(x) == 'image', fullnames)
images = filter(lambda x: get_mime(x) == 'image', fullnames)
filenames.extend(images)
filenames.sort()
@ -135,31 +84,6 @@ def get_image_meta_cache(db_task):
with open(db_task.get_image_meta_cache_path()) as meta_cache_file:
return literal_eval(meta_cache_file.read())
def _get_mime(name):
mime = mimetypes.guess_type(name)
mime_type = mime[0]
encoding = mime[1]
# zip, rar, tar, tar.gz, tar.bz2, 7z, cpio
supportedArchives = ['application/zip', 'application/x-rar-compressed',
'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
'gzip', 'bzip2']
if mime_type is not None:
if mime_type.startswith('video'):
return 'video'
elif mime_type in supportedArchives or encoding in supportedArchives:
return 'archive'
elif mime_type.startswith('image'):
return 'image'
else:
return 'unknown'
else:
if os.path.isdir(name):
return 'directory'
else:
return 'unknown'
def _copy_data_from_share(server_files, upload_dir):
job = rq.get_current_job()
job.meta['status'] = 'Data are being copied from share..'
@ -176,74 +100,6 @@ def _copy_data_from_share(server_files, upload_dir):
os.makedirs(target_dir)
shutil.copyfile(source_path, target_path)
def _unpack_archive(archive, upload_dir):
job = rq.get_current_job()
job.meta['status'] = 'Archive is being unpacked..'
job.save_meta()
Archive(archive).extractall(upload_dir)
os.remove(archive)
def _copy_video_to_task(video, db_task, step):
job = rq.get_current_job()
job.meta['status'] = 'Video is being extracted..'
job.save_meta()
extractor = _FrameExtractor(video, db_task.image_quality,
step, db_task.start_frame, db_task.stop_frame)
for frame, image_orig_path in enumerate(extractor):
image_dest_path = db_task.get_frame_path(frame)
db_task.size += 1
dirname = os.path.dirname(image_dest_path)
if not os.path.exists(dirname):
os.makedirs(dirname)
shutil.copyfile(image_orig_path, image_dest_path)
if db_task.stop_frame == 0:
db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * step
image = Image.open(db_task.get_frame_path(0))
models.Video.objects.create(task=db_task, path=video,
width=image.width, height=image.height)
image.close()
def _copy_images_to_task(upload_dir, db_task):
image_paths = []
for root, _, files in os.walk(upload_dir):
paths = map(lambda f: os.path.join(root, f), files)
paths = filter(lambda x: _get_mime(x) == 'image', paths)
image_paths.extend(paths)
image_paths.sort()
db_images = []
if len(image_paths):
job = rq.get_current_job()
for frame, image_orig_path in enumerate(image_paths):
progress = frame * 100 // len(image_paths)
job.meta['status'] = 'Images are being compressed.. {}%'.format(progress)
job.save_meta()
image_dest_path = db_task.get_frame_path(frame)
db_task.size += 1
dirname = os.path.dirname(image_dest_path)
if not os.path.exists(dirname):
os.makedirs(dirname)
image = Image.open(image_orig_path)
# Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
if image.mode == "I":
# Image mode is 32bit integer pixels.
# Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
im_data = np.array(image)
im_data = im_data * (2**8 / im_data.max())
image = Image.fromarray(im_data.astype(np.int32))
image = image.convert('RGB')
image.save(image_dest_path, quality=db_task.image_quality, optimize=True)
db_images.append(models.Image(task=db_task, path=image_orig_path,
frame=frame, width=image.width, height=image.height))
image.close()
models.Image.objects.bulk_create(db_images)
else:
raise ValueError("Image files were not found")
def _save_task_to_db(db_task):
job = rq.get_current_job()
job.meta['status'] = 'Task is being saved in database'
@ -296,7 +152,7 @@ def _validate_data(data):
if '..' in path.split(os.path.sep):
raise ValueError("Don't use '..' inside file paths")
full_path = os.path.abspath(os.path.join(share_root, path))
if 'directory' == _get_mime(full_path):
if 'directory' == get_mime(full_path):
server_files['dirs'].append(path)
else:
server_files['files'].append(path)
@ -308,43 +164,42 @@ def _validate_data(data):
if not [ f_name for f_name in server_files['files'] if f_name.startswith(dir_name)]]
def count_files(file_mapping, counter):
archive = None
video = None
for rel_path, full_path in file_mapping.items():
mime = _get_mime(full_path)
counter[mime] += 1
if mime == "archive":
archive = rel_path
elif mime == "video":
video = rel_path
return video, archive
mime = get_mime(full_path)
counter[mime].append(rel_path)
counter = {"image": 0, "video": 0, "archive": 0, "directory": 0}
counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
client_video, client_archive = count_files(
count_files(
file_mapping={ f:f for f in data['remote_files'] or data['client_files']},
counter=counter,
)
server_video, server_archive = count_files(
count_files(
file_mapping={ f:os.path.abspath(os.path.join(share_root, f)) for f in data['server_files']},
counter=counter,
)
num_videos = counter["video"]
num_archives = counter["archive"]
num_images = counter["image"] + counter["directory"]
if (num_videos > 1 or num_archives > 1 or
(num_videos == 1 and num_archives + num_images > 0) or
(num_archives == 1 and num_videos + num_images > 0) or
(num_images > 0 and num_archives + num_videos > 0)):
unique_entries = 0
multiple_entries = 0
for media_type, media_config in MEDIA_TYPES.items():
if counter[media_type]:
if media_config['unique']:
unique_entries += len(counter[media_type])
else:
multiple_entries += len(counter[media_type])
if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1:
unique_types = ', '.join([k for k, v in MEDIA_TYPES.items() if v['unique']])
multiply_types = ', '.join([k for k, v in MEDIA_TYPES.items() if not v['unique']])
count = ', '.join(['{} {}(s)'.format(len(v), k) for k, v in counter.items()])
raise ValueError('Only one {} or many {} can be used simultaneously, \
but {} found.'.format(unique_types, multiply_types, count))
raise ValueError("Only one archive, one video or many images can be \
dowloaded simultaneously. {} image(s), {} dir(s), {} video(s), {} \
archive(s) found".format(counter['image'], counter['directory'],
counter['video'], counter['archive']))
if unique_entries == 0 and multiple_entries == 0:
raise ValueError('No media data found')
return client_video or server_video, client_archive or server_archive
return counter
def _download_data(urls, upload_dir):
job = rq.get_current_job()
@ -382,25 +237,72 @@ def _create_thread(tid, data):
raise NotImplementedError("Adding more data is not implemented")
upload_dir = db_task.get_upload_dirname()
if data['remote_files']:
data['remote_files'] = _download_data(data['remote_files'], upload_dir)
video, archive = _validate_data(data)
media = _validate_data(data)
if data['server_files']:
_copy_data_from_share(data['server_files'], upload_dir)
if archive:
archive = os.path.join(upload_dir, archive)
_unpack_archive(archive, upload_dir)
job = rq.get_current_job()
job.meta['status'] = 'Media files is being extracted...'
job.save_meta()
if video:
db_task.mode = "interpolation"
video = os.path.join(upload_dir, video)
_copy_video_to_task(video, db_task, db_task.get_frame_step())
db_images = []
extractors = []
length = 0
for media_type, media_files in media.items():
if not media_files:
continue
extractor = MEDIA_TYPES[media_type]['extractor'](
source_path=[os.path.join(upload_dir, f) for f in media_files],
dest_path=upload_dir,
image_quality=db_task.image_quality,
step=db_task.get_frame_step(),
start=db_task.start_frame,
stop=db_task.stop_frame,
)
length += len(extractor)
db_task.mode = MEDIA_TYPES[media_type]['mode']
extractors.append(extractor)
for extractor in extractors:
for frame, image_orig_path in enumerate(extractor):
image_dest_path = db_task.get_frame_path(db_task.size)
dirname = os.path.dirname(image_dest_path)
if not os.path.exists(dirname):
os.makedirs(dirname)
if db_task.mode == 'interpolation':
extractor.save_image(frame, image_dest_path)
else:
width, height = extractor.save_image(frame, image_dest_path)
db_images.append(models.Image(
task=db_task,
path=image_orig_path,
frame=db_task.size,
width=width, height=height))
db_task.size += 1
progress = frame * 100 // length
job.meta['status'] = 'Images are being compressed... {}%'.format(progress)
job.save_meta()
if db_task.mode == 'interpolation':
image = Image.open(db_task.get_frame_path(0))
models.Video.objects.create(
task=db_task,
path=extractors[0].get_source_name(),
width=image.width, height=image.height)
image.close()
if db_task.stop_frame == 0:
db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * db_task.get_frame_step()
else:
db_task.mode = "annotation"
_copy_images_to_task(upload_dir, db_task)
models.Image.objects.bulk_create(db_images)
slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid))
_save_task_to_db(db_task)

Loading…
Cancel
Save