Ability to create custom extractors (#434)

* ability to add custom extractors * added configurable mimetypes * added a note to changelog
7 years ago · b48d59b5ec
parent d15de7bc29
commit b48d59b5ec
4 changed files with 304 additions and 185 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Remote data source (list of URLs to create an annotation task)
 - Auto annotation using Faster R-CNN with Inception v2 (utils/open_model_zoo)
 - Auto annotation using Pixel Link mobilenet v2 - text detection (utils/open_model_zoo)
 - Ability to create a custom extractors for unsupported media types
 ### Changed
 - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@ -0,0 +1,216 @@
 import os
 import tempfile
 import shutil
 import numpy as np
 from ffmpy import FFmpeg
 from pyunpack import Archive
 from PIL import Image
 import mimetypes
 _SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
 MEDIA_MIMETYPES_FILES = [
    os.path.join(_SCRIPT_DIR, "media.mimetypes"),
 ]
 mimetypes.init(files=MEDIA_MIMETYPES_FILES)
 def get_mime(name):
    for type_name, type_def in MEDIA_TYPES.items():
        if type_def['has_mime_type'](name):
            return type_name
    return 'unknown'
 class MediaExtractor:
    def __init__(self, source_path, dest_path, image_quality, step, start, stop):
        self._source_path = source_path
        self._dest_path = dest_path
        self._image_quality = image_quality
        self._step = step
        self._start = start
        self._stop = stop
    def get_source_name(self):
        return self._source_path
 #Note step, start, stop have no affect
 class ImageListExtractor(MediaExtractor):
    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
        if not source_path:
            raise Exception('No image found')
        super().__init__(
            source_path=sorted(source_path),
            dest_path=dest_path,
            image_quality=image_quality,
            step=1,
            start=0,
            stop=0,
        )
    def __iter__(self):
        return iter(self._source_path)
    def __getitem__(self, k):
        return self._source_path[k]
    def __len__(self):
        return len(self._source_path)
    def save_image(self, k, dest_path):
        image = Image.open(self[k])
        # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
        if image.mode == "I":
            # Image mode is 32bit integer pixels.
            # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
            im_data = np.array(image)
            im_data = im_data * (2**8 / im_data.max())
            image = Image.fromarray(im_data.astype(np.int32))
        image = image.convert('RGB')
        image.save(dest_path, quality=self._image_quality, optimize=True)
        height = image.height
        width = image.width
        image.close()
        return width, height
 #Note step, start, stop have no affect
 class DirectoryExtractor(ImageListExtractor):
    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
        image_paths = []
        for source in source_path:
            for root, _, files in os.walk(source):
                paths = [os.path.join(root, f) for f in files]
                paths = filter(lambda x: get_mime(x) == 'image', paths)
                image_paths.extend(paths)
        super().__init__(
            source_path=sorted(image_paths),
            dest_path=dest_path,
            image_quality=image_quality,
            step=1,
            start=0,
            stop=0,
        )
 #Note step, start, stop have no affect
 class ArchiveExtractor(DirectoryExtractor):
    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
        Archive(source_path[0]).extractall(dest_path)
        super().__init__(
            source_path=[dest_path],
            dest_path=dest_path,
            image_quality=image_quality,
            step=1,
            start=0,
            stop=0,
        )
 class VideoExtractor(MediaExtractor):
    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
        from cvat.apps.engine.log import slogger
        _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
        super().__init__(
            source_path=source_path[0],
            dest_path=_dest_path,
            image_quality=image_quality,
            step=step,
            start=start,
            stop=stop,
            )
        # translate inversed range 1:95 to 2:32
        translated_quality = 96 - self._image_quality
        translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
        self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
        target_path = os.path.join(self._tmp_output, '%d.jpg')
        output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
        filters = ''
        if self._stop > 0:
            filters = 'between(n,' + str(self._start) + ',' + str(self._stop) + ')'
        elif self._start > 0:
            filters = 'gte(n,' + str(self._start) + ')'
        if self._step > 1:
            filters += ('*' if filters else '') + 'not(mod(n-' + str(self._start) + ',' + str(self._step) + '))'
        if filters:
            output_opts += " -vf select=\"'" + filters + "'\""
        ff = FFmpeg(
            inputs  = {self._source_path: None},
            outputs = {target_path: output_opts})
        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
        ff.run()
    def _getframepath(self, k):
        return "{0}/{1}.jpg".format(self._tmp_output, k)
    def __iter__(self):
        i = 0
        while os.path.exists(self._getframepath(i)):
            yield self._getframepath(i)
            i += 1
    def __del__(self):
        if self._tmp_output:
            shutil.rmtree(self._tmp_output)
    def __getitem__(self, k):
        return self._getframepath(k)
    def __len__(self):
        return len(os.listdir(self._tmp_output))
    def save_image(self, k, dest_path):
        shutil.copyfile(self[k], dest_path)
 def _is_archive(path):
    mime = mimetypes.guess_type(path)
    mime_type = mime[0]
    encoding = mime[1]
    supportedArchives = ['application/zip', 'application/x-rar-compressed',
        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
        'gzip', 'bzip2']
    return mime_type in supportedArchives or encoding in supportedArchives
 def _is_video(path):
    mime = mimetypes.guess_type(path)
    return mime[0] is not None and mime[0].startswith('video')
 def _is_image(path):
    mime = mimetypes.guess_type(path)
    return mime[0] is not None and mime[0].startswith('image')
 def _is_dir(path):
    return os.path.isdir(path)
 # 'has_mime_type': function receives 1 argument - path to file.
 #                  Should return True if file has specified media type.
 # 'extractor': class that extracts images from specified media.
 # 'mode': 'annotation' or 'interpolation' - mode of task that should be created.
 # 'unique': True or False - describes how the type can be combined with other.
 #           True - only one item of this type and no other is allowed
 #           False - this media types can be combined with other which have unique == False
 MEDIA_TYPES = {
    'image': {
        'has_mime_type': _is_image,
        'extractor': ImageListExtractor,
        'mode': 'annotation',
        'unique': False,
    },
    'video': {
        'has_mime_type': _is_video,
        'extractor': VideoExtractor,
        'mode': 'interpolation',
        'unique': True,
    },
    'archive': {
        'has_mime_type': _is_archive,
        'extractor': ArchiveExtractor,
        'mode': 'annotation',
        'unique': True,
    },
    'directory': {
        'has_mime_type': _is_dir,
        'extractor': DirectoryExtractor,
        'mode': 'annotation',
        'unique': False,
    },
 }
--- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
+++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
@ -5,7 +5,7 @@ from io import StringIO
 from PIL import Image
 from django.db import migrations
 from django.conf import settings
-from cvat.apps.engine.task import _get_mime
+from cvat.apps.engine.media_extractors import get_mime
 def parse_attribute(value):
    match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value)
@ -81,7 +81,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
            video = ""
            for root, _, files in os.walk(_get_upload_dirname(db_task)):
                fullnames = map(lambda f: os.path.join(root, f), files)
-                videos = list(filter(lambda x: _get_mime(x) == 'video', fullnames))
+                videos = list(filter(lambda x: get_mime(x) == 'video', fullnames))
                if len(videos):
                    video = videos[0]
                    break
@ -100,7 +100,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
            filenames = []
            for root, _, files in os.walk(_get_upload_dirname(db_task)):
                fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                filenames.extend(images)
            filenames.sort()
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@ -7,9 +7,6 @@ import os
 import sys
 import rq
 import shutil
 import subprocess
 import tempfile
 import numpy as np
 from PIL import Image
 from traceback import print_exception
 from ast import literal_eval
@ -17,16 +14,11 @@ from urllib import error as urlerror
 from urllib import parse as urlparse
 from urllib import request as urlrequest
-import mimetypes
+from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES
 _SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
 _MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes")
 mimetypes.init(files=[_MEDIA_MIMETYPES_FILE])
 import django_rq
 from django.conf import settings
 from django.db import transaction
 from ffmpy import FFmpeg
 from pyunpack import Archive
 from distutils.dir_util import copy_tree
 from . import models
@ -51,49 +43,6 @@ def rq_handler(job, exc_type, exc_value, traceback):
 ############################# Internal implementation for server API
 class _FrameExtractor:
    def __init__(self, source_path, compress_quality, step=1, start=0, stop=0):
        # translate inversed range 1:95 to 2:32
        translated_quality = 96 - compress_quality
        translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
        self.source = source_path
        self.output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
        target_path = os.path.join(self.output, '%d.jpg')
        output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
        filters = ''
        if stop > 0:
            filters = 'between(n,' + str(start) + ',' + str(stop) + ')'
        elif start > 0:
            filters = 'gte(n,' + str(start) + ')'
        if step > 1:
            filters += ('*' if filters else '') + 'not(mod(n-' + str(start) + ',' + str(step) + '))'
        if filters:
            filters = "select=\"'" + filters + "'\""
        if filters:
            output_opts += ' -vf ' + filters
        ff = FFmpeg(
            inputs  = {source_path: None},
            outputs = {target_path: output_opts})
        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
        ff.run()
    def getframepath(self, k):
        return "{0}/{1}.jpg".format(self.output, k)
    def __del__(self):
        if self.output:
            shutil.rmtree(self.output)
    def __getitem__(self, k):
        return self.getframepath(k)
    def __iter__(self):
        i = 0
        while os.path.exists(self.getframepath(i)):
            yield self[i]
            i += 1
 def make_image_meta_cache(db_task):
    with open(db_task.get_image_meta_cache_path(), 'w') as meta_file:
        cache = {
@ -111,7 +60,7 @@ def make_image_meta_cache(db_task):
            filenames = []
            for root, _, files in os.walk(db_task.get_upload_dirname()):
                fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                filenames.extend(images)
            filenames.sort()
@ -135,31 +84,6 @@ def get_image_meta_cache(db_task):
        with open(db_task.get_image_meta_cache_path()) as meta_cache_file:
            return literal_eval(meta_cache_file.read())
 def _get_mime(name):
    mime = mimetypes.guess_type(name)
    mime_type = mime[0]
    encoding = mime[1]
    # zip, rar, tar, tar.gz, tar.bz2, 7z, cpio
    supportedArchives = ['application/zip', 'application/x-rar-compressed',
        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
        'gzip', 'bzip2']
    if mime_type is not None:
        if mime_type.startswith('video'):
            return 'video'
        elif mime_type in supportedArchives or encoding in supportedArchives:
            return 'archive'
        elif mime_type.startswith('image'):
            return 'image'
        else:
            return 'unknown'
    else:
        if os.path.isdir(name):
            return 'directory'
        else:
            return 'unknown'
 def _copy_data_from_share(server_files, upload_dir):
    job = rq.get_current_job()
    job.meta['status'] = 'Data are being copied from share..'
@ -176,74 +100,6 @@ def _copy_data_from_share(server_files, upload_dir):
                os.makedirs(target_dir)
            shutil.copyfile(source_path, target_path)
 def _unpack_archive(archive, upload_dir):
    job = rq.get_current_job()
    job.meta['status'] = 'Archive is being unpacked..'
    job.save_meta()
    Archive(archive).extractall(upload_dir)
    os.remove(archive)
 def _copy_video_to_task(video, db_task, step):
    job = rq.get_current_job()
    job.meta['status'] = 'Video is being extracted..'
    job.save_meta()
    extractor = _FrameExtractor(video, db_task.image_quality,
        step, db_task.start_frame, db_task.stop_frame)
    for frame, image_orig_path in enumerate(extractor):
        image_dest_path = db_task.get_frame_path(frame)
        db_task.size += 1
        dirname = os.path.dirname(image_dest_path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        shutil.copyfile(image_orig_path, image_dest_path)
    if db_task.stop_frame == 0:
        db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * step
    image = Image.open(db_task.get_frame_path(0))
    models.Video.objects.create(task=db_task, path=video,
        width=image.width, height=image.height)
    image.close()
 def _copy_images_to_task(upload_dir, db_task):
    image_paths = []
    for root, _, files in os.walk(upload_dir):
        paths = map(lambda f: os.path.join(root, f), files)
        paths = filter(lambda x: _get_mime(x) == 'image', paths)
        image_paths.extend(paths)
    image_paths.sort()
    db_images = []
    if len(image_paths):
        job = rq.get_current_job()
        for frame, image_orig_path in enumerate(image_paths):
            progress = frame * 100 // len(image_paths)
            job.meta['status'] = 'Images are being compressed.. {}%'.format(progress)
            job.save_meta()
            image_dest_path = db_task.get_frame_path(frame)
            db_task.size += 1
            dirname = os.path.dirname(image_dest_path)
            if not os.path.exists(dirname):
                os.makedirs(dirname)
            image = Image.open(image_orig_path)
            # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
            if image.mode == "I":
                # Image mode is 32bit integer pixels.
                # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
                im_data = np.array(image)
                im_data = im_data * (2**8 / im_data.max())
                image = Image.fromarray(im_data.astype(np.int32))
            image = image.convert('RGB')
            image.save(image_dest_path, quality=db_task.image_quality, optimize=True)
            db_images.append(models.Image(task=db_task, path=image_orig_path,
                frame=frame, width=image.width, height=image.height))
            image.close()
        models.Image.objects.bulk_create(db_images)
    else:
        raise ValueError("Image files were not found")
 def _save_task_to_db(db_task):
    job = rq.get_current_job()
    job.meta['status'] = 'Task is being saved in database'
@ -296,7 +152,7 @@ def _validate_data(data):
        if '..' in path.split(os.path.sep):
            raise ValueError("Don't use '..' inside file paths")
        full_path = os.path.abspath(os.path.join(share_root, path))
-        if 'directory' == _get_mime(full_path):
+        if 'directory' == get_mime(full_path):
            server_files['dirs'].append(path)
        else:
            server_files['files'].append(path)
@ -308,43 +164,42 @@ def _validate_data(data):
        if not [ f_name for f_name in server_files['files'] if f_name.startswith(dir_name)]]
    def count_files(file_mapping, counter):
        archive = None
        video = None
        for rel_path, full_path in file_mapping.items():
-            mime = _get_mime(full_path)
+            mime = get_mime(full_path)
-            counter[mime] += 1
+            counter[mime].append(rel_path)
            if mime == "archive":
                archive = rel_path
            elif mime == "video":
                video = rel_path
        return video, archive
-    counter = {"image": 0, "video": 0, "archive": 0, "directory": 0}
+    counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
-    client_video, client_archive = count_files(
+    count_files(
        file_mapping={ f:f for f in data['remote_files'] or data['client_files']},
        counter=counter,
    )
-    server_video, server_archive = count_files(
+    count_files(
        file_mapping={ f:os.path.abspath(os.path.join(share_root, f)) for f in data['server_files']},
        counter=counter,
    )
-    num_videos = counter["video"]
+    unique_entries = 0
-    num_archives = counter["archive"]
+    multiple_entries = 0
-    num_images = counter["image"] + counter["directory"]
+    for media_type, media_config in MEDIA_TYPES.items():
-    if (num_videos > 1 or num_archives > 1 or
+        if counter[media_type]:
-        (num_videos == 1 and num_archives + num_images > 0) or
+            if media_config['unique']:
-        (num_archives == 1 and num_videos + num_images > 0) or
+                unique_entries += len(counter[media_type])
-        (num_images > 0 and num_archives + num_videos > 0)):
+            else:
                multiple_entries += len(counter[media_type])
    if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1:
        unique_types = ', '.join([k for k, v in MEDIA_TYPES.items() if v['unique']])
        multiply_types = ', '.join([k for k, v in MEDIA_TYPES.items() if not v['unique']])
        count = ', '.join(['{} {}(s)'.format(len(v), k) for k, v in counter.items()])
        raise ValueError('Only one {} or many {} can be used simultaneously, \
            but {} found.'.format(unique_types, multiply_types, count))
-        raise ValueError("Only one archive, one video or many images can be \
+    if unique_entries == 0 and multiple_entries == 0:
-            dowloaded simultaneously. {} image(s), {} dir(s), {} video(s), {} \
+        raise ValueError('No media data found')
            archive(s) found".format(counter['image'], counter['directory'],
                counter['video'], counter['archive']))
-    return client_video or server_video, client_archive or server_archive
+    return counter
 def _download_data(urls, upload_dir):
    job = rq.get_current_job()
@ -382,25 +237,72 @@ def _create_thread(tid, data):
        raise NotImplementedError("Adding more data is not implemented")
    upload_dir = db_task.get_upload_dirname()
    if data['remote_files']:
        data['remote_files'] = _download_data(data['remote_files'], upload_dir)
-    video, archive = _validate_data(data)
+
    media = _validate_data(data)
    if data['server_files']:
        _copy_data_from_share(data['server_files'], upload_dir)
-    if archive:
+    job = rq.get_current_job()
-        archive = os.path.join(upload_dir, archive)
+    job.meta['status'] = 'Media files is being extracted...'
-        _unpack_archive(archive, upload_dir)
+    job.save_meta()
-    if video:
+    db_images = []
-        db_task.mode = "interpolation"
+    extractors = []
-        video = os.path.join(upload_dir, video)
+    length = 0
-        _copy_video_to_task(video, db_task, db_task.get_frame_step())
+    for media_type, media_files in media.items():
        if not media_files:
            continue
        extractor = MEDIA_TYPES[media_type]['extractor'](
            source_path=[os.path.join(upload_dir, f) for f in media_files],
            dest_path=upload_dir,
            image_quality=db_task.image_quality,
            step=db_task.get_frame_step(),
            start=db_task.start_frame,
            stop=db_task.stop_frame,
        )
        length += len(extractor)
        db_task.mode = MEDIA_TYPES[media_type]['mode']
        extractors.append(extractor)
    for extractor in extractors:
        for frame, image_orig_path in enumerate(extractor):
            image_dest_path = db_task.get_frame_path(db_task.size)
            dirname = os.path.dirname(image_dest_path)
            if not os.path.exists(dirname):
                os.makedirs(dirname)
            if db_task.mode == 'interpolation':
                extractor.save_image(frame, image_dest_path)
            else:
                width, height = extractor.save_image(frame, image_dest_path)
                db_images.append(models.Image(
                    task=db_task,
                    path=image_orig_path,
                    frame=db_task.size,
                    width=width, height=height))
            db_task.size += 1
            progress = frame * 100 // length
            job.meta['status'] = 'Images are being compressed... {}%'.format(progress)
            job.save_meta()
    if db_task.mode == 'interpolation':
        image = Image.open(db_task.get_frame_path(0))
        models.Video.objects.create(
            task=db_task,
            path=extractors[0].get_source_name(),
            width=image.width, height=image.height)
        image.close()
        if db_task.stop_frame == 0:
            db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * db_task.get_frame_step()
    else:
-        db_task.mode = "annotation"
+        models.Image.objects.bulk_create(db_images)
        _copy_images_to_task(upload_dir, db_task)
    slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid))
    _save_task_to_db(db_task)