Raw image copying in dataset export (#2229)

* Raw image copying in dataset export

* use byteimage

* use opencv, swith frame data type for videos

* Fix image reading

* update dm dependency
main
Maxim Zhiltsov 5 years ago committed by GitHub
parent 572b5f2610
commit 84b8a85a20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,7 +13,7 @@ import datumaro.components.extractor as datumaro
from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import AttributeType, ShapeType from cvat.apps.engine.models import AttributeType, ShapeType
from datumaro.util import cast from datumaro.util import cast
from datumaro.util.image import Image from datumaro.util.image import ByteImage, Image
from .annotation import AnnotationManager, TrackManager from .annotation import AnnotationManager, TrackManager
@ -457,18 +457,37 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor):
dm_items = [] dm_items = []
is_video = task_data.meta['task']['mode'] == 'interpolation'
ext = ''
if is_video:
ext = FrameProvider.VIDEO_FRAME_EXT
if include_images: if include_images:
frame_provider = FrameProvider(task_data.db_task.data) frame_provider = FrameProvider(task_data.db_task.data)
if is_video:
# optimization for videos: use numpy arrays instead of bytes
# some formats or transforms can require image data
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.NUMPY_ARRAY)[0]
return Image(loader=loader, **kwargs)
else:
# for images use encoded data to avoid recoding
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.BUFFER)[0].getvalue()
return ByteImage(data=loader, **kwargs)
for frame_data in task_data.group_by_frame(include_empty=True): for frame_data in task_data.group_by_frame(include_empty=True):
loader = None image_args = {
'path': frame_data.name + ext,
'size': (frame_data.height, frame_data.width),
}
if include_images: if include_images:
loader = lambda p, i=frame_data.idx: frame_provider.get_frame(i, dm_image = _make_image(frame_data.idx, **image_args)
quality=frame_provider.Quality.ORIGINAL, else:
out_type=frame_provider.Type.NUMPY_ARRAY)[0] dm_image = Image(**image_args)
dm_image = Image(path=frame_data.name, loader=loader,
size=(frame_data.height, frame_data.width)
)
dm_anno = self._read_cvat_anno(frame_data, task_data) dm_anno = self._read_cvat_anno(frame_data, task_data)
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0], dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image, annotations=dm_anno, image=dm_image,

@ -531,6 +531,10 @@ def _export(dst_file, task_data, anno_callback, save_images=False):
anno_callback(f, task_data) anno_callback(f, task_data)
if save_images: if save_images:
ext = ''
if task_data.meta['task']['mode'] == 'interpolation':
ext = FrameProvider.VIDEO_FRAME_EXT
img_dir = osp.join(temp_dir, 'images') img_dir = osp.join(temp_dir, 'images')
frame_provider = FrameProvider(task_data.db_task.data) frame_provider = FrameProvider(task_data.db_task.data)
frames = frame_provider.get_frames( frames = frame_provider.get_frames(
@ -538,9 +542,6 @@ def _export(dst_file, task_data, anno_callback, save_images=False):
frame_provider.Type.BUFFER) frame_provider.Type.BUFFER)
for frame_id, (frame_data, _) in enumerate(frames): for frame_id, (frame_data, _) in enumerate(frames):
frame_name = task_data.frame_info[frame_id]['path'] frame_name = task_data.frame_info[frame_id]['path']
ext = ''
if not '.' in osp.basename(frame_name):
ext = '.png'
img_path = osp.join(img_dir, frame_name + ext) img_path = osp.join(img_dir, frame_name + ext)
os.makedirs(osp.dirname(img_path), exist_ok=True) os.makedirs(osp.dirname(img_path), exist_ok=True)
with open(img_path, 'wb') as f: with open(img_path, 'wb') as f:

@ -6,6 +6,7 @@ import math
from enum import Enum from enum import Enum
from io import BytesIO from io import BytesIO
import cv2
import numpy as np import numpy as np
from PIL import Image from PIL import Image
@ -43,6 +44,9 @@ class RandomAccessIterator:
self.pos = -1 self.pos = -1
class FrameProvider: class FrameProvider:
VIDEO_FRAME_EXT = '.PNG'
VIDEO_FRAME_MIME = 'image/png'
class Quality(Enum): class Quality(Enum):
COMPRESSED = 0 COMPRESSED = 0
ORIGINAL = 100 ORIGINAL = 100
@ -129,13 +133,14 @@ class FrameProvider:
return chunk_number_ return chunk_number_
@staticmethod @classmethod
def _av_frame_to_png_bytes(av_frame): def _av_frame_to_png_bytes(cls, av_frame):
pil_img = av_frame.to_image() ext = cls.VIDEO_FRAME_EXT
buf = BytesIO() image = av_frame.to_ndarray(format='bgr24')
pil_img.save(buf, format='PNG') success, result = cv2.imencode(ext, image)
buf.seek(0) if not success:
return buf raise Exception("Failed to encode image to '%s' format" % (ext))
return BytesIO(result.tobytes())
def _convert_frame(self, frame, reader_class, out_type): def _convert_frame(self, frame, reader_class, out_type):
if out_type == self.Type.BUFFER: if out_type == self.Type.BUFFER:
@ -144,11 +149,11 @@ class FrameProvider:
return frame.to_image() if reader_class is VideoReader else Image.open(frame) return frame.to_image() if reader_class is VideoReader else Image.open(frame)
elif out_type == self.Type.NUMPY_ARRAY: elif out_type == self.Type.NUMPY_ARRAY:
if reader_class is VideoReader: if reader_class is VideoReader:
image = np.array(frame.to_image()) image = frame.to_ndarray(format='bgr24')
else: else:
image = np.array(Image.open(frame)) image = np.array(Image.open(frame))
if len(image.shape) == 3 and image.shape[2] in {3, 4}: if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
return image return image
else: else:
raise Exception('unsupported output type') raise Exception('unsupported output type')
@ -171,7 +176,7 @@ class FrameProvider:
frame = self._convert_frame(frame, loader.reader_class, out_type) frame = self._convert_frame(frame, loader.reader_class, out_type)
if loader.reader_class is VideoReader: if loader.reader_class is VideoReader:
return (frame, 'image/png') return (frame, self.VIDEO_FRAME_MIME)
return (frame, mimetypes.guess_type(frame_name)) return (frame, mimetypes.guess_type(frame_name))
def get_frames(self, quality=Quality.ORIGINAL, out_type=Type.BUFFER): def get_frames(self, quality=Quality.ORIGINAL, out_type=Type.BUFFER):

@ -244,6 +244,8 @@ class VideoReader(IMediaReader):
return pos / stream.duration if stream.duration else None return pos / stream.duration if stream.duration else None
def _get_av_container(self): def _get_av_container(self):
if isinstance(self._source_path[0], io.BytesIO):
self._source_path[0].seek(0) # required for re-reading
return av.open(self._source_path[0]) return av.open(self._source_path[0])
def get_preview(self): def get_preview(self):

@ -44,4 +44,4 @@ tensorflow==2.2.1 # Optional requirement of Datumaro
# archives. Don't use as a python module because it has GPL license. # archives. Don't use as a python module because it has GPL license.
patool==1.12 patool==1.12
diskcache==5.0.2 diskcache==5.0.2
git+https://github.com/openvinotoolkit/datumaro@v0.1.0 git+https://github.com/openvinotoolkit/datumaro@v0.1.2
Loading…
Cancel
Save