Raw image copying in dataset export (#2229)

* Raw image copying in dataset export

* use byteimage

* use opencv, swith frame data type for videos

* Fix image reading

* update dm dependency
main
Maxim Zhiltsov 5 years ago committed by GitHub
parent 572b5f2610
commit 84b8a85a20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,7 +13,7 @@ import datumaro.components.extractor as datumaro
from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import AttributeType, ShapeType
from datumaro.util import cast
from datumaro.util.image import Image
from datumaro.util.image import ByteImage, Image
from .annotation import AnnotationManager, TrackManager
@ -457,18 +457,37 @@ class CvatTaskDataExtractor(datumaro.SourceExtractor):
dm_items = []
is_video = task_data.meta['task']['mode'] == 'interpolation'
ext = ''
if is_video:
ext = FrameProvider.VIDEO_FRAME_EXT
if include_images:
frame_provider = FrameProvider(task_data.db_task.data)
if is_video:
# optimization for videos: use numpy arrays instead of bytes
# some formats or transforms can require image data
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.NUMPY_ARRAY)[0]
return Image(loader=loader, **kwargs)
else:
# for images use encoded data to avoid recoding
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.BUFFER)[0].getvalue()
return ByteImage(data=loader, **kwargs)
for frame_data in task_data.group_by_frame(include_empty=True):
loader = None
image_args = {
'path': frame_data.name + ext,
'size': (frame_data.height, frame_data.width),
}
if include_images:
loader = lambda p, i=frame_data.idx: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.NUMPY_ARRAY)[0]
dm_image = Image(path=frame_data.name, loader=loader,
size=(frame_data.height, frame_data.width)
)
dm_image = _make_image(frame_data.idx, **image_args)
else:
dm_image = Image(**image_args)
dm_anno = self._read_cvat_anno(frame_data, task_data)
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image,

@ -531,6 +531,10 @@ def _export(dst_file, task_data, anno_callback, save_images=False):
anno_callback(f, task_data)
if save_images:
ext = ''
if task_data.meta['task']['mode'] == 'interpolation':
ext = FrameProvider.VIDEO_FRAME_EXT
img_dir = osp.join(temp_dir, 'images')
frame_provider = FrameProvider(task_data.db_task.data)
frames = frame_provider.get_frames(
@ -538,9 +542,6 @@ def _export(dst_file, task_data, anno_callback, save_images=False):
frame_provider.Type.BUFFER)
for frame_id, (frame_data, _) in enumerate(frames):
frame_name = task_data.frame_info[frame_id]['path']
ext = ''
if not '.' in osp.basename(frame_name):
ext = '.png'
img_path = osp.join(img_dir, frame_name + ext)
os.makedirs(osp.dirname(img_path), exist_ok=True)
with open(img_path, 'wb') as f:

@ -6,6 +6,7 @@ import math
from enum import Enum
from io import BytesIO
import cv2
import numpy as np
from PIL import Image
@ -43,6 +44,9 @@ class RandomAccessIterator:
self.pos = -1
class FrameProvider:
VIDEO_FRAME_EXT = '.PNG'
VIDEO_FRAME_MIME = 'image/png'
class Quality(Enum):
COMPRESSED = 0
ORIGINAL = 100
@ -129,13 +133,14 @@ class FrameProvider:
return chunk_number_
@staticmethod
def _av_frame_to_png_bytes(av_frame):
pil_img = av_frame.to_image()
buf = BytesIO()
pil_img.save(buf, format='PNG')
buf.seek(0)
return buf
@classmethod
def _av_frame_to_png_bytes(cls, av_frame):
ext = cls.VIDEO_FRAME_EXT
image = av_frame.to_ndarray(format='bgr24')
success, result = cv2.imencode(ext, image)
if not success:
raise Exception("Failed to encode image to '%s' format" % (ext))
return BytesIO(result.tobytes())
def _convert_frame(self, frame, reader_class, out_type):
if out_type == self.Type.BUFFER:
@ -144,11 +149,11 @@ class FrameProvider:
return frame.to_image() if reader_class is VideoReader else Image.open(frame)
elif out_type == self.Type.NUMPY_ARRAY:
if reader_class is VideoReader:
image = np.array(frame.to_image())
image = frame.to_ndarray(format='bgr24')
else:
image = np.array(Image.open(frame))
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
return image
else:
raise Exception('unsupported output type')
@ -171,7 +176,7 @@ class FrameProvider:
frame = self._convert_frame(frame, loader.reader_class, out_type)
if loader.reader_class is VideoReader:
return (frame, 'image/png')
return (frame, self.VIDEO_FRAME_MIME)
return (frame, mimetypes.guess_type(frame_name))
def get_frames(self, quality=Quality.ORIGINAL, out_type=Type.BUFFER):

@ -244,6 +244,8 @@ class VideoReader(IMediaReader):
return pos / stream.duration if stream.duration else None
def _get_av_container(self):
if isinstance(self._source_path[0], io.BytesIO):
self._source_path[0].seek(0) # required for re-reading
return av.open(self._source_path[0])
def get_preview(self):

@ -44,4 +44,4 @@ tensorflow==2.2.1 # Optional requirement of Datumaro
# archives. Don't use as a python module because it has GPL license.
patool==1.12
diskcache==5.0.2
git+https://github.com/openvinotoolkit/datumaro@v0.1.0
git+https://github.com/openvinotoolkit/datumaro@v0.1.2
Loading…
Cancel
Save