From 0f5cf2064dd357e70debcf25534ef9676d60dd73 Mon Sep 17 00:00:00 2001
From: zhiltsov-max <zhiltsov.max35@gmail.com>
Date: Thu, 12 Dec 2019 17:26:41 +0300
Subject: [PATCH] [Datumaro] Added tf detection api tfrecord import and export
 (#894)

* Added tf detection api tfrecord import and export
* Added export button in dashboard
* Add tf to requirements
* Extend test
* Add tf dependency
* Require images in tfrecord export
* Add video task case handling
* Maintain image order in CVAT export
* Fix Task image id-path conversions
* Update tfrecord tests
* Extend image utilities
* Update tfrecord format
* Fix image loading bug
* Add some logs
* Add 'copy' option to project import command
* Reduce default cache size
---
 cvat/apps/dataset_manager/bindings.py         |   4 +-
 .../extractors/cvat_rest_api_task_images.py   |   2 +-
 cvat/apps/dataset_manager/task.py             |  29 ++-
 cvat/apps/engine/models.py                    |   7 +-
 cvat/apps/engine/tests/test_model.py          |  25 +++
 datumaro/datumaro/cli/project/__init__.py     |  19 +-
 .../components/converters/__init__.py         |   6 +
 .../components/converters/tfrecord.py         | 146 +++++++++++++
 .../components/extractors/__init__.py         |   7 +
 .../components/extractors/tfrecord.py         | 206 ++++++++++++++++++
 .../datumaro/components/formats/tfrecord.py   |  13 ++
 .../datumaro/components/importers/__init__.py |   7 +
 .../datumaro/components/importers/tfrecord.py |  35 +++
 datumaro/datumaro/components/project.py       |  11 +-
 datumaro/datumaro/util/image.py               |  63 +++++-
 datumaro/datumaro/util/image_cache.py         |   2 +-
 datumaro/datumaro/util/tf_util.py             |  38 ++++
 datumaro/requirements.txt                     |   1 +
 datumaro/setup.py                             |   1 +
 datumaro/tests/test_image.py                  |  39 ++--
 datumaro/tests/test_tfrecord_format.py        | 151 +++++++++++++
 21 files changed, 768 insertions(+), 44 deletions(-)
 create mode 100644 cvat/apps/engine/tests/test_model.py
 create mode 100644 datumaro/datumaro/components/converters/tfrecord.py
 create mode 100644 datumaro/datumaro/components/extractors/tfrecord.py
 create mode 100644 datumaro/datumaro/components/formats/tfrecord.py
 create mode 100644 datumaro/datumaro/components/importers/tfrecord.py
 create mode 100644 datumaro/datumaro/util/tf_util.py
 create mode 100644 datumaro/tests/test_tfrecord_format.py

diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py
index b6949470..cc758fd0 100644
--- a/cvat/apps/dataset_manager/bindings.py
+++ b/cvat/apps/dataset_manager/bindings.py
@@ -28,7 +28,7 @@ class CvatImagesDirExtractor(datumaro.Extractor):
                         id=item_id, image=lazy_image(path))
                     items.append((item.id, item))
 
-        items = sorted(items, key=lambda e: e[0])
+        items = sorted(items, key=lambda e: int(e[0]))
         items = OrderedDict(items)
         self._items = items
 
@@ -74,7 +74,7 @@ class CvatTaskExtractor(datumaro.Extractor):
                 id=cvat_anno.frame, annotations=dm_anno)
             dm_annotations.append((dm_item.id, dm_item))
 
-        dm_annotations = sorted(dm_annotations, key=lambda e: e[0])
+        dm_annotations = sorted(dm_annotations, key=lambda e: int(e[0]))
         self._items = OrderedDict(dm_annotations)
 
         self._subsets = None
diff --git a/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py b/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py
index 9b6065da..28baafad 100644
--- a/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py
+++ b/cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py
@@ -102,7 +102,7 @@ class cvat_rest_api_task_images(datumaro.Extractor):
                 id=item_id, image=self._make_image_loader(item_id))
             items.append((item.id, item))
 
-        items = sorted(items, key=lambda e: e[0])
+        items = sorted(items, key=lambda e: int(e[0]))
         items = OrderedDict(items)
         self._items = items
 
diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
index 4b346d06..8103dab4 100644
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@@ -242,13 +242,23 @@ class TaskProject:
         images_meta = {
             'images': items,
         }
-        for db_image in self._db_task.image_set.all():
-            frame_info = {
-                'id': db_image.frame,
-                'width': db_image.width,
-                'height': db_image.height,
-            }
-            items.append(frame_info)
+        db_video = getattr(self._db_task, 'video', None)
+        if db_video is not None:
+            for i in range(self._db_task.size):
+                frame_info = {
+                    'id': str(i),
+                    'width': db_video.width,
+                    'height': db_video.height,
+                }
+                items.append(frame_info)
+        else:
+            for db_image in self._db_task.image_set.all():
+                frame_info = {
+                    'id': db_image.frame,
+                    'width': db_image.width,
+                    'height': db_image.height,
+                }
+                items.append(frame_info)
 
         with open(osp.join(save_dir, 'config.json'), 'w') as config_file:
             json.dump(config, config_file)
@@ -385,6 +395,11 @@ EXPORT_FORMATS = [
         'tag': 'yolo',
         'is_default': False,
     },
+    {
+        'name': 'TF Detection API TFrecord',
+        'tag': 'tf_detection_api',
+        'is_default': False,
+    },
 ]
 
 def get_export_formats():
diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py
index 11710f60..8b6f63d8 100644
--- a/cvat/apps/engine/models.py
+++ b/cvat/apps/engine/models.py
@@ -90,12 +90,7 @@ class Task(models.Model):
     def get_image_frame(image_path):
         assert image_path.endswith('.jpg')
         index = os.path.splitext(os.path.basename(image_path))[0]
-
-        path = os.path.dirname(image_path)
-        d2 = os.path.basename(path)
-        d1 = os.path.basename(os.path.dirname(path))
-
-        return int(d1) * 10000 + int(d2) * 100 + int(index)
+        return int(index)
 
     def get_frame_step(self):
         match = re.search("step\s*=\s*([1-9]\d*)", self.frame_filter)
diff --git a/cvat/apps/engine/tests/test_model.py b/cvat/apps/engine/tests/test_model.py
new file mode 100644
index 00000000..34454c0b
--- /dev/null
+++ b/cvat/apps/engine/tests/test_model.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+
+from django.test import TestCase
+from cvat.apps.engine.models import Task
+
+
+class TaskModelTest(TestCase):
+    def test_frame_id_path_conversions(self):
+        task_id = 1
+        task = Task(task_id)
+
+        for i in [10 ** p for p in range(6)]:
+            src_path_expected = osp.join(
+                str(i // 10000), str(i // 100), '%s.jpg' % i)
+            src_path = task.get_frame_path(i)
+
+            dst_frame = task.get_image_frame(src_path)
+
+            self.assertTrue(src_path.endswith(src_path_expected),
+                '%s vs. %s' % (src_path, src_path_expected))
+            self.assertEqual(i, dst_frame)
diff --git a/datumaro/datumaro/cli/project/__init__.py b/datumaro/datumaro/cli/project/__init__.py
index 1f4db978..c8fec0de 100644
--- a/datumaro/datumaro/cli/project/__init__.py
+++ b/datumaro/datumaro/cli/project/__init__.py
@@ -48,7 +48,7 @@ def build_import_parser(parser):
     import datumaro.components.importers as importers_module
     importers_list = [name for name, cls in importers_module.items]
 
-    parser.add_argument('source_path',
+    parser.add_argument('-s', '--source', required=True,
         help="Path to import a project from")
     parser.add_argument('-f', '--format', required=True,
         help="Source project format (options: %s)" % (', '.join(importers_list)))
@@ -60,6 +60,8 @@ def build_import_parser(parser):
         help="Name of the new project (default: same as project dir)")
     parser.add_argument('--overwrite', action='store_true',
         help="Overwrite existing files in the save directory")
+    parser.add_argument('--copy', action='store_true',
+        help="Make a deep copy instead of saving source links")
     return parser
 
 def import_command(args):
@@ -74,14 +76,19 @@ def import_command(args):
         project_name = osp.basename(project_dir)
 
     log.info("Importing project from '%s' as '%s'" % \
-        (args.source_path, args.format))
+        (args.source, args.format))
 
-    source_path = osp.abspath(args.source_path)
-    project = Project.import_from(source_path, args.format)
+    source = osp.abspath(args.source)
+    project = Project.import_from(source, args.format)
     project.config.project_name = project_name
     project.config.project_dir = project_dir
-    project = project.make_dataset()
-    project.save(merge=True, save_images=False)
+
+    dataset = project.make_dataset()
+    if args.copy:
+        log.info("Cloning data...")
+        dataset.save(merge=True, save_images=True)
+    else:
+        project.save()
 
     log.info("Project has been created at '%s'" % (project_dir))
 
diff --git a/datumaro/datumaro/components/converters/__init__.py b/datumaro/datumaro/components/converters/__init__.py
index 5d03ceb6..26c37109 100644
--- a/datumaro/datumaro/components/converters/__init__.py
+++ b/datumaro/datumaro/components/converters/__init__.py
@@ -25,6 +25,10 @@ from datumaro.components.converters.voc import (
 
 from datumaro.components.converters.yolo import YoloConverter
 
+from datumaro.components.converters.tfrecord import (
+    DetectionApiConverter,
+)
+
 
 items = [
     ('datumaro', DatumaroConverter),
@@ -44,4 +48,6 @@ items = [
     ('voc_layout', VocLayoutConverter),
 
     ('yolo', YoloConverter),
+
+    ('tf_detection_api', DetectionApiConverter),
 ]
diff --git a/datumaro/datumaro/components/converters/tfrecord.py b/datumaro/datumaro/components/converters/tfrecord.py
new file mode 100644
index 00000000..bc28e74f
--- /dev/null
+++ b/datumaro/datumaro/components/converters/tfrecord.py
@@ -0,0 +1,146 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import codecs
+from collections import OrderedDict
+import os
+import os.path as osp
+import string
+
+from datumaro.components.extractor import AnnotationType, DEFAULT_SUBSET_NAME
+from datumaro.components.formats.tfrecord import DetectionApiPath
+from datumaro.util.image import encode_image
+from datumaro.util.tf_util import import_tf as _import_tf
+
+
+# we need it to filter out non-ASCII characters, otherwise training will crash
+_printable = set(string.printable)
+def _make_printable(s):
+    return ''.join(filter(lambda x: x in _printable, s))
+
+def _make_tf_example(item, get_label_id, get_label, save_images=False):
+    tf = _import_tf()
+
+    def int64_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+    def int64_list_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+    def bytes_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+    def bytes_list_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+
+    def float_list_feature(value):
+        return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+    features = {
+        'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
+        'image/filename': bytes_feature(
+            ('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')),
+    }
+
+    if not item.has_image:
+        raise Exception(
+            "Failed to export dataset item '%s': item has no image" % item.id)
+    height, width, _ = item.image.shape
+
+    features.update({
+        'image/height': int64_feature(height),
+        'image/width': int64_feature(width),
+    })
+
+    if save_images and item.has_image:
+        fmt = DetectionApiPath.IMAGE_FORMAT
+        buffer = encode_image(item.image, DetectionApiPath.IMAGE_EXT)
+
+        features.update({
+            'image/encoded': bytes_feature(buffer),
+            'image/format': bytes_feature(fmt.encode('utf-8')),
+        })
+
+    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
+    xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box)
+    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
+    ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box)
+    classes_text = [] # List of string class name of bounding box (1 per box)
+    classes = [] # List of integer class id of bounding box (1 per box)
+
+    boxes = [ann for ann in item.annotations if ann.type is AnnotationType.bbox]
+    for box in boxes:
+        box_label = _make_printable(get_label(box.label))
+
+        xmins.append(box.points[0] / width)
+        xmaxs.append(box.points[2] / width)
+        ymins.append(box.points[1] / height)
+        ymaxs.append(box.points[3] / height)
+        classes_text.append(box_label.encode('utf-8'))
+        classes.append(get_label_id(box.label))
+
+    if boxes:
+        features.update({
+            'image/object/bbox/xmin': float_list_feature(xmins),
+            'image/object/bbox/xmax': float_list_feature(xmaxs),
+            'image/object/bbox/ymin': float_list_feature(ymins),
+            'image/object/bbox/ymax': float_list_feature(ymaxs),
+            'image/object/class/text': bytes_list_feature(classes_text),
+            'image/object/class/label': int64_list_feature(classes),
+        })
+
+    tf_example = tf.train.Example(
+        features=tf.train.Features(feature=features))
+
+    return tf_example
+
+class DetectionApiConverter:
+    def __init__(self, save_images=True):
+        self.save_images = save_images
+
+    def __call__(self, extractor, save_dir):
+        tf = _import_tf()
+
+        os.makedirs(save_dir, exist_ok=True)
+
+        subsets = extractor.subsets()
+        if len(subsets) == 0:
+            subsets = [ None ]
+
+        for subset_name in subsets:
+            if subset_name:
+                subset = extractor.get_subset(subset_name)
+            else:
+                subset_name = DEFAULT_SUBSET_NAME
+                subset = extractor
+
+            label_categories = subset.categories()[AnnotationType.label]
+            get_label = lambda label_id: label_categories.items[label_id].name \
+                if label_id is not None else ''
+            label_ids = OrderedDict((label.name, 1 + idx)
+                for idx, label in enumerate(label_categories.items))
+            map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0)
+
+            labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE)
+            with codecs.open(labelmap_path, 'w', encoding='utf8') as f:
+                for label, idx in label_ids.items():
+                    f.write(
+                        'item {\n' +
+                        ('\tid: %s\n' % (idx)) +
+                        ("\tname: '%s'\n" % (label)) +
+                        '}\n\n'
+                    )
+
+            anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name))
+            with tf.io.TFRecordWriter(anno_path) as writer:
+                for item in subset:
+                    tf_example = _make_tf_example(
+                        item,
+                        get_label=get_label,
+                        get_label_id=map_label_id,
+                        save_images=self.save_images,
+                    )
+                    writer.write(tf_example.SerializeToString())
diff --git a/datumaro/datumaro/components/extractors/__init__.py b/datumaro/datumaro/components/extractors/__init__.py
index 3df18a6f..9820a27d 100644
--- a/datumaro/datumaro/components/extractors/__init__.py
+++ b/datumaro/datumaro/components/extractors/__init__.py
@@ -30,6 +30,11 @@ from datumaro.components.extractors.yolo import (
     YoloExtractor,
 )
 
+from datumaro.components.extractors.tfrecord import (
+    DetectionApiExtractor,
+)
+
+
 items = [
     ('datumaro', DatumaroExtractor),
 
@@ -52,4 +57,6 @@ items = [
     ('voc_comp_9_10', VocComp_9_10_Extractor),
 
     ('yolo', YoloExtractor),
+
+    ('tf_detection_api', DetectionApiExtractor),
 ]
\ No newline at end of file
diff --git a/datumaro/datumaro/components/extractors/tfrecord.py b/datumaro/datumaro/components/extractors/tfrecord.py
new file mode 100644
index 00000000..46b78b63
--- /dev/null
+++ b/datumaro/datumaro/components/extractors/tfrecord.py
@@ -0,0 +1,206 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from collections import OrderedDict
+import numpy as np
+import os.path as osp
+import re
+
+from datumaro.components.extractor import AnnotationType, DEFAULT_SUBSET_NAME, \
+    LabelCategories, BboxObject, DatasetItem, Extractor
+from datumaro.components.formats.tfrecord import DetectionApiPath
+from datumaro.util.image import lazy_image, decode_image
+from datumaro.util.tf_util import import_tf as _import_tf
+
+
+def clamp(value, _min, _max):
+    return max(min(_max, value), _min)
+
+class DetectionApiExtractor(Extractor):
+    class Subset(Extractor):
+        def __init__(self, name, parent):
+            super().__init__()
+            self._name = name
+            self._parent = parent
+            self.items = OrderedDict()
+
+        def __iter__(self):
+            for item in self.items.values():
+                yield item
+
+        def __len__(self):
+            return len(self.items)
+
+        def categories(self):
+            return self._parent.categories()
+
+    def __init__(self, path, images_dir=None):
+        super().__init__()
+
+        root_dir = osp.dirname(osp.abspath(path))
+        if osp.basename(root_dir) == DetectionApiPath.ANNOTATIONS_DIR:
+            root_dir = osp.dirname(root_dir)
+            images_dir = osp.join(root_dir, DetectionApiPath.IMAGES_DIR)
+            if not osp.isdir(images_dir):
+                images_dir = None
+        self._images_dir = images_dir
+
+        self._subsets = {}
+
+        subset_name = osp.splitext(osp.basename(path))[0]
+        if subset_name == DEFAULT_SUBSET_NAME:
+            subset_name = None
+        subset = DetectionApiExtractor.Subset(subset_name, self)
+        items, labels = self._parse_tfrecord_file(path, subset_name, images_dir)
+        subset.items = items
+        self._subsets[subset_name] = subset
+
+        label_categories = LabelCategories()
+        labels = sorted(labels.items(), key=lambda item: item[1])
+        for label, _ in labels:
+            label_categories.add(label)
+        self._categories = {
+            AnnotationType.label: label_categories
+        }
+
+    @classmethod
+    def _parse_labelmap(cls, text):
+        id_pattern = r'(?:id\s*:\s*(?P<id>\d+))'
+        name_pattern = r'(?:name\s*:\s*[\'\"](?P<name>.*?)[\'\"])'
+        entry_pattern = r'(\{(?:[\s\n]*(?:%(id)s|%(name)s)[\s\n]*){2}\})+' % \
+            {'id': id_pattern, 'name': name_pattern}
+        matches = re.finditer(entry_pattern, text)
+
+        labelmap = {}
+        for match in matches:
+            label_id = match.group('id')
+            label_name = match.group('name')
+            if label_id is not None and label_name is not None:
+                labelmap[label_name] = int(label_id)
+
+        return labelmap
+
+    @classmethod
+    def _parse_tfrecord_file(cls, filepath, subset_name, images_dir):
+        tf = _import_tf()
+
+        dataset = tf.data.TFRecordDataset(filepath)
+        features = {
+            'image/filename': tf.io.FixedLenFeature([], tf.string),
+            'image/source_id': tf.io.FixedLenFeature([], tf.string),
+            'image/height': tf.io.FixedLenFeature([], tf.int64),
+            'image/width': tf.io.FixedLenFeature([], tf.int64),
+            'image/encoded': tf.io.FixedLenFeature([], tf.string),
+            'image/format': tf.io.FixedLenFeature([], tf.string),
+            # Object boxes and classes.
+            'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
+            'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
+            'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
+            'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
+            'image/object/class/label': tf.io.VarLenFeature(tf.int64),
+            'image/object/class/text': tf.io.VarLenFeature(tf.string),
+            'image/object/mask': tf.io.VarLenFeature(tf.string),
+        }
+
+        dataset_labels = OrderedDict()
+        labelmap_path = osp.join(osp.dirname(filepath),
+            DetectionApiPath.LABELMAP_FILE)
+        if osp.exists(labelmap_path):
+            with open(labelmap_path, 'r', encoding='utf-8') as f:
+                labelmap_text = f.read()
+            dataset_labels.update({ label: id - 1
+                for label, id in cls._parse_labelmap(labelmap_text).items()
+            })
+
+        dataset_items = OrderedDict()
+
+        for record in dataset:
+            parsed_record = tf.io.parse_single_example(record, features)
+            frame_id = parsed_record['image/source_id'].numpy().decode('utf-8')
+            frame_filename = \
+                parsed_record['image/filename'].numpy().decode('utf-8')
+            frame_height = tf.cast(
+                parsed_record['image/height'], tf.int64).numpy().item()
+            frame_width = tf.cast(
+                parsed_record['image/width'], tf.int64).numpy().item()
+            frame_image = parsed_record['image/encoded'].numpy()
+            frame_format = parsed_record['image/format'].numpy().decode('utf-8')
+            xmins = tf.sparse.to_dense(
+                parsed_record['image/object/bbox/xmin']).numpy()
+            ymins = tf.sparse.to_dense(
+                parsed_record['image/object/bbox/ymin']).numpy()
+            xmaxs = tf.sparse.to_dense(
+                parsed_record['image/object/bbox/xmax']).numpy()
+            ymaxs = tf.sparse.to_dense(
+                parsed_record['image/object/bbox/ymax']).numpy()
+            label_ids = tf.sparse.to_dense(
+                parsed_record['image/object/class/label']).numpy()
+            labels = tf.sparse.to_dense(
+                parsed_record['image/object/class/text'],
+                default_value=b'').numpy()
+
+            for label, label_id in zip(labels, label_ids):
+                label = label.decode('utf-8')
+                if not label:
+                    continue
+                if label_id <= 0:
+                    continue
+                if label in dataset_labels:
+                    continue
+                dataset_labels[label] = label_id - 1
+
+            item_id = frame_id
+            if not item_id:
+                item_id = osp.splitext(frame_filename)[0]
+
+            annotations = []
+            for index, shape in enumerate(
+                    np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]):
+                label = shape[0].decode('utf-8')
+                x = clamp(shape[1] * frame_width, 0, frame_width)
+                y = clamp(shape[2] * frame_height, 0, frame_height)
+                w = clamp(shape[3] * frame_width, 0, frame_width) - x
+                h = clamp(shape[4] * frame_height, 0, frame_height) - y
+                annotations.append(BboxObject(x, y, w, h,
+                    label=dataset_labels.get(label, None), id=index
+                ))
+
+            image = None
+            if image is None and frame_image and frame_format:
+                image = lazy_image(frame_image, loader=decode_image)
+            if image is None and frame_filename and images_dir:
+                image_path = osp.join(images_dir, frame_filename)
+                if osp.exists(image_path):
+                    image = lazy_image(image_path)
+
+            dataset_items[item_id] = DatasetItem(id=item_id, subset=subset_name,
+                image=image, annotations=annotations)
+
+        return dataset_items, dataset_labels
+
+    def categories(self):
+        return self._categories
+
+    def __iter__(self):
+        for subset in self._subsets.values():
+            for item in subset:
+                yield item
+
+    def __len__(self):
+        length = 0
+        for subset in self._subsets.values():
+            length += len(subset)
+        return length
+
+    def subsets(self):
+        return list(self._subsets)
+
+    def get_subset(self, name):
+        return self._subsets[name]
+
+    def get(self, item_id, subset=None, path=None):
+        if path is not None:
+            return None
+        return self.get_subset(subset).items.get(item_id, None)
\ No newline at end of file
diff --git a/datumaro/datumaro/components/formats/tfrecord.py b/datumaro/datumaro/components/formats/tfrecord.py
new file mode 100644
index 00000000..9e31212e
--- /dev/null
+++ b/datumaro/datumaro/components/formats/tfrecord.py
@@ -0,0 +1,13 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+class DetectionApiPath:
+    IMAGES_DIR = 'images'
+    ANNOTATIONS_DIR = 'annotations'
+
+    IMAGE_EXT = '.jpg'
+    IMAGE_FORMAT = 'jpeg'
+
+    LABELMAP_FILE = 'label_map.pbtxt'
\ No newline at end of file
diff --git a/datumaro/datumaro/components/importers/__init__.py b/datumaro/datumaro/components/importers/__init__.py
index 4383b467..5d2923b8 100644
--- a/datumaro/datumaro/components/importers/__init__.py
+++ b/datumaro/datumaro/components/importers/__init__.py
@@ -14,6 +14,11 @@ from datumaro.components.importers.voc import (
     VocResultsImporter,
 )
 
+from datumaro.components.importers.tfrecord import (
+    DetectionApiImporter,
+)
+
+
 items = [
     ('datumaro', DatumaroImporter),
 
@@ -21,4 +26,6 @@ items = [
 
     ('voc', VocImporter),
     ('voc_results', VocResultsImporter),
+
+    ('tf_detection_api', DetectionApiImporter),
 ]
\ No newline at end of file
diff --git a/datumaro/datumaro/components/importers/tfrecord.py b/datumaro/datumaro/components/importers/tfrecord.py
new file mode 100644
index 00000000..c42c2e17
--- /dev/null
+++ b/datumaro/datumaro/components/importers/tfrecord.py
@@ -0,0 +1,35 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from glob import glob
+import os.path as osp
+
+
+class DetectionApiImporter:
+    EXTRACTOR_NAME = 'tf_detection_api'
+
+    def __call__(self, path):
+        from datumaro.components.project import Project # cyclic import
+        project = Project()
+
+        subset_paths = glob(osp.join(path, '*.tfrecord'))
+
+        for subset_path in subset_paths:
+            if not osp.isfile(subset_path):
+                continue
+
+            subset_name = osp.splitext(osp.basename(subset_path))[0]
+
+            project.add_source(subset_name, {
+                'url': subset_path,
+                'format': self.EXTRACTOR_NAME,
+            })
+
+        if len(project.config.sources) == 0:
+            raise Exception(
+                "Failed to find 'tf_detection_api' dataset at '%s'" % path)
+
+        return project
+
diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py
index e51c2baa..8bd0ef64 100644
--- a/datumaro/datumaro/components/project.py
+++ b/datumaro/datumaro/components/project.py
@@ -351,6 +351,7 @@ class ProjectDataset(Extractor):
         # merge items
         subsets = defaultdict(lambda: Subset(self))
         for source_name, source in self._sources.items():
+            log.info("Loading '%s' source contents..." % source_name)
             for item in source:
                 if dataset_filter and not dataset_filter(item):
                     continue
@@ -360,7 +361,7 @@ class ProjectDataset(Extractor):
                     image = None
                     if existing_item.has_image:
                         # TODO: think of image comparison
-                        image = lambda: existing_item.image
+                        image = self._lazy_image(existing_item)
 
                     path = existing_item.path
                     if item.path != path:
@@ -386,6 +387,7 @@ class ProjectDataset(Extractor):
 
         # override with our items, fallback to existing images
         if own_source is not None:
+            log.info("Loading own dataset...")
             for item in own_source:
                 if dataset_filter and not dataset_filter(item):
                     continue
@@ -396,7 +398,7 @@ class ProjectDataset(Extractor):
                         image = None
                         if existing_item.has_image:
                             # TODO: think of image comparison
-                            image = lambda: existing_item.image
+                            image = self._lazy_image(existing_item)
                         item = DatasetItemWrapper(item=item, path=None,
                             annotations=item.annotations, image=image)
 
@@ -410,6 +412,11 @@ class ProjectDataset(Extractor):
 
         self._length = None
 
+    @staticmethod
+    def _lazy_image(item):
+        # NOTE: avoid https://docs.python.org/3/faq/programming.html#why-do-lambdas-defined-in-a-loop-with-different-values-all-return-the-same-result
+        return lambda: item.image
+
     @staticmethod
     def _merge_anno(a, b):
         from itertools import chain
diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py
index e55cce73..784a1218 100644
--- a/datumaro/datumaro/util/image.py
+++ b/datumaro/datumaro/util/image.py
@@ -5,6 +5,7 @@
 
 # pylint: disable=unused-import
 
+from io import BytesIO
 import numpy as np
 
 from enum import Enum
@@ -42,20 +43,76 @@ def load_image(path):
     assert image.shape[2] in [1, 3, 4]
     return image
 
-def save_image(path, image):
+def save_image(path, image, params=None):
     if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
         import cv2
-        cv2.imwrite(path, image)
+        cv2.imwrite(path, image, params=params)
     elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
         from PIL import Image
+
+        if not params:
+            params = {}
+
+        image = image.astype(np.uint8)
+        if len(image.shape) == 3 and image.shape[2] in [3, 4]:
+            image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
+        image = Image.fromarray(image)
+        image.save(path, **params)
+    else:
+        raise NotImplementedError()
+
+def encode_image(image, ext, params=None):
+    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
+        import cv2
+
+        if not ext.startswith('.'):
+            ext = '.' + ext
+
+        if ext.upper() == '.JPG':
+            params = [ int(cv2.IMWRITE_JPEG_QUALITY), 75 ]
+
+        success, result = cv2.imencode(ext, image, params=params)
+        if not success:
+            raise Exception("Failed to encode image to '%s' format" % (ext))
+        return result.tobytes()
+    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
+        from PIL import Image
+
+        if ext.startswith('.'):
+            ext = ext[1:]
+
+        if not params:
+            params = {}
+
         image = image.astype(np.uint8)
         if len(image.shape) == 3 and image.shape[2] in [3, 4]:
             image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
         image = Image.fromarray(image)
-        image.save(path)
+        with BytesIO() as buffer:
+            image.save(buffer, format=ext, **params)
+            return buffer.getvalue()
     else:
         raise NotImplementedError()
 
+def decode_image(image_bytes):
+    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
+        import cv2
+        image = np.frombuffer(image_bytes, dtype=np.uint8)
+        image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED)
+        image = image.astype(np.float32)
+    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
+        from PIL import Image
+        image = Image.open(BytesIO(image_bytes))
+        image = np.asarray(image, dtype=np.float32)
+        if len(image.shape) == 3 and image.shape[2] in [3, 4]:
+            image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
+    else:
+        raise NotImplementedError()
+
+    assert len(image.shape) == 3
+    assert image.shape[2] in [1, 3, 4]
+    return image
+
 
 class lazy_image:
     def __init__(self, path, loader=load_image, cache=None):
diff --git a/datumaro/datumaro/util/image_cache.py b/datumaro/datumaro/util/image_cache.py
index 1d5a5d6b..fd1ad0d7 100644
--- a/datumaro/datumaro/util/image_cache.py
+++ b/datumaro/datumaro/util/image_cache.py
@@ -3,7 +3,7 @@ from collections import OrderedDict
 
 _instance = None
 
-DEFAULT_CAPACITY = 1000
+DEFAULT_CAPACITY = 2
 
 class ImageCache:
     @staticmethod
diff --git a/datumaro/datumaro/util/tf_util.py b/datumaro/datumaro/util/tf_util.py
new file mode 100644
index 00000000..0d939a95
--- /dev/null
+++ b/datumaro/datumaro/util/tf_util.py
@@ -0,0 +1,38 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+def import_tf():
+    import sys
+
+    tf = sys.modules.get('tensorflow', None)
+    if tf is not None:
+        return tf
+
+    # Reduce output noise, https://stackoverflow.com/questions/38073432/how-to-suppress-verbose-tensorflow-logging
+    import os
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+
+    import tensorflow as tf
+
+    try:
+        tf.get_logger().setLevel('WARNING')
+    except AttributeError:
+        pass
+    try:
+        tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.WARN)
+    except AttributeError:
+        pass
+
+    # Enable eager execution in early versions to unlock dataset operations
+    try:
+        tf.compat.v1.enable_eager_execution()
+    except AttributeError:
+        pass
+    try:
+        tf.enable_eager_execution()
+    except AttributeError:
+        pass
+
+    return tf
\ No newline at end of file
diff --git a/datumaro/requirements.txt b/datumaro/requirements.txt
index f06040c0..f50ab0af 100644
--- a/datumaro/requirements.txt
+++ b/datumaro/requirements.txt
@@ -7,3 +7,4 @@ Pillow>=6.1.0
 pycocotools>=2.0.0
 PyYAML>=5.1.1
 tensorboardX>=1.8
+tensorflow>=1.12.0
\ No newline at end of file
diff --git a/datumaro/setup.py b/datumaro/setup.py
index a250412a..1bdf5ff1 100644
--- a/datumaro/setup.py
+++ b/datumaro/setup.py
@@ -57,6 +57,7 @@ setuptools.setup(
         'PyYAML',
         'pycocotools',
         'tensorboardX',
+        'tensorflow',
     ],
     entry_points={
         'console_scripts': [
diff --git a/datumaro/tests/test_image.py b/datumaro/tests/test_image.py
index f277bd0f..143d6c4e 100644
--- a/datumaro/tests/test_image.py
+++ b/datumaro/tests/test_image.py
@@ -15,25 +15,32 @@ class ImageTest(TestCase):
     def tearDown(self):
         image_module._IMAGE_BACKEND = self.default_backend
 
-    def _test_can_save_and_load(self, src_image, path,
-            save_backend=None, load_backend=None):
-        if save_backend:
-            image_module._IMAGE_BACKEND = save_backend
-        image_module.save_image(path, src_image)
-
-        if load_backend:
-            image_module._IMAGE_BACKEND = load_backend
-        dst_image = image_module.load_image(path)
-
-        self.assertTrue(np.all(src_image == dst_image), 'save: %s, load: %s' % \
-            (save_backend, load_backend))
-
     def test_save_and_load_backends(self):
         backends = image_module._IMAGE_BACKENDS
         for save_backend, load_backend in product(backends, backends):
             with TestDir() as test_dir:
                 src_image = np.random.randint(0, 255 + 1, (2, 4, 3))
-                image_path = osp.join(test_dir.path, 'img.png')
+                path = osp.join(test_dir.path, 'img.png') # lossless
+
+                image_module._IMAGE_BACKEND = save_backend
+                image_module.save_image(path, src_image)
+
+                image_module._IMAGE_BACKEND = load_backend
+                dst_image = image_module.load_image(path)
+
+                self.assertTrue(np.all(src_image == dst_image),
+                    'save: %s, load: %s' % (save_backend, load_backend))
+
+    def test_encode_and_decode_backends(self):
+        backends = image_module._IMAGE_BACKENDS
+        for save_backend, load_backend in product(backends, backends):
+            src_image = np.random.randint(0, 255 + 1, (2, 4, 3))
+
+            image_module._IMAGE_BACKEND = save_backend
+            buffer = image_module.encode_image(src_image, '.png') # lossless
+
+            image_module._IMAGE_BACKEND = load_backend
+            dst_image = image_module.decode_image(buffer)
 
-                self._test_can_save_and_load(src_image, image_path,
-                    save_backend, load_backend)
\ No newline at end of file
+            self.assertTrue(np.all(src_image == dst_image),
+                'save: %s, load: %s' % (save_backend, load_backend))
\ No newline at end of file
diff --git a/datumaro/tests/test_tfrecord_format.py b/datumaro/tests/test_tfrecord_format.py
new file mode 100644
index 00000000..8511dc14
--- /dev/null
+++ b/datumaro/tests/test_tfrecord_format.py
@@ -0,0 +1,151 @@
+import numpy as np
+
+from unittest import TestCase
+
+from datumaro.components.project import Project
+from datumaro.components.extractor import (Extractor, DatasetItem,
+    AnnotationType, BboxObject, LabelCategories
+)
+from datumaro.components.extractors.tfrecord import (
+    DetectionApiExtractor,
+)
+from datumaro.components.converters.tfrecord import (
+    DetectionApiConverter,
+)
+from datumaro.util import find
+from datumaro.util.test_utils import TestDir
+
+
+class TfrecordConverterTest(TestCase):
+    def _test_can_save_and_load(self, source_dataset, converter, test_dir,
+            importer_params=None):
+        converter(source_dataset, test_dir.path)
+
+        if not importer_params:
+            importer_params = {}
+        project = Project.import_from(test_dir.path, 'tf_detection_api',
+            **importer_params)
+        parsed_dataset = project.make_dataset()
+
+        self.assertListEqual(
+            sorted(source_dataset.subsets()),
+            sorted(parsed_dataset.subsets()),
+        )
+
+        self.assertEqual(len(source_dataset), len(parsed_dataset))
+
+        for item_a in source_dataset:
+            item_b = find(parsed_dataset, lambda x: x.id == item_a.id)
+            self.assertFalse(item_b is None)
+            self.assertEqual(len(item_a.annotations), len(item_b.annotations))
+            for ann_a in item_a.annotations:
+                ann_b = find(item_b.annotations, lambda x: \
+                    x.id == ann_a.id and \
+                    x.type == ann_a.type and x.group == ann_a.group)
+                self.assertEqual(ann_a, ann_b, 'id: ' + str(ann_a.id))
+
+    def test_can_save_bboxes(self):
+        class TestExtractor(Extractor):
+            def __iter__(self):
+                items = [
+                    DatasetItem(id=1, subset='train',
+                        image=np.ones((16, 16, 3)),
+                        annotations=[
+                            BboxObject(0, 4, 4, 8, label=2, id=0),
+                            BboxObject(0, 4, 4, 4, label=3, id=1),
+                            BboxObject(2, 4, 4, 4, id=2),
+                        ]
+                    ),
+
+                    DatasetItem(id=2, subset='val',
+                        image=np.ones((8, 8, 3)),
+                        annotations=[
+                            BboxObject(1, 2, 4, 2, label=3, id=0),
+                        ]
+                    ),
+
+                    DatasetItem(id=3, subset='test',
+                        image=np.ones((5, 4, 3)) * 3,
+                    ),
+                ]
+
+                for item in items:
+                    yield item
+
+            def categories(self):
+                label_cat = LabelCategories()
+                for label in range(10):
+                    label_cat.add('label_' + str(label))
+                return {
+                    AnnotationType.label: label_cat,
+                }
+
+        with TestDir() as test_dir:
+            self._test_can_save_and_load(
+                TestExtractor(), DetectionApiConverter(save_images=True),
+                test_dir)
+
+    def test_can_save_dataset_with_no_subsets(self):
+        class TestExtractor(Extractor):
+            def __iter__(self):
+                items = [
+                    DatasetItem(id=1,
+                        image=np.ones((16, 16, 3)),
+                        annotations=[
+                            BboxObject(2, 1, 4, 4, label=2, id=0),
+                            BboxObject(4, 2, 8, 4, label=3, id=1),
+                        ]
+                    ),
+
+                    DatasetItem(id=2,
+                        image=np.ones((8, 8, 3)) * 2,
+                        annotations=[
+                            BboxObject(4, 4, 4, 4, label=3, id=0),
+                        ]
+                    ),
+
+                    DatasetItem(id=3,
+                        image=np.ones((8, 4, 3)) * 3,
+                    ),
+                ]
+
+                for item in items:
+                    yield item
+
+            def categories(self):
+                label_cat = LabelCategories()
+                for label in range(10):
+                    label_cat.add('label_' + str(label))
+                return {
+                    AnnotationType.label: label_cat,
+                }
+
+        with TestDir() as test_dir:
+            self._test_can_save_and_load(
+                TestExtractor(), DetectionApiConverter(), test_dir)
+
+    def test_labelmap_parsing(self):
+        text = """
+            {
+                id: 4
+                name: 'qw1'
+            }
+            {
+                id: 5 name: 'qw2'
+            }
+
+            {
+                name: 'qw3'
+                id: 6
+            }
+            {name:'qw4' id:7}
+        """
+        expected = {
+            'qw1': 4,
+            'qw2': 5,
+            'qw3': 6,
+            'qw4': 7,
+        }
+        parsed = DetectionApiExtractor._parse_labelmap(text)
+
+        self.assertEqual(expected, parsed)