tfrecord format support (#644)

* added tfrecord loader/dumper
* add comment
* remove unused import
* used the latest version of tensorflow(1.12.3) which supports cuda 9.0
updated cudnn library
install tensorflow by default and replace it by tensorflow-gpu in case
of cuda support is enabled
* Updated changelog
main
Andrey Zhavoronkov 7 years ago committed by Nikita Manovich
parent 6cac464f46
commit 935d380d36

@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to create a custom extractors for unsupported media types
- Added in PDF extractor
- Added in a command line model manager tester
- Ability to dump/load annotations in several formats from UI (CVAT, Pascal VOC, YOLO, MS COCO, png mask)
- Ability to dump/load annotations in several formats from UI (CVAT, Pascal VOC, YOLO, MS COCO, png mask, TFRecord)
### Changed
- Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)

@ -58,15 +58,12 @@ COPY components /tmp/components
# OpenVINO toolkit support
ARG OPENVINO_TOOLKIT
ENV OPENVINO_TOOLKIT=${OPENVINO_TOOLKIT}
ENV REID_MODEL_DIR=${HOME}/reid
RUN if [ "$OPENVINO_TOOLKIT" = "yes" ]; then \
/tmp/components/openvino/install.sh; \
fi
# CUDA support
ARG CUDA_SUPPORT
ENV CUDA_SUPPORT=${CUDA_SUPPORT}
RUN if [ "$CUDA_SUPPORT" = "yes" ]; then \
/tmp/components/cuda/install.sh; \
/tmp/components/openvino/install.sh && \
mkdir ${REID_MODEL_DIR} && \
wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml -O reid/reid.xml && \
wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin -O reid/reid.bin; \
fi
# Tensorflow annotation support
@ -120,12 +117,11 @@ RUN apt-get update && \
echo export "GIT_SSH_COMMAND=\"ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -o ProxyCommand='nc -X 5 -x ${socks_proxy} %h %p'\"" >> ${HOME}/.bashrc; \
fi
# Download model for re-identification app
ENV REID_MODEL_DIR=${HOME}/reid
RUN if [ "$OPENVINO_TOOLKIT" = "yes" ]; then \
mkdir ${HOME}/reid && \
wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml -O reid/reid.xml && \
wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin -O reid/reid.bin; \
# CUDA support
ARG CUDA_SUPPORT
ENV CUDA_SUPPORT=${CUDA_SUPPORT}
RUN if [ "$CUDA_SUPPORT" = "yes" ]; then \
/tmp/components/cuda/install.sh; \
fi
# TODO: CHANGE URL

@ -39,6 +39,7 @@ Format selection is possible after clicking on the Upload annotation / Dump anno
| [YOLO](https://pjreddie.com/darknet/yolo/) | X | X |
| [MS COCO Object Detection](http://cocodataset.org/#format-data) | X | X |
| PNG mask | X | |
| [TFrecord](https://www.tensorflow.org/tutorials/load_data/tf_records) | X | X |
## Links
- [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat)

@ -16,7 +16,7 @@ echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ub
CUDA_VERSION=9.0.176
NCCL_VERSION=2.1.15
CUDNN_VERSION=7.0.5.15
CUDNN_VERSION=7.6.2.24
CUDA_PKG_VERSION="9-0=${CUDA_VERSION}-1"
echo 'export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}' >> ${HOME}/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc
@ -32,3 +32,6 @@ apt-get update && apt-get install -y --no-install-recommends --allow-unauthentic
ln -s cuda-9.0 /usr/local/cuda && \
rm -rf /var/lib/apt/lists/* \
/etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/cuda.list
pip3 uninstall -y tensorflow
pip3 install --no-cache-dir tensorflow-gpu==1.12.3

@ -12,9 +12,4 @@ tar -xzf model.tar.gz && rm model.tar.gz && \
mv faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28 ${HOME}/rcnn && cd ${HOME} && \
mv rcnn/frozen_inference_graph.pb rcnn/inference_graph.pb
if [[ "$CUDA_SUPPORT" = "yes" ]]
then
pip3 install --no-cache-dir tensorflow-gpu==1.7.0
else
pip3 install --no-cache-dir tensorflow==1.7.0
fi
# tensorflow is installed globally

@ -11,4 +11,5 @@ BUILTIN_FORMATS = (
os.path.join(path_prefix, 'yolo.py'),
os.path.join(path_prefix, 'coco.py'),
os.path.join(path_prefix, 'mask.py'),
os.path.join(path_prefix, 'tfrecord.py'),
)

@ -0,0 +1,182 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
format_spec = {
"name": "TFRecord",
"dumpers": [
{
"display_name": "{name} {format} {version}",
"format": "ZIP",
"version": "1.0",
"handler": "dump"
},
],
"loaders": [
{
"display_name": "{name} {format} {version}",
"format": "ZIP",
"version": "1.0",
"handler": "load"
},
],
}
def dump(file_object, annotations):
import tensorflow as tf
import os
import string
from zipfile import ZipFile
import codecs
from tempfile import TemporaryDirectory
from collections import OrderedDict
# we need it to filter out non-ASCII characters otherwise
# trainning will crash
printable = set(string.printable)
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
# Defining the main conversion function
def create_tf_example(img_id, img_size, image_name, boxes, label_ids):
# Process one image data per run
height = img_size[0]
width = img_size[1]
xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)
# Loop oer the boxes and fill the above fields
for box in boxes:
# filter out non-ASCII characters
box_name = ''.join(filter(lambda x: x in printable, box.label))
xmins.append(box.points[0] / width)
xmaxs.append(box.points[2] / width)
ymins.append(box.points[1] / height)
ymaxs.append(box.points[3] / height)
classes_text.append(box_name.encode('utf8'))
classes.append(label_ids[box.label])
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': int64_feature(height),
'image/width': int64_feature(width),
'image/filename': bytes_feature(image_name.encode('utf8')),
'image/source_id': int64_feature(img_id),
'image/object/bbox/xmin': float_list_feature(xmins),
'image/object/bbox/xmax': float_list_feature(xmaxs),
'image/object/bbox/ymin': float_list_feature(ymins),
'image/object/bbox/ymax': float_list_feature(ymaxs),
'image/object/class/text': bytes_list_feature(classes_text),
'image/object/class/label': int64_list_feature(classes),
}))
return tf_example
# Create the label map file
label_ids = OrderedDict((label[1]["name"], idx) for idx, label in enumerate(annotations.meta["task"]["labels"]))
with TemporaryDirectory() as out_dir:
labelmap_file = 'label_map.pbtxt'
with codecs.open(os.path.join(out_dir, labelmap_file), 'w', encoding='utf8') as f:
for label, idx in label_ids.items():
f.write(u'item {\n')
f.write(u'\tid: {}\n'.format(idx))
f.write(u"\tname: '{}'\n".format(label))
f.write(u'}\n\n')
annotation_file = '{}.tfrecord'.format(annotations.meta['task']['name'])
with tf.io.TFRecordWriter(os.path.join(out_dir, annotation_file)) as writer:
for frame_annotation in annotations.group_by_frame():
boxes = [shape for shape in frame_annotation.labeled_shapes if shape.type == 'rectangle']
if not boxes:
continue
tf_example = create_tf_example(
img_id=frame_annotation.frame,
img_size=(frame_annotation.height, frame_annotation.width),
image_name=frame_annotation.name,
boxes=boxes,
label_ids=label_ids,
)
writer.write(tf_example.SerializeToString())
with ZipFile(file_object, 'w') as output_zip:
output_zip.write(filename=os.path.join(out_dir, labelmap_file), arcname=labelmap_file)
output_zip.write(filename=os.path.join(out_dir, annotation_file), arcname=annotation_file)
def load(file_object, annotations):
from pyunpack import Archive
from tempfile import TemporaryDirectory
import os
import tensorflow as tf
from glob import glob
import numpy as np
tf.enable_eager_execution()
def parse_tfrecord_file(filenames):
def clamp(value, _min, _max):
return max(min(_max, value), _min)
dataset = tf.data.TFRecordDataset(filenames)
image_feature_description = {
'image/filename': tf.io.FixedLenFeature([], tf.string),
'image/source_id': tf.io.FixedLenFeature([], tf.int64),
'image/height': tf.io.FixedLenFeature([], tf.int64),
'image/width': tf.io.FixedLenFeature([], tf.int64),
# Object boxes and classes.
'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
'image/object/class/label': tf.io.VarLenFeature(tf.int64),
'image/object/class/text': tf.io.VarLenFeature(tf.string),
}
for record in dataset:
parsed_record = tf.io.parse_single_example(record, image_feature_description)
frame_number = tf.cast(parsed_record['image/source_id'], tf.int64).numpy().item()
frame_height = tf.cast(parsed_record['image/height'], tf.int64).numpy().item()
frame_width = tf.cast(parsed_record['image/width'], tf.int64).numpy().item()
xmins = tf.sparse.to_dense(parsed_record['image/object/bbox/xmin']).numpy()
ymins = tf.sparse.to_dense(parsed_record['image/object/bbox/ymin']).numpy()
xmaxs = tf.sparse.to_dense(parsed_record['image/object/bbox/xmax']).numpy()
ymaxs = tf.sparse.to_dense(parsed_record['image/object/bbox/ymax']).numpy()
labels = tf.sparse.to_dense(parsed_record['image/object/class/text'], default_value='').numpy()
for shape in np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]:
annotations.add_shape(annotations.LabeledShape(
type='rectangle',
frame=frame_number,
label=shape[0].decode("utf-8"),
points=[
clamp(shape[1] * frame_width, 0, frame_width),
clamp(shape[2] * frame_height, 0, frame_height),
clamp(shape[3] * frame_width, 0, frame_width),
clamp(shape[4] * frame_height, 0, frame_height),
],
occluded=False,
attributes=[],
))
archive_file = getattr(file_object, 'name')
with TemporaryDirectory() as tmp_dir:
Archive(archive_file).extractall(tmp_dir)
filenames = glob(os.path.join(tmp_dir, '*.tfrecord'))
parse_tfrecord_file(filenames)

@ -38,3 +38,4 @@ pascal_voc_writer==0.1.4
cython==0.29.13
matplotlib==3.0.3
scikit-image>=0.14.0
tensorflow==1.12.3

Loading…
Cancel
Save