From 935d380d36cb1074eddc3b0018c856a3997804de Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com> Date: Thu, 15 Aug 2019 18:02:35 +0300 Subject: [PATCH] tfrecord format support (#644) * added tfrecord loader/dumper * add comment * remove unused import * used the latest version of tensorflow(1.12.3) which supports cuda 9.0 updated cudnn library install tensorflow by default and replace it by tensorflow-gpu in case of cuda support is enabled * Updated changelog --- CHANGELOG.md | 2 +- Dockerfile | 24 ++-- README.md | 1 + components/cuda/install.sh | 5 +- components/tf_annotation/install.sh | 7 +- cvat/apps/annotation/settings.py | 1 + cvat/apps/annotation/tfrecord.py | 182 ++++++++++++++++++++++++++++ cvat/requirements/base.txt | 1 + 8 files changed, 201 insertions(+), 22 deletions(-) create mode 100644 cvat/apps/annotation/tfrecord.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f578b59..10e6a6ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Ability to create a custom extractors for unsupported media types - Added in PDF extractor - Added in a command line model manager tester -- Ability to dump/load annotations in several formats from UI (CVAT, Pascal VOC, YOLO, MS COCO, png mask) +- Ability to dump/load annotations in several formats from UI (CVAT, Pascal VOC, YOLO, MS COCO, png mask, TFRecord) ### Changed - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before) diff --git a/Dockerfile b/Dockerfile index 8837fdd0..6b5d416b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -58,15 +58,12 @@ COPY components /tmp/components # OpenVINO toolkit support ARG OPENVINO_TOOLKIT ENV OPENVINO_TOOLKIT=${OPENVINO_TOOLKIT} +ENV REID_MODEL_DIR=${HOME}/reid RUN if [ "$OPENVINO_TOOLKIT" = "yes" ]; then \ - /tmp/components/openvino/install.sh; \ - fi - -# CUDA support -ARG CUDA_SUPPORT -ENV CUDA_SUPPORT=${CUDA_SUPPORT} -RUN if [ "$CUDA_SUPPORT" = "yes" ]; then \ - /tmp/components/cuda/install.sh; \ + /tmp/components/openvino/install.sh && \ + mkdir ${REID_MODEL_DIR} && \ + wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml -O reid/reid.xml && \ + wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin -O reid/reid.bin; \ fi # Tensorflow annotation support @@ -120,12 +117,11 @@ RUN apt-get update && \ echo export "GIT_SSH_COMMAND=\"ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -o ProxyCommand='nc -X 5 -x ${socks_proxy} %h %p'\"" >> ${HOME}/.bashrc; \ fi -# Download model for re-identification app -ENV REID_MODEL_DIR=${HOME}/reid -RUN if [ "$OPENVINO_TOOLKIT" = "yes" ]; then \ - mkdir ${HOME}/reid && \ - wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml -O reid/reid.xml && \ - wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin -O reid/reid.bin; \ +# CUDA support +ARG CUDA_SUPPORT +ENV CUDA_SUPPORT=${CUDA_SUPPORT} +RUN if [ "$CUDA_SUPPORT" = "yes" ]; then \ + /tmp/components/cuda/install.sh; \ fi # TODO: CHANGE URL diff --git a/README.md b/README.md index bf5d6a6b..2bca82a5 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Format selection is possible after clicking on the Upload annotation / Dump anno | [YOLO](https://pjreddie.com/darknet/yolo/) | X | X | | [MS COCO Object Detection](http://cocodataset.org/#format-data) | X | X | | PNG mask | X | | +| [TFrecord](https://www.tensorflow.org/tutorials/load_data/tf_records) | X | X | ## Links - [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat) diff --git a/components/cuda/install.sh b/components/cuda/install.sh index dd689f77..2cda99fc 100755 --- a/components/cuda/install.sh +++ b/components/cuda/install.sh @@ -16,7 +16,7 @@ echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ub CUDA_VERSION=9.0.176 NCCL_VERSION=2.1.15 -CUDNN_VERSION=7.0.5.15 +CUDNN_VERSION=7.6.2.24 CUDA_PKG_VERSION="9-0=${CUDA_VERSION}-1" echo 'export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}' >> ${HOME}/.bashrc echo 'export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc @@ -32,3 +32,6 @@ apt-get update && apt-get install -y --no-install-recommends --allow-unauthentic ln -s cuda-9.0 /usr/local/cuda && \ rm -rf /var/lib/apt/lists/* \ /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/cuda.list + +pip3 uninstall -y tensorflow +pip3 install --no-cache-dir tensorflow-gpu==1.12.3 diff --git a/components/tf_annotation/install.sh b/components/tf_annotation/install.sh index bdd1c6e2..fc5ed6b6 100755 --- a/components/tf_annotation/install.sh +++ b/components/tf_annotation/install.sh @@ -12,9 +12,4 @@ tar -xzf model.tar.gz && rm model.tar.gz && \ mv faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28 ${HOME}/rcnn && cd ${HOME} && \ mv rcnn/frozen_inference_graph.pb rcnn/inference_graph.pb -if [[ "$CUDA_SUPPORT" = "yes" ]] -then - pip3 install --no-cache-dir tensorflow-gpu==1.7.0 -else - pip3 install --no-cache-dir tensorflow==1.7.0 -fi +# tensorflow is installed globally diff --git a/cvat/apps/annotation/settings.py b/cvat/apps/annotation/settings.py index f9b28d4d..0ac2a38c 100644 --- a/cvat/apps/annotation/settings.py +++ b/cvat/apps/annotation/settings.py @@ -11,4 +11,5 @@ BUILTIN_FORMATS = ( os.path.join(path_prefix, 'yolo.py'), os.path.join(path_prefix, 'coco.py'), os.path.join(path_prefix, 'mask.py'), + os.path.join(path_prefix, 'tfrecord.py'), ) diff --git a/cvat/apps/annotation/tfrecord.py b/cvat/apps/annotation/tfrecord.py new file mode 100644 index 00000000..a911625e --- /dev/null +++ b/cvat/apps/annotation/tfrecord.py @@ -0,0 +1,182 @@ +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +format_spec = { + "name": "TFRecord", + "dumpers": [ + { + "display_name": "{name} {format} {version}", + "format": "ZIP", + "version": "1.0", + "handler": "dump" + }, + ], + "loaders": [ + { + "display_name": "{name} {format} {version}", + "format": "ZIP", + "version": "1.0", + "handler": "load" + }, + ], +} + +def dump(file_object, annotations): + import tensorflow as tf + import os + import string + from zipfile import ZipFile + import codecs + from tempfile import TemporaryDirectory + from collections import OrderedDict + + # we need it to filter out non-ASCII characters otherwise + # trainning will crash + printable = set(string.printable) + + def int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + def int64_list_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + def bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + def bytes_list_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + + def float_list_feature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + # Defining the main conversion function + def create_tf_example(img_id, img_size, image_name, boxes, label_ids): + # Process one image data per run + height = img_size[0] + width = img_size[1] + + xmins = [] # List of normalized left x coordinates in bounding box (1 per box) + xmaxs = [] # List of normalized right x coordinates in bounding box + # (1 per box) + ymins = [] # List of normalized top y coordinates in bounding box (1 per box) + ymaxs = [] # List of normalized bottom y coordinates in bounding box + # (1 per box) + classes_text = [] # List of string class name of bounding box (1 per box) + classes = [] # List of integer class id of bounding box (1 per box) + + # Loop oer the boxes and fill the above fields + for box in boxes: + # filter out non-ASCII characters + box_name = ''.join(filter(lambda x: x in printable, box.label)) + + xmins.append(box.points[0] / width) + xmaxs.append(box.points[2] / width) + ymins.append(box.points[1] / height) + ymaxs.append(box.points[3] / height) + classes_text.append(box_name.encode('utf8')) + classes.append(label_ids[box.label]) + + tf_example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': int64_feature(height), + 'image/width': int64_feature(width), + 'image/filename': bytes_feature(image_name.encode('utf8')), + 'image/source_id': int64_feature(img_id), + 'image/object/bbox/xmin': float_list_feature(xmins), + 'image/object/bbox/xmax': float_list_feature(xmaxs), + 'image/object/bbox/ymin': float_list_feature(ymins), + 'image/object/bbox/ymax': float_list_feature(ymaxs), + 'image/object/class/text': bytes_list_feature(classes_text), + 'image/object/class/label': int64_list_feature(classes), + })) + return tf_example + + # Create the label map file + label_ids = OrderedDict((label[1]["name"], idx) for idx, label in enumerate(annotations.meta["task"]["labels"])) + with TemporaryDirectory() as out_dir: + labelmap_file = 'label_map.pbtxt' + with codecs.open(os.path.join(out_dir, labelmap_file), 'w', encoding='utf8') as f: + for label, idx in label_ids.items(): + f.write(u'item {\n') + f.write(u'\tid: {}\n'.format(idx)) + f.write(u"\tname: '{}'\n".format(label)) + f.write(u'}\n\n') + + annotation_file = '{}.tfrecord'.format(annotations.meta['task']['name']) + with tf.io.TFRecordWriter(os.path.join(out_dir, annotation_file)) as writer: + for frame_annotation in annotations.group_by_frame(): + boxes = [shape for shape in frame_annotation.labeled_shapes if shape.type == 'rectangle'] + if not boxes: + continue + tf_example = create_tf_example( + img_id=frame_annotation.frame, + img_size=(frame_annotation.height, frame_annotation.width), + image_name=frame_annotation.name, + boxes=boxes, + label_ids=label_ids, + ) + writer.write(tf_example.SerializeToString()) + + with ZipFile(file_object, 'w') as output_zip: + output_zip.write(filename=os.path.join(out_dir, labelmap_file), arcname=labelmap_file) + output_zip.write(filename=os.path.join(out_dir, annotation_file), arcname=annotation_file) + +def load(file_object, annotations): + from pyunpack import Archive + from tempfile import TemporaryDirectory + import os + import tensorflow as tf + from glob import glob + import numpy as np + + tf.enable_eager_execution() + + def parse_tfrecord_file(filenames): + def clamp(value, _min, _max): + return max(min(_max, value), _min) + + dataset = tf.data.TFRecordDataset(filenames) + image_feature_description = { + 'image/filename': tf.io.FixedLenFeature([], tf.string), + 'image/source_id': tf.io.FixedLenFeature([], tf.int64), + 'image/height': tf.io.FixedLenFeature([], tf.int64), + 'image/width': tf.io.FixedLenFeature([], tf.int64), + # Object boxes and classes. + 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32), + 'image/object/class/label': tf.io.VarLenFeature(tf.int64), + 'image/object/class/text': tf.io.VarLenFeature(tf.string), + } + + for record in dataset: + parsed_record = tf.io.parse_single_example(record, image_feature_description) + frame_number = tf.cast(parsed_record['image/source_id'], tf.int64).numpy().item() + frame_height = tf.cast(parsed_record['image/height'], tf.int64).numpy().item() + frame_width = tf.cast(parsed_record['image/width'], tf.int64).numpy().item() + xmins = tf.sparse.to_dense(parsed_record['image/object/bbox/xmin']).numpy() + ymins = tf.sparse.to_dense(parsed_record['image/object/bbox/ymin']).numpy() + xmaxs = tf.sparse.to_dense(parsed_record['image/object/bbox/xmax']).numpy() + ymaxs = tf.sparse.to_dense(parsed_record['image/object/bbox/ymax']).numpy() + labels = tf.sparse.to_dense(parsed_record['image/object/class/text'], default_value='').numpy() + for shape in np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]: + annotations.add_shape(annotations.LabeledShape( + type='rectangle', + frame=frame_number, + label=shape[0].decode("utf-8"), + points=[ + clamp(shape[1] * frame_width, 0, frame_width), + clamp(shape[2] * frame_height, 0, frame_height), + clamp(shape[3] * frame_width, 0, frame_width), + clamp(shape[4] * frame_height, 0, frame_height), + ], + occluded=False, + attributes=[], + )) + + archive_file = getattr(file_object, 'name') + with TemporaryDirectory() as tmp_dir: + Archive(archive_file).extractall(tmp_dir) + filenames = glob(os.path.join(tmp_dir, '*.tfrecord')) + parse_tfrecord_file(filenames) diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 04a201ee..a3f22e82 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -38,3 +38,4 @@ pascal_voc_writer==0.1.4 cython==0.29.13 matplotlib==3.0.3 scikit-image>=0.14.0 +tensorflow==1.12.3