From a0f083d274652cbb5e73006b60f874969c17ee58 Mon Sep 17 00:00:00 2001 From: Ben Hoff Date: Mon, 28 Oct 2019 07:33:17 -0400 Subject: [PATCH] added in yolo auto annotation sciprt (#794) --- CHANGELOG.md | 1 + utils/open_model_zoo/yolov3/README.md | 22 ++++ utils/open_model_zoo/yolov3/interp.py | 160 +++++++++++++++++++++++ utils/open_model_zoo/yolov3/mapping.json | 84 ++++++++++++ 4 files changed, 267 insertions(+) create mode 100644 utils/open_model_zoo/yolov3/README.md create mode 100644 utils/open_model_zoo/yolov3/interp.py create mode 100644 utils/open_model_zoo/yolov3/mapping.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 242babe3..04923a63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 https://github.com/opencv/cvat/issues/750). - Changed REST API: removed PUT and added DELETE methods for /api/v1/users/ID. - Added Mask-RCNN Auto Annotation Script +- Added Yolo Auto Annotation Script ### Changed - diff --git a/utils/open_model_zoo/yolov3/README.md b/utils/open_model_zoo/yolov3/README.md new file mode 100644 index 00000000..28cbac5e --- /dev/null +++ b/utils/open_model_zoo/yolov3/README.md @@ -0,0 +1,22 @@ +# Object Detection YOLO V3 Python Demo, Async API Performance Showcase + +See [these instructions][1] for converting the yolo weights to the OpenVino format. + +As of OpenVINO 2019 R3, only tensorflow 1.13 and NetworkX 2.3. +These can be explicitly installed using the following command. + +```bash +$ pip3 install tensorflow==1.13 networkx==2.3 +``` + + +Additionally, at the time of writing, the model optimizer required an input shape. + +``` bash +$ python3 mo_tf.py \ + --input_model /path/to/yolo_v3.pb \ + --tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/yolo_v3.json \ + --input_shape [1,416,416,3] +``` + +[1]: https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html diff --git a/utils/open_model_zoo/yolov3/interp.py b/utils/open_model_zoo/yolov3/interp.py new file mode 100644 index 00000000..f9349287 --- /dev/null +++ b/utils/open_model_zoo/yolov3/interp.py @@ -0,0 +1,160 @@ +from math import exp + + +class Parser: + IOU_THRESHOLD = 0.4 + PROB_THRESHOLD = 0.5 + + def __init__(self): + self.objects = [] + + def scale_bbox(self, x, y, h, w, class_id, confidence, h_scale, w_scale): + xmin = int((x - w / 2) * w_scale) + ymin = int((y - h / 2) * h_scale) + xmax = int(xmin + w * w_scale) + ymax = int(ymin + h * h_scale) + + return dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id, confidence=confidence) + + def entry_index(self, side, coord, classes, location, entry): + side_power_2 = side ** 2 + n = location // side_power_2 + loc = location % side_power_2 + return int(side_power_2 * (n * (coord + classes + 1) + entry) + loc) + + def intersection_over_union(self, box_1, box_2): + width_of_overlap_area = min(box_1['xmax'], box_2['xmax']) - max(box_1['xmin'], box_2['xmin']) + height_of_overlap_area = min(box_1['ymax'], box_2['ymax']) - max(box_1['ymin'], box_2['ymin']) + if width_of_overlap_area < 0 or height_of_overlap_area < 0: + area_of_overlap = 0 + else: + area_of_overlap = width_of_overlap_area * height_of_overlap_area + box_1_area = (box_1['ymax'] - box_1['ymin']) * (box_1['xmax'] - box_1['xmin']) + box_2_area = (box_2['ymax'] - box_2['ymin']) * (box_2['xmax'] - box_2['xmin']) + area_of_union = box_1_area + box_2_area - area_of_overlap + if area_of_union == 0: + return 0 + return area_of_overlap / area_of_union + + + def sort_objects(self): + self.objects = sorted(self.objects, key=lambda obj : obj['confidence'], reverse=True) + + for i in range(len(self.objects)): + if self.objects[i]['confidence'] == 0: + continue + for j in range(i + 1, len(self.objects)): + if self.intersection_over_union(self.objects[i], self.objects[j]) > self.IOU_THRESHOLD: + self.objects[j]['confidence'] = 0 + + def parse_yolo_region(self, blob: 'np.ndarray', original_shape: list, params: dict) -> list: + + # YOLO magic numbers + # See: https://github.com/opencv/open_model_zoo/blob/acf297c73db8cb3f68791ae1fad4a7cc4a6039e5/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L61 + num = 3 + coords = 4 + classes = 80 + # ----------------- + + _, _, out_blob_h, out_blob_w = blob.shape + assert out_blob_w == out_blob_h, "Invalid size of output blob. It sould be in NCHW layout and height should " \ + "be equal to width. Current height = {}, current width = {}" \ + "".format(out_blob_h, out_blob_w) + + # ------ Extracting layer parameters -- + orig_im_h, orig_im_w = original_shape + predictions = blob.flatten() + side_square = params['side'] * params['side'] + + # ------ Parsing YOLO Region output -- + for i in range(side_square): + row = i // params['side'] + col = i % params['side'] + for n in range(num): + # -----entry index calcs------ + obj_index = self.entry_index(params['side'], coords, classes, n * side_square + i, coords) + # -----entry index calcs------ + scale = predictions[obj_index] + if scale < self.PROB_THRESHOLD: + continue + box_index = self.entry_index(params['side'], coords, classes, n * side_square + i, 0) + + # Network produces location predictions in absolute coordinates of feature maps. + # Scale it to relative coordinates. + x = (col + predictions[box_index + 0 * side_square]) / params['side'] + y = (row + predictions[box_index + 1 * side_square]) / params['side'] + # Value for exp is very big number in some cases so following construction is using here + try: + w_exp = exp(predictions[box_index + 2 * side_square]) + h_exp = exp(predictions[box_index + 3 * side_square]) + except OverflowError: + continue + + w = w_exp * params['anchors'][2 * n] / 416 + h = h_exp * params['anchors'][2 * n + 1] / 416 + for j in range(classes): + class_index = self.entry_index(params['side'], coords, classes, n * side_square + i, + coords + 1 + j) + confidence = scale * predictions[class_index] + if confidence < self.PROB_THRESHOLD: + continue + + self.objects.append(self.scale_bbox(x=x, + y=y, + h=h, + w=w, + class_id=j, + confidence=confidence, + h_scale=orig_im_h, + w_scale=orig_im_w)) + + +for detection in detections: + frame_number = detection['frame_id'] + height = detection['frame_height'] + width = detection['frame_width'] + detection = detection['detections'] + + original_shape = (width, height) + + resized_width = width / 416 + resized_height = height / 416 + + resized_shape = (resized_width, resized_height) + + # https://github.com/opencv/open_model_zoo/blob/master/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L72 + anchors = [10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326] + conv_6 = {'side': 13, 'mask': [6,7,8]} + conv_14 = {'side': 26, 'mask': [3,4,5]} + conv_22 = {'side': 52, 'mask': [0,1,2]} + + yolo_params = {'detector/yolo-v3/Conv_6/BiasAdd/YoloRegion': conv_6, + 'detector/yolo-v3/Conv_14/BiasAdd/YoloRegion': conv_14, + 'detector/yolo-v3/Conv_22/BiasAdd/YoloRegion': conv_22} + + for conv_net in yolo_params.values(): + mask = conv_net['mask'] + masked_anchors = [] + for idx in mask: + masked_anchors += [anchors[idx * 2], anchors[idx * 2 + 1]] + + conv_net['anchors'] = masked_anchors + + parser = Parser() + + for name, blob in detection.items(): + parser.parse_yolo_region(blob, original_shape, yolo_params[name]) + + parser.sort_objects() + + objects = [] + + for obj in parser.objects: + if obj['confidence'] >= parser.PROB_THRESHOLD: + label = obj['class_id'] + xmin = obj['xmin'] + xmax = obj['xmax'] + ymin = obj['ymin'] + ymax = obj['ymax'] + + results.add_box(xmax, ymax, xmin, ymin, label, frame_number) diff --git a/utils/open_model_zoo/yolov3/mapping.json b/utils/open_model_zoo/yolov3/mapping.json new file mode 100644 index 00000000..15e7420b --- /dev/null +++ b/utils/open_model_zoo/yolov3/mapping.json @@ -0,0 +1,84 @@ +{ + "label_map": { + "1": "person", + "2": "bicycle", + "3": "car", + "4": "motorbike", + "5": "aeroplane", + "6": "bus", + "7": "train", + "8": "truck", + "9": "boat", + "10": "traffic light", + "11": "fire hydrant", + "12": "stop sign", + "13": "parking meter", + "14": "bench", + "15": "bird", + "16": "cat", + "17": "dog", + "18": "horse", + "19": "sheep", + "20": "cow", + "21": "elephant", + "22": "bear", + "23": "zebra", + "24": "giraffe", + "25": "backpack", + "26": "umbrella", + "27": "handbag", + "28": "tie", + "29": "suitcase", + "30": "frisbee", + "31": "skis", + "32": "snowboard", + "33": "sports ball", + "34": "kite", + "35": "baseball bat", + "36": "baseball glove", + "37": "skateboard", + "38": "surfboard", + "39": "tennis racket", + "40": "bottle", + "41": "wine glass", + "42": "cup", + "43": "fork", + "44": "knife", + "45": "spoon", + "46": "bowl", + "47": "banana", + "48": "apple", + "49": "sandwich", + "50": "orange", + "51": "broccoli", + "52": "carrot", + "53": "hot dog", + "54": "pizza", + "55": "donut", + "56": "cake", + "57": "chair", + "58": "sofa", + "59": "pottedplant", + "60": "bed", + "61": "diningtable", + "62": "toilet", + "63": "tvmonitor", + "64": "laptop", + "65": "mouse", + "66": "remote", + "67": "keyboard", + "68": "cell phone", + "69": "microwave", + "70": "oven", + "71": "toaster", + "72": "sink", + "73": "refrigerator", + "74": "book", + "75": "clock", + "76": "vase", + "77": "scissors", + "78": "teddy bear", + "79": "hair drier", + "80": "toothbrush" + } +}