YoloV7 serverless detector feature for auto annotation (#5552)
### Motivation and context Integration of YOLOv7 as a serverless nuclio function that can be used for auto-labeling. YoloV7 is the SOTA at the time of this PR therefore it would make sense to support it in CVAT. The integration is quite simple into CVAT as docker based on Ultralytics YoloV5 with coco pretrained model (https://github.com/WongKinYiu/yolov7) and a docker image (https://hub.docker.com/r/ultralytics/yolov5). related issue: #5548 ### How has this been tested? Automatic annotation was run using YOLOv7 on a custom dataset. The serverless function was deployed using ``` nuctl deploy --project-name cvat \ --path serverless/onnx/WongKinYiu/yolov7/nuclio \ --volume `pwd`/serverless/common:/opt/nuclio/common \ --platform local ``` Then using the 'Automatic annotation' action the function was tested and the auto-generated labels were controlled to check that no coordinates misfit is happening. ### Use custom model: 1. Export your model with NMS for image resolution of 640x640 (preferable). 2. Copy your custom model yolov7-custom.onnx to /serverless/common 3. Modify function.yaml file according to your labels. 4. Modify model_handler.py as follow: ``` self.model_path = "yolov7-custom.onnx" ``` Co-authored-by: Nikita Manovich <nikita@cvat.ai> Co-authored-by: yasakova-anastasia <yasakova_anastasiya@mail.ru>main
parent
bcaca0369f
commit
f3843aa74f
@ -0,0 +1,135 @@
|
|||||||
|
metadata:
|
||||||
|
name: onnx-yolov7
|
||||||
|
namespace: cvat
|
||||||
|
annotations:
|
||||||
|
name: YOLO v7
|
||||||
|
type: detector
|
||||||
|
framework: onnx
|
||||||
|
spec: |
|
||||||
|
[
|
||||||
|
{ "id": 0, "name": "person" },
|
||||||
|
{ "id": 1, "name": "bicycle" },
|
||||||
|
{ "id": 2, "name": "car" },
|
||||||
|
{ "id": 3, "name": "motorbike" },
|
||||||
|
{ "id": 4, "name": "aeroplane" },
|
||||||
|
{ "id": 5, "name": "bus" },
|
||||||
|
{ "id": 6, "name": "train" },
|
||||||
|
{ "id": 7, "name": "truck" },
|
||||||
|
{ "id": 8, "name": "boat" },
|
||||||
|
{ "id": 9, "name": "traffic light" },
|
||||||
|
{ "id": 10, "name": "fire hydrant" },
|
||||||
|
{ "id": 11, "name": "stop sign" },
|
||||||
|
{ "id": 12, "name": "parking meter" },
|
||||||
|
{ "id": 13, "name": "bench" },
|
||||||
|
{ "id": 14, "name": "bird" },
|
||||||
|
{ "id": 15, "name": "cat" },
|
||||||
|
{ "id": 16, "name": "dog" },
|
||||||
|
{ "id": 17, "name": "horse" },
|
||||||
|
{ "id": 18, "name": "sheep" },
|
||||||
|
{ "id": 19, "name": "cow" },
|
||||||
|
{ "id": 20, "name": "elephant" },
|
||||||
|
{ "id": 21, "name": "bear" },
|
||||||
|
{ "id": 22, "name": "zebra" },
|
||||||
|
{ "id": 23, "name": "giraffe" },
|
||||||
|
{ "id": 24, "name": "backpack" },
|
||||||
|
{ "id": 25, "name": "umbrella" },
|
||||||
|
{ "id": 26, "name": "handbag" },
|
||||||
|
{ "id": 27, "name": "tie" },
|
||||||
|
{ "id": 28, "name": "suitcase" },
|
||||||
|
{ "id": 29, "name": "frisbee" },
|
||||||
|
{ "id": 30, "name": "skis" },
|
||||||
|
{ "id": 31, "name": "snowboard" },
|
||||||
|
{ "id": 32, "name": "sports ball" },
|
||||||
|
{ "id": 33, "name": "kite" },
|
||||||
|
{ "id": 34, "name": "baseball bat" },
|
||||||
|
{ "id": 35, "name": "baseball glove" },
|
||||||
|
{ "id": 36, "name": "skateboard" },
|
||||||
|
{ "id": 37, "name": "surfboard" },
|
||||||
|
{ "id": 38, "name": "tennis racket" },
|
||||||
|
{ "id": 39, "name": "bottle" },
|
||||||
|
{ "id": 40, "name": "wine glass" },
|
||||||
|
{ "id": 41, "name": "cup" },
|
||||||
|
{ "id": 42, "name": "fork" },
|
||||||
|
{ "id": 43, "name": "knife" },
|
||||||
|
{ "id": 44, "name": "spoon" },
|
||||||
|
{ "id": 45, "name": "bowl" },
|
||||||
|
{ "id": 46, "name": "banana" },
|
||||||
|
{ "id": 47, "name": "apple" },
|
||||||
|
{ "id": 48, "name": "sandwich" },
|
||||||
|
{ "id": 49, "name": "orange" },
|
||||||
|
{ "id": 50, "name": "broccoli" },
|
||||||
|
{ "id": 51, "name": "carrot" },
|
||||||
|
{ "id": 52, "name": "hot dog" },
|
||||||
|
{ "id": 53, "name": "pizza" },
|
||||||
|
{ "id": 54, "name": "donut" },
|
||||||
|
{ "id": 55, "name": "cake" },
|
||||||
|
{ "id": 56, "name": "chair" },
|
||||||
|
{ "id": 57, "name": "sofa" },
|
||||||
|
{ "id": 58, "name": "pottedplant" },
|
||||||
|
{ "id": 59, "name": "bed" },
|
||||||
|
{ "id": 60, "name": "diningtable" },
|
||||||
|
{ "id": 61, "name": "toilet" },
|
||||||
|
{ "id": 62, "name": "tvmonitor" },
|
||||||
|
{ "id": 63, "name": "laptop" },
|
||||||
|
{ "id": 64, "name": "mouse" },
|
||||||
|
{ "id": 65, "name": "remote" },
|
||||||
|
{ "id": 66, "name": "keyboard" },
|
||||||
|
{ "id": 67, "name": "cell phone" },
|
||||||
|
{ "id": 68, "name": "microwave" },
|
||||||
|
{ "id": 69, "name": "oven" },
|
||||||
|
{ "id": 70, "name": "toaster" },
|
||||||
|
{ "id": 71, "name": "sink" },
|
||||||
|
{ "id": 72, "name": "refrigerator" },
|
||||||
|
{ "id": 73, "name": "book" },
|
||||||
|
{ "id": 74, "name": "clock" },
|
||||||
|
{ "id": 75, "name": "vase" },
|
||||||
|
{ "id": 76, "name": "scissors" },
|
||||||
|
{ "id": 77, "name": "teddy bear" },
|
||||||
|
{ "id": 78, "name": "hair drier" },
|
||||||
|
{ "id": 79, "name": "toothbrush" }
|
||||||
|
]
|
||||||
|
|
||||||
|
spec:
|
||||||
|
description: YOLO v7 via onnx-runtime
|
||||||
|
runtime: 'python:3.8'
|
||||||
|
handler: main:handler
|
||||||
|
eventTimeout: 30s
|
||||||
|
build:
|
||||||
|
image: cvat/onnx-yolov7
|
||||||
|
baseImage: ultralytics/yolov5:latest
|
||||||
|
|
||||||
|
directives:
|
||||||
|
preCopy:
|
||||||
|
- kind: USER
|
||||||
|
value: root
|
||||||
|
- kind: RUN
|
||||||
|
value: apt update && apt install --no-install-recommends -y libglib2.0-0
|
||||||
|
- kind: WORKDIR
|
||||||
|
value: /opt/nuclio
|
||||||
|
- kind: RUN
|
||||||
|
value: pip install onnxruntime
|
||||||
|
- kind: WORKDIR
|
||||||
|
value: /opt/nuclio
|
||||||
|
- kind: RUN
|
||||||
|
value: wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-nms-640.onnx
|
||||||
|
- kind: RUN
|
||||||
|
value: ln -s /usr/bin/python3 /usr/bin/python
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
myHttpTrigger:
|
||||||
|
maxWorkers: 1
|
||||||
|
kind: 'http'
|
||||||
|
workerAvailabilityTimeoutMilliseconds: 10000
|
||||||
|
attributes:
|
||||||
|
maxRequestBodySize: 33554432 # 32MB
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 1
|
||||||
|
|
||||||
|
platform:
|
||||||
|
attributes:
|
||||||
|
restartPolicy:
|
||||||
|
name: always
|
||||||
|
maximumRetryCount: 3
|
||||||
|
mountMode: volume
|
||||||
@ -0,0 +1,128 @@
|
|||||||
|
metadata:
|
||||||
|
name: onnx-yolov7
|
||||||
|
namespace: cvat
|
||||||
|
annotations:
|
||||||
|
name: YOLO v7
|
||||||
|
type: detector
|
||||||
|
framework: onnx
|
||||||
|
spec: |
|
||||||
|
[
|
||||||
|
{ "id": 0, "name": "person" },
|
||||||
|
{ "id": 1, "name": "bicycle" },
|
||||||
|
{ "id": 2, "name": "car" },
|
||||||
|
{ "id": 3, "name": "motorbike" },
|
||||||
|
{ "id": 4, "name": "aeroplane" },
|
||||||
|
{ "id": 5, "name": "bus" },
|
||||||
|
{ "id": 6, "name": "train" },
|
||||||
|
{ "id": 7, "name": "truck" },
|
||||||
|
{ "id": 8, "name": "boat" },
|
||||||
|
{ "id": 9, "name": "traffic light" },
|
||||||
|
{ "id": 10, "name": "fire hydrant" },
|
||||||
|
{ "id": 11, "name": "stop sign" },
|
||||||
|
{ "id": 12, "name": "parking meter" },
|
||||||
|
{ "id": 13, "name": "bench" },
|
||||||
|
{ "id": 14, "name": "bird" },
|
||||||
|
{ "id": 15, "name": "cat" },
|
||||||
|
{ "id": 16, "name": "dog" },
|
||||||
|
{ "id": 17, "name": "horse" },
|
||||||
|
{ "id": 18, "name": "sheep" },
|
||||||
|
{ "id": 19, "name": "cow" },
|
||||||
|
{ "id": 20, "name": "elephant" },
|
||||||
|
{ "id": 21, "name": "bear" },
|
||||||
|
{ "id": 22, "name": "zebra" },
|
||||||
|
{ "id": 23, "name": "giraffe" },
|
||||||
|
{ "id": 24, "name": "backpack" },
|
||||||
|
{ "id": 25, "name": "umbrella" },
|
||||||
|
{ "id": 26, "name": "handbag" },
|
||||||
|
{ "id": 27, "name": "tie" },
|
||||||
|
{ "id": 28, "name": "suitcase" },
|
||||||
|
{ "id": 29, "name": "frisbee" },
|
||||||
|
{ "id": 30, "name": "skis" },
|
||||||
|
{ "id": 31, "name": "snowboard" },
|
||||||
|
{ "id": 32, "name": "sports ball" },
|
||||||
|
{ "id": 33, "name": "kite" },
|
||||||
|
{ "id": 34, "name": "baseball bat" },
|
||||||
|
{ "id": 35, "name": "baseball glove" },
|
||||||
|
{ "id": 36, "name": "skateboard" },
|
||||||
|
{ "id": 37, "name": "surfboard" },
|
||||||
|
{ "id": 38, "name": "tennis racket" },
|
||||||
|
{ "id": 39, "name": "bottle" },
|
||||||
|
{ "id": 40, "name": "wine glass" },
|
||||||
|
{ "id": 41, "name": "cup" },
|
||||||
|
{ "id": 42, "name": "fork" },
|
||||||
|
{ "id": 43, "name": "knife" },
|
||||||
|
{ "id": 44, "name": "spoon" },
|
||||||
|
{ "id": 45, "name": "bowl" },
|
||||||
|
{ "id": 46, "name": "banana" },
|
||||||
|
{ "id": 47, "name": "apple" },
|
||||||
|
{ "id": 48, "name": "sandwich" },
|
||||||
|
{ "id": 49, "name": "orange" },
|
||||||
|
{ "id": 50, "name": "broccoli" },
|
||||||
|
{ "id": 51, "name": "carrot" },
|
||||||
|
{ "id": 52, "name": "hot dog" },
|
||||||
|
{ "id": 53, "name": "pizza" },
|
||||||
|
{ "id": 54, "name": "donut" },
|
||||||
|
{ "id": 55, "name": "cake" },
|
||||||
|
{ "id": 56, "name": "chair" },
|
||||||
|
{ "id": 57, "name": "sofa" },
|
||||||
|
{ "id": 58, "name": "pottedplant" },
|
||||||
|
{ "id": 59, "name": "bed" },
|
||||||
|
{ "id": 60, "name": "diningtable" },
|
||||||
|
{ "id": 61, "name": "toilet" },
|
||||||
|
{ "id": 62, "name": "tvmonitor" },
|
||||||
|
{ "id": 63, "name": "laptop" },
|
||||||
|
{ "id": 64, "name": "mouse" },
|
||||||
|
{ "id": 65, "name": "remote" },
|
||||||
|
{ "id": 66, "name": "keyboard" },
|
||||||
|
{ "id": 67, "name": "cell phone" },
|
||||||
|
{ "id": 68, "name": "microwave" },
|
||||||
|
{ "id": 69, "name": "oven" },
|
||||||
|
{ "id": 70, "name": "toaster" },
|
||||||
|
{ "id": 71, "name": "sink" },
|
||||||
|
{ "id": 72, "name": "refrigerator" },
|
||||||
|
{ "id": 73, "name": "book" },
|
||||||
|
{ "id": 74, "name": "clock" },
|
||||||
|
{ "id": 75, "name": "vase" },
|
||||||
|
{ "id": 76, "name": "scissors" },
|
||||||
|
{ "id": 77, "name": "teddy bear" },
|
||||||
|
{ "id": 78, "name": "hair drier" },
|
||||||
|
{ "id": 79, "name": "toothbrush" }
|
||||||
|
]
|
||||||
|
spec:
|
||||||
|
description: YOLO v7 via onnx
|
||||||
|
runtime: 'python:3.8'
|
||||||
|
handler: main:handler
|
||||||
|
eventTimeout: 30s
|
||||||
|
build:
|
||||||
|
image: cvat/onnx-yolov7
|
||||||
|
baseImage: ultralytics/yolov5:latest-cpu
|
||||||
|
|
||||||
|
directives:
|
||||||
|
preCopy:
|
||||||
|
- kind: USER
|
||||||
|
value: root
|
||||||
|
- kind: RUN
|
||||||
|
value: apt update && apt install --no-install-recommends -y libglib2.0-0 && apt install wget
|
||||||
|
- kind: RUN
|
||||||
|
value: pip install onnxruntime
|
||||||
|
- kind: WORKDIR
|
||||||
|
value: /opt/nuclio
|
||||||
|
- kind: RUN
|
||||||
|
value: wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-nms-640.onnx
|
||||||
|
- kind: RUN
|
||||||
|
value: ln -s /usr/bin/python3 /usr/bin/python
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
myHttpTrigger:
|
||||||
|
maxWorkers: 2
|
||||||
|
kind: 'http'
|
||||||
|
workerAvailabilityTimeoutMilliseconds: 10000
|
||||||
|
attributes:
|
||||||
|
maxRequestBodySize: 33554432 # 32MB
|
||||||
|
|
||||||
|
platform:
|
||||||
|
attributes:
|
||||||
|
restartPolicy:
|
||||||
|
name: always
|
||||||
|
maximumRetryCount: 3
|
||||||
|
mountMode: volume
|
||||||
@ -0,0 +1,37 @@
|
|||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from model_handler import ModelHandler
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
def init_context(context):
|
||||||
|
context.logger.info("Init context... 0%")
|
||||||
|
|
||||||
|
# Read labels
|
||||||
|
with open("/opt/nuclio/function.yaml", 'rb') as function_file:
|
||||||
|
functionconfig = yaml.safe_load(function_file)
|
||||||
|
|
||||||
|
labels_spec = functionconfig['metadata']['annotations']['spec']
|
||||||
|
labels = {item['id']: item['name'] for item in json.loads(labels_spec)}
|
||||||
|
|
||||||
|
# Read the DL model
|
||||||
|
model = ModelHandler(labels)
|
||||||
|
context.user_data.model = model
|
||||||
|
|
||||||
|
context.logger.info("Init context...100%")
|
||||||
|
|
||||||
|
|
||||||
|
def handler(context, event):
|
||||||
|
context.logger.info("Run YoloV7 ONNX model")
|
||||||
|
data = event.body
|
||||||
|
buf = io.BytesIO(base64.b64decode(data["image"]))
|
||||||
|
threshold = float(data.get("threshold", 0.5))
|
||||||
|
image = Image.open(buf)
|
||||||
|
|
||||||
|
results = context.user_data.model.infer(image, threshold)
|
||||||
|
|
||||||
|
return context.Response(body=json.dumps(results), headers={},
|
||||||
|
content_type='application/json', status_code=200)
|
||||||
@ -0,0 +1,119 @@
|
|||||||
|
# Copyright (C) 2023 CVAT.ai Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import onnxruntime as ort
|
||||||
|
|
||||||
|
|
||||||
|
class ModelHandler:
|
||||||
|
def __init__(self, labels):
|
||||||
|
self.model = None
|
||||||
|
self.load_network(model="yolov7-nms-640.onnx")
|
||||||
|
self.labels = labels
|
||||||
|
|
||||||
|
def load_network(self, model):
|
||||||
|
device = ort.get_device()
|
||||||
|
cuda = True if device == 'GPU' else False
|
||||||
|
try:
|
||||||
|
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
|
||||||
|
so = ort.SessionOptions()
|
||||||
|
so.log_severity_level = 3
|
||||||
|
|
||||||
|
self.model = ort.InferenceSession(model, providers=providers, sess_options=so)
|
||||||
|
self.output_details = [i.name for i in self.model.get_outputs()]
|
||||||
|
self.input_details = [i.name for i in self.model.get_inputs()]
|
||||||
|
|
||||||
|
self.is_inititated = True
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Cannot load model {model}: {e}")
|
||||||
|
|
||||||
|
def letterbox(self, im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
|
||||||
|
# Resize and pad image while meeting stride-multiple constraints
|
||||||
|
shape = im.shape[:2] # current shape [height, width]
|
||||||
|
if isinstance(new_shape, int):
|
||||||
|
new_shape = (new_shape, new_shape)
|
||||||
|
|
||||||
|
# Scale ratio (new / old)
|
||||||
|
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||||
|
if not scaleup: # only scale down, do not scale up (for better val mAP)
|
||||||
|
r = min(r, 1.0)
|
||||||
|
|
||||||
|
# Compute padding
|
||||||
|
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
||||||
|
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
||||||
|
|
||||||
|
if auto: # minimum rectangle
|
||||||
|
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
||||||
|
|
||||||
|
dw /= 2 # divide padding into 2 sides
|
||||||
|
dh /= 2
|
||||||
|
|
||||||
|
if shape[::-1] != new_unpad: # resize
|
||||||
|
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||||
|
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||||||
|
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||||||
|
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
||||||
|
return im, r, (dw, dh)
|
||||||
|
|
||||||
|
def _infer(self, inputs: np.ndarray):
|
||||||
|
try:
|
||||||
|
img = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB)
|
||||||
|
image = img.copy()
|
||||||
|
image, ratio, dwdh = self.letterbox(image, auto=False)
|
||||||
|
image = image.transpose((2, 0, 1))
|
||||||
|
image = np.expand_dims(image, 0)
|
||||||
|
image = np.ascontiguousarray(image)
|
||||||
|
|
||||||
|
im = image.astype(np.float32)
|
||||||
|
im /= 255
|
||||||
|
|
||||||
|
inp = {self.input_details[0]: im}
|
||||||
|
# ONNX inference
|
||||||
|
output = list()
|
||||||
|
detections = self.model.run(self.output_details, inp)[0]
|
||||||
|
|
||||||
|
# for det in detections:
|
||||||
|
boxes = detections[:, 1:5]
|
||||||
|
labels = detections[:, 5]
|
||||||
|
scores = detections[:, -1]
|
||||||
|
|
||||||
|
boxes -= np.array(dwdh * 2)
|
||||||
|
boxes /= ratio
|
||||||
|
boxes = boxes.round().astype(np.int32)
|
||||||
|
output.append(boxes)
|
||||||
|
output.append(labels)
|
||||||
|
output.append(scores)
|
||||||
|
return output
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
def infer(self, image, threshold):
|
||||||
|
image = np.array(image)
|
||||||
|
image = image[:, :, ::-1].copy()
|
||||||
|
h, w, _ = image.shape
|
||||||
|
detections = self._infer(image)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
if detections:
|
||||||
|
boxes = detections[0]
|
||||||
|
labels = detections[1]
|
||||||
|
scores = detections[2]
|
||||||
|
|
||||||
|
for label, score, box in zip(labels, scores, boxes):
|
||||||
|
if score >= threshold:
|
||||||
|
xtl = max(int(box[0]), 0)
|
||||||
|
ytl = max(int(box[1]), 0)
|
||||||
|
xbr = min(int(box[2]), w)
|
||||||
|
ybr = min(int(box[3]), h)
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"confidence": str(score),
|
||||||
|
"label": self.labels.get(label, "unknown"),
|
||||||
|
"points": [xtl, ytl, xbr, ybr],
|
||||||
|
"type": "rectangle",
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
Loading…
Reference in New Issue