How to align with ultralytics yolov5¶
[1]:
import os
import cv2
import torch
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
[2]:
from yolort.models.yolo import YOLO
from yolort.utils import cv2_imshow, get_image_from_url, read_image_to_tensor
from yolort.utils.image_utils import plot_one_box, color_list
from yolort.v5 import load_yolov5_model, letterbox, non_max_suppression, scale_coords, attempt_download
Prepare image and model weights to test¶
[3]:
img_source = "https://huggingface.co/spaces/zhiqwang/assets/resolve/main/bus.jpg"
# img_source = "https://huggingface.co/spaces/zhiqwang/assets/resolve/main/zidane.jpg"
img_raw = get_image_from_url(img_source)
# yolov5s6.pt is downloaded from 'https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5n6.pt'
model_path = 'yolov5n6.pt'
checkpoint_path = attempt_download(model_path)
[4]:
img_size = 640
stride = 64
score_thresh = 0.35
iou = 0.45
[5]:
# Preprocess
image = letterbox(img_raw, new_shape=(img_size, img_size), stride=stride)[0]
image = read_image_to_tensor(image)
image = image.to(device)
image = image[None]
Load model as ultralytics and inference¶
[6]:
model_yolov5 = load_yolov5_model(checkpoint_path, autoshape=False, verbose=True)
model_yolov5 = model_yolov5.to(device)
model_yolov5.conf = score_thresh # confidence threshold (0-1)
model_yolov5.iou = iou # NMS IoU threshold (0-1)
model_yolov5 = model_yolov5.eval()
from n params module arguments
0 -1 1 1760 yolort.v5.models.common.Conv [3, 16, 6, 2, 2]
1 -1 1 4672 yolort.v5.models.common.Conv [16, 32, 3, 2]
2 -1 1 4800 yolort.v5.models.common.C3 [32, 32, 1]
3 -1 1 18560 yolort.v5.models.common.Conv [32, 64, 3, 2]
4 -1 2 29184 yolort.v5.models.common.C3 [64, 64, 2]
5 -1 1 73984 yolort.v5.models.common.Conv [64, 128, 3, 2]
6 -1 3 156928 yolort.v5.models.common.C3 [128, 128, 3]
7 -1 1 221568 yolort.v5.models.common.Conv [128, 192, 3, 2]
8 -1 1 167040 yolort.v5.models.common.C3 [192, 192, 1]
9 -1 1 442880 yolort.v5.models.common.Conv [192, 256, 3, 2]
10 -1 1 296448 yolort.v5.models.common.C3 [256, 256, 1]
11 -1 1 164608 yolort.v5.models.common.SPPF [256, 256, 5]
12 -1 1 49536 yolort.v5.models.common.Conv [256, 192, 1, 1]
13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
14 [-1, 8] 1 0 yolort.v5.models.common.Concat [1]
15 -1 1 203904 yolort.v5.models.common.C3 [384, 192, 1, False]
16 -1 1 24832 yolort.v5.models.common.Conv [192, 128, 1, 1]
17 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
18 [-1, 6] 1 0 yolort.v5.models.common.Concat [1]
19 -1 1 90880 yolort.v5.models.common.C3 [256, 128, 1, False]
20 -1 1 8320 yolort.v5.models.common.Conv [128, 64, 1, 1]
21 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
22 [-1, 4] 1 0 yolort.v5.models.common.Concat [1]
23 -1 1 22912 yolort.v5.models.common.C3 [128, 64, 1, False]
24 -1 1 36992 yolort.v5.models.common.Conv [64, 64, 3, 2]
25 [-1, 20] 1 0 yolort.v5.models.common.Concat [1]
26 -1 1 74496 yolort.v5.models.common.C3 [128, 128, 1, False]
27 -1 1 147712 yolort.v5.models.common.Conv [128, 128, 3, 2]
28 [-1, 16] 1 0 yolort.v5.models.common.Concat [1]
29 -1 1 179328 yolort.v5.models.common.C3 [256, 192, 1, False]
30 -1 1 332160 yolort.v5.models.common.Conv [192, 192, 3, 2]
31 [-1, 12] 1 0 yolort.v5.models.common.Concat [1]
32 -1 1 329216 yolort.v5.models.common.C3 [384, 256, 1, False]
33 [23, 26, 29, 32] 1 164220 yolort.v5.models.yolo.Detect [80, [[19, 27, 44, 40, 38, 94], [96, 68, 86, 152, 180, 137], [140, 301, 303, 264, 238, 542], [436, 615, 739, 380, 925, 792]], [64, 128, 192, 256]]
/opt/conda/lib/python3.8/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
Model Summary: 355 layers, 3246940 parameters, 3246940 gradients, 4.6 GFLOPs
[7]:
with torch.no_grad():
ultralytics_dets = model_yolov5(image)[0]
ultralytics_dets = non_max_suppression(ultralytics_dets, score_thresh, iou, agnostic=False)[0]
[8]:
ultralytics_dets
[8]:
tensor([[ 35.26947, 133.40977, 496.86469, 439.13141, 0.88235, 5.00000],
[ 45.87330, 229.64430, 159.15872, 531.91492, 0.84495, 0.00000],
[145.77780, 228.77318, 220.91098, 514.98694, 0.72589, 0.00000],
[417.83066, 221.98871, 495.86893, 518.30176, 0.70359, 0.00000]], device='cuda:0')
Updating model weights from ultralytics to yolort and inference¶
[9]:
model_yolort = YOLO.load_from_yolov5(
checkpoint_path,
score_thresh=score_thresh,
nms_thresh=iou,
version="r6.0",
)
model_yolort = model_yolort.eval()
model_yolort = model_yolort.to(device)
from n params module arguments
0 -1 1 1760 yolort.v5.models.common.Conv [3, 16, 6, 2, 2]
1 -1 1 4672 yolort.v5.models.common.Conv [16, 32, 3, 2]
2 -1 1 4800 yolort.v5.models.common.C3 [32, 32, 1]
3 -1 1 18560 yolort.v5.models.common.Conv [32, 64, 3, 2]
4 -1 2 29184 yolort.v5.models.common.C3 [64, 64, 2]
5 -1 1 73984 yolort.v5.models.common.Conv [64, 128, 3, 2]
6 -1 3 156928 yolort.v5.models.common.C3 [128, 128, 3]
7 -1 1 221568 yolort.v5.models.common.Conv [128, 192, 3, 2]
8 -1 1 167040 yolort.v5.models.common.C3 [192, 192, 1]
9 -1 1 442880 yolort.v5.models.common.Conv [192, 256, 3, 2]
10 -1 1 296448 yolort.v5.models.common.C3 [256, 256, 1]
11 -1 1 164608 yolort.v5.models.common.SPPF [256, 256, 5]
12 -1 1 49536 yolort.v5.models.common.Conv [256, 192, 1, 1]
13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
14 [-1, 8] 1 0 yolort.v5.models.common.Concat [1]
15 -1 1 203904 yolort.v5.models.common.C3 [384, 192, 1, False]
16 -1 1 24832 yolort.v5.models.common.Conv [192, 128, 1, 1]
17 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
18 [-1, 6] 1 0 yolort.v5.models.common.Concat [1]
19 -1 1 90880 yolort.v5.models.common.C3 [256, 128, 1, False]
20 -1 1 8320 yolort.v5.models.common.Conv [128, 64, 1, 1]
21 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
22 [-1, 4] 1 0 yolort.v5.models.common.Concat [1]
23 -1 1 22912 yolort.v5.models.common.C3 [128, 64, 1, False]
24 -1 1 36992 yolort.v5.models.common.Conv [64, 64, 3, 2]
25 [-1, 20] 1 0 yolort.v5.models.common.Concat [1]
26 -1 1 74496 yolort.v5.models.common.C3 [128, 128, 1, False]
27 -1 1 147712 yolort.v5.models.common.Conv [128, 128, 3, 2]
28 [-1, 16] 1 0 yolort.v5.models.common.Concat [1]
29 -1 1 179328 yolort.v5.models.common.C3 [256, 192, 1, False]
30 -1 1 332160 yolort.v5.models.common.Conv [192, 192, 3, 2]
31 [-1, 12] 1 0 yolort.v5.models.common.Concat [1]
32 -1 1 329216 yolort.v5.models.common.C3 [384, 256, 1, False]
33 [23, 26, 29, 32] 1 164220 yolort.v5.models.yolo.Detect [80, [[19, 27, 44, 40, 38, 94], [96, 68, 86, 152, 180, 137], [140, 301, 303, 264, 238, 542], [436, 615, 739, 380, 925, 792]], [64, 128, 192, 256]]
Model Summary: 355 layers, 3246940 parameters, 3246940 gradients, 4.6 GFLOPs
[10]:
with torch.no_grad():
yolort_dets = model_yolort(image)
[11]:
print(f"Detection boxes with yolort:\n{yolort_dets[0]['boxes']}")
Detection boxes with yolort:
tensor([[ 35.26947, 133.40977, 496.86469, 439.13141],
[ 45.87330, 229.64430, 159.15872, 531.91492],
[145.77780, 228.77318, 220.91098, 514.98694],
[417.83066, 221.98871, 495.86893, 518.30176]], device='cuda:0')
[12]:
print(f"Detection scores with yolort:\n{yolort_dets[0]['scores']}")
Detection scores with yolort:
tensor([0.88235, 0.84495, 0.72589, 0.70359], device='cuda:0')
[13]:
print(f"Detection labels with yolort:\n{yolort_dets[0]['labels']}")
Detection labels with yolort:
tensor([5, 0, 0, 0], device='cuda:0')
Verify the detection results between yolort and ultralytics¶
[14]:
# Testing boxes
torch.testing.assert_allclose(yolort_dets[0]['boxes'], ultralytics_dets[:, :4])
# Testing scores
torch.testing.assert_allclose(yolort_dets[0]['scores'], ultralytics_dets[:, 4])
# Testing labels
torch.testing.assert_allclose(yolort_dets[0]['labels'], ultralytics_dets[:, 5].to(dtype=torch.int64))
print("Exported model has been tested, and the result looks good!")
Exported model has been tested, and the result looks good!
Detection output visualisation¶
[15]:
# Get label names
import requests
# label_path = "https://raw.githubusercontent.com/zhiqwang/yolov5-rt-stack/main/notebooks/assets/coco.names"
label_path = "https://huggingface.co/spaces/zhiqwang/assets/resolve/main/coco.names"
response = requests.get(label_path)
names = response.text
LABELS = []
for label in names.strip().split('\n'):
LABELS.append(label)
COLORS = color_list()
The labels can also be obtained by
from yolort.utils.image_utils import load_names
labels = load_names('./assets/coco.names')
Hah, that’s the trick to rescale the box correctly
[16]:
boxes = scale_coords(image.shape[2:], yolort_dets[0]['boxes'], img_raw.shape[:-1])
labels = yolort_dets[0]['labels']
[17]:
for box, label in zip(boxes.tolist(), labels.tolist()):
img_raw = plot_one_box(box, img_raw, color=COLORS[label % len(COLORS)], label=LABELS[label])
cv2_imshow(img_raw, imshow_scale=0.5)
View this document as a notebook: https://github.com/zhiqwang/yolov5-rt-stack/blob/main/notebooks/how-to-align-with-ultralytics-yolov5.ipynb