-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo_webcam_yolo.py
112 lines (87 loc) · 3.89 KB
/
demo_webcam_yolo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from IPython import display
from sam2.build_sam import build_sam2_camera_predictor
from ultralytics import YOLO
yolomodel = YOLO("./checkpoints/yolo/yolo11n.pt")
# use bfloat16 for the entire notebook
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.get_device_properties(0).major >= 8:
# turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
def get_bbox(frame):
results = yolomodel.track(source=frame, classes=[0], conf=0.5, show=False, stream=True, verbose=False)
largest_box = None # 가장 큰 바운딩 박스를 저장할 변수
largest_area = 0 # 가장 큰 바운딩 박스의 면적
# 탐지 결과 처리
for result in results:
boxes = result.boxes # 탐지된 객체의 박스 정보
for box in boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0]) # 바운딩 박스 좌표
area = (x2 - x1) * (y2 - y1) # 바운딩 박스 면적 계산
# 가장 큰 바운딩 박스 갱신
if area > largest_area:
largest_area = area
largest_box = (x1, y1, x2, y2, box.conf[0], int(box.cls[0])) # 좌표, 신뢰도, 클래스 저장
return largest_box
model_version='sam2'
sam2_checkpoint = f"./checkpoints/{model_version}/{model_version}_hiera_small.pt"
model_cfg = f"{model_version}/{model_version}_hiera_s.yaml"
predictor = build_sam2_camera_predictor(model_cfg, sam2_checkpoint)
# 전역 변수 초기화
if_init = False
largest_bbox=None
bbox_show = True
seg_show = False
# 카메라 열기
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
height, width, _ = frame.shape
if not ret:
print("Failed to grab frame")
break
if not largest_bbox and not if_init:
largest_bbox = get_bbox(frame)
if largest_bbox and not if_init:
x1, y1, x2, y2, conf, cls = largest_bbox
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
predictor.load_first_frame(frame)
bbox = np.array([[largest_bbox[0], largest_bbox[1]],
[largest_bbox[2], largest_bbox[3]]], dtype=np.float32)
_, out_obj_ids, out_mask_logits = predictor.add_new_prompt(frame_idx=0, obj_id=1, bbox=bbox)
if_init = True
elif if_init:
out_obj_ids, out_mask_logits = predictor.track(frame)
# all_mask = np.zeros((height, width, 1), dtype=np.uint8)
all_mask = torch.zeros((height, width), dtype=torch.uint8, device=device)
for i in range(len(out_obj_ids)):
out_mask = (out_mask_logits[i] > 0.0).byte()
all_mask = torch.bitwise_or(all_mask, out_mask.squeeze(0))
if bbox_show:
combined_mask = (all_mask > 0).byte().cpu().numpy().astype(np.uint8)
coords = np.argwhere(combined_mask)
if coords.size != 0:
y_min, x_min = coords.min(axis=0)
y_max, x_max = coords.max(axis=0)
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
# 마스크 적용
if seg_show and all_mask is not None:
all_mask = all_mask.cpu().numpy() * 255
all_mask = cv2.cvtColor(all_mask, cv2.COLOR_GRAY2RGB)
frame = cv2.addWeighted(frame, 1, all_mask, 0.5, 0)
# OpenCV로 이미지 표시
cv2.imshow("YOLO Object Detection", frame)
# 'q'를 누르면 종료
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 리소스 해제
cap.release()
cv2.destroyAllWindows()