import argparse
import cv2
from collections import defaultdict, deque
from ultralytics import YOLO
import supervision as sv
import numpy as np
SOURCE = np.array([[360, 63], [550, 63], [1282, 823], [-90, 823]])
TARGET_WIDTH = 5
TARGET_HEIGHT = 50
TARGET = np.array(
[
[0, 0],
[TARGET_WIDTH - 1, 0],
[TARGET_WIDTH - 1, TARGET_HEIGHT - 1],
[0, TARGET_HEIGHT - 1],
]
)
class ViewTransformer:
def __init__(self, source: np.ndarray, target: np.ndarray) -> None:
source = source.astype(np.float32)
target = target.astype(np.float32)
self.m = cv2.getPerspectiveTransform(source, target)
def transform_points(self, points: np.ndarray) -> np.ndarray:
if points.size == 0:
return points
reshaped_points = points.reshape(-1, 1, 2).astype(np.float32)
transformed_points = cv2.perspectiveTransform(reshaped_points, self.m)
return transformed_points.reshape(-1, 2)
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Vehicle Speed Estimation using Inference and Supervision")
parser.add_argument(
"--camera_index",
default=0,
help="index of the camera to use",
type=int,
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_arguments()
cap = cv2.VideoCapture(args.camera_index)
if not cap.isOpened():
print("Error: Could not open camera.")
exit()
model = YOLO("yolov8s.pt")
# Get the frame rate and resolution from the camera
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
video_info = sv.VideoInfo(fps=fps, width=width, height=height)
byte_track = sv.ByteTrack(frame_rate=fps)
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=1, color_lookup=sv.ColorLookup.TRACK)
label_annotator = sv.LabelAnnotator(
text_scale=0.5,
text_thickness=1,
color_lookup=sv.ColorLookup.TRACK,
)
polygon_zone = sv.PolygonZone(SOURCE, frame_resolution_wh=(width, height))
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)
coordinates = defaultdict(lambda: deque(maxlen=int(fps)))
while True:
ret, frame = cap.read()
if not ret:
break
# Resize the frame before processing
resized_frame = cv2.resize(frame, (1280, 720))
result = model(resized_frame)[0]
detections = sv.Detections.from_ultralytics(result)
detections = detections[polygon_zone.trigger(detections)]
detections = byte_track.update_with_detections(detections=detections)
points = detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER)
points = view_transformer.transform_points(points=points).astype(int)
zero_speed_detected = False
labels = []
for tracker_id, [_, y] in zip(detections.tracker_id, points):
coordinates[tracker_id].append(y)
if len(coordinates[tracker_id]) < fps / 2:
labels.append(f"#{tracker_id}")
else:
coordinate_start = coordinates[tracker_id][-1]
coordinate_end = coordinates[tracker_id][0]
distance = abs(coordinate_start - coordinate_end)
time = len(coordinates[tracker_id]) / fps
speed = distance / time * 3.6
if speed == 0:
zero_speed_detected = True
labels.append(f"#{tracker_id} {int(speed)} km/h")
annotated_frame = resized_frame.copy()
annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
if zero_speed_detected:
cv2.putText(annotated_frame, "Speed is zero", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
cv2.imshow("annotated_frame", annotated_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
import cv2
from collections import defaultdict, deque
from ultralytics import YOLO
import supervision as sv
import numpy as np
SOURCE = np.array([[360, 63], [550, 63], [1282, 823], [-90, 823]])
TARGET_WIDTH = 5
TARGET_HEIGHT = 50
TARGET = np.array(
[
[0, 0],
[TARGET_WIDTH - 1, 0],
[TARGET_WIDTH - 1, TARGET_HEIGHT - 1],
[0, TARGET_HEIGHT - 1],
]
)
class ViewTransformer:
def __init__(self, source: np.ndarray, target: np.ndarray) -> None:
source = source.astype(np.float32)
target = target.astype(np.float32)
self.m = cv2.getPerspectiveTransform(source, target)
def transform_points(self, points: np.ndarray) -> np.ndarray:
if points.size == 0:
return points
reshaped_points = points.reshape(-1, 1, 2).astype(np.float32)
transformed_points = cv2.perspectiveTransform(reshaped_points, self.m)
return transformed_points.reshape(-1, 2)
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Vehicle Speed Estimation using Inference and Supervision")
parser.add_argument(
"--camera_index",
default=0,
help="index of the camera to use",
type=int,
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_arguments()
cap = cv2.VideoCapture(args.camera_index)
if not cap.isOpened():
print("Error: Could not open camera.")
exit()
model = YOLO("yolov8s.pt")
# Get the frame rate and resolution from the camera
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
video_info = sv.VideoInfo(fps=fps, width=width, height=height)
byte_track = sv.ByteTrack(frame_rate=fps)
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=1, color_lookup=sv.ColorLookup.TRACK)
label_annotator = sv.LabelAnnotator(
text_scale=0.5,
text_thickness=1,
color_lookup=sv.ColorLookup.TRACK,
)
polygon_zone = sv.PolygonZone(SOURCE, frame_resolution_wh=(width, height))
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)
coordinates = defaultdict(lambda: deque(maxlen=int(fps)))
while True:
ret, frame = cap.read()
if not ret:
break
# Resize the frame before processing
resized_frame = cv2.resize(frame, (1280, 720))
result = model(resized_frame)[0]
detections = sv.Detections.from_ultralytics(result)
detections = detections[polygon_zone.trigger(detections)]
detections = byte_track.update_with_detections(detections=detections)
points = detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER)
points = view_transformer.transform_points(points=points).astype(int)
zero_speed_detected = False
labels = []
for tracker_id, [_, y] in zip(detections.tracker_id, points):
coordinates[tracker_id].append(y)
if len(coordinates[tracker_id]) < fps / 2:
labels.append(f"#{tracker_id}")
else:
coordinate_start = coordinates[tracker_id][-1]
coordinate_end = coordinates[tracker_id][0]
distance = abs(coordinate_start - coordinate_end)
time = len(coordinates[tracker_id]) / fps
speed = distance / time * 3.6
if speed == 0:
zero_speed_detected = True
labels.append(f"#{tracker_id} {int(speed)} km/h")
annotated_frame = resized_frame.copy()
annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
if zero_speed_detected:
cv2.putText(annotated_frame, "Speed is zero", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
cv2.imshow("annotated_frame", annotated_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Statistics: Posted by Sumeet019 — Thu Aug 01, 2024 7:15 am