I am a newbie so I don't know what to do. I am expecting it to open a new tab or at least save the output in a video file to show the objects being detected.
I got successful in doing it on pycharm. since I do not a fast GPU, I am doing it on Kaggle.
help a newbie out here
Code:
from ultralytics import YOLO
import cv2
import cvzone
import math
from IPython.display import HTML
model = YOLO('yolov8n.pt')
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
"carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
"diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
"teddy bear", "hair drier", "toothbrush"]
# Open the video file
cap = cv2.VideoCapture('/kaggle/input/demo-videos/car2.mp4')
# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video_path = '/kaggle/working/output.avi'
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
while True:
success, img = cap.read()
if not success:
break
# Perform object detection
results = model(img)
# Print the results to understand its structure
print(results)
# Draw bounding boxes and annotations
for pred in results:
for det in pred:
x1, y1, x2, y2 = map(int, det[:4])
conf = det[4]
cls = int(det[5])
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(img, f'{classNames[cls]} {conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Write the annotated frame to the output video
out.write(img)
# Release the video capture and writer objects
cap.release()
out.release()
# Display the video in a separate output cell
output_video_html = f'<video controls src="{output_video_path}" width="{frame_width}" height="{frame_height}" type="video/avi"></video>'
display(HTML(output_video_html))
Error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[25], line 47
45 for pred in results:
46 for det in pred:
---> 47 x1, y1, x2, y2 = map(int, det[:4])
48 conf = det[4]
49 cls = int(det[5])
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'Results'
I am trying to make a project of object detection on kaggle notebook using yolo. and i am facing this error. here is my code and my error