diff --git a/Computer Vision/Object Detection Using YOLO/Readme.md b/Computer Vision/Object Detection Using YOLO/Readme.md new file mode 100644 index 00000000..3d6e91e1 --- /dev/null +++ b/Computer Vision/Object Detection Using YOLO/Readme.md @@ -0,0 +1,36 @@ +# Object Detection using YOLO +## Introduction +The task of object detection involves identifying and localizing multiple objects within an image or video. In this project, we use the YOLO (You Only Look Once) algorithm, a state-of-the-art object detection model, to detect and classify objects from an image. YOLO is known for its high speed and accuracy, making it suitable for real-time object detection applications. + +## Algorithms Used +**YOLO (You Only Look Once):** +YOLO is a deep learning-based object detection algorithm that frames object detection as a single regression problem, straight from image pixels to bounding box coordinates and class probabilities. It divides an image into a grid and predicts bounding boxes and probabilities for each grid cell. YOLO is fast and efficient, making it ideal for real-time detection tasks. + +**Convolutional Neural Networks (CNNs):** +YOLO relies on CNNs to extract features from images and classify objects. The network is pre-trained on a large dataset (COCO) and then fine-tuned on new datasets for specific tasks. + +**Non-Maximum Suppression (NMS):** +This algorithm is used to filter overlapping bounding boxes. NMS ensures that the best prediction is chosen by suppressing weaker, overlapping boxes, reducing redundancy and improving detection accuracy. + +## Performance Analysis + +**Accuracy:** The accuracy of object detection models like YOLO depends on factors such as image quality, resolution, and object size. In this code, a confidence threshold of 0.7 is used to filter low-confidence predictions, ensuring that only highly confident detections are displayed. + +**Speed:** YOLO is known for its real-time detection capabilities. Using a model like YOLOv3, detection speed is optimized. Inference time is generally under a second, making YOLO suitable for video streams or high-throughput image detection tasks. + +**Challenges:** False positives or missed detections may occur if objects are small or partially obscured. Model performance can vary with different confidence thresholds or non-maximum suppression settings. + +## Result +The model successfully processes an input image and detects objects within it. Detected objects are highlighted with bounding boxes, and the class names are displayed with confidence scores. The result is visualized using matplotlib, which shows the image with detected objects after filtering through non-maximum suppression. + +## Future Work + +**Custom Dataset Fine-tuning:** + +Fine-tuning the YOLO model on a custom dataset specific to certain use cases (e.g., bird species detection) could lead to improved accuracy in specialized domains. + +**Integration with Real-Time Systems:** +Implementing the YOLO model in a real-time system, such as live video streams, can make it useful for applications like surveillance, traffic monitoring, or wildlife observation. + +**Improved Data Augmentation:** +Data augmentation techniques like image rotation, flipping, and cropping could be applied to the training set to increase the model’s robustness to variations in lighting, angles, and object positions. \ No newline at end of file diff --git a/Computer Vision/Object Detection Using YOLO/Result/Output.png b/Computer Vision/Object Detection Using YOLO/Result/Output.png new file mode 100644 index 00000000..4138d7be Binary files /dev/null and b/Computer Vision/Object Detection Using YOLO/Result/Output.png differ diff --git a/Computer Vision/Object Detection Using YOLO/Result/input.png b/Computer Vision/Object Detection Using YOLO/Result/input.png new file mode 100644 index 00000000..e6608f13 Binary files /dev/null and b/Computer Vision/Object Detection Using YOLO/Result/input.png differ diff --git a/Computer Vision/Object Detection Using YOLO/main.py b/Computer Vision/Object Detection Using YOLO/main.py new file mode 100644 index 00000000..215c44e7 --- /dev/null +++ b/Computer Vision/Object Detection Using YOLO/main.py @@ -0,0 +1,84 @@ +import cv2 +import numpy as np + +# Load YOLO model +#Yolo weights and cfg are to be downloaded from the official website of YOLO Algorithm +net = cv2.dnn.readNet("C:\\Users\\billa\\OneDrive\\Desktop\\Programs\\ML_DL\\yolov3.weights", + "C:\\Users\\billa\\OneDrive\\Desktop\\Programs\\ML_DL\\yolov3.cfg") + +# Load the input image +img = cv2.imread("C:\\Users\\billa\\OneDrive\\Desktop\\Programs\\ML_DL\\YO_img.jpg") + +# Prepare the image for the network (resize, normalize) +blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False) +net.setInput(blob) + +# Get the layer names from the YOLO model +layer_names = net.getLayerNames() +output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()] + +# Perform the forward pass to get output from the output layers +layer_outputs = net.forward(output_layers) + +# Load COCO class labels +with open("C:\\Users\\billa\\OneDrive\\Desktop\\Programs\\ML_DL\\coco (1).names", "r") as f: + classes = [line.strip() for line in f.readlines()] + +# Get image height and width +height, width, channels = img.shape + +# Lists to hold detected class IDs, confidence scores, and bounding boxes +class_ids = [] +confidences = [] +boxes = [] + +# Process each output layer +for output in layer_outputs: + for detection in output: + # Extract the scores, class ID, and confidence of the prediction + scores = detection[5:] + class_id = np.argmax(scores) + confidence = scores[class_id] + + # Filter out weak predictions by ensuring confidence is greater than a threshold + if confidence > 0.5: + # Get bounding box coordinates + center_x = int(detection[0] * width) + center_y = int(detection[1] * height) + w = int(detection[2] * width) + h = int(detection[3] * height) + + # Calculate the top-left corner of the bounding box + x = int(center_x - w / 2) + y = int(center_y - h / 2) + + # Add the bounding box coordinates, confidence score, and class ID to the lists + boxes.append([x, y, w, h]) + confidences.append(float(confidence)) + class_ids.append(class_id) + +# Apply Non-Maximum Suppression to remove overlapping bounding boxes +indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) + +# Draw the bounding boxes and labels on the image +if len(indices) > 0: + for i in indices.flatten(): # Flatten the indices list + x, y, w, h = boxes[i] + label = str(classes[class_ids[i]]) + confidence = confidences[i] + + # Draw a bounding box rectangle and put the label text + color = (0, 255, 0) # Green color for the box + cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) + cv2.putText(img, f"{label} {confidence:.2f}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) + +# Show the final output image +import matplotlib.pyplot as plt + +# Convert the image from BGR (OpenCV format) to RGB (for displaying correctly in matplotlib) +img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + +# Display the image using matplotlib +plt.imshow(img_rgb) +plt.axis('off') # Hide axis +plt.show()