-
-
Save YashasSamaga/e2b19a6807a13046e399f4bc3cca3a49 to your computer and use it in GitHub Desktop.
import cv2 | |
import time | |
CONFIDENCE_THRESHOLD = 0.2 | |
NMS_THRESHOLD = 0.4 | |
COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)] | |
class_names = [] | |
with open("classes.txt", "r") as f: | |
class_names = [cname.strip() for cname in f.readlines()] | |
vc = cv2.VideoCapture("demo.mp4") | |
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg") | |
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) | |
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) | |
model = cv2.dnn_DetectionModel(net) | |
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True) | |
while cv2.waitKey(1) < 1: | |
(grabbed, frame) = vc.read() | |
if not grabbed: | |
exit() | |
start = time.time() | |
classes, scores, boxes = model.detect(frame, CONFIDENCE_THRESHOLD, NMS_THRESHOLD) | |
end = time.time() | |
start_drawing = time.time() | |
for (classid, score, box) in zip(classes, scores, boxes): | |
color = COLORS[int(classid) % len(COLORS)] | |
label = "%s : %f" % (class_names[classid[0]], score) | |
cv2.rectangle(frame, box, color, 2) | |
cv2.putText(frame, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
end_drawing = time.time() | |
fps_label = "FPS: %.2f (excluding drawing time of %.2fms)" % (1 / (end - start), (end_drawing - start_drawing) * 1000) | |
cv2.putText(frame, fps_label, (0, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) | |
cv2.imshow("detections", frame) |
#include <iostream> | |
#include <queue> | |
#include <iterator> | |
#include <sstream> | |
#include <fstream> | |
#include <iomanip> | |
#include <chrono> | |
#include <opencv2/core.hpp> | |
#include <opencv2/dnn.hpp> | |
#include <opencv2/dnn/all_layers.hpp> | |
#include <opencv2/imgproc.hpp> | |
#include <opencv2/highgui.hpp> | |
constexpr float CONFIDENCE_THRESHOLD = 0; | |
constexpr float NMS_THRESHOLD = 0.4; | |
constexpr int NUM_CLASSES = 80; | |
// colors for bounding boxes | |
const cv::Scalar colors[] = { | |
{0, 255, 255}, | |
{255, 255, 0}, | |
{0, 255, 0}, | |
{255, 0, 0} | |
}; | |
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]); | |
int main() | |
{ | |
std::vector<std::string> class_names; | |
{ | |
std::ifstream class_file("classes.txt"); | |
if (!class_file) | |
{ | |
std::cerr << "failed to open classes.txt\n"; | |
return 0; | |
} | |
std::string line; | |
while (std::getline(class_file, line)) | |
class_names.push_back(line); | |
} | |
cv::VideoCapture source("demo.mp4"); | |
auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights"); | |
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); | |
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); | |
// net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); | |
// net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); | |
auto output_names = net.getUnconnectedOutLayersNames(); | |
cv::Mat frame, blob; | |
std::vector<cv::Mat> detections; | |
while(cv::waitKey(1) < 1) | |
{ | |
source >> frame; | |
if (frame.empty()) | |
{ | |
cv::waitKey(); | |
break; | |
} | |
auto total_start = std::chrono::steady_clock::now(); | |
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F); | |
net.setInput(blob); | |
auto dnn_start = std::chrono::steady_clock::now(); | |
net.forward(detections, output_names); | |
auto dnn_end = std::chrono::steady_clock::now(); | |
std::vector<int> indices[NUM_CLASSES]; | |
std::vector<cv::Rect> boxes[NUM_CLASSES]; | |
std::vector<float> scores[NUM_CLASSES]; | |
for (auto& output : detections) | |
{ | |
const auto num_boxes = output.rows; | |
for (int i = 0; i < num_boxes; i++) | |
{ | |
auto x = output.at<float>(i, 0) * frame.cols; | |
auto y = output.at<float>(i, 1) * frame.rows; | |
auto width = output.at<float>(i, 2) * frame.cols; | |
auto height = output.at<float>(i, 3) * frame.rows; | |
cv::Rect rect(x - width/2, y - height/2, width, height); | |
for (int c = 0; c < NUM_CLASSES; c++) | |
{ | |
auto confidence = *output.ptr<float>(i, 5 + c); | |
if (confidence >= CONFIDENCE_THRESHOLD) | |
{ | |
boxes[c].push_back(rect); | |
scores[c].push_back(confidence); | |
} | |
} | |
} | |
} | |
for (int c = 0; c < NUM_CLASSES; c++) | |
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]); | |
for (int c= 0; c < NUM_CLASSES; c++) | |
{ | |
for (size_t i = 0; i < indices[c].size(); ++i) | |
{ | |
const auto color = colors[c % NUM_COLORS]; | |
auto idx = indices[c][i]; | |
const auto& rect = boxes[c][idx]; | |
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3); | |
std::ostringstream label_ss; | |
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx]; | |
auto label = label_ss.str(); | |
int baseline; | |
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED); | |
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0)); | |
} | |
} | |
auto total_end = std::chrono::steady_clock::now(); | |
float inference_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(dnn_end - dnn_start).count(); | |
float total_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count(); | |
std::ostringstream stats_ss; | |
stats_ss << std::fixed << std::setprecision(2); | |
stats_ss << "Inference FPS: " << inference_fps << ", Total FPS: " << total_fps; | |
auto stats = stats_ss.str(); | |
int baseline; | |
auto stats_bg_sz = cv::getTextSize(stats.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(0, 0), cv::Point(stats_bg_sz.width, stats_bg_sz.height + 10), cv::Scalar(0, 0, 0), cv::FILLED); | |
cv::putText(frame, stats.c_str(), cv::Point(0, stats_bg_sz.height + 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 255, 255)); | |
cv::namedWindow("output"); | |
cv::imshow("output", frame); | |
} | |
return 0; | |
} |
@YashasSamaga @vvmatorin @marvision-ai @Fetulhak
Explicitly setting the thresh value in the test cfg file worked for me! Now it's giving me exact detections!
Here's what I think is happening. Feel free to correct me but it seems like OpenCV has a default confidence value it checks against during the detection process. Almost like a minimum threshold, so anything lower than this value won't get detected. We can overwrite this value by adding thresh = 0.01 or any default value you would like. Add this value for all three yolo layers.
Ex:
[yolo]
thresh = 0.01
.
.
[yolo]
thresh = 0.01
.
.
[yolo]
thresh = 0.01
I'm curious to see if this works for others.
@dgp52 this seems very interesting if it works. I will check it for my dataset.
When I changed yolov3 to yolov4's cfg and weights, a cv::Exception error occurred during dnn::readNetFromDarknet. I think it may be because of the opencv version problem, because 4.2 was proposed at the end of last year, but yolov4 was proposed in April this year.
i got the same error before...
i just ignored the FromDarknet and just used dnn.readNet
starting from opencv 4.5.4 classid is no longer list, so code needs to be fixed
from
label = "%s : %f" % (class_names[classid[0]], score)
to
label = "%s : %f" % (class_names[classid], score)
@dgp52 very interesting! You didn't actually have to train the network with thresh=0.01
correct? Only set it in the .cfg
file for a trained model?
@marvision-ai Sorry for not getting back to you sooner. That is correct. Just needed to update the .cfg file.
Hi Yashas, I need to access GPU power when running YOLOv4. What CUDA and Cudnn version should I install?
Because I installed CUDA 11.5 and cudnn 8.3.2, but when I install OpenCV, it does not find cudnn, as a result, it does not run fast.
I haven't checked the latest versions of cuDNN but cuDNN 7.6.5 gave the best performance six months ago.
Hi @YashasSamaga , I wrote this code for YOLO V5 / OpenCV / DNN
C++
#include <fstream>
#include <opencv2/opencv.hpp>
std::vector<std::string> load_class_list()
{
std::vector<std::string> class_list;
std::ifstream ifs("config_files/classes.txt");
std::string line;
while (getline(ifs, line))
{
class_list.push_back(line);
}
return class_list;
}
void load_net(cv::dnn::Net &net, bool is_cuda)
{
auto result = cv::dnn::readNet("config_files/yolov5s.onnx");
if (is_cuda)
{
std::cout << "Attempty to use CUDA\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
}
else
{
std::cout << "Running on CPU\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
net = result;
}
const std::vector<cv::Scalar> colors = {cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0)};
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.2;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.4;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
};
cv::Mat format_yolov5(const cv::Mat &source) {
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
void detect(cv::Mat &image, cv::dnn::Net &net, std::vector<Detection> &output, const std::vector<std::string> &className) {
cv::Mat blob;
auto input_image = format_yolov5(image);
cv::dnn::blobFromImage(input_image, blob, 1./255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
float x_factor = input_image.cols / INPUT_WIDTH;
float y_factor = input_image.rows / INPUT_HEIGHT;
float *data = (float *)outputs[0].data;
const int dimensions = 85;
const int rows = 25200;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i) {
float confidence = data[4];
if (confidence >= CONFIDENCE_THRESHOLD) {
float * classes_scores = data + 5;
cv::Mat scores(1, className.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > SCORE_THRESHOLD) {
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
data += 85;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
for (int i = 0; i < nms_result.size(); i++) {
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
output.push_back(result);
}
}
int main(int argc, char **argv)
{
std::vector<std::string> class_list = load_class_list();
cv::Mat frame;
cv::VideoCapture capture("sample.mp4");
if (!capture.isOpened())
{
std::cerr << "Error opening video file\n";
return -1;
}
bool is_cuda = argc > 1 && strcmp(argv[1], "cuda") == 0;
cv::dnn::Net net;
load_net(net, is_cuda);
auto start = std::chrono::high_resolution_clock::now();
int frame_count = 0;
float fps = -1;
int total_frames = 0;
while (true)
{
capture.read(frame);
if (frame.empty())
{
std::cout << "End of stream\n";
break;
}
std::vector<Detection> output;
detect(frame, net, output, class_list);
frame_count++;
total_frames++;
int detections = output.size();
for (int i = 0; i < detections; ++i)
{
auto detection = output[i];
auto box = detection.box;
auto classId = detection.class_id;
const auto color = colors[classId % colors.size()];
cv::rectangle(frame, box, color, 3);
cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
cv::putText(frame, class_list[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
if (frame_count >= 30)
{
auto end = std::chrono::high_resolution_clock::now();
fps = frame_count * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
frame_count = 0;
start = std::chrono::high_resolution_clock::now();
}
if (fps > 0)
{
std::ostringstream fps_label;
fps_label << std::fixed << std::setprecision(2);
fps_label << "FPS: " << fps;
std::string fps_label_str = fps_label.str();
cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
}
cv::imshow("output", frame);
if (cv::waitKey(1) != -1)
{
capture.release();
std::cout << "finished by user\n";
break;
}
}
std::cout << "Total frames: " << total_frames << "\n";
return 0;
}
Python
import cv2
import time
import sys
import numpy as np
def build_model(is_cuda):
net = cv2.dnn.readNet("config_files/yolov5s.onnx")
if is_cuda:
print("Attempty to use CUDA")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
else:
print("Running on CPU")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
return net
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.2
NMS_THRESHOLD = 0.4
CONFIDENCE_THRESHOLD = 0.4
def detect(image, net):
blob = cv2.dnn.blobFromImage(image, 1/255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True, crop=False)
net.setInput(blob)
preds = net.forward()
return preds
def load_capture():
capture = cv2.VideoCapture("sample.mp4")
return capture
def load_classes():
class_list = []
with open("config_files/classes.txt", "r") as f:
class_list = [cname.strip() for cname in f.readlines()]
return class_list
class_list = load_classes()
def wrap_detection(input_image, output_data):
class_ids = []
confidences = []
boxes = []
rows = output_data.shape[0]
image_width, image_height, _ = input_image.shape
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT
for r in range(rows):
row = output_data[r]
confidence = row[4]
if confidence >= 0.4:
classes_scores = row[5:]
_, _, _, max_indx = cv2.minMaxLoc(classes_scores)
class_id = max_indx[1]
if (classes_scores[class_id] > .25):
confidences.append(confidence)
class_ids.append(class_id)
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
left = int((x - 0.5 * w) * x_factor)
top = int((y - 0.5 * h) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45)
result_class_ids = []
result_confidences = []
result_boxes = []
for i in indexes:
result_confidences.append(confidences[i])
result_class_ids.append(class_ids[i])
result_boxes.append(boxes[i])
return result_class_ids, result_confidences, result_boxes
def format_yolov5(frame):
row, col, _ = frame.shape
_max = max(col, row)
result = np.zeros((_max, _max, 3), np.uint8)
result[0:row, 0:col] = frame
return result
colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)]
is_cuda = len(sys.argv) > 1 and sys.argv[1] == "cuda"
net = build_model(is_cuda)
capture = load_capture()
start = time.time_ns()
frame_count = 0
total_frames = 0
fps = -1
while True:
_, frame = capture.read()
if frame is None:
print("End of stream")
break
inputImage = format_yolov5(frame)
outs = detect(inputImage, net)
class_ids, confidences, boxes = wrap_detection(inputImage, outs[0])
frame_count += 1
total_frames += 1
for (classid, confidence, box) in zip(class_ids, confidences, boxes):
color = colors[int(classid) % len(colors)]
cv2.rectangle(frame, box, color, 2)
cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1)
cv2.putText(frame, class_list[classid], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))
if frame_count >= 30:
end = time.time_ns()
fps = 1000000000 * frame_count / (end - start)
frame_count = 0
start = time.time_ns()
if fps > 0:
fps_label = "FPS: %.2f" % fps
cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.imshow("output", frame)
if cv2.waitKey(1) > -1:
print("finished by user")
break
print("Total frames: " + str(total_frames))
The more update version and instructions to run code can be found here: https://github.com/doleron/yolov4-opencv-cpp-python
I want to use the output from the opencv object detections how can i do that?
@YashasSamaga
How to achieve batched inference using cv2.dnn_DetectionModel.detect for yolo
The code in python is fine for me, but its c++ version doesn't work for me.
Compile:
/usr/bin/c++ -lstdc++ -g -pg -O0 -pthread -lpthread -lstdc++fs -std=c++14 -std=c++17 -fPIC -std=gnu++11 -rdynamic -I/usr/local/include/tkDNN/ -I/usr/local/cuda/include -std=c++1z -std=gnu++1z CMakeFiles/predev2.dir/src/yolov4_opencv_dnn_cuda.cpp.o -o predev2 -L/usr/local/cuda/lib64 -Wl,-rpath,/usr/local/cuda/lib64:/usr/local/lib -ltkDNN -lcurl /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/lib/libopencv_gapi.so.4.6.0 /usr/local/lib/libopencv_stitching.so.4.6.0 /usr/local/lib/libopencv_alphamat.so.4.6.0 /usr/local/lib/libopencv_aruco.so.4.6.0 /usr/local/lib/libopencv_barcode.so.4.6.0 /usr/local/lib/libopencv_bgsegm.so.4.6.0 /usr/local/lib/libopencv_bioinspired.so.4.6.0 /usr/local/lib/libopencv_ccalib.so.4.6.0 /usr/local/lib/libopencv_cudabgsegm.so.4.6.0 /usr/local/lib/libopencv_cudafeatures2d.so.4.6.0 /usr/local/lib/libopencv_cudaobjdetect.so.4.6.0 /usr/local/lib/libopencv_cudastereo.so.4.6.0 /usr/local/lib/libopencv_dnn_objdetect.so.4.6.0 /usr/local/lib/libopencv_dnn_superres.so.4.6.0 /usr/local/lib/libopencv_dpm.so.4.6.0 /usr/local/lib/libopencv_face.so.4.6.0 /usr/local/lib/libopencv_freetype.so.4.6.0 /usr/local/lib/libopencv_fuzzy.so.4.6.0 /usr/local/lib/libopencv_hdf.so.4.6.0 /usr/local/lib/libopencv_hfs.so.4.6.0 /usr/local/lib/libopencv_img_hash.so.4.6.0 /usr/local/lib/libopencv_intensity_transform.so.4.6.0 /usr/local/lib/libopencv_line_descriptor.so.4.6.0 /usr/local/lib/libopencv_mcc.so.4.6.0 /usr/local/lib/libopencv_quality.so.4.6.0 /usr/local/lib/libopencv_rapid.so.4.6.0 /usr/local/lib/libopencv_reg.so.4.6.0 /usr/local/lib/libopencv_rgbd.so.4.6.0 /usr/local/lib/libopencv_saliency.so.4.6.0 /usr/local/lib/libopencv_stereo.so.4.6.0 /usr/local/lib/libopencv_structured_light.so.4.6.0 /usr/local/lib/libopencv_superres.so.4.6.0 /usr/local/lib/libopencv_surface_matching.so.4.6.0 /usr/local/lib/libopencv_tracking.so.4.6.0 /usr/local/lib/libopencv_videostab.so.4.6.0 /usr/local/lib/libopencv_wechat_qrcode.so.4.6.0 /usr/local/lib/libopencv_xfeatures2d.so.4.6.0 /usr/local/lib/libopencv_xobjdetect.so.4.6.0 /usr/local/lib/libopencv_xphoto.so.4.6.0 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so -lpthread /usr/local/lib/libopencv_shape.so.4.6.0 /usr/local/lib/libopencv_highgui.so.4.6.0 /usr/local/lib/libopencv_datasets.so.4.6.0 /usr/local/lib/libopencv_plot.so.4.6.0 /usr/local/lib/libopencv_text.so.4.6.0 /usr/local/lib/libopencv_ml.so.4.6.0 /usr/local/lib/libopencv_phase_unwrapping.so.4.6.0 /usr/local/lib/libopencv_cudacodec.so.4.6.0 /usr/local/lib/libopencv_videoio.so.4.6.0 /usr/local/lib/libopencv_cudaoptflow.so.4.6.0 /usr/local/lib/libopencv_cudalegacy.so.4.6.0 /usr/local/lib/libopencv_cudawarping.so.4.6.0 /usr/local/lib/libopencv_optflow.so.4.6.0 /usr/local/lib/libopencv_ximgproc.so.4.6.0 /usr/local/lib/libopencv_video.so.4.6.0 /usr/local/lib/libopencv_imgcodecs.so.4.6.0 /usr/local/lib/libopencv_objdetect.so.4.6.0 /usr/local/lib/libopencv_calib3d.so.4.6.0 /usr/local/lib/libopencv_dnn.so.4.6.0 /usr/local/lib/libopencv_features2d.so.4.6.0 /usr/local/lib/libopencv_flann.so.4.6.0 /usr/local/lib/libopencv_photo.so.4.6.0 /usr/local/lib/libopencv_cudaimgproc.so.4.6.0 /usr/local/lib/libopencv_cudafilters.so.4.6.0 /usr/local/lib/libopencv_imgproc.so.4.6.0 /usr/local/lib/libopencv_cudaarithm.so.4.6.0 /usr/local/lib/libopencv_core.so.4.6.0 /usr/local/lib/libopencv_cudev.so.4.6.0
Run:
$ gdb -batch -ex run -ex where -ex list -ex quit --args ./predev2
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 2 0, bytestream 213160
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 69 0, bytestream 669
[New Thread 0x7f97f79b50 (LWP 3049)]
[New Thread 0x7f97778b50 (LWP 3050)]
[New Thread 0x7f96f77b50 (LWP 3051)]
[New Thread 0x7f96776b50 (LWP 3052)]
[New Thread 0x7f95f75b50 (LWP 3053)]
[New Thread 0x7f95774b50 (LWP 3054)]
[h264 @ 0x5555792890] top block unavailable for requested intra mode -1
[h264 @ 0x5555792890] error while decoding MB 2 0, bytestream 213160
Thread 1 "predev2" received signal SIGSEGV, Segmentation fault.
0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () from /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#0 0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () at /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#1 0x0000007fb6f8edfc in cv::resize(cv::_InputArray const&, cv::_OutputArray const&, cv::Size_<int>, double, double, int) () at /usr/local/lib/libopencv_imgproc.so.406
#2 0x0000007fb72f5d04 in cv::dnn::dnn4_v20220524::blobFromImages(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int>, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#3 0x0000007fb72f6864 in cv::dnn::dnn4_v20220524::blobFromImage(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int> const&, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#4 0x0000005555560498 in main() () at /home/a/ai22/src/yolov4_opencv_dnn_cuda.cpp:66
16 constexpr float CONFIDENCE_THRESHOLD = 0;
17 constexpr float NMS_THRESHOLD = 0.4;
18 constexpr int NUM_CLASSES = 80;
19
20 // colors for bounding boxes
21 const cv::Scalar colors[] = {
22 {0, 255, 255},
23 {255, 255, 0},
24 {0, 255, 0},
25 {255, 0, 0}
A debugging session is active.
Inferior 1 [process 3043] will be killed.
Quit anyway? (y or n) [answered Y; input not from terminal]
The following line makes the issue. What's the problem? How to solve this problem?
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);
Even I change the size to cv::Size(512, 512) to fit my yolov4.cfg, the issue consists.
dfvcd
@dgp52 @Fetulhak @zpmmehrdad @vvmatorin and others
OpenCV has internal tests most of which are shared across all backends. The CUDA backend always trails the OpenCV CPU backend in terms of correctness and features; it just mimics what OpenCV CPU backend does (this is like the reference implementation). Since it also happens in the CPU backend, the patch must first go there. I think an issue should be opened at OpenCV repository reporting this issue in the OCV CPU backend (and mention as a sidenote that the same behavior is observed in the CUDA backend).
Please also check AlexeyAB's comment: