Quantcast
Channel: Recent Questions - Stack Overflow
Viewing all articles
Browse latest Browse all 12231

Why does OpenCV C++ have a worse accuracy than Python-OpenCV?

$
0
0

Hey so i found a python code on the internet, and i found it runs really good, but i wanted it to be faster so i tried to translate it to C++. I did and it works , it draws rectangles on a separate window like in python but it cant find some things... the python version can and i put them side by side and in python it worked. I dont know what is the problem because i wrote the same function calls in C++ , and i thought someone more experienced with both versions may help me.

here are the codes:

import numpy as npimport win32gui, win32ui, win32confrom PIL import Imagefrom time import sleepimport cv2 as cvimport osimport randomclass WindowCapture:    w = 0    h = 0    hwnd = None    def __init__(self, window_name):        self.hwnd = win32gui.FindWindow(None, window_name)        if not self.hwnd:            raise Exception('Window not found: {}'.format(window_name))        window_rect = win32gui.GetWindowRect(self.hwnd)        self.w = window_rect[2] - window_rect[0]        self.h = window_rect[3] - window_rect[1]        border_pixels = 8        titlebar_pixels = 30        self.w = self.w - (border_pixels * 2)        self.h = self.h - titlebar_pixels - border_pixels        self.cropped_x = border_pixels        self.cropped_y = titlebar_pixels    def get_screenshot(self):        wDC = win32gui.GetWindowDC(self.hwnd)        dcObj = win32ui.CreateDCFromHandle(wDC)        cDC = dcObj.CreateCompatibleDC()        dataBitMap = win32ui.CreateBitmap()        dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)        cDC.SelectObject(dataBitMap)        cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)        signedIntsArray = dataBitMap.GetBitmapBits(True)        img = np.fromstring(signedIntsArray, dtype='uint8')        img.shape = (self.h, self.w, 4)        dcObj.DeleteDC()        cDC.DeleteDC()        win32gui.ReleaseDC(self.hwnd, wDC)        win32gui.DeleteObject(dataBitMap.GetHandle())        img = img[...,:3]        img = np.ascontiguousarray(img)         return img    def generate_image_dataset(self):        if not os.path.exists("images"):            os.mkdir("images")        while(True):            img = self.get_screenshot()            im = Image.fromarray(img[..., [2, 1, 0]])            im.save(f"./images/img_{len(os.listdir('images'))}.jpeg")            sleep(1)    def get_window_size(self):        return (self.w, self.h)class ImageProcessor:    W = 0    H = 0    net = None    ln = None    classes = {}    colors = []    def __init__(self, img_size, cfg_file, weights_file):        np.random.seed(42)        self.net = cv.dnn.readNetFromDarknet(cfg_file, weights_file)        self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)        self.ln = self.net.getLayerNames()        self.ln = [self.ln[i-1] for i in self.net.getUnconnectedOutLayers()]        self.W = img_size[0]        self.H = img_size[1]        with open('Models/classes.txt', 'r') as file:            lines = file.readlines()        for i, line in enumerate(lines):            self.classes[i] = line.strip()        # If you plan to utilize more than six classes, please include additional colors in this list.        self.colors = [            (0, 0, 255),             (0, 255, 0),             (255, 0, 0),             (255, 255, 0),             (255, 0, 255),             (0, 255, 255)        ]    def proccess_image(self, img):        blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)        self.net.setInput(blob)        outputs = self.net.forward(self.ln)        outputs = np.vstack(outputs)        coordinates = self.get_coordinates(outputs, 0.1)        self.draw_identified_objects(img, coordinates)        return coordinates    def get_coordinates(self, outputs, conf):        boxes = []        confidences = []        classIDs = []        print(outputs)        for output in outputs:            #exit            scores = output[5:]            classID = np.argmax(scores)            confidence = scores[classID]            if confidence > conf:                x, y, w, h = output[:4] * np.array([self.W, self.H, self.W, self.H])                p0 = int(x - w//2), int(y - h//2)                boxes.append([*p0, int(w), int(h)])                confidences.append(float(confidence))                classIDs.append(classID)        indices = cv.dnn.NMSBoxes(boxes, confidences, conf, conf-0.1)        if len(indices) == 0:            return []        coordinates = []        for i in indices.flatten():            (x, y) = (boxes[i][0], boxes[i][1])            (w, h) = (boxes[i][2], boxes[i][3])            coordinates.append({'x': x, 'y': y, 'w': w, 'h': h, 'class': classIDs[i], 'class_name': self.classes[classIDs[i]]})        return coordinates    def draw_identified_objects(self, img, coordinates):        for coordinate in coordinates:            x = coordinate['x']            y = coordinate['y']            w = coordinate['w']            h = coordinate['h']            classID = coordinate['class']            color = self.colors[classID]            cv.rectangle(img, (x, y), (x + w, y + h), color, 2)            cv.putText(img, self.classes[classID], (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)        cv.imshow('window',  img)# Run this cell to initiate detections using the trained model.window_name = "Trek"cfg_file_name = "./Models/yolov4_train.cfg"weights_file_name = "./Models/yolov4_train_final.weights"wincap = WindowCapture(window_name)improc = ImageProcessor(wincap.get_window_size(), cfg_file_name, weights_file_name)while(True):    ss = wincap.get_screenshot()    if cv.waitKey(1) == ord('q'):        cv.destroyAllWindows()        break    coordinates = improc.proccess_image(ss)    sleep(2)    #for coordinate in coordinates:     #   print(coordinate)    #print()    #sleep(0.2)print('Finished.')

C++ code (my version):

#include <iostream>#include <opencv2/opencv.hpp>#include <opencv2/highgui.hpp>#include <opencv2/video.hpp>#include <opencv2/dnn.hpp>#include <opencv2/videoio.hpp>#include <opencv2/imgproc.hpp>#include <Z_Utils.h>using namespace cv;using namespace std;using namespace dnn;#define lol long long#define ld double#define CONF 0.1int main(){    LPCWSTR window_title = L"Trek";    HWND handle = FindWindow(NULL, window_title);    std::string model = "./Models/yolov4_train_final.weights";      std::string config = "./Models/yolov4_train.cfg";    Net network = readNet(model, config , "Darknet");    network.setPreferableBackend(DNN_BACKEND_OPENCV);    network.setPreferableTarget(DNN_TARGET_OPENCL);    //std::vector<cv::String> ln = network.getUnconnectedOutLayersNames();    //std::vector<cv::String> ln;    //auto layers = network.getLayerNames();    //for (auto i : network.getUnconnectedOutLayers()) {    //    ln.push_back(layers[i]);    //}    for (;;)    {        //if (!cap.isOpened()) {        //    cout << "Video Capture Fail" << endl;       //     break;       // }        Mat img = hwnd2mat(handle);        cvtColor(img, img, COLOR_RGBA2RGB);        static Mat blobFromImg;        bool swapRB = true;        blobFromImage(img, blobFromImg, 1/255.0, Size(416, 416), Scalar(), swapRB, false);        //cout << blobFromImg.size() << endl;        network.setInput(blobFromImg);        Mat outMat;        network.forward(outMat);        int rowsNoOfDetection = outMat.rows;        int colsCoordinatesPlusClassScore = outMat.cols;        std::vector<cv::Rect> boxes;        std::vector<float> confidences;        for (int j = 0; j < rowsNoOfDetection; ++j)        {            Mat scores = outMat.row(j).colRange(5, colsCoordinatesPlusClassScore);            Point PositionOfMax;            double confidence;            minMaxLoc(scores, 0, &confidence, 0, &PositionOfMax);            if (confidence > CONF)            {                ld centerX = (outMat.at<float>(j, 0) * img.cols);                ld centerY = (outMat.at<float>(j, 1) * img.rows);                ld width = (outMat.at<float>(j, 2) * img.cols);                ld height = (outMat.at<float>(j, 3) * img.rows);                ld left = centerX - width / 2;                ld top = centerY - height / 2;                cv::Rect2d box_(left, top, width, height);                boxes.push_back(box_);                confidences.push_back(confidence);               // putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 1.4, Scalar(0, 0, 255), 2, false);                //rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0);            }        }        std::vector<int> good;        cv::dnn::NMSBoxes(boxes, confidences, CONF, 0 , good);        for (auto ind : good) {            Rect r = boxes[ind];            int left = r.x;            int top = r.y;            int width = r.width;            int height = r.height;            putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255), 2);            rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0);        }        namedWindow("C++", WINDOW_AUTOSIZE);        cv::imshow("C++", img);        cv::waitKey(25);        Sleep(2000);    }    return 0;}

the function hwndtomat() in Z_utils.h , (the only function called from there , the rest are opencv api functions):

Mat hwnd2mat(HWND hwnd){    HDC hwindowDC, hwindowCompatibleDC;    int height, width, srcheight, srcwidth;    HBITMAP hbwindow;    Mat src;    BITMAPINFOHEADER  bi;    hwindowDC = GetDC(hwnd);    hwindowCompatibleDC = CreateCompatibleDC(hwindowDC);    SetStretchBltMode(hwindowCompatibleDC, COLORONCOLOR);    RECT windowsize;    // get the height and width of the screen    GetClientRect(hwnd, &windowsize);    srcheight = windowsize.bottom;    srcwidth = windowsize.right;    height = windowsize.bottom;  //change this to whatever size you want to resize to    width = windowsize.right;    src.create(height, width, CV_8UC4);    // create a bitmap    hbwindow = CreateCompatibleBitmap(hwindowDC, width, height);    bi.biSize = sizeof(BITMAPINFOHEADER);    //http://msdn.microsoft.com/en-us/library/windows/window/dd183402%28v=vs.85%29.aspx    bi.biWidth = width;    bi.biHeight = -height;  //this is the line that makes it draw upside down or not    bi.biPlanes = 1;    bi.biBitCount = 32;    bi.biCompression = BI_RGB;    bi.biSizeImage = 0;    bi.biXPelsPerMeter = 0;    bi.biYPelsPerMeter = 0;    bi.biClrUsed = 0;    bi.biClrImportant = 0;    // use the previously created device context with the bitmap    SelectObject(hwindowCompatibleDC, hbwindow);    // copy from the window device context to the bitmap device context    StretchBlt(hwindowCompatibleDC, 0, 0, width, height, hwindowDC, 0, 0, srcwidth, srcheight, SRCCOPY); //change SRCCOPY to NOTSRCCOPY for wacky colors !    GetDIBits(hwindowCompatibleDC, hbwindow, 0, height, src.data, (BITMAPINFO*)&bi, DIB_RGB_COLORS);  //copy from hwindowCompatibleDC to hbwindow    // avoid memory leak    DeleteObject(hbwindow);    DeleteDC(hwindowCompatibleDC);    ReleaseDC(hwnd, hwindowDC);    return src;}

The problem is that the objects it needs to find change size as the game is 3d , but i took care of that using the model when i trained. The only problem is that python while slower has a better accuracy, but i am using the same models and the same game... I have no idea why this happens.Note: I use the same confidences, the windows are the same size.


Viewing all articles
Browse latest Browse all 12231

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>