Hey so i found a python code on the internet, and i found it runs really good, but i wanted it to be faster so i tried to translate it to C++. I did and it works , it draws rectangles on a separate window like in python but it cant find some things... the python version can and i put them side by side and in python it worked. I dont know what is the problem because i wrote the same function calls in C++ , and i thought someone more experienced with both versions may help me.
here are the codes:
import numpy as npimport win32gui, win32ui, win32confrom PIL import Imagefrom time import sleepimport cv2 as cvimport osimport randomclass WindowCapture: w = 0 h = 0 hwnd = None def __init__(self, window_name): self.hwnd = win32gui.FindWindow(None, window_name) if not self.hwnd: raise Exception('Window not found: {}'.format(window_name)) window_rect = win32gui.GetWindowRect(self.hwnd) self.w = window_rect[2] - window_rect[0] self.h = window_rect[3] - window_rect[1] border_pixels = 8 titlebar_pixels = 30 self.w = self.w - (border_pixels * 2) self.h = self.h - titlebar_pixels - border_pixels self.cropped_x = border_pixels self.cropped_y = titlebar_pixels def get_screenshot(self): wDC = win32gui.GetWindowDC(self.hwnd) dcObj = win32ui.CreateDCFromHandle(wDC) cDC = dcObj.CreateCompatibleDC() dataBitMap = win32ui.CreateBitmap() dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h) cDC.SelectObject(dataBitMap) cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY) signedIntsArray = dataBitMap.GetBitmapBits(True) img = np.fromstring(signedIntsArray, dtype='uint8') img.shape = (self.h, self.w, 4) dcObj.DeleteDC() cDC.DeleteDC() win32gui.ReleaseDC(self.hwnd, wDC) win32gui.DeleteObject(dataBitMap.GetHandle()) img = img[...,:3] img = np.ascontiguousarray(img) return img def generate_image_dataset(self): if not os.path.exists("images"): os.mkdir("images") while(True): img = self.get_screenshot() im = Image.fromarray(img[..., [2, 1, 0]]) im.save(f"./images/img_{len(os.listdir('images'))}.jpeg") sleep(1) def get_window_size(self): return (self.w, self.h)class ImageProcessor: W = 0 H = 0 net = None ln = None classes = {} colors = [] def __init__(self, img_size, cfg_file, weights_file): np.random.seed(42) self.net = cv.dnn.readNetFromDarknet(cfg_file, weights_file) self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) self.ln = self.net.getLayerNames() self.ln = [self.ln[i-1] for i in self.net.getUnconnectedOutLayers()] self.W = img_size[0] self.H = img_size[1] with open('Models/classes.txt', 'r') as file: lines = file.readlines() for i, line in enumerate(lines): self.classes[i] = line.strip() # If you plan to utilize more than six classes, please include additional colors in this list. self.colors = [ (0, 0, 255), (0, 255, 0), (255, 0, 0), (255, 255, 0), (255, 0, 255), (0, 255, 255) ] def proccess_image(self, img): blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False) self.net.setInput(blob) outputs = self.net.forward(self.ln) outputs = np.vstack(outputs) coordinates = self.get_coordinates(outputs, 0.1) self.draw_identified_objects(img, coordinates) return coordinates def get_coordinates(self, outputs, conf): boxes = [] confidences = [] classIDs = [] print(outputs) for output in outputs: #exit scores = output[5:] classID = np.argmax(scores) confidence = scores[classID] if confidence > conf: x, y, w, h = output[:4] * np.array([self.W, self.H, self.W, self.H]) p0 = int(x - w//2), int(y - h//2) boxes.append([*p0, int(w), int(h)]) confidences.append(float(confidence)) classIDs.append(classID) indices = cv.dnn.NMSBoxes(boxes, confidences, conf, conf-0.1) if len(indices) == 0: return [] coordinates = [] for i in indices.flatten(): (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) coordinates.append({'x': x, 'y': y, 'w': w, 'h': h, 'class': classIDs[i], 'class_name': self.classes[classIDs[i]]}) return coordinates def draw_identified_objects(self, img, coordinates): for coordinate in coordinates: x = coordinate['x'] y = coordinate['y'] w = coordinate['w'] h = coordinate['h'] classID = coordinate['class'] color = self.colors[classID] cv.rectangle(img, (x, y), (x + w, y + h), color, 2) cv.putText(img, self.classes[classID], (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) cv.imshow('window', img)# Run this cell to initiate detections using the trained model.window_name = "Trek"cfg_file_name = "./Models/yolov4_train.cfg"weights_file_name = "./Models/yolov4_train_final.weights"wincap = WindowCapture(window_name)improc = ImageProcessor(wincap.get_window_size(), cfg_file_name, weights_file_name)while(True): ss = wincap.get_screenshot() if cv.waitKey(1) == ord('q'): cv.destroyAllWindows() break coordinates = improc.proccess_image(ss) sleep(2) #for coordinate in coordinates: # print(coordinate) #print() #sleep(0.2)print('Finished.')
C++ code (my version):
#include <iostream>#include <opencv2/opencv.hpp>#include <opencv2/highgui.hpp>#include <opencv2/video.hpp>#include <opencv2/dnn.hpp>#include <opencv2/videoio.hpp>#include <opencv2/imgproc.hpp>#include <Z_Utils.h>using namespace cv;using namespace std;using namespace dnn;#define lol long long#define ld double#define CONF 0.1int main(){ LPCWSTR window_title = L"Trek"; HWND handle = FindWindow(NULL, window_title); std::string model = "./Models/yolov4_train_final.weights"; std::string config = "./Models/yolov4_train.cfg"; Net network = readNet(model, config , "Darknet"); network.setPreferableBackend(DNN_BACKEND_OPENCV); network.setPreferableTarget(DNN_TARGET_OPENCL); //std::vector<cv::String> ln = network.getUnconnectedOutLayersNames(); //std::vector<cv::String> ln; //auto layers = network.getLayerNames(); //for (auto i : network.getUnconnectedOutLayers()) { // ln.push_back(layers[i]); //} for (;;) { //if (!cap.isOpened()) { // cout << "Video Capture Fail" << endl; // break; // } Mat img = hwnd2mat(handle); cvtColor(img, img, COLOR_RGBA2RGB); static Mat blobFromImg; bool swapRB = true; blobFromImage(img, blobFromImg, 1/255.0, Size(416, 416), Scalar(), swapRB, false); //cout << blobFromImg.size() << endl; network.setInput(blobFromImg); Mat outMat; network.forward(outMat); int rowsNoOfDetection = outMat.rows; int colsCoordinatesPlusClassScore = outMat.cols; std::vector<cv::Rect> boxes; std::vector<float> confidences; for (int j = 0; j < rowsNoOfDetection; ++j) { Mat scores = outMat.row(j).colRange(5, colsCoordinatesPlusClassScore); Point PositionOfMax; double confidence; minMaxLoc(scores, 0, &confidence, 0, &PositionOfMax); if (confidence > CONF) { ld centerX = (outMat.at<float>(j, 0) * img.cols); ld centerY = (outMat.at<float>(j, 1) * img.rows); ld width = (outMat.at<float>(j, 2) * img.cols); ld height = (outMat.at<float>(j, 3) * img.rows); ld left = centerX - width / 2; ld top = centerY - height / 2; cv::Rect2d box_(left, top, width, height); boxes.push_back(box_); confidences.push_back(confidence); // putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 1.4, Scalar(0, 0, 255), 2, false); //rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0); } } std::vector<int> good; cv::dnn::NMSBoxes(boxes, confidences, CONF, 0 , good); for (auto ind : good) { Rect r = boxes[ind]; int left = r.x; int top = r.y; int width = r.width; int height = r.height; putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255), 2); rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0); } namedWindow("C++", WINDOW_AUTOSIZE); cv::imshow("C++", img); cv::waitKey(25); Sleep(2000); } return 0;}
the function hwndtomat() in Z_utils.h , (the only function called from there , the rest are opencv api functions):
Mat hwnd2mat(HWND hwnd){ HDC hwindowDC, hwindowCompatibleDC; int height, width, srcheight, srcwidth; HBITMAP hbwindow; Mat src; BITMAPINFOHEADER bi; hwindowDC = GetDC(hwnd); hwindowCompatibleDC = CreateCompatibleDC(hwindowDC); SetStretchBltMode(hwindowCompatibleDC, COLORONCOLOR); RECT windowsize; // get the height and width of the screen GetClientRect(hwnd, &windowsize); srcheight = windowsize.bottom; srcwidth = windowsize.right; height = windowsize.bottom; //change this to whatever size you want to resize to width = windowsize.right; src.create(height, width, CV_8UC4); // create a bitmap hbwindow = CreateCompatibleBitmap(hwindowDC, width, height); bi.biSize = sizeof(BITMAPINFOHEADER); //http://msdn.microsoft.com/en-us/library/windows/window/dd183402%28v=vs.85%29.aspx bi.biWidth = width; bi.biHeight = -height; //this is the line that makes it draw upside down or not bi.biPlanes = 1; bi.biBitCount = 32; bi.biCompression = BI_RGB; bi.biSizeImage = 0; bi.biXPelsPerMeter = 0; bi.biYPelsPerMeter = 0; bi.biClrUsed = 0; bi.biClrImportant = 0; // use the previously created device context with the bitmap SelectObject(hwindowCompatibleDC, hbwindow); // copy from the window device context to the bitmap device context StretchBlt(hwindowCompatibleDC, 0, 0, width, height, hwindowDC, 0, 0, srcwidth, srcheight, SRCCOPY); //change SRCCOPY to NOTSRCCOPY for wacky colors ! GetDIBits(hwindowCompatibleDC, hbwindow, 0, height, src.data, (BITMAPINFO*)&bi, DIB_RGB_COLORS); //copy from hwindowCompatibleDC to hbwindow // avoid memory leak DeleteObject(hbwindow); DeleteDC(hwindowCompatibleDC); ReleaseDC(hwnd, hwindowDC); return src;}
The problem is that the objects it needs to find change size as the game is 3d , but i took care of that using the model when i trained. The only problem is that python while slower has a better accuracy, but i am using the same models and the same game... I have no idea why this happens.Note: I use the same confidences, the windows are the same size.