python 如何使用opencv waitkey 库检测组合键?

python how to detect key combinations with opencv waitkey library?

我有一个机器学习项目,正在为 linux 中的速度梦想游戏设计自动驾驶。在这种情况下,我必须找到一种方法将键盘输出到像这样的实际一维数组。

 up - down - right - left - upleft - upright - downleft - downright - do nothing
[0     0       0       0       0         0          0           0          1]

我使用这段代码作为截图和处理的起始代码:

import time
import cv2
import mss
import numpy as np

def process_img(original_img):
    processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
    processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
    return processed_img

with mss.mss() as sct:
    # Part of the screen to capture
    monitor = {"top": 0, "left": 70, "width": 640, "height": 480}

    while True:
        last_time = time.time()
        # Get raw pixels from the screen, save it to a Numpy array
        screen = np.array(sct.grab(monitor))
        new_screen = process_img(original_img=screen)

        # Display the picture
        cv2.imshow("Window", new_screen)

        print("Loop took {} seconds".format(time.time() - last_time))

        # Press "q" to quit
        k = cv2.waitKey(12)
        if k > 0:
            print(k)
        if k & 0xFF == ord("q"):
            cv2.destroyAllWindows()
            break

我知道使用 cv2.waitkey() 函数可以捕获键码。所以我可以想出一种方法来捕捉是否按下了上-下-左或右。但是有什么方法可以捕捉 up-leftup-right、... 和 cv2.waitkey 等组合键。

cv2.waitkey 在循环中捕捉按键对我来说非常重要,因为它在准确性方面极大地提高了我的神经网络的性能。

我认为cv2.waitKey 无法同时捕获多个按键。一个简单的捕捉两个组合键的方法,你可以记录最后一个捕捉到的键并与当前捕捉到的键进行比较,检查这两个键是否符合你想要的组合键。

import cv2

cap = cv2.VideoCapture(0)
k = last_key = -1
up_left_is_pressed = up_right_is_pressed = False

while True:
    ok, image = cap.read()

    if not ok:
        break

    last_key = k # last catched key
    k = cv2.waitKey(1) # current catched key

    if k == -1:
        up_left_is_pressed = up_right_is_pressed = False

    if (k == ord('a') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('a'))):
        up_left_is_pressed = True
        cv2.putText(image, "up left press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    elif (k == ord('d') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('d'))):
        up_right_is_pressed = True
        cv2.putText(image, "up right press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    else:
        cv2.putText(image, "no key combination pressed", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))

    cv2.imshow("hi", image)
    if k == 27:
        break
cap.release()
cv2.destroyAllWindows()

听起来 'cv2.waitkey' 不是一个好的选择,当你切换到另一个程序并继续按键时。我找到了 this examples 并制作了一个示例代码来捕获按键,它在 windows 中完美运行,在 linux.

中运行良好
import time
import cv2
import mss
import numpy as np
from pynput.keyboard import Key, Listener

def up():
    print("Go up")


def down():
    print("Go down")


def left():
    print("Go left")


def right():
    print("Go right")


def up_left():
    print("Go up_left")


def up_right():
    print("Go up_right")


def down_left():
    print("Go down_left")


def down_right():
    print("Go down_right")


def do_nothing():
    print("Do Nothing")


# Create a mapping of keys to function (use frozenset as sets are not hashable - so they can't be used as keys)

combination_to_function = {
    frozenset([Key.up]): up,  # No `()` after function_1 because
    # we want to pass the function, not the value of the function
    frozenset([Key.down, ]): down,
    frozenset([Key.left, ]): left,
    frozenset([Key.right, ]): right,
    frozenset([Key.up, Key.left]): up_left,
    frozenset([Key.up, Key.right]): up_right,
    frozenset([Key.down, Key.left]): down_left,
    frozenset([Key.down, Key.right]): down_right,
}

# Currently pressed keys
current_keys = set()


def on_press(key):
    # When a key is pressed, add it to the set we are keeping track of and check if this set is in the dictionary
    current_keys.add(key)
    if frozenset(current_keys) in combination_to_function:
        # If the current set of keys are in the mapping, execute the function
        combination_to_function[frozenset(current_keys)]()


def on_release(key):
    # When a key is released, remove it from the set of keys we are keeping track of
    if key in current_keys:
        current_keys.remove(key)


def process_img(original_img):
    processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
    processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
    return processed_img


with mss.mss() as sct:
    # Part of the screen to capture
    monitor = {"top": 0, "left": 70, "width": 640, "height": 480}

    while True:
        listener = Listener(on_press=on_press, on_release=on_release)
        listener.start()
        last_time = time.time()
        # key_catcher = MockButton()
        # Get raw pixels from the screen, save it to a Numpy array
        screen = np.array(sct.grab(monitor))
        new_screen = process_img(original_img=screen)

        # Display the picture
        cv2.imshow("Window", new_screen)

        # print("Loop took {} seconds".format(time.time() - last_time))
        # Press "q" to quit

        k = cv2.waitKey(10)

        if k & 0xFF == ord("q"):
            cv2.destroyAllWindows()
            break

        listener.stop()