python 如何使用opencv waitkey 库检测组合键?
python how to detect key combinations with opencv waitkey library?
我有一个机器学习项目,正在为 linux 中的速度梦想游戏设计自动驾驶。在这种情况下,我必须找到一种方法将键盘输出到像这样的实际一维数组。
up - down - right - left - upleft - upright - downleft - downright - do nothing
[0 0 0 0 0 0 0 0 1]
我使用这段代码作为截图和处理的起始代码:
import time
import cv2
import mss
import numpy as np
def process_img(original_img):
processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
return processed_img
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 0, "left": 70, "width": 640, "height": 480}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
screen = np.array(sct.grab(monitor))
new_screen = process_img(original_img=screen)
# Display the picture
cv2.imshow("Window", new_screen)
print("Loop took {} seconds".format(time.time() - last_time))
# Press "q" to quit
k = cv2.waitKey(12)
if k > 0:
print(k)
if k & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
我知道使用 cv2.waitkey()
函数可以捕获键码。所以我可以想出一种方法来捕捉是否按下了上-下-左或右。但是有什么方法可以捕捉 up-left
、up-right
、... 和 cv2.waitkey 等组合键。
用 cv2.waitkey
在循环中捕捉按键对我来说非常重要,因为它在准确性方面极大地提高了我的神经网络的性能。
我认为cv2.waitKey
无法同时捕获多个按键。一个简单的捕捉两个组合键的方法,你可以记录最后一个捕捉到的键并与当前捕捉到的键进行比较,检查这两个键是否符合你想要的组合键。
import cv2
cap = cv2.VideoCapture(0)
k = last_key = -1
up_left_is_pressed = up_right_is_pressed = False
while True:
ok, image = cap.read()
if not ok:
break
last_key = k # last catched key
k = cv2.waitKey(1) # current catched key
if k == -1:
up_left_is_pressed = up_right_is_pressed = False
if (k == ord('a') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('a'))):
up_left_is_pressed = True
cv2.putText(image, "up left press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
elif (k == ord('d') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('d'))):
up_right_is_pressed = True
cv2.putText(image, "up right press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
else:
cv2.putText(image, "no key combination pressed", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
cv2.imshow("hi", image)
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
听起来 'cv2.waitkey' 不是一个好的选择,当你切换到另一个程序并继续按键时。我找到了 this examples 并制作了一个示例代码来捕获按键,它在 windows 中完美运行,在 linux.
中运行良好
import time
import cv2
import mss
import numpy as np
from pynput.keyboard import Key, Listener
def up():
print("Go up")
def down():
print("Go down")
def left():
print("Go left")
def right():
print("Go right")
def up_left():
print("Go up_left")
def up_right():
print("Go up_right")
def down_left():
print("Go down_left")
def down_right():
print("Go down_right")
def do_nothing():
print("Do Nothing")
# Create a mapping of keys to function (use frozenset as sets are not hashable - so they can't be used as keys)
combination_to_function = {
frozenset([Key.up]): up, # No `()` after function_1 because
# we want to pass the function, not the value of the function
frozenset([Key.down, ]): down,
frozenset([Key.left, ]): left,
frozenset([Key.right, ]): right,
frozenset([Key.up, Key.left]): up_left,
frozenset([Key.up, Key.right]): up_right,
frozenset([Key.down, Key.left]): down_left,
frozenset([Key.down, Key.right]): down_right,
}
# Currently pressed keys
current_keys = set()
def on_press(key):
# When a key is pressed, add it to the set we are keeping track of and check if this set is in the dictionary
current_keys.add(key)
if frozenset(current_keys) in combination_to_function:
# If the current set of keys are in the mapping, execute the function
combination_to_function[frozenset(current_keys)]()
def on_release(key):
# When a key is released, remove it from the set of keys we are keeping track of
if key in current_keys:
current_keys.remove(key)
def process_img(original_img):
processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
return processed_img
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 0, "left": 70, "width": 640, "height": 480}
while True:
listener = Listener(on_press=on_press, on_release=on_release)
listener.start()
last_time = time.time()
# key_catcher = MockButton()
# Get raw pixels from the screen, save it to a Numpy array
screen = np.array(sct.grab(monitor))
new_screen = process_img(original_img=screen)
# Display the picture
cv2.imshow("Window", new_screen)
# print("Loop took {} seconds".format(time.time() - last_time))
# Press "q" to quit
k = cv2.waitKey(10)
if k & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
listener.stop()
我有一个机器学习项目,正在为 linux 中的速度梦想游戏设计自动驾驶。在这种情况下,我必须找到一种方法将键盘输出到像这样的实际一维数组。
up - down - right - left - upleft - upright - downleft - downright - do nothing
[0 0 0 0 0 0 0 0 1]
我使用这段代码作为截图和处理的起始代码:
import time
import cv2
import mss
import numpy as np
def process_img(original_img):
processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
return processed_img
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 0, "left": 70, "width": 640, "height": 480}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
screen = np.array(sct.grab(monitor))
new_screen = process_img(original_img=screen)
# Display the picture
cv2.imshow("Window", new_screen)
print("Loop took {} seconds".format(time.time() - last_time))
# Press "q" to quit
k = cv2.waitKey(12)
if k > 0:
print(k)
if k & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
我知道使用 cv2.waitkey()
函数可以捕获键码。所以我可以想出一种方法来捕捉是否按下了上-下-左或右。但是有什么方法可以捕捉 up-left
、up-right
、... 和 cv2.waitkey 等组合键。
用 cv2.waitkey
在循环中捕捉按键对我来说非常重要,因为它在准确性方面极大地提高了我的神经网络的性能。
我认为cv2.waitKey
无法同时捕获多个按键。一个简单的捕捉两个组合键的方法,你可以记录最后一个捕捉到的键并与当前捕捉到的键进行比较,检查这两个键是否符合你想要的组合键。
import cv2
cap = cv2.VideoCapture(0)
k = last_key = -1
up_left_is_pressed = up_right_is_pressed = False
while True:
ok, image = cap.read()
if not ok:
break
last_key = k # last catched key
k = cv2.waitKey(1) # current catched key
if k == -1:
up_left_is_pressed = up_right_is_pressed = False
if (k == ord('a') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('a'))):
up_left_is_pressed = True
cv2.putText(image, "up left press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
elif (k == ord('d') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('d'))):
up_right_is_pressed = True
cv2.putText(image, "up right press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
else:
cv2.putText(image, "no key combination pressed", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
cv2.imshow("hi", image)
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
听起来 'cv2.waitkey' 不是一个好的选择,当你切换到另一个程序并继续按键时。我找到了 this examples 并制作了一个示例代码来捕获按键,它在 windows 中完美运行,在 linux.
中运行良好import time
import cv2
import mss
import numpy as np
from pynput.keyboard import Key, Listener
def up():
print("Go up")
def down():
print("Go down")
def left():
print("Go left")
def right():
print("Go right")
def up_left():
print("Go up_left")
def up_right():
print("Go up_right")
def down_left():
print("Go down_left")
def down_right():
print("Go down_right")
def do_nothing():
print("Do Nothing")
# Create a mapping of keys to function (use frozenset as sets are not hashable - so they can't be used as keys)
combination_to_function = {
frozenset([Key.up]): up, # No `()` after function_1 because
# we want to pass the function, not the value of the function
frozenset([Key.down, ]): down,
frozenset([Key.left, ]): left,
frozenset([Key.right, ]): right,
frozenset([Key.up, Key.left]): up_left,
frozenset([Key.up, Key.right]): up_right,
frozenset([Key.down, Key.left]): down_left,
frozenset([Key.down, Key.right]): down_right,
}
# Currently pressed keys
current_keys = set()
def on_press(key):
# When a key is pressed, add it to the set we are keeping track of and check if this set is in the dictionary
current_keys.add(key)
if frozenset(current_keys) in combination_to_function:
# If the current set of keys are in the mapping, execute the function
combination_to_function[frozenset(current_keys)]()
def on_release(key):
# When a key is released, remove it from the set of keys we are keeping track of
if key in current_keys:
current_keys.remove(key)
def process_img(original_img):
processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
return processed_img
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 0, "left": 70, "width": 640, "height": 480}
while True:
listener = Listener(on_press=on_press, on_release=on_release)
listener.start()
last_time = time.time()
# key_catcher = MockButton()
# Get raw pixels from the screen, save it to a Numpy array
screen = np.array(sct.grab(monitor))
new_screen = process_img(original_img=screen)
# Display the picture
cv2.imshow("Window", new_screen)
# print("Loop took {} seconds".format(time.time() - last_time))
# Press "q" to quit
k = cv2.waitKey(10)
if k & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
listener.stop()