python how to detect key combinations with opencv waitkey library?

11.2k Views Asked by At

I am having a machine learning project which is designing autonomous driver for speed dreams game in linux. In this case I have to find a way to get keyboard outputs to actual 1-dimensional array like this.

 up - down - right - left - upleft - upright - downleft - downright - do nothing
[0     0       0       0       0         0          0           0          1]

I used this code as the starter code for taking screenshots and processing:

import time
import cv2
import mss
import numpy as np

def process_img(original_img):
    processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
    processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
    return processed_img

with mss.mss() as sct:
    # Part of the screen to capture
    monitor = {"top": 0, "left": 70, "width": 640, "height": 480}

    while True:
        last_time = time.time()
        # Get raw pixels from the screen, save it to a Numpy array
        screen = np.array(sct.grab(monitor))
        new_screen = process_img(original_img=screen)

        # Display the picture
        cv2.imshow("Window", new_screen)

        print("Loop took {} seconds".format(time.time() - last_time))

        # Press "q" to quit
        k = cv2.waitKey(12)
        if k > 0:
            print(k)
        if k & 0xFF == ord("q"):
            cv2.destroyAllWindows()
            break

I know catching keycodes is possible with cv2.waitkey() function. So I can figure out a way to catch if up - down - left or right is pressed. but is there any way that I could catch keys combination like up-left, up-right, ... with cv2.waitkey .

Catching key presses in the loop with cv2.waitkey is very important to me because it has a huge improvement in performance of my neural network in terms of accuracy.

2

There are 2 best solutions below

0
On BEST ANSWER

It sounds that 'cv2.waitkey' is not a good option when you are switching to another program and keep pressing keys. I found this examples and made a sample code to catch keypresses which works perfect in windows and not bad in linux.

import time
import cv2
import mss
import numpy as np
from pynput.keyboard import Key, Listener

def up():
    print("Go up")


def down():
    print("Go down")


def left():
    print("Go left")


def right():
    print("Go right")


def up_left():
    print("Go up_left")


def up_right():
    print("Go up_right")


def down_left():
    print("Go down_left")


def down_right():
    print("Go down_right")


def do_nothing():
    print("Do Nothing")


# Create a mapping of keys to function (use frozenset as sets are not hashable - so they can't be used as keys)

combination_to_function = {
    frozenset([Key.up]): up,  # No `()` after function_1 because
    # we want to pass the function, not the value of the function
    frozenset([Key.down, ]): down,
    frozenset([Key.left, ]): left,
    frozenset([Key.right, ]): right,
    frozenset([Key.up, Key.left]): up_left,
    frozenset([Key.up, Key.right]): up_right,
    frozenset([Key.down, Key.left]): down_left,
    frozenset([Key.down, Key.right]): down_right,
}

# Currently pressed keys
current_keys = set()


def on_press(key):
    # When a key is pressed, add it to the set we are keeping track of and check if this set is in the dictionary
    current_keys.add(key)
    if frozenset(current_keys) in combination_to_function:
        # If the current set of keys are in the mapping, execute the function
        combination_to_function[frozenset(current_keys)]()


def on_release(key):
    # When a key is released, remove it from the set of keys we are keeping track of
    if key in current_keys:
        current_keys.remove(key)


def process_img(original_img):
    processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
    processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
    return processed_img


with mss.mss() as sct:
    # Part of the screen to capture
    monitor = {"top": 0, "left": 70, "width": 640, "height": 480}

    while True:
        listener = Listener(on_press=on_press, on_release=on_release)
        listener.start()
        last_time = time.time()
        # key_catcher = MockButton()
        # Get raw pixels from the screen, save it to a Numpy array
        screen = np.array(sct.grab(monitor))
        new_screen = process_img(original_img=screen)

        # Display the picture
        cv2.imshow("Window", new_screen)

        # print("Loop took {} seconds".format(time.time() - last_time))
        # Press "q" to quit

        k = cv2.waitKey(10)

        if k & 0xFF == ord("q"):
            cv2.destroyAllWindows()
            break

        listener.stop()
0
On

I think cv2.waitKey can't catch multiple key presses simultaneously. A simple way to catch two key combination, you can record the last catched key and compare it with current catch key, check if these two keys meet your desired key combination.

import cv2

cap = cv2.VideoCapture(0)
k = last_key = -1
up_left_is_pressed = up_right_is_pressed = False

while True:
    ok, image = cap.read()

    if not ok:
        break

    last_key = k # last catched key
    k = cv2.waitKey(1) # current catched key

    if k == -1:
        up_left_is_pressed = up_right_is_pressed = False

    if (k == ord('a') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('a'))):
        up_left_is_pressed = True
        cv2.putText(image, "up left press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    elif (k == ord('d') and last_key == ord('w')) or ((k == ord('w') and last_key == ord('d'))):
        up_right_is_pressed = True
        cv2.putText(image, "up right press", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    else:
        cv2.putText(image, "no key combination pressed", (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))

    cv2.imshow("hi", image)
    if k == 27:
        break
cap.release()
cv2.destroyAllWindows()