-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathimageProcessing.py
More file actions
195 lines (153 loc) · 6.42 KB
/
Copy pathimageProcessing.py
File metadata and controls
195 lines (153 loc) · 6.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
import csv
from datetime import datetime
from PIL import ImageFont, ImageDraw, Image
import urllib.request
from pathlib import Path
from HandDetection import HandDetector
from spotifyIntegration import SpotifyClient
import SkinSegmentation
from GestureRecognition import GestureAccepter
SHOW_SPOTIFY_INFO = True
CONNECT_TO_SPOTIFY = True
CREATE_DATA_SET = False
SHOW_MODEL_PREDICTIONS = True
def nothing(arg):
pass
def drawtext(img, osd_list, bgracolor=(255,255,255,0)):
img_pil = Image.fromarray(img)
draw = ImageDraw.Draw(img_pil)
font_text = ImageFont.truetype('Arial.ttf', 24, encoding="utf-8")
for txt, coords in osd_list:
draw.text(coords, txt, fill=bgracolor, font=font_text)
img = np.array(img_pil)
return img
if __name__ == '__main__':
curr_dir = os.getcwd()
palm_model_path = curr_dir + "/models/palm_detection.tflite"
anchors_path = curr_dir + "/models/anchors.csv"
# Load the cascade
face_cascade = cv2.CascadeClassifier('models/haarcascade_frontalface_default.xml')
#load model
detector = HandDetector(palm_model_path, anchors_path)
gesture_model = keras.models.load_model('models/model_v1')
# load Spotify client
if CONNECT_TO_SPOTIFY:
spclient = SpotifyClient()
me = spclient.me()
st = spclient.status()
if(st is None):
raise ConnectionError("Can't connect to Spotify")
else:
spclient = None
#load GestureAccepter
gesture_accepter = GestureAccepter(spclient, 5, 15)
capture = cv2.VideoCapture(0)
hasBackground = False
mask = None
cv2.namedWindow('source')
cv2.createTrackbar('Threshhold','source',60,254,nothing)
if CREATE_DATA_SET:
cv2.createTrackbar('Label','source',0,6,nothing)
Path(curr_dir, 'dataset').mkdir(exist_ok=True)
photo_counter = 0
album_cover = None
album_cover_src = None
while True:
#get camera feed
ret, frame = capture.read()
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#save background
if not hasBackground:
background = frame
hasBackground = True
#detect hand keypoints
keypoints, center = detector(image)
#detect the faces
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
source = np.copy(frame)
osd = []
if keypoints is not None:
#get hand box
(x, y, w, h) = detector.getBBox(keypoints, center, 4)
#check if not face
if not HandDetector.checkIfFace(x, y, w, h, faces, 0.3):
#visualize detection
for px, py in keypoints:
px, py = int(px), int(py)
source = cv2.circle(source, (px, py), 5, (0, 0, 255))
source = cv2.circle(source, (int(center[0]),int(center[1])),5, (255, 0, 255))
source = cv2.rectangle(source, (x, y), (x + w, y + h), (0, 255, 0), 2)
#extract skin
handImage = SkinSegmentation.getSkinBackground(frame, background, x, y, w, h, 256)
if handImage is not None:
thresh = cv2.getTrackbarPos('Threshhold','source')
mask = SkinSegmentation.getSkinMask(handImage, thresh)
cv2.imshow('hand', mask)
# Prediction
img_shape = (28, 28)
mask_norm = mask // 255
im = cv2.resize(mask_norm, img_shape)
rshp = np.reshape(im, (1, 28, 28, 1))
# Model output (array of classes probabilities)
pred = gesture_model.predict(rshp)
recognised_gesture = gesture_accepter.recognise_gesture(pred[0])
if recognised_gesture is not None:
osd.append((recognised_gesture, (450, 50)))
# Index of maximum probability
argmax = np.argmax(pred[0])
# Display the output
if SHOW_MODEL_PREDICTIONS:
y0, dy = 50, 20
for i, line in enumerate(pred[0]):
y = y0 + i*dy
maxind = 'MAX' if i == argmax else ' '
txt = '{} {} {:f}'.format(maxind, i, line)
osd.append((txt, (50, y)))
# Apply Spotify info
if (CONNECT_TO_SPOTIFY and SHOW_SPOTIFY_INFO):
name = me['display_name']
st = spclient.status()
playing = st['is_playing']
track_name = st['item']['name']
artist_name = st['item']['artists'][0]['name']
album_cover_src_new = st['item']['album']['images'][2]
if album_cover_src_new != album_cover_src:
album_cover_src = album_cover_src_new
album_cover_pil = Image.open(urllib.request.urlopen(album_cover_src['url']))
album_cover = np.array(album_cover_pil.convert('RGB'))
x_offset=source.shape[1]-50-album_cover.shape[1]
y_offset=50
source[y_offset:y_offset+album_cover.shape[0], x_offset:x_offset+album_cover.shape[1]] = album_cover
x_pos = source.shape[1]-450
y_pos = 50
dy = 30
osd.append((track_name, (x_pos, y_pos)))
osd.append((artist_name, (x_pos, y_pos+dy)))
osd.append(("Logged as: "+name, (x_pos, y_pos+2*dy)))
source = drawtext(source, osd)
cv2.imshow('source', source)
#get key code if pressed
key = cv2.waitKey(1)
if key == 27: #esc
break
if key == 32: #space
background = frame
# Save frame on click
if CREATE_DATA_SET and mask is not None and key != -1 and key != 32:
date_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
lbl = cv2.getTrackbarPos('Label','source')
filename = '{}_{}.png'.format(lbl, date_time)
path = os.path.join(curr_dir, 'dataset', filename)
cv2.imwrite(path, mask)
photo_counter+= 1
print(f"Frame saved! nr {photo_counter}" + path)
capture.release()
cv2.destroyAllWindows()