-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstream.py
112 lines (95 loc) · 3.35 KB
/
stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from threading import Thread
import time
import numpy as np
import cv2
import cv2.aruco as aruco
from dotenv import load_dotenv
import os
import keras_ocr
from google.auth.credentials import Credentials
from google.cloud import texttospeech
import threading
import os.path
from playsound import playsound
from imutils.video import WebcamVideoStream
import imutils
from fps import FPS
def findArucoMarkers(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
key = getattr(aruco, f'DICT_4X4_50')
arucoDict = aruco.Dictionary_get(key)
arucoParam = aruco.DetectorParameters_create()
bboxs, ids, rejected = aruco.detectMarkers(gray, arucoDict, parameters = arucoParam)
#print(ids)
aruco.drawDetectedMarkers(img, bboxs)
return (ids is not None)
def do_predictions(pipeline, shrunk):
color = (255, 0, 0)
thickness = 4
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
prediction_groups = pipeline.recognize([shrunk])
groups = prediction_groups[0]
for group in groups:
point = np.int32( group[1][0])
text = group[0]
cv2.putText(shrunk, text, (point[0], point[1] - 30), font, fontScale, color, thickness)
rect = np.int32([group[1]])
#print(rect)
cv2.polylines(shrunk, rect, isClosed=True, color=color, thickness=thickness)
if (len(groups) > 0):
return groups[0][0]
else:
return None
def textToSpeech(text):
path = f'./cached/{text}.mp3'
if (not os.path.isfile(path)):
client = texttospeech.TextToSpeechClient.from_service_account_json('key.json')
hyphenated = "-".join([char for char in text])
synthesis_input = texttospeech.SynthesisInput(text=f'{hyphenated} spells "{text}"')
voice = texttospeech.VoiceSelectionParams(
language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
with open(path, "wb") as out:
out.write(response.audio_content)
print(f'Fetched Audio content written to file "{path}"')
else:
print (f'using cache for {text}')
playsound(path)
capture_url = os.environ.get("STREAM_URL")
print(capture_url)
cam = WebcamVideoStream(capture_url).start()
#time.sleep(1)
pipeline = keras_ocr.pipeline.Pipeline(max_size=1000)
fps = FPS(5).start()
alreadyPredicted = False
try:
while True:
frame = cam.read()
shrunk = imutils.resize(frame, 1000)
arucoFound = findArucoMarkers(shrunk)
#print (arucoFound)
if (arucoFound):
alreadyPredicted = False
if (not arucoFound and not alreadyPredicted):
text = do_predictions(pipeline, shrunk)
if (text is not None):
#print(text)
th = threading.Thread(target=textToSpeech, args=(text,))
th.start()
alreadyPredicted = True
cv2.imshow('frame', shrunk)
fps.updateAndPrintAndReset()
if cv2.waitKey(1) == ord('q'):
break
except Exception as exc:
print(f'Exception: {exc}')
cam.stop()
cam.stream.release()
cv2.destroyAllWindows()