-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorganizer.py
81 lines (64 loc) · 2.05 KB
/
organizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import time
import os
import pickle
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./TakeNote-89a084a7097d.json"
#adopted from GCP github
def transcribe_gcs_with_word_time_offsets(mcontent):
"""Transcribe the given audio file asynchronously and output the word time
offsets."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(content=mcontent)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=48000,
language_code='en-US',
enable_word_time_offsets=True)
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
result = operation.result(timeout=90)
for result in result.results:
alternative = result.alternatives[0]
print(u'Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
for word_info in alternative.words:
word = word_info.word
start_time = word_info.start_time
end_time = word_info.end_time
print('Word: {}, start_time: {}, end_time: {}'.format(
word,
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))
return alternative.words
def transcribe():
mydir = os.getcwd()+"/chunks/flac/"
info = []
for (dirpath, dirnames, filenames) in os.walk(mydir):
for filename in filenames:
with open(mydir+filename, 'rb') as fd:
mcontent = fd.read()
info.append(transcribe_gcs_with_word_time_offsets(mcontent))
os.remove(mydir+filename)
#prepping for pickle
mystr = ""
strings = []
for wObjectList in info:
mystr = ""
for wObject in wObjectList:
mystr += wObject.word
#print(type(wObject.word))
#print(wObject.word)
mystr += " "
#print(mystr)
strings.append(mystr)
mystr = ""
#print(strings)
#pickle dumping time
projDir = os.getcwd()
os.chdir(os.getcwd()+"//pickles")
outfile = open("words.pkl", "wb")
pickle.dump(strings,outfile)
outfile.close()
os.chdir(projDir)