Skip to content

Commit

Permalink
Add IBM Watson as a STT option
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjreed committed Apr 5, 2016
1 parent e6789d7 commit a061766
Showing 1 changed file with 145 additions and 0 deletions.
145 changes: 145 additions & 0 deletions client/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,151 @@ def is_available(cls):
return diagnose.check_network_connection()


class WatsonSTT(AbstractSTTEngine):
"""
Speech-To-Text implementation which relies on the IBM Watson Speech-To-Text
API. This requires an IBM Bluemix account, but the first 1000 minutes of
transcribing per month are free.
To obtain a login:
1. Register for IBM Bluemix here:
https://console.ng.bluemix.net/registration/
2. Once you've logged in, click the "Use Services & APIs" link on the
dashboard
3. Click the "Speech To Text" icon
4. In the form on the right, leave all options as defaults and click Create
5. You'll now have a new service listed on your dashboard. If you click
that service there will be a navigation option for "Service Credentials"
in the left hand nav. Find your username and password there.
Excerpt from sample profile.yml:
...
timezone: US/Pacific
stt_engine: watson
watson:
username: $YOUR_USERNAME_HERE
password: $YOUR_PASSWORD_HERE
"""

SLUG = 'watson'

def __init__(self, username=None, password=None, language='en-us'):
# FIXME: get init args from config
"""
Arguments:
username - the watson api username credential
password - the watson api password credential
"""
self._logger = logging.getLogger(__name__)
self._username = None
self._password = None
self._http = requests.Session()
self.username = username
self.password = password

@property
def request_url(self):
return self._request_url

@property
def username(self):
return self._username

@username.setter
def username(self, value):
self._username = value

@property
def password(self):
return self._password

@password.setter
def password(self, value):
self._password = value

@classmethod
def get_config(cls):
# FIXME: Replace this as soon as we have a config module
config = {}
# HMM dir
# Try to get hmm_dir from config
profile_path = jasperpath.config('profile.yml')
if os.path.exists(profile_path):
with open(profile_path, 'r') as f:
profile = yaml.safe_load(f)
if 'watson' in profile:
if 'username' in profile['watson']:
config['username'] = profile['watson']['username']
if 'password' in profile['watson']:
config['password'] = profile['watson']['password']
return config

def transcribe(self, fp):
"""
Performs STT via the Watson Speech-to-Text API, transcribing an audio
file and returning an English string.
Arguments:
fp -- the path to the .wav file to be transcribed
"""

if not self.username:
self._logger.critical('Username missing, transcription request ' +
'aborted.')
return []
elif not self.password:
self._logger.critical('Password missing, transcription ' +
'request aborted.')
return []

wav = wave.open(fp, 'rb')
frame_rate = wav.getframerate()
wav.close()
data = fp.read()

headers = {'content-type':
'audio/l16; rate=%s; channels=1' % frame_rate}
r = self._http.post(
'https://stream.watsonplatform.net/' +
'speech-to-text/api/v1/recognize?continuous=true',
data=data, headers=headers, auth=(self.username, self.password)
)
try:
r.raise_for_status()
except requests.exceptions.HTTPError as e:
self._logger.critical('Request failed with http status %d',
r.status_code)
if r.status_code == requests.codes['forbidden']:
self._logger.warning('Status 403 is probably caused by ' +
'invalid credentials.')
return []
r.encoding = 'utf-8'
try:
response = r.json()
if len(response['results']) == 0:
# Response result is empty
raise ValueError('Nothing has been transcribed.')
results = [alt['transcript'] for alt
in response['results'][0]['alternatives']]
except ValueError as e:
self._logger.warning('Empty response: %s', e.args[0])
results = []
except (KeyError, IndexError):
self._logger.warning('Cannot parse response.', exc_info=True)
results = []
else:
# Convert all results to uppercase
results = tuple(result.strip().upper() for result in results)
self._logger.info('Transcribed: %r', results)
return results

@classmethod
def is_available(cls):
return diagnose.check_network_connection()


class AttSTT(AbstractSTTEngine):
"""
Speech-To-Text implementation which relies on the AT&T Speech API.
Expand Down

0 comments on commit a061766

Please sign in to comment.