Skip to content

Commit 334b861

Browse files
Merge pull request #31 from Mohamed00/viavoice
ViaVoice
2 parents f77edc6 + 16efc4e commit 334b861

File tree

2 files changed

+57
-12
lines changed

2 files changed

+57
-12
lines changed

addon/synthDrivers/_ibmeci.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class ECIMessage:
4444

4545
class ECICallbackReturn:
4646
eciDataNotProcessed, eciDataProcessed, eciDataAbort= range(3)
47+
isIBM=False
4748

4849
# constants
4950
samples=3300
@@ -70,7 +71,11 @@ class ECICallbackReturn:
7071
'deu': (262144, _('German'), 'de_DE', 'de'),
7172
'ita': (327680, _('Italian'), 'it_IT', 'it'),
7273
'enu': (65536, _('American English'), 'en_US', 'en'),
73-
'eng': (65537, _('British English'), 'en_UK', '')
74+
'eng': (65537, _('British English'), 'en_UK', ''),
75+
'swe': (917504, _('Swedish'), 'sv_SE', 'sv'),
76+
'nor': (851968, _('Norwegian'), 'nb_NO', 'nb'),
77+
'dan': (983040, _('Danish'), 'da_DK', 'da'),
78+
'ctt': (720897, _('Hong Kong Cantonese'), 'yue', '')
7479
}
7580

7681
audioStream = BytesIO()
@@ -104,7 +109,7 @@ class ECICallbackReturn:
104109

105110
class EciThread(threading.Thread):
106111
def run(self):
107-
global vparams, params, speaking, endMarkersCount
112+
global vparams, params, speaking, endMarkersCount, isIBM
108113
global eciThreadId, dll, handle
109114
eciThreadId = windll.kernel32.GetCurrentThreadId()
110115
msg = wintypes.MSG()
@@ -119,6 +124,11 @@ def run(self):
119124
if v is not None:
120125
dictHandles[v[0]]=v[1]
121126
dll.eciSetDict(handle,v[1])
127+
version=eciVersion()
128+
if version>'6.4':
129+
isIBM=True
130+
else:
131+
isIBM=False
122132
started.set()
123133
while True:
124134
user32.GetMessageA(byref(msg), 0, 0, 0)
@@ -178,10 +188,10 @@ def eciCheck():
178188
global ttsPath, dllName, dll
179189
dllName = config.conf.profiles[0]['ibmeci']['dllName']
180190
ttsPath = config.conf.profiles[0]['ibmeci']['TTSPath']
181-
if path.exists(path.abspath(path.join(path.abspath(path.dirname(__file__)), 'ibmtts'))): ttsPath='ibmtts'
191+
# if path.exists(path.abspath(path.join(path.abspath(path.dirname(__file__)), 'ibmtts'))): ttsPath='ibmtts'
182192
if not path.isabs(ttsPath):
183193
ttsPath = path.abspath(path.join(path.abspath(path.dirname(__file__)), ttsPath))
184-
if path.exists(ttsPath): iniCheck()
194+
if path.exists(ttsPath) and not isIBM: iniCheck()
185195
if not path.exists(ttsPath): return False
186196
if dll: return True
187197
try:
@@ -387,7 +397,7 @@ def process():
387397
def eciVersion():
388398
ptr=b" "
389399
dll.eciVersion(ptr)
390-
return ptr
400+
return ptr.decode('ascii')
391401

392402
def getVoiceByLanguage(lang):
393403
""" Return the voice corresponding to the given language

addon/synthDrivers/ibmeci.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from logHandler import log
1111
from synthDrivers import _ibmeci
1212
from synthDrivers._ibmeci import ECIVoiceParam
13+
from synthDrivers._ibmeci import isIBM
1314
import addonHandler
1415
addonHandler.initTranslation()
1516

@@ -27,7 +28,7 @@ def notify (cls, synth=None, index=None): pass
2728
def unicode(s): return s
2829

2930
minRate=40
30-
maxRate=150
31+
maxRate=156
3132
punctuation = b"-,.:;)(?!\x96\x97"
3233
pause_re = re.compile(br'([a-zA-Z0-9]|\s)([%s])(\2*?)(\s|[\\/]|$)' %punctuation)
3334
time_re = re.compile(br"(\d):(\d+):(\d+)")
@@ -55,16 +56,37 @@ def unicode(s): return s
5556
re.compile(br"\b(\d+|\W+)?(\w+\_+)?(\_+)?([bcdfghjklmnpqrstvwxz]+)?(\d+)?t+z[s]che", re.I): br"\1 \2 \3 \4 \5 tz sche",
5657
re.compile(br"\b(juar[aeou]s)([aeiou]{6,})", re.I): br"\1 \2"
5758
}
58-
59+
english_ibm_fixes = {
60+
#Prevents the synth from spelling out everything if a punctuation mark follows a word.
61+
re.compile(br"([a-z]+)([\x7e\x23\x24\x25\x5e\x2a\x28\x7b\x7c\x5c\x5b\x3c\x25\x95])", re.I): br"\1 \2",
62+
#Don't break phrases like books).
63+
re.compile(br"([a-z]+)\s+(\(s\))", re.I): br"\1\2",
64+
#Removes spaces if a string is followed by a punctuation mark, since ViaVoice doesn't tolerate that.
65+
re.compile(br"([a-z]+|\d+|\W+)\s+([\x3a\x2e\x21\x3b\x2c])", re.I): br"\1\2",
66+
re.compile(br"(http://|ftp://)([a-z]+)(\W){1,3}([a-z]+)(/*\W){1,3}([a-z]){1}", re.I): br"\1\2\3\4 \5\6",
67+
re.compile(br"(\d+)([\x2d\x2b\x2a\x5e\x2f])(\d+)(\.)(\d+)(\.)(0{2,})", re.I): br"\1\2\3\4\5\6 \7",
68+
re.compile(br"(\d+)([\x2d\x2b\x2a\x5e\x2f])(\d+)(\.)(\d+)(\.)(0\W)", re.I): br"\1\2\3\4 \5\6\7",
69+
re.compile(br"(\d+)([\x2d\x2b\x2a\x5e\x2f]+)(\d+)([\x2d\x2b\x2a\x5e\x2f]+)([\x2c\x2e+])(0{2,})", re.I): br"\1\2\3\4\5 \6",
70+
re.compile(br"(\d+)(\.+)(\d+)(\.+)(0{2,})\s*\.*([\x2d\x2b\x2a\x5e\x2f])", re.I): br"\1\2\3\4 \5\6",
71+
re.compile(br"(\d+)\s*([\x2d\x2b\x2a\x5e\x2f])\s*(\d+)(,)(0{2,})", re.I): br"\1\2\3\4 \5",
72+
}
5973
spanish_fixes = {
6074
# Euros
6175
re.compile(b'([\x80$]\\d{1,3})((\\s\\d{3})+\\.\\d{2})'): r'\1 \2',
6276
}
77+
spanish_ibm_fixes = {
78+
#ViaVoice's time parser is slightly broken in Spanish, and will crash if the minute part goes from 20 to 59.
79+
#For these times, convert the periods to colons.
80+
re.compile(br'([0-2][0-4])\.([2-5][0-9])\.([0-5][0-9])'): br'\1:\2:\3',
81+
}
6382
german_fixes = {
6483
# Crash words.
6584
re.compile(br'dane-ben', re.I): br'dane- ben',
6685
re.compile(br'dage-gen', re.I): br'dage- gen',
6786
}
87+
portuguese_ibm_fixes = {
88+
re.compile(br'(\d{1,2}):(00):(\d{1,2})'): br'\1:\2 \3',
89+
}
6890

6991
# fixme: These are only the variant names for enu. Does ECI have a way to obtain names for other languages?
7092
variants = {
@@ -101,10 +123,17 @@ def unicode(s): return s
101123
"pt_PT":b"`l7.1",
102124
"ja":b"`l8",
103125
"ja_JP":b"`l8.0",
126+
"fi":b"`l9",
127+
"fi_FI":b"`l9.0",
104128
"ko":b"`l10",
105129
"ko_KR":b"`l10.0",
106-
"fi":b"`l9",
107-
"fi_FI":b"`l9.0"
130+
"yue":b"`l11.1",
131+
"nb":b"`l13",
132+
"nb_NO":b"`l13.0",
133+
"sv":b"`l14",
134+
"sv_SE":b"`l14.0",
135+
"da":b"`l15",
136+
"da_DK":b"`l15.0"
108137
}
109138

110139
class SynthDriver(synthDriverHandler.SynthDriver):
@@ -234,17 +263,22 @@ def processText(self,text):
234263
#this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it.
235264
text = text.encode(self.currentEncoding, 'replace') # special unicode symbols may encode to backquote. For this reason, backquote processing is after this.
236265
text = text.rstrip()
237-
if _ibmeci.params[9] in (65536, 65537, 393216, 655360): text = resub(english_fixes, text) #Applies to Chinese and Korean as they can read English text and thus inherit the English bugs.
238-
if _ibmeci.params[9] in (131072, 131073): text = resub(spanish_fixes, text)
266+
if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897): text = resub(english_fixes, text) #Applies to all languages with dual language support.
267+
if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897) and _ibmeci.isIBM: text = resub(english_ibm_fixes, text)
268+
if _ibmeci.params[9] in (131072, 131073) and not _ibmeci.isIBM: text = resub(spanish_fixes, text)
269+
if _ibmeci.params[9] in ('esp', 131072) and _ibmeci.isIBM: text = resub(spanish_ibm_fixes, text)
239270
if _ibmeci.params[9] in (196609, 196608):
240271
text = text.replace(br'quil', br'qil') #Sometimes this string make everything buggy with IBMTTS in French
241272
if _ibmeci.params[9] in ('deu', 262144):
242273
text = resub(german_fixes, text)
274+
if _ibmeci.params[9] in ('ptb', 458752) and _ibmeci.isIBM:
275+
text = resub(portuguese_ibm_fixes, text)
243276
if not self._backquoteVoiceTags:
244277
text=text.replace(b'`', b' ') # no embedded commands
245278
if self._shortpause:
246279
text = pause_re.sub(br'\1 `p1\2\3\4', text) # this enforces short, JAWS-like pauses.
247-
text = time_re.sub(br'\1:\2 \3', text) # apparently if this isn't done strings like 2:30:15 will only announce 2:30
280+
if not _ibmeci.isIBM:
281+
text = time_re.sub(br'\1:\2 \3', text) # apparently if this isn't done strings like 2:30:15 will only announce 2:30
248282
embeds=b''
249283
if self._ABRDICT:
250284
embeds+=b"`da1 "
@@ -389,6 +423,7 @@ def updateEncoding(self, lang): # lang must be a number asociated with IBMTTS la
389423
elif lang == 524288: self.currentEncoding = "cp932"
390424
# korean
391425
elif lang == 655360: self.currentEncoding = "cp949"
426+
elif lang == 720897: self.currentEncoding = "big5"
392427
else: self.currentEncoding = "mbcs"
393428

394429
def _get_lastIndex(self):

0 commit comments

Comments
 (0)