Skip to content

Commit cb7ae97

Browse files
committed
Changes done for v2.3.2.
1 parent f17e1e3 commit cb7ae97

File tree

14 files changed

+214
-24
lines changed

14 files changed

+214
-24
lines changed

com.ibm.streamsx.sttgateway/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changes
22

3+
## v2.3.2
4+
* Jan/10/2022
5+
* Fixed a problem where the call start date time values were not always correctly included in the STT result.
6+
* Added these three new parameters to the WatsonSTT operator: speechDetectorSensitivity, backgroundAudioSuppression, characterInsertionBias
7+
38
## v2.3.1
49
* Sep/20/2021
510
* Dynamically change the maximum concurrent calls allowed value.

com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT.xml

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@
510510

511511
<parameter>
512512
<name>customizationWeight</name>
513-
<description>This parameter specifies a relative weight for a custom language model as a float64 between 0.0 to 1.0 (Default is 0.0)</description>
513+
<description>This parameter specifies a relative weight for a custom language model as a float64 from 0.0 to 1.0 (Default is 0.0)</description>
514514
<optional>true</optional>
515515
<rewriteAllowed>true</rewriteAllowed>
516516
<expressionMode>AttributeFree</expressionMode>
@@ -666,6 +666,36 @@
666666
<cardinality>1</cardinality>
667667
</parameter>
668668

669+
<parameter>
670+
<name>speechDetectorSensitivity</name>
671+
<description>This parameter specifies a float64 value from 0.0 to 1.0 to adjust the sensitivity of speech activity detection (Default is 0.5)</description>
672+
<optional>true</optional>
673+
<rewriteAllowed>true</rewriteAllowed>
674+
<expressionMode>AttributeFree</expressionMode>
675+
<type>float64</type>
676+
<cardinality>1</cardinality>
677+
</parameter>
678+
679+
<parameter>
680+
<name>backgroundAudioSuppression</name>
681+
<description>This parameter specifies a float64 value from 0.0 to 1.0 to suppress side conversations or background noise (Default is 0.0)</description>
682+
<optional>true</optional>
683+
<rewriteAllowed>true</rewriteAllowed>
684+
<expressionMode>AttributeFree</expressionMode>
685+
<type>float64</type>
686+
<cardinality>1</cardinality>
687+
</parameter>
688+
689+
<parameter>
690+
<name>characterInsertionBias</name>
691+
<description>This parameter specifies a float64 value from -0.5 to 1.0 to change how prone the STT engine is to insert more transcribed characters (Default is 0.0)</description>
692+
<optional>true</optional>
693+
<rewriteAllowed>true</rewriteAllowed>
694+
<expressionMode>AttributeFree</expressionMode>
695+
<type>float64</type>
696+
<cardinality>1</cardinality>
697+
</parameter>
698+
669699
</parameters>
670700
<inputPorts>
671701
<inputPortSet>

com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT_cpp.cgt

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
/*
99
============================================================
1010
First created on: Jul/01/2018
11-
Last modified on: Sep/12/2021
11+
Last modified on: Jan/04/2022
1212

1313
Please refer to the sttgateway-tech-brief.txt file in the
1414
top-level directory of this toolkit to read about
@@ -459,6 +459,18 @@ https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-websocke
459459

460460
my $sttLiveMetricsUpdateNeeded = $model->getParameterByName("sttLiveMetricsUpdateNeeded");
461461
$sttLiveMetricsUpdateNeeded = $sttLiveMetricsUpdateNeeded ? $sttLiveMetricsUpdateNeeded->getValueAt(0)->getCppExpression() : 1;
462+
463+
my $speechDetectorSensitivity = $model->getParameterByName("speechDetectorSensitivity");
464+
# Default: 0.5
465+
$speechDetectorSensitivity = $speechDetectorSensitivity ? $speechDetectorSensitivity->getValueAt(0)->getCppExpression() : 0.5;
466+
467+
my $backgroundAudioSuppression = $model->getParameterByName("backgroundAudioSuppression");
468+
# Default: 0.0
469+
$backgroundAudioSuppression = $backgroundAudioSuppression ? $backgroundAudioSuppression->getValueAt(0)->getCppExpression() : 0.0;
470+
471+
my $characterInsertionBias = $model->getParameterByName("characterInsertionBias");
472+
# Default: 0.0
473+
$characterInsertionBias = $characterInsertionBias ? $characterInsertionBias->getValueAt(0)->getCppExpression() : 0.0;
462474
%>
463475

464476
#include <type_traits>
@@ -501,7 +513,10 @@ MY_OPERATOR::MY_OPERATOR()
501513
<%=$redactionNeeded%>,
502514
<%=$keywordsSpottingThreshold%>,
503515
<%=$keywordsToBeSpotted%>,
504-
<%=$isTranscriptionCompletedRequested%>
516+
<%=$isTranscriptionCompletedRequested%>,
517+
<%=$speechDetectorSensitivity%>,
518+
<%=$backgroundAudioSuppression%>,
519+
<%=$characterInsertionBias%>
505520
}
506521
)
507522
{}

com.ibm.streamsx.sttgateway/impl/include/WatsonSTTConfig.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ struct WatsonSTTConfig {
4747
SPL::float64 keywordsSpottingThreshold;
4848
const SPL::list<SPL::rstring> keywordsToBeSpotted;
4949
const bool isTranscriptionCompletedRequested;
50+
SPL::float64 speechDetectorSensitivity;
51+
SPL::float64 backgroundAudioSuppression;
52+
SPL::float64 characterInsertionBias;
5053

5154
// Some definitions
5255
//This time becomes effective, when the connectionAttemptsThreshold limit is exceeded

com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImpl.hpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,18 @@ WatsonSTTImpl<OP, OT>::WatsonSTTImpl(OP & splOperator_,Conf config_)
232232
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::keywordsSpottingThreshold, "keywordsSpottingThreshold"));
233233
}
234234

235+
if (Conf::speechDetectorSensitivity < 0.0 || Conf::speechDetectorSensitivity > 1.0) {
236+
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::speechDetectorSensitivity, "speechDetectorSensitivity"));
237+
}
238+
239+
if (Conf::backgroundAudioSuppression < 0.0 || Conf::backgroundAudioSuppression > 1.0) {
240+
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::backgroundAudioSuppression, "backgroundAudioSuppression"));
241+
}
242+
243+
if (Conf::characterInsertionBias < -0.5 || Conf::characterInsertionBias > 1.0) {
244+
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::characterInsertionBias, "characterInsertionBias"));
245+
}
246+
235247
// If the keywords to be spotted list is empty, then disable keywords_spotting.
236248
if (Conf::keywordsToBeSpotted.size() == 0) {
237249
Conf::keywordsSpottingThreshold = 0.0;
@@ -246,9 +258,9 @@ WatsonSTTImpl<OP, OT>::WatsonSTTImpl(OP & splOperator_,Conf config_)
246258
}
247259

248260
// The parameters maxUtteranceAlternatives, wordAlternativesThreshold, keywordsSpottingThreshold, keywordsToBeSpotted
249-
// are not available in sttResultMose complete
261+
// are not available in sttResultMode complete
250262
// The COF getUtteranceNumber, isFinalizedUtterance, getConfidence, getUtteranceAlternatives
251-
// are not available in sttResultMose complete
263+
// are not available in sttResultMode complete
252264

253265
// Update the operator metric.
254266
sttOutputResultModeMetric->setValueNoLock(Conf::sttOutputResultMode);
@@ -284,6 +296,9 @@ WatsonSTTImpl<OP, OT>::WatsonSTTImpl(OP & splOperator_,Conf config_)
284296
<< "\nkeywordsSpottingThreshold = " << Conf::keywordsSpottingThreshold
285297
<< "\nkeywordsToBeSpotted = " << Conf::keywordsToBeSpotted
286298
<< "\nisTranscriptionCompletedRequested = " << Conf::isTranscriptionCompletedRequested
299+
<< "\nspeechDetectorSensitivity = " << Conf::speechDetectorSensitivity
300+
<< "\nbackgroundAudioSuppression = " << Conf::backgroundAudioSuppression
301+
<< "\ncharacterInsertionBias = " << Conf::characterInsertionBias
287302
<< "\nconnectionState.wsState.is_lock_free() = " << Rec::wsState.is_lock_free()
288303
<< "\nrecentOTuple.is_lock_free() = " << Rec::recentOTuple.is_lock_free()
289304
<< "\n----------------------------------------------------------------" << std::endl;

com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImplReceiver.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,20 @@ void WatsonSTTImplReceiver<OP, OT>::on_open(client* c, websocketpp::connection_h
578578
msg += ", \"redaction\" : true";
579579
}
580580

581+
// https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection
582+
if (speechDetectorSensitivity >= 0.0) {
583+
msg += ", \"speech_detector_sensitivity\" : " + boost::to_string(speechDetectorSensitivity);
584+
}
585+
586+
// https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection
587+
if (backgroundAudioSuppression >= 0.0) {
588+
msg += ", \"background_audio_suppression\" : " + boost::to_string(backgroundAudioSuppression);
589+
}
590+
591+
if (characterInsertionBias >= -5.0) {
592+
msg += ", \"character_insertion_bias\" : " + boost::to_string(characterInsertionBias);
593+
}
594+
581595
// https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#keyword_spotting
582596
if (keywordsSpottingThreshold > 0.0) {
583597
msg += ", \"keywords_threshold\" : " + boost::to_string(keywordsSpottingThreshold);

com.ibm.streamsx.sttgateway/info.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
**Note:** This toolkit requires c++11 support.
1616
</description>
17-
<version>2.3.1</version>
17+
<version>2.3.2</version>
1818
<requiredProductVersion>4.2.1.6</requiredProductVersion>
1919
</identity>
2020
<dependencies>

samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
/*
22
==============================================
33
# Licensed Materials - Property of IBM
4-
# Copyright IBM Corp. 2018, 2021
4+
# Copyright IBM Corp. 2018, 2022
55
==============================================
66
*/
77

88
/*
99
==============================================
1010
First created on: Nov/27/2020
11-
Last modified on: Sep/20/2021
11+
Last modified on: Jan/08/2022
1212

1313
IMPORTANT NOTE
1414
--------------
@@ -846,6 +846,10 @@ public composite VgwDataRouterToWatsonSTT {
846846
// After getting released, such UDP channels will become available for
847847
// doing speech to text work for any new voice calls.
848848
mutable map<rstring, int32> _vgwSessionToCompletedUdpChannelMap = {};
849+
// This map tells us the call start date time string for a given vgwSessionId.
850+
mutable map<rstring, rstring> _vgwSessionToCallStartDateTime = {};
851+
// This map tells us the call start time in epoch seconds for a given vgwSessionId.
852+
mutable map<rstring, int64> _vgwSessionToCallStartTimeInEpochSeconds = {};
849853
mutable BinarySpeech_t _oTuple = {};
850854
mutable rstring _key = "";
851855
}
@@ -994,6 +998,10 @@ public composite VgwDataRouterToWatsonSTT {
994998
if(BSD.callStartTimeInEpochSeconds == 0l) {
995999
BSD.callStartTimeInEpochSeconds = getSeconds(getTimestamp());
9961000
}
1001+
1002+
// Insert the call start date time values in the state variables.
1003+
insertM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId, BSD.callStartDateTime);
1004+
insertM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId, BSD.callStartTimeInEpochSeconds);
9971005

9981006
rstring socsFileName = dataDirectory() + "/" +
9991007
BSD.vgwSessionId + "-call-started.txt";
@@ -1055,6 +1063,12 @@ public composite VgwDataRouterToWatsonSTT {
10551063
(rstring)BSD.totalSpeechDataBytesReceived +
10561064
", speechEngineId=" + (rstring)BSD.speechEngineId +
10571065
", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId);
1066+
// Set the call start date time values to the tuple attributes.
1067+
if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
1068+
BSD.callStartDateTime = _vgwSessionToCallStartDateTime[BSD.vgwSessionId];
1069+
BSD.callStartTimeInEpochSeconds = _vgwSessionToCallStartTimeInEpochSeconds[BSD.vgwSessionId];
1070+
}
1071+
10581072
// Submit this tuple.
10591073
submit(BSD, BSDF);
10601074
} else {
@@ -1209,6 +1223,12 @@ public composite VgwDataRouterToWatsonSTT {
12091223
// We are done. Remove it from the map as well.
12101224
removeM(_vgwSessionToCompletedUdpChannelMap, key2);
12111225
}
1226+
1227+
// Remove the call start date time values from the state variables.
1228+
if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
1229+
removeM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId);
1230+
removeM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId);
1231+
}
12121232

12131233
// At this time, the voice call for this VGW session id has ended.
12141234
// We can now write an "End of Call" indicator file in the
@@ -1353,6 +1373,15 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
13531373
(boolean)getSubmissionTimeValue("smartFormattingNeeded", "false");
13541374
expression<boolean> $redactionNeeded :
13551375
(boolean)getSubmissionTimeValue("redactionNeeded", "false");
1376+
// Allowed value range for this is from 0.0 to 1.0.
1377+
expression<float64> $speechDetectorSensitivity :
1378+
(float64)getSubmissionTimeValue("speechDetectorSensitivity", "0.5");
1379+
// Allowed value range for this is from 0.0 to 1.0.
1380+
expression<float64> $backgroundAudioSuppression :
1381+
(float64)getSubmissionTimeValue("backgroundAudioSuppression", "0.0");
1382+
// Allowed value range for this is from -0.5 to 1.0.
1383+
expression<float64> $characterInsertionBias :
1384+
(float64)getSubmissionTimeValue("characterInsertionBias", "0.0");
13561385
expression<float64> $keywordsSpottingThreshold :
13571386
(float64)getSubmissionTimeValue("keywordsSpottingThreshold", "0.0");
13581387
expression<list<rstring>> $keywordsToBeSpotted :
@@ -1466,6 +1495,9 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
14661495
wordAlternativesThreshold: $wordAlternativesThreshold;
14671496
smartFormattingNeeded: $smartFormattingNeeded;
14681497
redactionNeeded: $redactionNeeded;
1498+
speechDetectorSensitivity: $speechDetectorSensitivity;
1499+
backgroundAudioSuppression: $backgroundAudioSuppression;
1500+
characterInsertionBias: $characterInsertionBias;
14691501
keywordsSpottingThreshold: $keywordsSpottingThreshold;
14701502
keywordsToBeSpotted: $keywordsToBeSpotted;
14711503
websocketLoggingNeeded: $sttWebsocketLoggingNeeded;
@@ -1685,11 +1717,16 @@ public composite STTResultProcessor(input MyTranscriptionResult, BinarySpeechDat
16851717
MTR.callStartTimeInEpochSeconds = _callStartTimeInEpochSeconds;
16861718
}
16871719

1688-
// If the user opted for include the time of the
1720+
// If the user opted for including the time of the
16891721
// utterance result reception time, let us add it to
16901722
// the transcription result.
16911723
if($includeUtteranceResultReceptionTime == true) {
1724+
// This is the current time expressed in ctime format.
16921725
MTR.utteranceResultReceptionTime = ctime(getTimestamp());
1726+
// We will also do the utterance reception time expressed
1727+
// in seconds elapsed since the start of the call.
1728+
MTR.utteranceRxTime =
1729+
getSeconds(getTimestamp()) - MTR.callStartTimeInEpochSeconds;
16931730
}
16941731

16951732
// We will write the transcription results to

samples/VgwDataRouterToWatsonSTT/info.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
<info:identity>
55
<info:name>VgwDataRouterToWatsonSTT</info:name>
66
<info:description>Example that showcases STT on Cloud and STT on CP4D</info:description>
7-
<info:version>1.0.4</info:version>
7+
<info:version>1.0.5</info:version>
88
<info:requiredProductVersion>4.2.1.6</info:requiredProductVersion>
99
</info:identity>
1010
<info:dependencies>
1111
<info:toolkit>
1212
<common:name>com.ibm.streamsx.sttgateway</common:name>
13-
<common:version>[2.3.1,7.0.0]</common:version>
13+
<common:version>[2.3.2,7.0.0]</common:version>
1414
</info:toolkit>
1515
<info:toolkit>
1616
<common:name>com.ibm.streamsx.json</common:name>

0 commit comments

Comments
 (0)