Skip to content

Commit 27c7bad

Browse files
asus007charles_moulhaudFabilin
authored and
Morgan Diverrez
committed
#1606 Add Deepl translator module (#1671)
* #1606 Add Deepl translator module * #1606 Add Deepl translator module - first set of corrections * #1606 Add Deepl translator module - first set of corrections * #1606 Add Deepl translator module - second set of corrections * #1606 : Use of TockProxyAuthenticator + use of glossary map ids for all languages * #1606 translator: clean up module * #1606 : Correction of the glossary id name --------- Co-authored-by: charles_moulhaud <charles_moulhaud@connect-tech.sncf> Co-authored-by: Fabilin <fabilin.dev@pm.me>
1 parent 707e3a7 commit 27c7bad

File tree

7 files changed

+376
-0
lines changed

7 files changed

+376
-0
lines changed

translator/deepl-translate/README.md

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# TOCK Deepl Translation
2+
3+
Here are the configurable variables:
4+
5+
- `tock_translator_deepl_target_languages`: set of supported languages - ex : en,es
6+
- `tock_translator_deepl_api_url`: Deepl api url (default pro api url : https://api.deepl.com/v2/translate).
7+
If you have problems with pro api, you can use free api : https://api-free.deepl.com/v2/translate
8+
- `tock_translator_deepl_api_key` : Deepl api key to use (see your account)
9+
- `tock_translator_deepl_glossary_id`: glossary identifier to use in translation
10+
11+
> Deepl documentation: https://developers.deepl.com/docs
12+
13+
To integrate the module into a custom Tock Admin, pass the module as a parameter to the `ai.tock.nlp.admin.startAdminServer()` function.
14+
15+
Example:
16+
17+
```kt
18+
package ai.tock.bot.admin
19+
20+
import ai.tock.nlp.admin.startAdminServer
21+
import ai.tock.translator.deepl.deeplTranslatorModule
22+
23+
fun main() {
24+
startAdminServer(deeplTranslatorModule())
25+
}
26+
```
27+
28+
## Http Client Configuration
29+
30+
You can configure the Deepl client, including proxy settings, by passing a parameter to `deeplTranslatorModule`:
31+
32+
```kt
33+
startAdminServer(deeplTranslatorModule(OkHttpDeeplClient {
34+
proxyAuthenticator { _: Route?, response: Response ->
35+
// https://square.github.io/okhttp/3.x/okhttp/index.html?okhttp3/Authenticator.html
36+
if (response.challenges().any { it.scheme.equals("OkHttp-Preemptive", ignoreCase = true) }) {
37+
response.request.newBuilder()
38+
.header("Proxy-Authorization", credential)
39+
.build()
40+
} else {
41+
null
42+
}
43+
}
44+
}))
45+
```

translator/deepl-translate/pom.xml

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Copyright (C) 2017/2021 e-voyageurs technologies
4+
~
5+
~ Licensed under the Apache License, Version 2.0 (the "License");
6+
~ you may not use this file except in compliance with the License.
7+
~ You may obtain a copy of the License at
8+
~
9+
~ http://www.apache.org/licenses/LICENSE-2.0
10+
~
11+
~ Unless required by applicable law or agreed to in writing, software
12+
~ distributed under the License is distributed on an "AS IS" BASIS,
13+
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
~ See the License for the specific language governing permissions and
15+
~ limitations under the License.
16+
-->
17+
18+
<project xmlns="http://maven.apache.org/POM/4.0.0"
19+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21+
<modelVersion>4.0.0</modelVersion>
22+
<parent>
23+
<groupId>ai.tock</groupId>
24+
<artifactId>tock-translator</artifactId>
25+
<version>24.3.5-SNAPSHOT</version>
26+
</parent>
27+
28+
<artifactId>tock-deepl-translate</artifactId>
29+
<name>Tock Deepl Translator</name>
30+
<description>Deepl translator implementation</description>
31+
32+
<dependencies>
33+
<dependency>
34+
<groupId>org.apache.commons</groupId>
35+
<artifactId>commons-text</artifactId>
36+
</dependency>
37+
<dependency>
38+
<groupId>ai.tock</groupId>
39+
<artifactId>tock-translator-core</artifactId>
40+
</dependency>
41+
<dependency>
42+
<groupId>com.squareup.okhttp3</groupId>
43+
<artifactId>okhttp</artifactId>
44+
<version>4.12.0</version>
45+
</dependency>
46+
<dependency>
47+
<groupId>com.fasterxml.jackson.core</groupId>
48+
<artifactId>jackson-core</artifactId>
49+
</dependency>
50+
</dependencies>
51+
52+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* Copyright (C) 2017/2021 e-voyageurs technologies
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package ai.tock.translator.deepl
18+
19+
import ai.tock.shared.TockProxyAuthenticator
20+
import ai.tock.shared.jackson.mapper
21+
import ai.tock.shared.property
22+
import ai.tock.shared.propertyOrNull
23+
import com.fasterxml.jackson.module.kotlin.readValue
24+
import java.io.IOException
25+
import java.util.regex.Pattern
26+
import okhttp3.FormBody
27+
import okhttp3.OkHttpClient
28+
import okhttp3.Request
29+
30+
internal data class TranslationResponse(
31+
val translations: List<Translation>
32+
)
33+
34+
internal data class Translation(
35+
val text: String
36+
)
37+
38+
const val TAG_HANDLING = "xml"
39+
40+
interface DeeplClient {
41+
fun translate(
42+
text: String,
43+
sourceLang: String,
44+
targetLang: String,
45+
preserveFormatting: Boolean,
46+
glossaryId: String?
47+
): String?
48+
}
49+
50+
class OkHttpDeeplClient(
51+
private val apiURL: String = property("tock_translator_deepl_api_url", "https://api.deepl.com/v2/translate"),
52+
private val apiKey: String? = propertyOrNull("tock_translator_deepl_api_key"),
53+
okHttpCustomizer: OkHttpClient.Builder.() -> Unit = {}
54+
) : DeeplClient {
55+
private val client = OkHttpClient.Builder()
56+
.apply(TockProxyAuthenticator::install)
57+
.apply(okHttpCustomizer)
58+
.build()
59+
60+
private fun replaceSpecificPlaceholders(text: String): Pair<String, List<String>> {
61+
// Store original placeholders for later restoration
62+
val placeholderPattern = Pattern.compile("\\{:([^}]*)}")
63+
val matcher = placeholderPattern.matcher(text)
64+
65+
val placeholders = mutableListOf<String>()
66+
while (matcher.find()) {
67+
placeholders.add(matcher.group(1))
68+
}
69+
70+
// Replace placeholders with '_PLACEHOLDER_'
71+
val replacedText = matcher.replaceAll("_PLACEHOLDER_")
72+
73+
return Pair(replacedText, placeholders)
74+
}
75+
76+
private fun revertSpecificPlaceholders(text: String, placeholders: List<String>): String {
77+
var resultText = text
78+
for (placeholder in placeholders) {
79+
resultText = resultText.replaceFirst("_PLACEHOLDER_", "{:$placeholder}")
80+
}
81+
return resultText
82+
}
83+
84+
override fun translate(
85+
text: String,
86+
sourceLang: String,
87+
targetLang: String,
88+
preserveFormatting: Boolean,
89+
glossaryId: String?
90+
): String? {
91+
if (apiKey == null) return text
92+
93+
val (textWithPlaceholders, originalPlaceholders) = replaceSpecificPlaceholders(text)
94+
95+
val formBuilder = FormBody.Builder()
96+
97+
val requestBody = formBuilder
98+
.add("text", textWithPlaceholders)
99+
.add("source_lang", sourceLang)
100+
.add("target_lang", targetLang)
101+
.add("preserve_formatting", preserveFormatting.toString())
102+
.add("tag_handling", TAG_HANDLING)
103+
.build()
104+
105+
glossaryId?.let {
106+
formBuilder.add("glossary_id", it)
107+
}
108+
109+
val request = Request.Builder()
110+
.url(apiURL)
111+
.addHeader("Authorization", "DeepL-Auth-Key $apiKey")
112+
.post(requestBody)
113+
.build()
114+
115+
client.newCall(request).execute().use { response ->
116+
if (!response.isSuccessful) throw IOException("Unexpected code $response")
117+
118+
val responseBody = response.body?.string()
119+
val translationResponse = mapper.readValue<TranslationResponse>(responseBody!!)
120+
121+
val translatedText = translationResponse.translations.firstOrNull()?.text
122+
return translatedText?.let { revertSpecificPlaceholders(it, originalPlaceholders) }
123+
}
124+
}
125+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright (C) 2017/2021 e-voyageurs technologies
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package ai.tock.translator.deepl
18+
19+
import ai.tock.shared.mapProperty
20+
import ai.tock.shared.propertyOrNull
21+
import ai.tock.translator.TranslatorEngine
22+
import java.util.Locale
23+
import org.apache.commons.text.StringEscapeUtils
24+
25+
internal class DeeplTranslatorEngine(client: DeeplClient) : TranslatorEngine {
26+
private val deeplClient = client
27+
28+
private val supportedLanguages: Set<String>? = propertyOrNull("tock_translator_deepl_target_languages")?.split(",")?.map { it.trim() }?.toSet()
29+
private val glossaryMapIds = mapProperty("tock_translator_deepl_glossary_map_ids", emptyMap())
30+
override val supportAdminTranslation: Boolean = true
31+
32+
override fun translate(text: String, source: Locale, target: Locale): String {
33+
var translatedTextHTML4 = ""
34+
// Allows to filter translation on a specific language
35+
if (supportedLanguages == null || supportedLanguages.contains(target.language)) {
36+
val translatedText = deeplClient.translate(text, source.language, target.language, true, glossaryMapIds[target.language])
37+
translatedTextHTML4 = StringEscapeUtils.unescapeHtml4(translatedText)
38+
}
39+
return translatedTextHTML4
40+
}
41+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright (C) 2017/2021 e-voyageurs technologies
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package ai.tock.translator.deepl
18+
19+
import ai.tock.translator.TranslatorEngine
20+
import com.github.salomonbrys.kodein.Kodein
21+
import com.github.salomonbrys.kodein.bind
22+
import com.github.salomonbrys.kodein.provider
23+
24+
/**
25+
* The default Deepl translator module, for use in a Kodein injector.
26+
*/
27+
val deeplTranslatorModule = configureDeeplTranslatorModule()
28+
29+
fun configureDeeplTranslatorModule(client: DeeplClient = OkHttpDeeplClient()) = Kodein.Module {
30+
bind<TranslatorEngine>(overrides = true) with provider { DeeplTranslatorEngine(client) }
31+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Copyright (C) 2017/2021 e-voyageurs technologies
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package ai.tock.translator.deepl
18+
19+
import java.util.Locale
20+
import kotlin.test.assertEquals
21+
import org.junit.jupiter.api.Disabled
22+
import org.junit.jupiter.api.Test
23+
24+
/**
25+
* All these tests are disabled because it uses Deepl pro api that can be expensive
26+
*/
27+
class DeeplTranslateIntegrationTest {
28+
private val deeplTranslatorEngine = DeeplTranslatorEngine(OkHttpDeeplClient())
29+
30+
@Test
31+
@Disabled
32+
fun simpleTest() {
33+
val result = deeplTranslatorEngine.translate(
34+
"Bonjour, je voudrais me rendre à New-York Mardi prochain",
35+
Locale.FRENCH,
36+
Locale.ENGLISH
37+
)
38+
assertEquals("Hello, I would like to go to New York next Tuesday.", result)
39+
}
40+
41+
@Test
42+
@Disabled
43+
fun testWithEmoticonAndAntislash() {
44+
val result = deeplTranslatorEngine.translate(
45+
"Bonjour, je suis l'Agent virtuel SNCF Voyageurs! \uD83E\uDD16\n" +
46+
"Je vous informe sur l'état du trafic en temps réel.\n" +
47+
"Dites-moi par exemple \"Mon train 6111 est-il à l'heure ?\", \"Aller à Saint-Lazare\", \"Prochains départs Gare de Lyon\" ...",
48+
Locale.FRENCH,
49+
Locale.ENGLISH
50+
)
51+
52+
assertEquals(
53+
"Hello, I'm the SNCF Voyageurs Virtual Agent! \uD83E\uDD16\n" +
54+
"I inform you about traffic conditions in real time.\n" +
55+
"Tell me for example \"Is my train 6111 on time?\", \"Going to Saint-Lazare\", \"Next departures Gare de Lyon\" ...",
56+
result
57+
)
58+
}
59+
60+
@Test
61+
@Disabled
62+
fun testWithParameters() {
63+
val result = deeplTranslatorEngine.translate(
64+
"Bonjour, je voudrais me rendre à {:city} {:date}",
65+
Locale.FRENCH,
66+
Locale.GERMAN
67+
)
68+
assertEquals("Hallo, ich würde gerne nach {:city} {:date} fahren.", result)
69+
}
70+
71+
@Test
72+
@Disabled
73+
fun testWithHTML() {
74+
val result = deeplTranslatorEngine.translate(
75+
"Bonjour, je voudrais me rendre à Paris <br><br/> demain soir",
76+
Locale.FRENCH,
77+
Locale.GERMAN
78+
)
79+
assertEquals("Hallo, ich möchte morgen Abend nach Paris <br><br/> fahren", result)
80+
}
81+
}

translator/pom.xml

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
<module>core</module>
3434
<module>noop</module>
3535
<module>google-translate</module>
36+
<module>deepl-translate</module>
3637
</modules>
3738

3839
<dependencies>

0 commit comments

Comments
 (0)