Skip to content

Commit 2ad0de0

Browse files
authored
Merge pull request #100 from frzsombor/token-patch-2
Fix problems with special Unicode characters
2 parents fc5d3c7 + 90aacd8 commit 2ad0de0

File tree

2 files changed

+28
-16
lines changed

2 files changed

+28
-16
lines changed

src/Stichoza/GoogleTranslate/Tokens/GoogleTokenGenerator.php

+20-16
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,16 @@ private function TL($a)
3636
$tkk = $this->TKK();
3737
$b = $tkk[0];
3838

39-
for ($d = [], $e = 0, $f = 0; $f < mb_strlen($a, 'UTF-8'); $f++) {
40-
$g = $this->charCodeAt($a, $f);
39+
for ($d = [], $e = 0, $f = 0; $f < $this->JS_length($a); $f++) {
40+
$g = $this->JS_charCodeAt($a, $f);
4141
if (128 > $g) {
4242
$d[$e++] = $g;
4343
} else {
4444
if (2048 > $g) {
4545
$d[$e++] = $g >> 6 | 192;
4646
} else {
47-
if (55296 == ($g & 64512) && $f + 1 < mb_strlen($a, 'UTF-8') && 56320 == ($this->charCodeAt($a, $f + 1) & 64512)) {
48-
$g = 65536 + (($g & 1023) << 10) + ($this->charCodeAt($a, ++$f) & 1023);
47+
if (55296 == ($g & 64512) && $f + 1 < $this->JS_length($a) && 56320 == ($this->JS_charCodeAt($a, $f + 1) & 64512)) {
48+
$g = 65536 + (($g & 1023) << 10) + ($this->JS_charCodeAt($a, ++$f) & 1023);
4949
$d[$e++] = $g >> 18 | 240;
5050
$d[$e++] = $g >> 12 & 63 | 128;
5151
} else {
@@ -138,23 +138,27 @@ private function unsignedRightShift($a, $b)
138138
}
139139

140140
/**
141-
* Get the Unicode of the character at the specified index in a string.
141+
* Get JS charCodeAt equivalent result with UTF-16 encoding
142142
*
143143
* @param string $str
144144
* @param int $index
145145
*
146-
* @return null|number
146+
* @return number
147147
*/
148-
private function charCodeAt($str, $index)
149-
{
150-
$char = mb_substr($str, $index, 1, 'UTF-8');
151-
if (mb_check_encoding($char, 'UTF-8')) {
152-
$ret = mb_convert_encoding($char, 'UTF-32BE', 'UTF-8');
153-
$result = hexdec(bin2hex($ret));
154-
155-
return $result;
156-
}
148+
private function JS_charCodeAt($str, $index) {
149+
$utf16 = mb_convert_encoding($str, 'UTF-16LE', 'UTF-8');
150+
return ord($utf16[$index*2]) + (ord($utf16[$index*2+1]) << 8);
151+
}
157152

158-
return;
153+
/**
154+
* Get JS equivalent string length with UTF-16 encoding
155+
*
156+
* @param string $str
157+
*
158+
* @return number
159+
*/
160+
private function JS_length($str) {
161+
$utf16 = mb_convert_encoding($str, 'UTF-16LE', 'UTF-8');
162+
return strlen($utf16)/2;
159163
}
160164
}

tests/TranslationTest.php

+8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ public function testTranslationEquality()
1919
$this->assertEquals($resultOne, $resultTwo, 'გამარჯობა');
2020
}
2121

22+
public function testUTF16Translation()
23+
{
24+
$resultOne = TranslateClient::translate('en', 'de', 'yes 👍🏽');
25+
$resultTwo = $this->tr->setSource('en')->setTarget('de')->translate('yes 👍🏽');
26+
27+
$this->assertEquals($resultOne, $resultTwo, 'ja 👍🏽');
28+
}
29+
2230
public function testArrayTranslation()
2331
{
2432
$this->tr->setSource('en')->setTarget('ka');

0 commit comments

Comments
 (0)