@@ -165,48 +165,82 @@ function encode(text) {
165
165
}
166
166
167
167
/**
168
- * @param {number[] } tokens
168
+ * @param {number } token
169
+ * @param {number[] } buffer
170
+ * @returns {string | undefined }
171
+ */
172
+ function decodeToken ( token , buffer ) {
173
+ const decodedToken = decoder [ token ] ;
174
+ if ( typeof decodedToken === "undefined" ) {
175
+ return ;
176
+ }
177
+ const decodedBytes = splitToken ( decodedToken ) . map ( ( x ) => byteDecoder . get ( x ) ) ;
178
+
179
+ let decodedString = "" ;
180
+ for ( const byte of decodedBytes ) {
181
+ if ( typeof byte === "undefined" ) {
182
+ continue ;
183
+ }
184
+ buffer . push ( byte ) ;
185
+ const utf16str = textDecoder . decode ( new Uint8Array ( buffer ) , {
186
+ stream : true ,
187
+ } ) ;
188
+
189
+ // Check if the last character is a high surrogate
190
+ const lastCharCode = utf16str . charCodeAt ( utf16str . length - 1 ) ;
191
+ if (
192
+ utf16str . length > 0 &&
193
+ lastCharCode >= 0xd8_00 &&
194
+ lastCharCode <= 0xdb_ff
195
+ ) {
196
+ // Keep the high surrogate in the buffer and continue with the next token
197
+ continue ;
198
+ } else {
199
+ decodedString += utf16str ;
200
+ // reset buffer
201
+ buffer . length = 0 ;
202
+ }
203
+ }
204
+ if ( decodedString . length > 0 ) {
205
+ return decodedString ;
206
+ }
207
+ }
208
+
209
+ /**
210
+ * @param {Iterable<number> } tokens
169
211
* @returns {Generator<string, void, undefined> }
170
212
*/
171
213
function * decodeGenerator ( tokens ) {
172
214
/** @type {number[] } */
173
- let buffer = [ ] ;
215
+ const buffer = [ ] ;
174
216
175
217
for ( const token of tokens ) {
176
- const decodedToken = decoder [ token ] ;
177
- if ( typeof decodedToken = == "undefined" ) {
178
- continue ;
218
+ const result = decodeToken ( token , buffer ) ;
219
+ if ( typeof result ! == "undefined" ) {
220
+ yield result ;
179
221
}
180
- const decodedBytes = splitToken ( decodedToken ) . map ( ( x ) =>
181
- byteDecoder . get ( x ) ,
182
- ) ;
183
-
184
- let decodedString = "" ;
185
- for ( const byte of decodedBytes ) {
186
- if ( typeof byte === "undefined" ) {
187
- continue ;
188
- }
189
- buffer . push ( byte ) ;
190
- const utf16str = textDecoder . decode ( new Uint8Array ( buffer ) , {
191
- stream : true ,
192
- } ) ;
193
-
194
- // Check if the last character is a high surrogate
195
- const lastCharCode = utf16str . charCodeAt ( utf16str . length - 1 ) ;
196
- if (
197
- utf16str . length > 0 &&
198
- lastCharCode >= 0xd8_00 &&
199
- lastCharCode <= 0xdb_ff
200
- ) {
201
- // Keep the high surrogate in the buffer and continue with the next token
202
- continue ;
203
- } else {
204
- decodedString += utf16str ;
205
- buffer = [ ] ;
206
- }
207
- }
208
- if ( decodedString . length > 0 ) {
209
- yield decodedString ;
222
+ }
223
+
224
+ // Yield any remaining characters in the buffer
225
+ if ( buffer . length > 0 ) {
226
+ yield textDecoder . decode ( new Uint8Array ( buffer ) ) ;
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Decode tokens asynchronously and yield the decoded strings, one by one.
232
+ * Will not yield for tokens that include a high surrogate, but wait for the next token.
233
+ * @param {AsyncIterable<number> } tokens
234
+ * @returns {AsyncGenerator<string, void, undefined> }
235
+ */
236
+ async function * decodeAsyncGenerator ( tokens ) {
237
+ /** @type {number[] } */
238
+ const buffer = [ ] ;
239
+
240
+ for await ( const token of tokens ) {
241
+ const result = decodeToken ( token , buffer ) ;
242
+ if ( typeof result !== "undefined" ) {
243
+ yield result ;
210
244
}
211
245
}
212
246
@@ -228,4 +262,5 @@ module.exports.encode = encode;
228
262
module . exports . decode = decode ;
229
263
module . exports . encodeGenerator = encodeGenerator ;
230
264
module . exports . decodeGenerator = decodeGenerator ;
265
+ module . exports . decodeAsyncGenerator = decodeAsyncGenerator ;
231
266
module . exports . isWithinTokenLimit = isWithinTokenLimit ;
0 commit comments