|
1734 | 1734 | }),
|
1735 | 1735 | signal,
|
1736 | 1736 | });
|
1737 |
| - if (!res.ok) |
| 1737 | + |
| 1738 | + if (!res.ok) { |
1738 | 1739 | throw new Error(`HTTP ${res.status}`);
|
1739 |
| - if (options.stream) { |
1740 |
| - for await (const chunk of parseEventStream(res.body)) { |
| 1740 | + } |
| 1741 | + |
| 1742 | + async function* yieldTokens(chunks) { |
| 1743 | + for await (const chunk of chunks) { |
| 1744 | + const token = chunk.content || chunk.token; |
1741 | 1745 | const choice = chunk.completion_probabilities?.[0];
|
1742 | 1746 |
|
1743 |
| - let probs = []; |
1744 |
| - if (choice?.probs) { |
1745 |
| - probs = choice.probs ?? []; |
1746 |
| - } else if (choice?.top_logprobs) { |
1747 |
| - probs = Object.values(choice.top_logprobs).map(({ token, logprob }) => ({ |
1748 |
| - tok_str: token, |
1749 |
| - prob: Math.exp(logprob) |
1750 |
| - })); |
1751 |
| - } |
1752 |
| - const prob = probs.find(p => p.tok_str === chunk.content)?.prob; |
1753 |
| - |
1754 |
| - yield { |
1755 |
| - content: chunk.content, |
1756 |
| - ...(probs.length > 0 ? { |
1757 |
| - prob: prob ?? -1, |
1758 |
| - completion_probabilities: [{ |
1759 |
| - content: chunk.content, |
1760 |
| - probs |
1761 |
| - }] |
1762 |
| - } : {}) |
1763 |
| - }; |
1764 |
| - } |
1765 |
| - } else { |
1766 |
| - const { completion_probabilities } = await res.json(); |
1767 |
| - for (const chunk of completion_probabilities) { |
1768 |
| - const token = chunk.content ? chunk.content : chunk.token; |
1769 |
| - |
1770 |
| - let probs = []; |
1771 |
| - if (chunk.probs) { |
1772 |
| - probs = chunk.probs ?? []; |
1773 |
| - } else if (chunk.top_logprobs) { |
1774 |
| - probs = Object.values(chunk.top_logprobs).map(({ token, logprob }) => ({ |
| 1747 | + const probs = choice?.probs ?? |
| 1748 | + Object.values(choice?.top_logprobs || chunk.top_logprobs || {}).map(({ token, logprob }) => ({ |
1775 | 1749 | tok_str: token,
|
1776 | 1750 | prob: Math.exp(logprob)
|
1777 | 1751 | }));
|
1778 |
| - } |
1779 | 1752 | const prob = probs.find(p => p.tok_str === token)?.prob;
|
1780 | 1753 |
|
1781 | 1754 | yield {
|
|
1790 | 1763 | };
|
1791 | 1764 | }
|
1792 | 1765 | }
|
| 1766 | + |
| 1767 | + if (options.stream) { |
| 1768 | + yield* await yieldTokens(parseEventStream(res.body)); |
| 1769 | + } else { |
| 1770 | + const { completion_probabilities } = await res.json(); |
| 1771 | + yield* await yieldTokens(completion_probabilities); |
| 1772 | + } |
1793 | 1773 | }
|
1794 | 1774 |
|
1795 | 1775 | async function koboldCppTokenCount({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) {
|
|
1872 | 1852 | }),
|
1873 | 1853 | signal,
|
1874 | 1854 | });
|
1875 |
| - if (!res.ok) |
| 1855 | + |
| 1856 | + if (!res.ok) { |
1876 | 1857 | throw new Error(`HTTP ${res.status}`);
|
1877 |
| - if (options.stream) { |
1878 |
| - for await (const chunk of parseEventStream(res.body)) { |
1879 |
| - yield { content: chunk.token }; |
1880 |
| - } |
1881 |
| - } else { |
1882 |
| - const { results } = await res.json(); |
1883 |
| - const chunks = results?.[0].logprobs?.content ?? []; |
1884 |
| - for (const chunk of chunks) { |
| 1858 | + } |
| 1859 | + |
| 1860 | + async function* yieldTokens(chunks) { |
| 1861 | + for await (const chunk of chunks) { |
1885 | 1862 | const { token, top_logprobs } = chunk;
|
1886 |
| - |
1887 |
| - const probs = Object.values(top_logprobs).map(({ token, logprob }) => ({ |
| 1863 | + |
| 1864 | + const probs = Object.values(top_logprobs ?? {}).map(({ token, logprob }) => ({ |
1888 | 1865 | tok_str: token,
|
1889 | 1866 | prob: Math.exp(logprob)
|
1890 | 1867 | }));
|
|
1902 | 1879 | };
|
1903 | 1880 | }
|
1904 | 1881 | }
|
| 1882 | + |
| 1883 | + if (options.stream) { |
| 1884 | + yield* await yieldTokens(parseEventStream(res.body)); |
| 1885 | + } else { |
| 1886 | + const { results } = await res.json(); |
| 1887 | + yield* await yieldTokens(results?.[0].logprobs?.content ?? []); |
| 1888 | + } |
1905 | 1889 | }
|
1906 | 1890 |
|
1907 | 1891 | async function koboldCppAbortCompletion({ endpoint, proxyEndpoint, ...options }) {
|
|
2193 | 2177 | throw new Error(`HTTP ${res.status}`);
|
2194 | 2178 | }
|
2195 | 2179 |
|
2196 |
| - if (options.stream) { |
2197 |
| - for await (const chunk of parseEventStream(res.body)) { |
| 2180 | + async function* yieldTokens(chunks) { |
| 2181 | + for await (const chunk of chunks) { |
2198 | 2182 | if (!chunk.choices || chunk.choices.length === 0) {
|
2199 |
| - if (chunk.content) { |
2200 |
| - yield { content: chunk.content }; |
2201 |
| - } |
| 2183 | + if (chunk.content) yield { content: chunk.content }; |
2202 | 2184 | continue;
|
2203 | 2185 | }
|
2204 | 2186 |
|
2205 | 2187 | const { text, logprobs } = chunk.choices[0];
|
2206 | 2188 | const top_logprobs = logprobs?.top_logprobs?.[0] ?? {};
|
2207 |
| - |
| 2189 | + |
2208 | 2190 | const probs = Object.entries(top_logprobs).map(([tok, logprob]) => ({
|
2209 | 2191 | tok_str: tok,
|
2210 | 2192 | prob: Math.exp(logprob)
|
|
2222 | 2204 | } : {})
|
2223 | 2205 | };
|
2224 | 2206 | }
|
| 2207 | + } |
| 2208 | + |
| 2209 | + if (options.stream) { |
| 2210 | + yield* await yieldTokens(parseEventStream(res.body)); |
2225 | 2211 | } else {
|
2226 | 2212 | const { content, choices } = await res.json();
|
2227 | 2213 | if (choices?.[0].logprobs?.tokens) {
|
2228 |
| - const logprobs = choices?.[0].logprobs; |
2229 |
| - const chunks = Object.values(logprobs.tokens).map((token, i) => { |
2230 |
| - return { text: token, logprobs: { top_logprobs: [ logprobs.top_logprobs[i] ] } }; |
2231 |
| - }); |
2232 |
| - for (const chunk of chunks) { |
2233 |
| - const { text, logprobs } = chunk; |
2234 |
| - const top_logprobs = logprobs?.top_logprobs?.[0] ?? {}; |
2235 |
| - |
2236 |
| - const probs = Object.entries(top_logprobs).map(([tok, logprob]) => ({ |
2237 |
| - tok_str: tok, |
2238 |
| - prob: Math.exp(logprob) |
2239 |
| - })); |
2240 |
| - const prob = probs.find(p => p.tok_str === text)?.prob; |
2241 |
| - |
2242 |
| - yield { |
2243 |
| - content: text, |
2244 |
| - ...(probs.length > 0 ? { |
2245 |
| - prob: prob ?? -1, |
2246 |
| - completion_probabilities: [{ |
2247 |
| - content: text, |
2248 |
| - probs |
2249 |
| - }] |
2250 |
| - } : {}) |
2251 |
| - }; |
2252 |
| - } |
| 2214 | + const logprobs = choices[0].logprobs; |
| 2215 | + const chunks = Object.values(logprobs.tokens).map((token, i) => ({ |
| 2216 | + choices: [{ |
| 2217 | + text: token, |
| 2218 | + logprobs: { top_logprobs: [logprobs.top_logprobs[i]] } |
| 2219 | + }] |
| 2220 | + })); |
| 2221 | + yield* await yieldTokens(chunks); |
2253 | 2222 | } else if (choices?.[0].text) {
|
2254 | 2223 | yield { content: choices[0].text };
|
2255 | 2224 | } else if (content) { // llama.cpp specific?
|
2256 |
| - yield { content: content }; |
| 2225 | + yield { content }; |
2257 | 2226 | }
|
2258 | 2227 | }
|
2259 | 2228 | }
|
|
2300 | 2269 | }
|
2301 | 2270 | }
|
2302 | 2271 |
|
2303 |
| - |
2304 | 2272 | async function* openaiChatCompletion({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) {
|
2305 | 2273 | const res = await fetch(`${proxyEndpoint ?? endpoint}/v1/chat/completions`, {
|
2306 | 2274 | method: 'POST',
|
|
2326 | 2294 | throw new Error(`HTTP ${res.status}`);
|
2327 | 2295 | }
|
2328 | 2296 |
|
2329 |
| - if (options.stream) { |
2330 |
| - for await (const chunk of parseEventStream(res.body)) { |
| 2297 | + async function* yieldTokens(chunks) { |
| 2298 | + for await (const chunk of chunks) { |
2331 | 2299 | const token = chunk.choices[0].delta.content;
|
2332 | 2300 | const top_logprobs = chunk.choices[0].logprobs?.content?.[0]?.top_logprobs ?? {};
|
2333 |
| - if (!token) { |
2334 |
| - continue |
2335 |
| - } |
2336 |
| - |
| 2301 | + if (!token) continue; |
| 2302 | + |
2337 | 2303 | const probs = Object.values(top_logprobs).map(({ token, logprob }) => ({
|
2338 | 2304 | tok_str: token,
|
2339 | 2305 | prob: Math.exp(logprob)
|
|
2351 | 2317 | } : {})
|
2352 | 2318 | };
|
2353 | 2319 | }
|
| 2320 | + } |
| 2321 | + |
| 2322 | + if (options.stream) { |
| 2323 | + yield* await yieldTokens(parseEventStream(res.body)); |
2354 | 2324 | } else {
|
2355 | 2325 | const { choices } = await res.json();
|
2356 |
| - const chunks = choices?.[0].logprobs?.content ?? []; |
2357 |
| - if (chunks.length) { |
| 2326 | + const chunks = choices?.[0].logprobs?.content; |
| 2327 | + |
| 2328 | + if (chunks?.length) { |
2358 | 2329 | const formattedChunks = chunks.map(chunk => ({
|
2359 | 2330 | choices: [{
|
2360 | 2331 | delta: { content: chunk.token },
|
2361 |
| - logprobs: { |
2362 |
| - content: [{ |
2363 |
| - top_logprobs: chunk.top_logprobs |
2364 |
| - }] |
2365 |
| - } |
| 2332 | + logprobs: { content: [{ top_logprobs: chunk.top_logprobs }] } |
2366 | 2333 | }]
|
2367 | 2334 | }));
|
2368 |
| - for await (const chunk of openaiBufferUtf8Stream(formattedChunks)) { |
2369 |
| - const token = chunk.choices[0].delta.content; |
2370 |
| - const top_logprobs = chunk.choices[0].logprobs?.content?.[0]?.top_logprobs ?? {}; |
2371 |
| - if (!token) { |
2372 |
| - continue |
2373 |
| - } |
2374 |
| - |
2375 |
| - const probs = Object.values(top_logprobs).map(({ token, logprob }) => ({ |
2376 |
| - tok_str: token, |
2377 |
| - prob: Math.exp(logprob) |
2378 |
| - })); |
2379 |
| - const prob = probs.find(p => p.tok_str === token)?.prob; |
2380 |
| - |
2381 |
| - yield { |
2382 |
| - content: token, |
2383 |
| - ...(probs.length > 0 ? { |
2384 |
| - prob: prob ?? -1, |
2385 |
| - completion_probabilities: [{ |
2386 |
| - content: token, |
2387 |
| - probs |
2388 |
| - }] |
2389 |
| - } : {}) |
2390 |
| - }; |
2391 |
| - } |
2392 |
| - } else { |
2393 |
| - const content = choices?.[0].message?.content; |
2394 |
| - if (content) { |
2395 |
| - yield { content: content }; |
2396 |
| - } |
| 2335 | + yield* await yieldTokens(openaiBufferUtf8Stream(formattedChunks)); |
| 2336 | + } else if (choices?.[0].message?.content) { |
| 2337 | + yield { content: choices[0].message.content }; |
2397 | 2338 | }
|
2398 | 2339 | }
|
2399 | 2340 | }
|
|
0 commit comments