diff --git a/controllers/jsctrl/samples/aici-types.d.ts b/controllers/jsctrl/samples/aici-types.d.ts index 1e94e5aa..9b0fe7dd 100644 --- a/controllers/jsctrl/samples/aici-types.d.ts +++ b/controllers/jsctrl/samples/aici-types.d.ts @@ -143,6 +143,11 @@ declare module "_aici" { */ function detokenize(tokens: number[]): Buffer; + /** + * Return debug string representation of a given token index + */ + function tokenRepr(token: number): string; + /** * Return identifier of the current sequence. * Most useful with fork_group parameter in mid_process() callback. @@ -200,13 +205,15 @@ declare module "_aici" { */ constructor(); + toString(): string; + add(t: number): void; delete(t: number): void; has(t: number): boolean; clear(): void; /** - * Number of all tokens (not only in the set). + * Number of all possible tokens (regardless of whether they are in the set or not). */ length: number; @@ -214,6 +221,11 @@ declare module "_aici" { * Include or exclude all tokens from the set. */ setAll(value: boolean): void; + + /** + * Number of tokens in the set. + */ + numSet(): number; } /** diff --git a/controllers/jsctrl/src/jsctrl.rs b/controllers/jsctrl/src/jsctrl.rs index 5e6cf98c..c6894d21 100644 --- a/controllers/jsctrl/src/jsctrl.rs +++ b/controllers/jsctrl/src/jsctrl.rs @@ -125,6 +125,11 @@ impl TokenSet { self.inner.len() } + pub fn toString(&self) -> String { + let trie = &mut GLOBAL_STATE.lock().unwrap().trie; + trie.token_set_dbg(&self.inner) + } + pub fn add(&mut self, tok: u32) { self.inner.allow_token(tok); } @@ -144,6 +149,10 @@ impl TokenSet { pub fn setAll(&mut self, val: bool) { self.inner.set_all(val); } + + pub fn numSet(&self) -> usize { + self.inner.num_set() + } } impl Default for TokenSet { @@ -261,6 +270,12 @@ mod aici_mod { Buffer(bytes) } + #[rquickjs::function] + pub fn tokenRepr(token: TokenId) -> String { + let trie = &mut GLOBAL_STATE.lock().unwrap().trie; + trie.token_dbg(token) + } + #[rquickjs::function] pub fn getVar(name: String) -> Option { let name = name.as_str(); diff --git a/controllers/jsctrl/ts/aici.ts b/controllers/jsctrl/ts/aici.ts index da61e9a1..1f0be610 100644 --- a/controllers/jsctrl/ts/aici.ts +++ b/controllers/jsctrl/ts/aici.ts @@ -340,6 +340,11 @@ export class ConstrainedToken extends NextToken { this._constraint = this.mkConstraint(); } this._constraint.allowTokens(bias); + console.log("ALLOW:", bias.toString()); + if (bias.numSet() === 0) { + console.log("Constraint doesn't allow any tokens; adding EOS") + return MidProcessResult.stop(); + } return MidProcessResult.bias(bias); } @@ -677,6 +682,8 @@ export async function genTokens(options: GenOptions): Promise { const tokens = await next_token.run(); res.push(...tokens); + console.log("GEN-STEP:", tokens.map(t => _aici.tokenRepr(t)).join(", ")); + const text = detokenize(res).decode(); if (stopAt !== undefined && text.includes(stopAt)) { diff --git a/controllers/jsctrl/ts/native.d.ts b/controllers/jsctrl/ts/native.d.ts index 2d0413e5..60c7840a 100644 --- a/controllers/jsctrl/ts/native.d.ts +++ b/controllers/jsctrl/ts/native.d.ts @@ -143,6 +143,11 @@ declare module "_aici" { */ function detokenize(tokens: number[]): Buffer; + /** + * Return debug string representation of a given token index + */ + function tokenRepr(token: number): string; + /** * Return identifier of the current sequence. * Most useful with fork_group parameter in mid_process() callback. @@ -200,13 +205,15 @@ declare module "_aici" { */ constructor(); + toString(): string; + add(t: number): void; delete(t: number): void; has(t: number): boolean; clear(): void; /** - * Number of all tokens (not only in the set). + * Number of all possible tokens (regardless of whether they are in the set or not). */ length: number; @@ -214,6 +221,11 @@ declare module "_aici" { * Include or exclude all tokens from the set. */ setAll(value: boolean): void; + + /** + * Number of tokens in the set. + */ + numSet(): number; } /**