Skip to content

Commit

Permalink
add TokenSet.numSet/toString and tokenRepr in jsctrl (#76)
Browse files Browse the repository at this point in the history
This PR adds support for `TokenSet.numSet/toString` and `tokenRepr` in
`jsctrl`.

Example:
```js
// samples/hello.js

async function main() {
    await $`Ultimate answer is to the life, universe and everything is `
    await gen({ regex: /abc^/ })
}

start(main)
```

Output:
```
% ../../aici.sh run --build . samples/hello.js
...
[0]: FIXED "Ultimate answer is to the life, universe and everything is "
[0]: GEN-OPT {regex: /abc^/}
[0]: regex constraint: "abc^"
[0]: dfa: 244 bytes
[0]: ALLOW: TokenSet: 3/50295; "a", "ab", "abc"
[0]: GEN-STEP: "a"
[0]: ALLOW: TokenSet: 2/50295; "b", "bc"
[0]: GEN-STEP: "b"
[0]: ALLOW: TokenSet: 1/50295; "c"
[0]: GEN-STEP: "c"
[0]: ALLOW: TokenSet: 0/50295;
[0]: Constraint doesn't allow any tokens; adding EOS
[0]: GEN-STEP: EOS
[0]: GEN "abc"
[0]: JsCtrl: done
[DONE]
[Response] abc
```

Closes #64
  • Loading branch information
kevinmingtarja authored Mar 15, 2024
1 parent 5a83316 commit c662637
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 2 deletions.
14 changes: 13 additions & 1 deletion controllers/jsctrl/samples/aici-types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ declare module "_aici" {
*/
function detokenize(tokens: number[]): Buffer;

/**
* Return debug string representation of a given token index
*/
function tokenRepr(token: number): string;

/**
* Return identifier of the current sequence.
* Most useful with fork_group parameter in mid_process() callback.
Expand Down Expand Up @@ -200,20 +205,27 @@ declare module "_aici" {
*/
constructor();

toString(): string;

add(t: number): void;
delete(t: number): void;
has(t: number): boolean;
clear(): void;

/**
* Number of all tokens (not only in the set).
* Number of all possible tokens (regardless of whether they are in the set or not).
*/
length: number;

/**
* Include or exclude all tokens from the set.
*/
setAll(value: boolean): void;

/**
* Number of tokens in the set.
*/
numSet(): number;
}

/**
Expand Down
15 changes: 15 additions & 0 deletions controllers/jsctrl/src/jsctrl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ impl TokenSet {
self.inner.len()
}

pub fn toString(&self) -> String {
let trie = &mut GLOBAL_STATE.lock().unwrap().trie;
trie.token_set_dbg(&self.inner)
}

pub fn add(&mut self, tok: u32) {
self.inner.allow_token(tok);
}
Expand All @@ -144,6 +149,10 @@ impl TokenSet {
pub fn setAll(&mut self, val: bool) {
self.inner.set_all(val);
}

pub fn numSet(&self) -> usize {
self.inner.num_set()
}
}

impl Default for TokenSet {
Expand Down Expand Up @@ -261,6 +270,12 @@ mod aici_mod {
Buffer(bytes)
}

#[rquickjs::function]
pub fn tokenRepr(token: TokenId) -> String {
let trie = &mut GLOBAL_STATE.lock().unwrap().trie;
trie.token_dbg(token)
}

#[rquickjs::function]
pub fn getVar(name: String) -> Option<Buffer> {
let name = name.as_str();
Expand Down
7 changes: 7 additions & 0 deletions controllers/jsctrl/ts/aici.ts
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ export class ConstrainedToken extends NextToken {
this._constraint = this.mkConstraint();
}
this._constraint.allowTokens(bias);
console.log("ALLOW:", bias.toString());
if (bias.numSet() === 0) {
console.log("Constraint doesn't allow any tokens; adding EOS")
return MidProcessResult.stop();
}
return MidProcessResult.bias(bias);
}

Expand Down Expand Up @@ -677,6 +682,8 @@ export async function genTokens(options: GenOptions): Promise<Token[]> {
const tokens = await next_token.run();
res.push(...tokens);

console.log("GEN-STEP:", tokens.map(t => _aici.tokenRepr(t)).join(", "));

const text = detokenize(res).decode();

if (stopAt !== undefined && text.includes(stopAt)) {
Expand Down
14 changes: 13 additions & 1 deletion controllers/jsctrl/ts/native.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ declare module "_aici" {
*/
function detokenize(tokens: number[]): Buffer;

/**
* Return debug string representation of a given token index
*/
function tokenRepr(token: number): string;

/**
* Return identifier of the current sequence.
* Most useful with fork_group parameter in mid_process() callback.
Expand Down Expand Up @@ -200,20 +205,27 @@ declare module "_aici" {
*/
constructor();

toString(): string;

add(t: number): void;
delete(t: number): void;
has(t: number): boolean;
clear(): void;

/**
* Number of all tokens (not only in the set).
* Number of all possible tokens (regardless of whether they are in the set or not).
*/
length: number;

/**
* Include or exclude all tokens from the set.
*/
setAll(value: boolean): void;

/**
* Number of tokens in the set.
*/
numSet(): number;
}

/**
Expand Down

0 comments on commit c662637

Please sign in to comment.