From cc4d48a29802c9169b0c0b11c10d799a5ad06090 Mon Sep 17 00:00:00 2001 From: franktip Date: Sun, 30 Jun 2024 18:21:00 -0400 Subject: [PATCH 01/13] retry when encountering 429 errors --- src/chatmodel.ts | 6 ++- src/promise-utils.ts | 115 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 src/promise-utils.ts diff --git a/src/chatmodel.ts b/src/chatmodel.ts index 9758d9e..a29d1ea 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -1,6 +1,7 @@ import axios from "axios"; import { performance } from "perf_hooks"; import { ICompletionModel } from "./completionModel"; +import { retry } from "./promise-utils"; const defaultPostOptions = { max_tokens: 1000, // maximum number of tokens to return @@ -27,7 +28,8 @@ export class ChatModel implements ICompletionModel { constructor( private readonly model: string, - private readonly instanceOptions: PostOptions = {} + private readonly instanceOptions: PostOptions = {}, + private readonly nrAttempts: number = 3 ) { this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT"); this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS"); @@ -75,7 +77,7 @@ export class ChatModel implements ICompletionModel { ...options, }; - const res = await axios.post(this.apiEndpoint, postOptions, { headers }); + const res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts); performance.measure( `llm-query:${JSON.stringify({ diff --git a/src/promise-utils.ts b/src/promise-utils.ts new file mode 100644 index 0000000..762d7f4 --- /dev/null +++ b/src/promise-utils.ts @@ -0,0 +1,115 @@ +/** + * This function provides supports for retrying the creation of a promise + * up to a given number of times in case the promise is rejected. + * This is useful for, e.g., retrying a request to a server that is temporarily unavailable. + * + */ +export async function retry( + f: () => Promise, + howManyTimes: number +): Promise { + let i = 1; + let promise: Promise = f(); // create the promise, but don't wait for its fulfillment yet.. + while (i <= howManyTimes) { + try { + if (i > 1) { + console.log(` retry ${i}/${howManyTimes}`); + } + let val: T = await promise; // throws an exception if the promise is rejected + return val; // if the promise was fulfilled, return another promise that is fulfilled with the same value + } catch (e) { + i++; + console.log(`Promise rejected with ${e}.`); + promise = f(); // next attempt: create the promise, but don't wait for its fulfillment yet.. + } + } + return promise; // if the promise was rejected howManyTimes times, return the last promise +} + +/** + * This class provides supports for asynchronous rate limiting by + * limiting the number of requests to the server to at most one + * in N milliseconds. This is useful for throttling requests to + * a server that has a limit on the number of requests per second. + */ +export abstract class RateLimiter { + constructor(protected howManyMilliSeconds: number) { + this.timer = this.resetTimer(); + } + /** + * the timer is a promise that is resolved after a certain number of milliseconds + * have elapsed. The timer is reset after each request. + */ + private timer: Promise; + + /** + * Waits until the timer has expired, then evaluate the function that + * produces the promise + * @param p a function that produces a promise + * @returns returns the promise produced by the function p (after the timer has expired) + */ + public async next(p: () => Promise): Promise { + await this.timer; // wait until timer has expired + this.timer = this.resetTimer(); // reset timer (for the next request) + return p(); // return the promise + } + + /** + * resets the timer + * @returns a promise that is resolved after the number of milliseconds + * specified in the constructor have elapsed + */ + protected resetTimer = () => + new Promise((resolve, reject) => { + setTimeout(() => { + resolve(); + }, this.howManyMilliSeconds); + }); +} + +/** + * A rate limiter that limits the number of requests to the server to a + * maximum of one per N milliseconds. + * + */ +export class FixedRateLimiter extends RateLimiter { + public constructor(N: number) { + super(N); + } +} + +/** + * A custom rate limiter for use during benchmark runs. It increases + * the pace of requests after two designated thresholds have been reached. + */ +export class BenchmarkRateLimiter extends RateLimiter { + private requestCount: number; + + private static INITIAL_PACE = 10000; + private static PACE_AFTER_150_REQUESTS = 5000; + private static PACE_AFTER_300_REQUESTS = 2500; + + constructor() { + console.log( + `BenchmarkRateLimiter: initial pace is ${BenchmarkRateLimiter.INITIAL_PACE}` + ); + super(BenchmarkRateLimiter.INITIAL_PACE); + this.requestCount = 0; + } + + public next(p: () => Promise): Promise { + this.requestCount++; + if (this.requestCount === 150) { + this.howManyMilliSeconds = BenchmarkRateLimiter.PACE_AFTER_150_REQUESTS; + console.log( + `BenchmarkRateLimiter: increasing pace to ${BenchmarkRateLimiter.PACE_AFTER_150_REQUESTS}` + ); + } else if (this.requestCount === 300) { + this.howManyMilliSeconds = BenchmarkRateLimiter.PACE_AFTER_300_REQUESTS; + console.log( + `BenchmarkRateLimiter: increasing pace to ${BenchmarkRateLimiter.PACE_AFTER_300_REQUESTS}` + ); + } + return super.next(p); + } +} \ No newline at end of file From eef754982838422fb26927ad050c3f50193bd006 Mon Sep 17 00:00:00 2001 From: franktip Date: Mon, 1 Jul 2024 06:25:46 -0400 Subject: [PATCH 02/13] adopt rate limiter --- src/chatmodel.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/chatmodel.ts b/src/chatmodel.ts index a29d1ea..c398932 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -1,7 +1,7 @@ import axios from "axios"; import { performance } from "perf_hooks"; import { ICompletionModel } from "./completionModel"; -import { retry } from "./promise-utils"; +import { retry, RateLimiter, BenchmarkRateLimiter } from "./promise-utils"; const defaultPostOptions = { max_tokens: 1000, // maximum number of tokens to return @@ -25,6 +25,7 @@ function getEnv(name: string): string { export class ChatModel implements ICompletionModel { private readonly apiEndpoint: string; private readonly authHeaders: string; + protected rateLimiter: RateLimiter; constructor( private readonly model: string, @@ -33,7 +34,8 @@ export class ChatModel implements ICompletionModel { ) { this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT"); this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS"); - console.log(`Using Chat Model API at ${this.apiEndpoint}`); + this.rateLimiter = new BenchmarkRateLimiter(); + console.log(`Using ${this.model} at ${this.apiEndpoint}`); } /** @@ -77,7 +79,10 @@ export class ChatModel implements ICompletionModel { ...options, }; - const res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts); + const res = await retry( () => + this.rateLimiter.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), + this.nrAttempts + ); performance.measure( `llm-query:${JSON.stringify({ From e4b5e6f0c009d0387b143add11702fae2b0780d3 Mon Sep 17 00:00:00 2001 From: franktip Date: Mon, 1 Jul 2024 08:07:44 -0400 Subject: [PATCH 03/13] update comment --- src/chatmodel.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/chatmodel.ts b/src/chatmodel.ts index c398932..d84f814 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -114,8 +114,7 @@ export class ChatModel implements ICompletionModel { } /** - * Get completions from the LLM, extract the code fragments enclosed in a fenced code block, - * and postprocess them as needed; print a warning if it did not produce any + * Get completions from the LLM; issue a warning if it did not produce any * * @param prompt the prompt to use */ From 99106a61cec25f61304e9262d8d1999fe9585a05 Mon Sep 17 00:00:00 2001 From: franktip Date: Mon, 1 Jul 2024 10:03:52 -0400 Subject: [PATCH 04/13] add parameters for rate limiting and retry --- .github/workflows/run-experiment.yml | 7 +++++- benchmark/run.ts | 27 +++++++++++++++++++++- src/chatmodel.ts | 34 ++++++++++++++++++++-------- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml index 307a59b..07ed97c 100644 --- a/.github/workflows/run-experiment.yml +++ b/.github/workflows/run-experiment.yml @@ -29,6 +29,10 @@ on: description: "Skip slow benchmarks" type: boolean default: false + benchmarkMode: + type: boolean + description: "Use custom rate limiting for running benchmarks" + default: false debug_enabled: type: boolean description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)" @@ -175,7 +179,8 @@ jobs: --temperatures "${{ needs.setup.outputs.temperatures }}" \ --model ${{ needs.setup.outputs.model }} \ --template ${{ needs.setup.outputs.template }} \ - --retryTemplate ${{ needs.setup.outputs.retryTemplate }}" + --retryTemplate ${{ needs.setup.outputs.retryTemplate }} \ + --benchmark ${{ github.event.inputs.benchmarkMode }} \" echo "command: $command" $command mv stats.json $outputdir diff --git a/benchmark/run.ts b/benchmark/run.ts index e91a8b7..d74003a 100644 --- a/benchmark/run.ts +++ b/benchmark/run.ts @@ -121,6 +121,12 @@ if (require.main === module) { default: 20, description: "maximum length of each snippet in lines", }, + maxTokens: { + type: "number", + default: 1000, + demandOption: false, + description: "maximum number of tokens in a completion", + }, temperatures: { type: "string", default: "0.0", @@ -156,6 +162,25 @@ if (require.main === module) { default: "./templates/retry-template.hb", description: "Handlebars template file to use", }, + nrAttempts: { + type: "number", + default: 3, + description: "number of attempts to make for each request", + }, + rateLimit: { + type: "number", + default: 0, + demandOption: false, + description: + "number of milliseconds between requests to the model (0 is no rate limit)", + }, + benchmark: { + type: "boolean", + default: false, + demandOption: false, + description: + "use custom rate-limiting for benchmarking (if specified, this supercedes the rateLimit option)", + } }); const argv = await parser.argv; @@ -166,7 +191,7 @@ if (require.main === module) { "Warning: --strictResponses has no effect when not using --responses" ); } - model = new ChatModel(argv.model); + model = new ChatModel(argv.model, argv.nrAttempts, argv.rateLimit, argv.benchmark, { max_tokens: argv.maxTokens } ); } else { model = MockCompletionModel.fromFile( argv.responses, diff --git a/src/chatmodel.ts b/src/chatmodel.ts index d84f814..6ac1b7f 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -1,7 +1,7 @@ import axios from "axios"; import { performance } from "perf_hooks"; import { ICompletionModel } from "./completionModel"; -import { retry, RateLimiter, BenchmarkRateLimiter } from "./promise-utils"; +import { retry, RateLimiter, BenchmarkRateLimiter, FixedRateLimiter } from "./promise-utils"; const defaultPostOptions = { max_tokens: 1000, // maximum number of tokens to return @@ -25,17 +25,28 @@ function getEnv(name: string): string { export class ChatModel implements ICompletionModel { private readonly apiEndpoint: string; private readonly authHeaders: string; - protected rateLimiter: RateLimiter; + protected rateLimiter: RateLimiter | undefined; constructor( private readonly model: string, + private readonly nrAttempts: number, + private readonly rateLimit: number, + private readonly benchmark: boolean, private readonly instanceOptions: PostOptions = {}, - private readonly nrAttempts: number = 3 ) { this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT"); this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS"); - this.rateLimiter = new BenchmarkRateLimiter(); - console.log(`Using ${this.model} at ${this.apiEndpoint}`); + if (this.benchmark) { + this.rateLimiter = new BenchmarkRateLimiter(); + console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.`); + } else if (this.rateLimit > 0) { + this.rateLimiter = new FixedRateLimiter(this.rateLimit); + console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.`); + } else { + this.rateLimiter = undefined; + console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.`); + } + } /** @@ -79,10 +90,15 @@ export class ChatModel implements ICompletionModel { ...options, }; - const res = await retry( () => - this.rateLimiter.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), - this.nrAttempts - ); + let res; + if (this.rateLimiter) { + res = await retry( () => + this.rateLimiter!.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), + this.nrAttempts + ); + } else { + res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts); + } performance.measure( `llm-query:${JSON.stringify({ From 6004c8d1af829153c3f5bba61692e74ee447fa9a Mon Sep 17 00:00:00 2001 From: franktip Date: Mon, 1 Jul 2024 10:08:42 -0400 Subject: [PATCH 05/13] debug --- .github/workflows/run-experiment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml index 07ed97c..3d6c101 100644 --- a/.github/workflows/run-experiment.yml +++ b/.github/workflows/run-experiment.yml @@ -180,7 +180,7 @@ jobs: --model ${{ needs.setup.outputs.model }} \ --template ${{ needs.setup.outputs.template }} \ --retryTemplate ${{ needs.setup.outputs.retryTemplate }} \ - --benchmark ${{ github.event.inputs.benchmarkMode }} \" + --benchmark ${{ github.event.inputs.benchmarkMode }} " echo "command: $command" $command mv stats.json $outputdir From c19dc125b06ba5b3ad985f2995f7fc0587629871 Mon Sep 17 00:00:00 2001 From: franktip Date: Mon, 1 Jul 2024 10:36:02 -0400 Subject: [PATCH 06/13] autoformat --- benchmark/run.ts | 10 ++++++++-- src/chatmodel.ts | 34 +++++++++++++++++++++++++--------- src/promise-utils.ts | 2 +- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/benchmark/run.ts b/benchmark/run.ts index d74003a..6db1917 100644 --- a/benchmark/run.ts +++ b/benchmark/run.ts @@ -180,7 +180,7 @@ if (require.main === module) { demandOption: false, description: "use custom rate-limiting for benchmarking (if specified, this supercedes the rateLimit option)", - } + }, }); const argv = await parser.argv; @@ -191,7 +191,13 @@ if (require.main === module) { "Warning: --strictResponses has no effect when not using --responses" ); } - model = new ChatModel(argv.model, argv.nrAttempts, argv.rateLimit, argv.benchmark, { max_tokens: argv.maxTokens } ); + model = new ChatModel( + argv.model, + argv.nrAttempts, + argv.rateLimit, + argv.benchmark, + { max_tokens: argv.maxTokens } + ); } else { model = MockCompletionModel.fromFile( argv.responses, diff --git a/src/chatmodel.ts b/src/chatmodel.ts index 6ac1b7f..b9733e4 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -1,7 +1,12 @@ import axios from "axios"; import { performance } from "perf_hooks"; import { ICompletionModel } from "./completionModel"; -import { retry, RateLimiter, BenchmarkRateLimiter, FixedRateLimiter } from "./promise-utils"; +import { + retry, + RateLimiter, + BenchmarkRateLimiter, + FixedRateLimiter, +} from "./promise-utils"; const defaultPostOptions = { max_tokens: 1000, // maximum number of tokens to return @@ -32,21 +37,26 @@ export class ChatModel implements ICompletionModel { private readonly nrAttempts: number, private readonly rateLimit: number, private readonly benchmark: boolean, - private readonly instanceOptions: PostOptions = {}, + private readonly instanceOptions: PostOptions = {} ) { this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT"); this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS"); if (this.benchmark) { this.rateLimiter = new BenchmarkRateLimiter(); - console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.`); + console.log( + `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.` + ); } else if (this.rateLimit > 0) { this.rateLimiter = new FixedRateLimiter(this.rateLimit); - console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.`); + console.log( + `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.` + ); } else { this.rateLimiter = undefined; - console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.`); + console.log( + `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.` + ); } - } /** @@ -92,12 +102,18 @@ export class ChatModel implements ICompletionModel { let res; if (this.rateLimiter) { - res = await retry( () => - this.rateLimiter!.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), + res = await retry( + () => + this.rateLimiter!.next(() => + axios.post(this.apiEndpoint, postOptions, { headers }) + ), this.nrAttempts ); } else { - res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts); + res = await retry( + () => axios.post(this.apiEndpoint, postOptions, { headers }), + this.nrAttempts + ); } performance.measure( diff --git a/src/promise-utils.ts b/src/promise-utils.ts index 762d7f4..b94a109 100644 --- a/src/promise-utils.ts +++ b/src/promise-utils.ts @@ -112,4 +112,4 @@ export class BenchmarkRateLimiter extends RateLimiter { } return super.next(p); } -} \ No newline at end of file +} From 14d6c16eda0529ef6fb967d23b43eee4177a5173 Mon Sep 17 00:00:00 2001 From: Frank Tip Date: Mon, 1 Jul 2024 13:04:30 -0400 Subject: [PATCH 07/13] update json format used for prompt and response --- .github/benchmarks11.txt | 11 +++++++++++ src/chatmodel.ts | 9 +++------ 2 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 .github/benchmarks11.txt diff --git a/.github/benchmarks11.txt b/.github/benchmarks11.txt new file mode 100644 index 0000000..8efda6d --- /dev/null +++ b/.github/benchmarks11.txt @@ -0,0 +1,11 @@ +https://github.com/manuelmhtr/countries-and-timezones/tree/e34cb4b6832795cbac8d44f6f9c97eb1038b831b +https://github.com/infusion/Complex.js/tree/d995ca105e8adef4c38d0ace50643daf84e0dd1c +https://gitlab.com/autokent/crawler-url-parser/tree/202c5b25ad693d284804261e2b3815fe66e0723e +https://gitlab.com/demsking/image-downloader/tree/19a53f652824bd0c612cc5bcd3a2eb173a16f938 +https://github.com/rainder/node-geo-point/tree/c839d477ff7a48d1fc6574495cbbc6196161f494 +https://github.com/jprichardson/node-jsonfile/tree/9c6478a85899a9318547a6e9514b0403166d8c5c +https://github.com/chakrit/node-uneval/tree/7578dc67090f650a171610a08ea529eba9d27438 +https://github.com/swang/plural/tree/f0027d66ecb37ce0108c8bcb4a6a448d1bf64047 +https://github.com/pull-stream/pull-stream/tree/29b4868bb3864c427c3988855c5d65ad5cb2cb1c +https://gitlab.com/cptpackrat/spacl-core/tree/fcb8511a0d01bdc206582cfacb3e2b01a0288f6a +https://github.com/maugenst/zip-a-folder/tree/5089113647753d5086ea20f052f9d29840866ee1 \ No newline at end of file diff --git a/src/chatmodel.ts b/src/chatmodel.ts index b9733e4..e90ab8d 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -87,11 +87,8 @@ export class ChatModel implements ICompletionModel { const postOptions = { model: this.model, + system: "You are a programming assistant.", messages: [ - { - role: "system", - content: "You are a programming assistant.", - }, { role: "user", content: prompt, @@ -138,8 +135,8 @@ export class ChatModel implements ICompletionModel { } const completions = new Set(); - for (const choice of json.choices) { - const content = choice.message.content; + for (const choice of json.content) { + const content = choice.text; completions.add(content); } return completions; From e04c8cf27fdc214c6470a20996f77e0a53f5bff9 Mon Sep 17 00:00:00 2001 From: Frank Tip Date: Mon, 1 Jul 2024 13:15:36 -0400 Subject: [PATCH 08/13] update default LLM to use --- .github/workflows/run-experiment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml index 3d6c101..ddd3d44 100644 --- a/.github/workflows/run-experiment.yml +++ b/.github/workflows/run-experiment.yml @@ -21,7 +21,7 @@ on: model: description: "Which LLM API to use" type: "string" - default: "meta-llama-3-70b-instruct" + default: "claude-3-sonnet-20240229" compareTo: description: "Run number of previous run to compare to (leave empty to skip comparison)" default: "" From c8fc854418050fce22cbc84bfecfb7b9932eaeeb Mon Sep 17 00:00:00 2001 From: franktip Date: Mon, 1 Jul 2024 13:20:55 -0400 Subject: [PATCH 09/13] Revert "update default LLM to use" This reverts commit e04c8cf27fdc214c6470a20996f77e0a53f5bff9. --- .github/workflows/run-experiment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml index ddd3d44..3d6c101 100644 --- a/.github/workflows/run-experiment.yml +++ b/.github/workflows/run-experiment.yml @@ -21,7 +21,7 @@ on: model: description: "Which LLM API to use" type: "string" - default: "claude-3-sonnet-20240229" + default: "meta-llama-3-70b-instruct" compareTo: description: "Run number of previous run to compare to (leave empty to skip comparison)" default: "" From 4b4bfa744c85b0d68c00e304d1e533412a41ef92 Mon Sep 17 00:00:00 2001 From: franktip Date: Tue, 2 Jul 2024 09:10:06 -0400 Subject: [PATCH 10/13] rationalize command line options for rate limiting --- benchmark/run.ts | 36 ++++++++++++++++------------ src/chatmodel.ts | 57 ++++++++++++++++---------------------------- src/promise-utils.ts | 53 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 92 insertions(+), 54 deletions(-) diff --git a/benchmark/run.ts b/benchmark/run.ts index 6db1917..d6e30b9 100644 --- a/benchmark/run.ts +++ b/benchmark/run.ts @@ -14,12 +14,11 @@ import { TestValidator, } from ".."; import { ChatModel } from "../src/chatmodel"; -import yargs from "yargs"; +import yargs, { parse } from "yargs"; import { hideBin } from "yargs/helpers"; import { PerformanceMeasurer } from "./performanceMeasurer"; import { TestResultCollector } from "./testResultCollector"; -require("console-stamp")(console); - +import { BenchmarkRateLimiter, FixedRateLimiter, IRateLimiter, NoRateLimiter } from "../src/promise-utils"; /** * Run an end-to-end experiment. * Given a package generate tests for its methods, run them, and generate a report. @@ -168,18 +167,11 @@ if (require.main === module) { description: "number of attempts to make for each request", }, rateLimit: { - type: "number", - default: 0, - demandOption: false, - description: - "number of milliseconds between requests to the model (0 is no rate limit)", - }, - benchmark: { - type: "boolean", - default: false, + type: "string", + default: "", demandOption: false, description: - "use custom rate-limiting for benchmarking (if specified, this supercedes the rateLimit option)", + "number of milliseconds between prompts or \"benchmark\"", }, }); const argv = await parser.argv; @@ -191,11 +183,25 @@ if (require.main === module) { "Warning: --strictResponses has no effect when not using --responses" ); } + + let rateLimiter: IRateLimiter; + if (argv.rateLimit === "benchmark") { + rateLimiter = new BenchmarkRateLimiter(); + } else if (argv.rateLimit) { + const rateLimit: number = parseInt(argv.rateLimit, 10); + if (!Number.isNaN(rateLimit)) { + rateLimiter = new FixedRateLimiter(+argv.rateLimit); + } else { + throw new Error(`Invalid rate limit: ${argv.rateLimit}`); + } + } else { + rateLimiter = new NoRateLimiter(); + } + model = new ChatModel( argv.model, argv.nrAttempts, - argv.rateLimit, - argv.benchmark, + rateLimiter, { max_tokens: argv.maxTokens } ); } else { diff --git a/src/chatmodel.ts b/src/chatmodel.ts index e90ab8d..9f16926 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -6,6 +6,7 @@ import { RateLimiter, BenchmarkRateLimiter, FixedRateLimiter, + IRateLimiter, } from "./promise-utils"; const defaultPostOptions = { @@ -30,33 +31,19 @@ function getEnv(name: string): string { export class ChatModel implements ICompletionModel { private readonly apiEndpoint: string; private readonly authHeaders: string; - protected rateLimiter: RateLimiter | undefined; constructor( private readonly model: string, private readonly nrAttempts: number, - private readonly rateLimit: number, - private readonly benchmark: boolean, + private readonly rateLimiter: IRateLimiter, private readonly instanceOptions: PostOptions = {} ) { this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT"); this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS"); - if (this.benchmark) { - this.rateLimiter = new BenchmarkRateLimiter(); - console.log( - `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.` - ); - } else if (this.rateLimit > 0) { - this.rateLimiter = new FixedRateLimiter(this.rateLimit); - console.log( - `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.` - ); - } else { - this.rateLimiter = undefined; - console.log( - `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.` - ); - } + + console.log( + `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and ${this.rateLimiter.getDescription()}` + ); } /** @@ -87,8 +74,11 @@ export class ChatModel implements ICompletionModel { const postOptions = { model: this.model, - system: "You are a programming assistant.", messages: [ + { + role: "system", + content: "You are a programming assistant." + }, { role: "user", content: prompt, @@ -97,21 +87,14 @@ export class ChatModel implements ICompletionModel { ...options, }; - let res; - if (this.rateLimiter) { - res = await retry( - () => - this.rateLimiter!.next(() => - axios.post(this.apiEndpoint, postOptions, { headers }) - ), - this.nrAttempts - ); - } else { - res = await retry( - () => axios.post(this.apiEndpoint, postOptions, { headers }), - this.nrAttempts - ); - } + const res = await retry( + () => + this.rateLimiter!.next(() => + axios.post(this.apiEndpoint, postOptions, { headers }) + ), + this.nrAttempts + ); + performance.measure( `llm-query:${JSON.stringify({ @@ -135,8 +118,8 @@ export class ChatModel implements ICompletionModel { } const completions = new Set(); - for (const choice of json.content) { - const content = choice.text; + for (const choice of json.choices) { + const content = choice.message.content; completions.add(content); } return completions; diff --git a/src/promise-utils.ts b/src/promise-utils.ts index b94a109..7cfd713 100644 --- a/src/promise-utils.ts +++ b/src/promise-utils.ts @@ -26,13 +26,30 @@ export async function retry( return promise; // if the promise was rejected howManyTimes times, return the last promise } +/** + * This interface provides supports for retrying the creation of a promise + */ +export interface IRateLimiter { + + /** + * Waits until the rate limiter allows the next request, then evaluate the function that + * produces the promise + */ + next(p: () => Promise): Promise; + + /** + * returns a description of the rate limiter + */ + getDescription(): string; +} + /** * This class provides supports for asynchronous rate limiting by * limiting the number of requests to the server to at most one * in N milliseconds. This is useful for throttling requests to * a server that has a limit on the number of requests per second. */ -export abstract class RateLimiter { +export abstract class RateLimiter implements IRateLimiter { constructor(protected howManyMilliSeconds: number) { this.timer = this.resetTimer(); } @@ -54,6 +71,8 @@ export abstract class RateLimiter { return p(); // return the promise } + public abstract getDescription(): string; + /** * resets the timer * @returns a promise that is resolved after the number of milliseconds @@ -72,10 +91,17 @@ export abstract class RateLimiter { * maximum of one per N milliseconds. * */ -export class FixedRateLimiter extends RateLimiter { +export class FixedRateLimiter extends RateLimiter implements IRateLimiter { public constructor(N: number) { super(N); } + + /** + * returns a description of the rate limiter + */ + public getDescription(): string { + return `FixedRateLimiter (1 request per ${this.howManyMilliSeconds} ms)`; + } } /** @@ -112,4 +138,27 @@ export class BenchmarkRateLimiter extends RateLimiter { } return super.next(p); } + + /** + * returns a description of the rate limiter + */ + public getDescription(): string { + return `BenchmarkRateLimiter (increasing pace after 150 and 300 requests)`; + } +} + +/** + * A rate limiter that does not limit the rate of requests to the server. + */ +export class NoRateLimiter implements IRateLimiter { + public async next(p: () => Promise): Promise { + return p(); + } + + /** + * returns a description of the rate limiter + */ + public getDescription(): string { + return `NoRateLimiter`; + } } From bb56a253cf6ed2c026c650bd79b89e0e27dec82b Mon Sep 17 00:00:00 2001 From: franktip Date: Tue, 2 Jul 2024 09:12:51 -0400 Subject: [PATCH 11/13] autoformat --- benchmark/run.ts | 19 ++++++++++--------- src/chatmodel.ts | 9 +++++---- src/promise-utils.ts | 3 +-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/benchmark/run.ts b/benchmark/run.ts index d6e30b9..22e9a02 100644 --- a/benchmark/run.ts +++ b/benchmark/run.ts @@ -18,7 +18,12 @@ import yargs, { parse } from "yargs"; import { hideBin } from "yargs/helpers"; import { PerformanceMeasurer } from "./performanceMeasurer"; import { TestResultCollector } from "./testResultCollector"; -import { BenchmarkRateLimiter, FixedRateLimiter, IRateLimiter, NoRateLimiter } from "../src/promise-utils"; +import { + BenchmarkRateLimiter, + FixedRateLimiter, + IRateLimiter, + NoRateLimiter, +} from "../src/promise-utils"; /** * Run an end-to-end experiment. * Given a package generate tests for its methods, run them, and generate a report. @@ -170,8 +175,7 @@ if (require.main === module) { type: "string", default: "", demandOption: false, - description: - "number of milliseconds between prompts or \"benchmark\"", + description: 'number of milliseconds between prompts or "benchmark"', }, }); const argv = await parser.argv; @@ -198,12 +202,9 @@ if (require.main === module) { rateLimiter = new NoRateLimiter(); } - model = new ChatModel( - argv.model, - argv.nrAttempts, - rateLimiter, - { max_tokens: argv.maxTokens } - ); + model = new ChatModel(argv.model, argv.nrAttempts, rateLimiter, { + max_tokens: argv.maxTokens, + }); } else { model = MockCompletionModel.fromFile( argv.responses, diff --git a/src/chatmodel.ts b/src/chatmodel.ts index 9f16926..9b62788 100644 --- a/src/chatmodel.ts +++ b/src/chatmodel.ts @@ -40,9 +40,11 @@ export class ChatModel implements ICompletionModel { ) { this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT"); this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS"); - + console.log( - `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and ${this.rateLimiter.getDescription()}` + `Using ${this.model} at ${this.apiEndpoint} with ${ + this.nrAttempts + } attempts and ${this.rateLimiter.getDescription()}` ); } @@ -77,7 +79,7 @@ export class ChatModel implements ICompletionModel { messages: [ { role: "system", - content: "You are a programming assistant." + content: "You are a programming assistant.", }, { role: "user", @@ -94,7 +96,6 @@ export class ChatModel implements ICompletionModel { ), this.nrAttempts ); - performance.measure( `llm-query:${JSON.stringify({ diff --git a/src/promise-utils.ts b/src/promise-utils.ts index 7cfd713..0ba9546 100644 --- a/src/promise-utils.ts +++ b/src/promise-utils.ts @@ -30,13 +30,12 @@ export async function retry( * This interface provides supports for retrying the creation of a promise */ export interface IRateLimiter { - /** * Waits until the rate limiter allows the next request, then evaluate the function that * produces the promise */ next(p: () => Promise): Promise; - + /** * returns a description of the rate limiter */ From 401acd86c23f53e21ea6428551e88a86f9b954fb Mon Sep 17 00:00:00 2001 From: franktip Date: Tue, 2 Jul 2024 09:20:28 -0400 Subject: [PATCH 12/13] update workflow to reflect new ratelimiting mechanism --- .github/workflows/run-experiment.yml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml index 3d6c101..df40945 100644 --- a/.github/workflows/run-experiment.yml +++ b/.github/workflows/run-experiment.yml @@ -29,9 +29,9 @@ on: description: "Skip slow benchmarks" type: boolean default: false - benchmarkMode: + rateLimiting: type: boolean - description: "Use custom rate limiting for running benchmarks" + description: "Use rate limiting for running benchmarks" default: false debug_enabled: type: boolean @@ -173,14 +173,26 @@ jobs: echo "Computing package statistics" node benchmark/package_stats.js "$TESTPILOT_PACKAGE_PATH" > stats.json echo "Generating tests for $TESTPILOT_PACKAGE_NAME" - export command="node benchmark/run.js \ + + if (${{ github.event.inputs.rateLimiting }}); then + export command="node benchmark/run.js \ --outputDir $outputdir \ --package "$TESTPILOT_PACKAGE_PATH" \ --temperatures "${{ needs.setup.outputs.temperatures }}" \ --model ${{ needs.setup.outputs.model }} \ --template ${{ needs.setup.outputs.template }} \ --retryTemplate ${{ needs.setup.outputs.retryTemplate }} \ - --benchmark ${{ github.event.inputs.benchmarkMode }} " + --rateLimit benchmark" + else + export command="node benchmark/run.js \ + --outputDir $outputdir \ + --package "$TESTPILOT_PACKAGE_PATH" \ + --temperatures "${{ needs.setup.outputs.temperatures }}" \ + --model ${{ needs.setup.outputs.model }} \ + --template ${{ needs.setup.outputs.template }} \ + --retryTemplate ${{ needs.setup.outputs.retryTemplate }}" + fi + echo "command: $command" $command mv stats.json $outputdir From efefbb9f6dfde32ac6f7d31161fcd04430c1601e Mon Sep 17 00:00:00 2001 From: franktip Date: Tue, 2 Jul 2024 09:31:59 -0400 Subject: [PATCH 13/13] update workflow to reflect new ratelimiting mechanism --- .github/workflows/run-experiment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml index df40945..1929a4f 100644 --- a/.github/workflows/run-experiment.yml +++ b/.github/workflows/run-experiment.yml @@ -31,7 +31,7 @@ on: default: false rateLimiting: type: boolean - description: "Use rate limiting for running benchmarks" + description: "Use rate limiting" default: false debug_enabled: type: boolean