From cc4d48a29802c9169b0c0b11c10d799a5ad06090 Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Sun, 30 Jun 2024 18:21:00 -0400
Subject: [PATCH 01/13] retry when encountering 429 errors

---
 src/chatmodel.ts     |   6 ++-
 src/promise-utils.ts | 115 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 2 deletions(-)
 create mode 100644 src/promise-utils.ts
diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index 9758d9e..a29d1ea 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -1,6 +1,7 @@
 import axios from "axios";
 import { performance } from "perf_hooks";
 import { ICompletionModel } from "./completionModel";
+import { retry } from "./promise-utils";
 
 const defaultPostOptions = {
   max_tokens: 1000, // maximum number of tokens to return
@@ -27,7 +28,8 @@ export class ChatModel implements ICompletionModel {
 
   constructor(
     private readonly model: string,
-    private readonly instanceOptions: PostOptions = {}
+    private readonly instanceOptions: PostOptions = {},
+    private readonly nrAttempts: number = 3
   ) {
     this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT");
     this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS");
@@ -75,7 +77,7 @@ export class ChatModel implements ICompletionModel {
       ...options,
     };
 
-    const res = await axios.post(this.apiEndpoint, postOptions, { headers });
+    const res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts);
 
     performance.measure(
       `llm-query:${JSON.stringify({
diff --git a/src/promise-utils.ts b/src/promise-utils.ts
new file mode 100644
index 0000000..762d7f4
--- /dev/null
+++ b/src/promise-utils.ts
@@ -0,0 +1,115 @@
+/**
+ * This function provides supports for retrying the creation of a promise
+ * up to a given number of times in case the promise is rejected.
+ * This is useful for, e.g., retrying a request to a server that is temporarily unavailable.
+ *
+ */
+export async function retry<T>(
+  f: () => Promise<T>,
+  howManyTimes: number
+): Promise<T> {
+  let i = 1;
+  let promise: Promise<T> = f(); // create the promise, but don't wait for its fulfillment yet..
+  while (i <= howManyTimes) {
+    try {
+      if (i > 1) {
+        console.log(`  retry ${i}/${howManyTimes}`);
+      }
+      let val: T = await promise; // throws an exception if the promise is rejected
+      return val; // if the promise was fulfilled, return another promise that is fulfilled with the same value
+    } catch (e) {
+      i++;
+      console.log(`Promise rejected with ${e}.`);
+      promise = f(); // next attempt: create the promise, but don't wait for its fulfillment yet..
+    }
+  }
+  return promise; // if the promise was rejected howManyTimes times, return the last promise
+}
+
+/**
+ * This class provides supports for asynchronous rate limiting by
+ * limiting the number of requests to the server to at most one
+ * in N milliseconds. This is useful for throttling requests to
+ * a server that has a limit on the number of requests per second.
+ */
+export abstract class RateLimiter {
+  constructor(protected howManyMilliSeconds: number) {
+    this.timer = this.resetTimer();
+  }
+  /**
+   * the timer is a promise that is resolved after a certain number of milliseconds
+   * have elapsed. The timer is reset after each request.
+   */
+  private timer: Promise<void>;
+
+  /**
+   *  Waits until the timer has expired, then evaluate the function that
+   * produces the promise
+   * @param p a function that produces a promise
+   * @returns returns the promise produced by the function p (after the timer has expired)
+   */
+  public async next<T>(p: () => Promise<T>): Promise<T> {
+    await this.timer; // wait until timer has expired
+    this.timer = this.resetTimer(); // reset timer (for the next request)
+    return p(); // return the promise
+  }
+
+  /**
+   * resets the timer
+   * @returns a promise that is resolved after the number of milliseconds
+   *         specified in the constructor have elapsed
+   */
+  protected resetTimer = () =>
+    new Promise<void>((resolve, reject) => {
+      setTimeout(() => {
+        resolve();
+      }, this.howManyMilliSeconds);
+    });
+}
+
+/**
+ * A rate limiter that limits the number of requests to the server to a
+ * maximum of one per N milliseconds.
+ *
+ */
+export class FixedRateLimiter extends RateLimiter {
+  public constructor(N: number) {
+    super(N);
+  }
+}
+
+/**
+ * A custom rate limiter for use during benchmark runs. It increases
+ * the pace of requests after two designated thresholds have been reached.
+ */
+export class BenchmarkRateLimiter extends RateLimiter {
+  private requestCount: number;
+
+  private static INITIAL_PACE = 10000;
+  private static PACE_AFTER_150_REQUESTS = 5000;
+  private static PACE_AFTER_300_REQUESTS = 2500;
+
+  constructor() {
+    console.log(
+      `BenchmarkRateLimiter: initial pace is ${BenchmarkRateLimiter.INITIAL_PACE}`
+    );
+    super(BenchmarkRateLimiter.INITIAL_PACE);
+    this.requestCount = 0;
+  }
+
+  public next<T>(p: () => Promise<T>): Promise<T> {
+    this.requestCount++;
+    if (this.requestCount === 150) {
+      this.howManyMilliSeconds = BenchmarkRateLimiter.PACE_AFTER_150_REQUESTS;
+      console.log(
+        `BenchmarkRateLimiter: increasing pace to ${BenchmarkRateLimiter.PACE_AFTER_150_REQUESTS}`
+      );
+    } else if (this.requestCount === 300) {
+      this.howManyMilliSeconds = BenchmarkRateLimiter.PACE_AFTER_300_REQUESTS;
+      console.log(
+        `BenchmarkRateLimiter: increasing pace to ${BenchmarkRateLimiter.PACE_AFTER_300_REQUESTS}`
+      );
+    }
+    return super.next(p);
+  }
+}
\ No newline at end of file

From eef754982838422fb26927ad050c3f50193bd006 Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Mon, 1 Jul 2024 06:25:46 -0400
Subject: [PATCH 02/13] adopt rate limiter

---
 src/chatmodel.ts | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index a29d1ea..c398932 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -1,7 +1,7 @@
 import axios from "axios";
 import { performance } from "perf_hooks";
 import { ICompletionModel } from "./completionModel";
-import { retry } from "./promise-utils";
+import { retry, RateLimiter, BenchmarkRateLimiter } from "./promise-utils";
 
 const defaultPostOptions = {
   max_tokens: 1000, // maximum number of tokens to return
@@ -25,6 +25,7 @@ function getEnv(name: string): string {
 export class ChatModel implements ICompletionModel {
   private readonly apiEndpoint: string;
   private readonly authHeaders: string;
+  protected rateLimiter: RateLimiter;
 
   constructor(
     private readonly model: string,
@@ -33,7 +34,8 @@ export class ChatModel implements ICompletionModel {
   ) {
     this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT");
     this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS");
-    console.log(`Using Chat Model API at ${this.apiEndpoint}`);
+    this.rateLimiter = new BenchmarkRateLimiter();
+    console.log(`Using ${this.model} at ${this.apiEndpoint}`);
   }
 
   /**
@@ -77,7 +79,10 @@ export class ChatModel implements ICompletionModel {
       ...options,
     };
 
-    const res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts);
+    const res = await retry( () => 
+      this.rateLimiter.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), 
+      this.nrAttempts
+    );
 
     performance.measure(
       `llm-query:${JSON.stringify({

From e4b5e6f0c009d0387b143add11702fae2b0780d3 Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Mon, 1 Jul 2024 08:07:44 -0400
Subject: [PATCH 03/13] update comment

---
 src/chatmodel.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index c398932..d84f814 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -114,8 +114,7 @@ export class ChatModel implements ICompletionModel {
   }
 
   /**
-   * Get completions from the LLM, extract the code fragments enclosed in a fenced code block,
-   * and postprocess them as needed; print a warning if it did not produce any
+   * Get completions from the LLM; issue a warning if it did not produce any
    *
    * @param prompt the prompt to use
    */

From 99106a61cec25f61304e9262d8d1999fe9585a05 Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Mon, 1 Jul 2024 10:03:52 -0400
Subject: [PATCH 04/13] add parameters for rate limiting and retry

---
 .github/workflows/run-experiment.yml |  7 +++++-
 benchmark/run.ts                     | 27 +++++++++++++++++++++-
 src/chatmodel.ts                     | 34 ++++++++++++++++++++--------
 3 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml
index 307a59b..07ed97c 100644
--- a/.github/workflows/run-experiment.yml
+++ b/.github/workflows/run-experiment.yml
@@ -29,6 +29,10 @@ on:
         description: "Skip slow benchmarks"
         type: boolean
         default: false
+      benchmarkMode:
+        type: boolean
+        description: "Use custom rate limiting for running benchmarks"
+        default: false
       debug_enabled:
         type: boolean
         description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
@@ -175,7 +179,8 @@ jobs:
             --temperatures "${{ needs.setup.outputs.temperatures }}" \
             --model ${{ needs.setup.outputs.model }} \
             --template ${{ needs.setup.outputs.template }} \
-            --retryTemplate ${{ needs.setup.outputs.retryTemplate }}" 
+            --retryTemplate ${{ needs.setup.outputs.retryTemplate }} \
+            --benchmark ${{ github.event.inputs.benchmarkMode }} \" 
           echo "command: $command"
           $command
           mv stats.json $outputdir
diff --git a/benchmark/run.ts b/benchmark/run.ts
index e91a8b7..d74003a 100644
--- a/benchmark/run.ts
+++ b/benchmark/run.ts
@@ -121,6 +121,12 @@ if (require.main === module) {
           default: 20,
           description: "maximum length of each snippet in lines",
         },
+        maxTokens: {
+          type: "number",
+          default: 1000,
+          demandOption: false,
+          description: "maximum number of tokens in a completion",
+        },
         temperatures: {
           type: "string",
           default: "0.0",
@@ -156,6 +162,25 @@ if (require.main === module) {
           default: "./templates/retry-template.hb",
           description: "Handlebars template file to use",
         },
+        nrAttempts: {
+          type: "number",
+          default: 3,
+          description: "number of attempts to make for each request",
+        },
+        rateLimit: {
+          type: "number",
+          default: 0,
+          demandOption: false,
+          description:
+            "number of milliseconds between requests to the model (0 is no rate limit)",
+        },
+        benchmark: {
+          type: "boolean",
+          default: false,
+          demandOption: false,
+          description:
+            "use custom rate-limiting for benchmarking (if specified, this supercedes the rateLimit option)",
+        }
       });
     const argv = await parser.argv;
 
@@ -166,7 +191,7 @@ if (require.main === module) {
           "Warning: --strictResponses has no effect when not using --responses"
         );
       }
-      model = new ChatModel(argv.model);
+      model = new ChatModel(argv.model, argv.nrAttempts, argv.rateLimit, argv.benchmark, { max_tokens: argv.maxTokens } );
     } else {
       model = MockCompletionModel.fromFile(
         argv.responses,
diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index d84f814..6ac1b7f 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -1,7 +1,7 @@
 import axios from "axios";
 import { performance } from "perf_hooks";
 import { ICompletionModel } from "./completionModel";
-import { retry, RateLimiter, BenchmarkRateLimiter } from "./promise-utils";
+import { retry, RateLimiter, BenchmarkRateLimiter, FixedRateLimiter } from "./promise-utils";
 
 const defaultPostOptions = {
   max_tokens: 1000, // maximum number of tokens to return
@@ -25,17 +25,28 @@ function getEnv(name: string): string {
 export class ChatModel implements ICompletionModel {
   private readonly apiEndpoint: string;
   private readonly authHeaders: string;
-  protected rateLimiter: RateLimiter;
+  protected rateLimiter: RateLimiter | undefined;
 
   constructor(
     private readonly model: string,
+    private readonly nrAttempts: number,
+    private readonly rateLimit: number,
+    private readonly benchmark: boolean,
     private readonly instanceOptions: PostOptions = {},
-    private readonly nrAttempts: number = 3
   ) {
     this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT");
     this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS");
-    this.rateLimiter = new BenchmarkRateLimiter();
-    console.log(`Using ${this.model} at ${this.apiEndpoint}`);
+    if (this.benchmark) {
+      this.rateLimiter = new BenchmarkRateLimiter();
+      console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.`);
+    } else if (this.rateLimit > 0) {
+      this.rateLimiter = new FixedRateLimiter(this.rateLimit);
+      console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.`);
+    } else {
+      this.rateLimiter = undefined;
+      console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.`);
+    }
+    
   }
 
   /**
@@ -79,10 +90,15 @@ export class ChatModel implements ICompletionModel {
       ...options,
     };
 
-    const res = await retry( () => 
-      this.rateLimiter.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), 
-      this.nrAttempts
-    );
+    let res;
+    if (this.rateLimiter) {
+      res = await retry( () => 
+        this.rateLimiter!.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), 
+        this.nrAttempts
+      );
+    } else {
+      res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts);
+    }
 
     performance.measure(
       `llm-query:${JSON.stringify({

From 6004c8d1af829153c3f5bba61692e74ee447fa9a Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Mon, 1 Jul 2024 10:08:42 -0400
Subject: [PATCH 05/13] debug

---
 .github/workflows/run-experiment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml
index 07ed97c..3d6c101 100644
--- a/.github/workflows/run-experiment.yml
+++ b/.github/workflows/run-experiment.yml
@@ -180,7 +180,7 @@ jobs:
             --model ${{ needs.setup.outputs.model }} \
             --template ${{ needs.setup.outputs.template }} \
             --retryTemplate ${{ needs.setup.outputs.retryTemplate }} \
-            --benchmark ${{ github.event.inputs.benchmarkMode }} \" 
+            --benchmark ${{ github.event.inputs.benchmarkMode }} " 
           echo "command: $command"
           $command
           mv stats.json $outputdir

From c19dc125b06ba5b3ad985f2995f7fc0587629871 Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Mon, 1 Jul 2024 10:36:02 -0400
Subject: [PATCH 06/13] autoformat

---
 benchmark/run.ts     | 10 ++++++++--
 src/chatmodel.ts     | 34 +++++++++++++++++++++++++---------
 src/promise-utils.ts |  2 +-
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/benchmark/run.ts b/benchmark/run.ts
index d74003a..6db1917 100644
--- a/benchmark/run.ts
+++ b/benchmark/run.ts
@@ -180,7 +180,7 @@ if (require.main === module) {
           demandOption: false,
           description:
             "use custom rate-limiting for benchmarking (if specified, this supercedes the rateLimit option)",
-        }
+        },
       });
     const argv = await parser.argv;
 
@@ -191,7 +191,13 @@ if (require.main === module) {
           "Warning: --strictResponses has no effect when not using --responses"
         );
       }
-      model = new ChatModel(argv.model, argv.nrAttempts, argv.rateLimit, argv.benchmark, { max_tokens: argv.maxTokens } );
+      model = new ChatModel(
+        argv.model,
+        argv.nrAttempts,
+        argv.rateLimit,
+        argv.benchmark,
+        { max_tokens: argv.maxTokens }
+      );
     } else {
       model = MockCompletionModel.fromFile(
         argv.responses,
diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index 6ac1b7f..b9733e4 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -1,7 +1,12 @@
 import axios from "axios";
 import { performance } from "perf_hooks";
 import { ICompletionModel } from "./completionModel";
-import { retry, RateLimiter, BenchmarkRateLimiter, FixedRateLimiter } from "./promise-utils";
+import {
+  retry,
+  RateLimiter,
+  BenchmarkRateLimiter,
+  FixedRateLimiter,
+} from "./promise-utils";
 
 const defaultPostOptions = {
   max_tokens: 1000, // maximum number of tokens to return
@@ -32,21 +37,26 @@ export class ChatModel implements ICompletionModel {
     private readonly nrAttempts: number,
     private readonly rateLimit: number,
     private readonly benchmark: boolean,
-    private readonly instanceOptions: PostOptions = {},
+    private readonly instanceOptions: PostOptions = {}
   ) {
     this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT");
     this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS");
     if (this.benchmark) {
       this.rateLimiter = new BenchmarkRateLimiter();
-      console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.`);
+      console.log(
+        `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.`
+      );
     } else if (this.rateLimit > 0) {
       this.rateLimiter = new FixedRateLimiter(this.rateLimit);
-      console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.`);
+      console.log(
+        `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.`
+      );
     } else {
       this.rateLimiter = undefined;
-      console.log(`Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.`);
+      console.log(
+        `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.`
+      );
     }
-    
   }
 
   /**
@@ -92,12 +102,18 @@ export class ChatModel implements ICompletionModel {
 
     let res;
     if (this.rateLimiter) {
-      res = await retry( () => 
-        this.rateLimiter!.next(() => axios.post(this.apiEndpoint, postOptions, { headers })), 
+      res = await retry(
+        () =>
+          this.rateLimiter!.next(() =>
+            axios.post(this.apiEndpoint, postOptions, { headers })
+          ),
         this.nrAttempts
       );
     } else {
-      res = await retry( () => axios.post(this.apiEndpoint, postOptions, { headers }), this.nrAttempts);
+      res = await retry(
+        () => axios.post(this.apiEndpoint, postOptions, { headers }),
+        this.nrAttempts
+      );
     }
 
     performance.measure(
diff --git a/src/promise-utils.ts b/src/promise-utils.ts
index 762d7f4..b94a109 100644
--- a/src/promise-utils.ts
+++ b/src/promise-utils.ts
@@ -112,4 +112,4 @@ export class BenchmarkRateLimiter extends RateLimiter {
     }
     return super.next(p);
   }
-}
\ No newline at end of file
+}

From 14d6c16eda0529ef6fb967d23b43eee4177a5173 Mon Sep 17 00:00:00 2001
From: Frank Tip <ftip@amazon.com>
Date: Mon, 1 Jul 2024 13:04:30 -0400
Subject: [PATCH 07/13] update json format used for prompt and response

---
 .github/benchmarks11.txt | 11 +++++++++++
 src/chatmodel.ts         |  9 +++------
 2 files changed, 14 insertions(+), 6 deletions(-)
 create mode 100644 .github/benchmarks11.txt

diff --git a/.github/benchmarks11.txt b/.github/benchmarks11.txt
new file mode 100644
index 0000000..8efda6d
--- /dev/null
+++ b/.github/benchmarks11.txt
@@ -0,0 +1,11 @@
+https://github.com/manuelmhtr/countries-and-timezones/tree/e34cb4b6832795cbac8d44f6f9c97eb1038b831b
+https://github.com/infusion/Complex.js/tree/d995ca105e8adef4c38d0ace50643daf84e0dd1c
+https://gitlab.com/autokent/crawler-url-parser/tree/202c5b25ad693d284804261e2b3815fe66e0723e
+https://gitlab.com/demsking/image-downloader/tree/19a53f652824bd0c612cc5bcd3a2eb173a16f938
+https://github.com/rainder/node-geo-point/tree/c839d477ff7a48d1fc6574495cbbc6196161f494
+https://github.com/jprichardson/node-jsonfile/tree/9c6478a85899a9318547a6e9514b0403166d8c5c
+https://github.com/chakrit/node-uneval/tree/7578dc67090f650a171610a08ea529eba9d27438
+https://github.com/swang/plural/tree/f0027d66ecb37ce0108c8bcb4a6a448d1bf64047
+https://github.com/pull-stream/pull-stream/tree/29b4868bb3864c427c3988855c5d65ad5cb2cb1c
+https://gitlab.com/cptpackrat/spacl-core/tree/fcb8511a0d01bdc206582cfacb3e2b01a0288f6a
+https://github.com/maugenst/zip-a-folder/tree/5089113647753d5086ea20f052f9d29840866ee1
\ No newline at end of file
diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index b9733e4..e90ab8d 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -87,11 +87,8 @@ export class ChatModel implements ICompletionModel {
 
     const postOptions = {
       model: this.model,
+      system: "You are a programming assistant.",
       messages: [
-        {
-          role: "system",
-          content: "You are a programming assistant.",
-        },
         {
           role: "user",
           content: prompt,
@@ -138,8 +135,8 @@ export class ChatModel implements ICompletionModel {
     }
 
     const completions = new Set<string>();
-    for (const choice of json.choices) {
-      const content = choice.message.content;
+    for (const choice of json.content) {
+      const content = choice.text;
       completions.add(content);
     }
     return completions;

From e04c8cf27fdc214c6470a20996f77e0a53f5bff9 Mon Sep 17 00:00:00 2001
From: Frank Tip <ftip@amazon.com>
Date: Mon, 1 Jul 2024 13:15:36 -0400
Subject: [PATCH 08/13] update default LLM to use

---
 .github/workflows/run-experiment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml
index 3d6c101..ddd3d44 100644
--- a/.github/workflows/run-experiment.yml
+++ b/.github/workflows/run-experiment.yml
@@ -21,7 +21,7 @@ on:
       model:
         description: "Which LLM API to use"
         type: "string"
-        default: "meta-llama-3-70b-instruct"
+        default: "claude-3-sonnet-20240229"
       compareTo:
         description: "Run number of previous run to compare to (leave empty to skip comparison)"
         default: ""

From c8fc854418050fce22cbc84bfecfb7b9932eaeeb Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Mon, 1 Jul 2024 13:20:55 -0400
Subject: [PATCH 09/13] Revert "update default LLM to use"

This reverts commit e04c8cf27fdc214c6470a20996f77e0a53f5bff9.
---
 .github/workflows/run-experiment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml
index ddd3d44..3d6c101 100644
--- a/.github/workflows/run-experiment.yml
+++ b/.github/workflows/run-experiment.yml
@@ -21,7 +21,7 @@ on:
       model:
         description: "Which LLM API to use"
         type: "string"
-        default: "claude-3-sonnet-20240229"
+        default: "meta-llama-3-70b-instruct"
       compareTo:
         description: "Run number of previous run to compare to (leave empty to skip comparison)"
         default: ""

From 4b4bfa744c85b0d68c00e304d1e533412a41ef92 Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Tue, 2 Jul 2024 09:10:06 -0400
Subject: [PATCH 10/13] rationalize command line options for rate limiting

---
 benchmark/run.ts     | 36 ++++++++++++++++------------
 src/chatmodel.ts     | 57 ++++++++++++++++----------------------------
 src/promise-utils.ts | 53 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 92 insertions(+), 54 deletions(-)

diff --git a/benchmark/run.ts b/benchmark/run.ts
index 6db1917..d6e30b9 100644
--- a/benchmark/run.ts
+++ b/benchmark/run.ts
@@ -14,12 +14,11 @@ import {
   TestValidator,
 } from "..";
 import { ChatModel } from "../src/chatmodel";
-import yargs from "yargs";
+import yargs, { parse } from "yargs";
 import { hideBin } from "yargs/helpers";
 import { PerformanceMeasurer } from "./performanceMeasurer";
 import { TestResultCollector } from "./testResultCollector";
-require("console-stamp")(console);
-
+import { BenchmarkRateLimiter, FixedRateLimiter, IRateLimiter, NoRateLimiter } from "../src/promise-utils";
 /**
  * Run an end-to-end experiment.
  * Given a package generate tests for its methods, run them, and generate a report.
@@ -168,18 +167,11 @@ if (require.main === module) {
           description: "number of attempts to make for each request",
         },
         rateLimit: {
-          type: "number",
-          default: 0,
-          demandOption: false,
-          description:
-            "number of milliseconds between requests to the model (0 is no rate limit)",
-        },
-        benchmark: {
-          type: "boolean",
-          default: false,
+          type: "string",
+          default: "",
           demandOption: false,
           description:
-            "use custom rate-limiting for benchmarking (if specified, this supercedes the rateLimit option)",
+            "number of milliseconds between prompts or \"benchmark\"",
         },
       });
     const argv = await parser.argv;
@@ -191,11 +183,25 @@ if (require.main === module) {
           "Warning: --strictResponses has no effect when not using --responses"
         );
       }
+
+      let rateLimiter: IRateLimiter;
+      if (argv.rateLimit === "benchmark") {
+        rateLimiter = new BenchmarkRateLimiter();
+      } else if (argv.rateLimit) {
+        const rateLimit: number = parseInt(argv.rateLimit, 10);
+        if (!Number.isNaN(rateLimit)) {
+          rateLimiter = new FixedRateLimiter(+argv.rateLimit);
+        } else {
+          throw new Error(`Invalid rate limit: ${argv.rateLimit}`);
+        }
+      } else {
+        rateLimiter = new NoRateLimiter();
+      }
+
       model = new ChatModel(
         argv.model,
         argv.nrAttempts,
-        argv.rateLimit,
-        argv.benchmark,
+        rateLimiter,
         { max_tokens: argv.maxTokens }
       );
     } else {
diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index e90ab8d..9f16926 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -6,6 +6,7 @@ import {
   RateLimiter,
   BenchmarkRateLimiter,
   FixedRateLimiter,
+  IRateLimiter,
 } from "./promise-utils";
 
 const defaultPostOptions = {
@@ -30,33 +31,19 @@ function getEnv(name: string): string {
 export class ChatModel implements ICompletionModel {
   private readonly apiEndpoint: string;
   private readonly authHeaders: string;
-  protected rateLimiter: RateLimiter | undefined;
 
   constructor(
     private readonly model: string,
     private readonly nrAttempts: number,
-    private readonly rateLimit: number,
-    private readonly benchmark: boolean,
+    private readonly rateLimiter: IRateLimiter,
     private readonly instanceOptions: PostOptions = {}
   ) {
     this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT");
     this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS");
-    if (this.benchmark) {
-      this.rateLimiter = new BenchmarkRateLimiter();
-      console.log(
-        `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and benchmark rate limit.`
-      );
-    } else if (this.rateLimit > 0) {
-      this.rateLimiter = new FixedRateLimiter(this.rateLimit);
-      console.log(
-        `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and fixed rate of ${this.rateLimit} ms.`
-      );
-    } else {
-      this.rateLimiter = undefined;
-      console.log(
-        `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and no rate limit.`
-      );
-    }
+    
+    console.log(
+       `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and ${this.rateLimiter.getDescription()}`
+    );
   }
 
   /**
@@ -87,8 +74,11 @@ export class ChatModel implements ICompletionModel {
 
     const postOptions = {
       model: this.model,
-      system: "You are a programming assistant.",
       messages: [
+        {
+          role: "system",
+          content: "You are a programming assistant."
+        },
         {
           role: "user",
           content: prompt,
@@ -97,21 +87,14 @@ export class ChatModel implements ICompletionModel {
       ...options,
     };
 
-    let res;
-    if (this.rateLimiter) {
-      res = await retry(
-        () =>
-          this.rateLimiter!.next(() =>
-            axios.post(this.apiEndpoint, postOptions, { headers })
-          ),
-        this.nrAttempts
-      );
-    } else {
-      res = await retry(
-        () => axios.post(this.apiEndpoint, postOptions, { headers }),
-        this.nrAttempts
-      );
-    }
+    const res = await retry(
+      () =>
+        this.rateLimiter!.next(() =>
+          axios.post(this.apiEndpoint, postOptions, { headers })
+        ),
+      this.nrAttempts
+    );
+    
 
     performance.measure(
       `llm-query:${JSON.stringify({
@@ -135,8 +118,8 @@ export class ChatModel implements ICompletionModel {
     }
 
     const completions = new Set<string>();
-    for (const choice of json.content) {
-      const content = choice.text;
+    for (const choice of json.choices) {
+      const content = choice.message.content;
       completions.add(content);
     }
     return completions;
diff --git a/src/promise-utils.ts b/src/promise-utils.ts
index b94a109..7cfd713 100644
--- a/src/promise-utils.ts
+++ b/src/promise-utils.ts
@@ -26,13 +26,30 @@ export async function retry<T>(
   return promise; // if the promise was rejected howManyTimes times, return the last promise
 }
 
+/**
+ * This interface provides supports for retrying the creation of a promise
+ */
+export interface IRateLimiter {
+  
+  /**
+   * Waits until the rate limiter allows the next request, then evaluate the function that
+   * produces the promise
+   */
+  next<T>(p: () => Promise<T>): Promise<T>;
+  
+  /**
+   * returns a description of the rate limiter
+   */
+  getDescription(): string;
+}
+
 /**
  * This class provides supports for asynchronous rate limiting by
  * limiting the number of requests to the server to at most one
  * in N milliseconds. This is useful for throttling requests to
  * a server that has a limit on the number of requests per second.
  */
-export abstract class RateLimiter {
+export abstract class RateLimiter implements IRateLimiter {
   constructor(protected howManyMilliSeconds: number) {
     this.timer = this.resetTimer();
   }
@@ -54,6 +71,8 @@ export abstract class RateLimiter {
     return p(); // return the promise
   }
 
+  public abstract getDescription(): string;
+
   /**
    * resets the timer
    * @returns a promise that is resolved after the number of milliseconds
@@ -72,10 +91,17 @@ export abstract class RateLimiter {
  * maximum of one per N milliseconds.
  *
  */
-export class FixedRateLimiter extends RateLimiter {
+export class FixedRateLimiter extends RateLimiter implements IRateLimiter {
   public constructor(N: number) {
     super(N);
   }
+
+  /**
+   * returns a description of the rate limiter
+   */
+  public getDescription(): string {
+    return `FixedRateLimiter (1 request per ${this.howManyMilliSeconds} ms)`;
+  }
 }
 
 /**
@@ -112,4 +138,27 @@ export class BenchmarkRateLimiter extends RateLimiter {
     }
     return super.next(p);
   }
+
+  /**
+   * returns a description of the rate limiter
+   */
+  public getDescription(): string {
+    return `BenchmarkRateLimiter (increasing pace after 150 and 300 requests)`;
+  }
+}
+
+/**
+ * A rate limiter that does not limit the rate of requests to the server.
+ */
+export class NoRateLimiter implements IRateLimiter {
+  public async next<T>(p: () => Promise<T>): Promise<T> {
+    return p();
+  }
+
+  /**
+   * returns a description of the rate limiter
+   */
+  public getDescription(): string {
+    return `NoRateLimiter`;
+  }
 }

From bb56a253cf6ed2c026c650bd79b89e0e27dec82b Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Tue, 2 Jul 2024 09:12:51 -0400
Subject: [PATCH 11/13] autoformat

---
 benchmark/run.ts     | 19 ++++++++++---------
 src/chatmodel.ts     |  9 +++++----
 src/promise-utils.ts |  3 +--
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/benchmark/run.ts b/benchmark/run.ts
index d6e30b9..22e9a02 100644
--- a/benchmark/run.ts
+++ b/benchmark/run.ts
@@ -18,7 +18,12 @@ import yargs, { parse } from "yargs";
 import { hideBin } from "yargs/helpers";
 import { PerformanceMeasurer } from "./performanceMeasurer";
 import { TestResultCollector } from "./testResultCollector";
-import { BenchmarkRateLimiter, FixedRateLimiter, IRateLimiter, NoRateLimiter } from "../src/promise-utils";
+import {
+  BenchmarkRateLimiter,
+  FixedRateLimiter,
+  IRateLimiter,
+  NoRateLimiter,
+} from "../src/promise-utils";
 /**
  * Run an end-to-end experiment.
  * Given a package generate tests for its methods, run them, and generate a report.
@@ -170,8 +175,7 @@ if (require.main === module) {
           type: "string",
           default: "",
           demandOption: false,
-          description:
-            "number of milliseconds between prompts or \"benchmark\"",
+          description: 'number of milliseconds between prompts or "benchmark"',
         },
       });
     const argv = await parser.argv;
@@ -198,12 +202,9 @@ if (require.main === module) {
         rateLimiter = new NoRateLimiter();
       }
 
-      model = new ChatModel(
-        argv.model,
-        argv.nrAttempts,
-        rateLimiter,
-        { max_tokens: argv.maxTokens }
-      );
+      model = new ChatModel(argv.model, argv.nrAttempts, rateLimiter, {
+        max_tokens: argv.maxTokens,
+      });
     } else {
       model = MockCompletionModel.fromFile(
         argv.responses,
diff --git a/src/chatmodel.ts b/src/chatmodel.ts
index 9f16926..9b62788 100644
--- a/src/chatmodel.ts
+++ b/src/chatmodel.ts
@@ -40,9 +40,11 @@ export class ChatModel implements ICompletionModel {
   ) {
     this.apiEndpoint = getEnv("TESTPILOT_LLM_API_ENDPOINT");
     this.authHeaders = getEnv("TESTPILOT_LLM_AUTH_HEADERS");
-    
+
     console.log(
-       `Using ${this.model} at ${this.apiEndpoint} with ${this.nrAttempts} attempts and ${this.rateLimiter.getDescription()}`
+      `Using ${this.model} at ${this.apiEndpoint} with ${
+        this.nrAttempts
+      } attempts and ${this.rateLimiter.getDescription()}`
     );
   }
 
@@ -77,7 +79,7 @@ export class ChatModel implements ICompletionModel {
       messages: [
         {
           role: "system",
-          content: "You are a programming assistant."
+          content: "You are a programming assistant.",
         },
         {
           role: "user",
@@ -94,7 +96,6 @@ export class ChatModel implements ICompletionModel {
         ),
       this.nrAttempts
     );
-    
 
     performance.measure(
       `llm-query:${JSON.stringify({
diff --git a/src/promise-utils.ts b/src/promise-utils.ts
index 7cfd713..0ba9546 100644
--- a/src/promise-utils.ts
+++ b/src/promise-utils.ts
@@ -30,13 +30,12 @@ export async function retry<T>(
  * This interface provides supports for retrying the creation of a promise
  */
 export interface IRateLimiter {
-  
   /**
    * Waits until the rate limiter allows the next request, then evaluate the function that
    * produces the promise
    */
   next<T>(p: () => Promise<T>): Promise<T>;
-  
+
   /**
    * returns a description of the rate limiter
    */

From 401acd86c23f53e21ea6428551e88a86f9b954fb Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Tue, 2 Jul 2024 09:20:28 -0400
Subject: [PATCH 12/13] update workflow to reflect new ratelimiting mechanism

---
 .github/workflows/run-experiment.yml | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml
index 3d6c101..df40945 100644
--- a/.github/workflows/run-experiment.yml
+++ b/.github/workflows/run-experiment.yml
@@ -29,9 +29,9 @@ on:
         description: "Skip slow benchmarks"
         type: boolean
         default: false
-      benchmarkMode:
+      rateLimiting:
         type: boolean
-        description: "Use custom rate limiting for running benchmarks"
+        description: "Use rate limiting for running benchmarks"
         default: false
       debug_enabled:
         type: boolean
@@ -173,14 +173,26 @@ jobs:
           echo "Computing package statistics"
           node benchmark/package_stats.js "$TESTPILOT_PACKAGE_PATH" > stats.json
           echo "Generating tests for $TESTPILOT_PACKAGE_NAME"
-          export command="node benchmark/run.js \
+
+          if (${{ github.event.inputs.rateLimiting }}); then
+            export command="node benchmark/run.js \
             --outputDir $outputdir \
             --package "$TESTPILOT_PACKAGE_PATH" \
             --temperatures "${{ needs.setup.outputs.temperatures }}" \
             --model ${{ needs.setup.outputs.model }} \
             --template ${{ needs.setup.outputs.template }} \
             --retryTemplate ${{ needs.setup.outputs.retryTemplate }} \
-            --benchmark ${{ github.event.inputs.benchmarkMode }} " 
+            --rateLimit benchmark"
+          else
+            export command="node benchmark/run.js \
+            --outputDir $outputdir \
+            --package "$TESTPILOT_PACKAGE_PATH" \
+            --temperatures "${{ needs.setup.outputs.temperatures }}" \
+            --model ${{ needs.setup.outputs.model }} \
+            --template ${{ needs.setup.outputs.template }} \
+            --retryTemplate ${{ needs.setup.outputs.retryTemplate }}"
+          fi
+           
           echo "command: $command"
           $command
           mv stats.json $outputdir

From efefbb9f6dfde32ac6f7d31161fcd04430c1601e Mon Sep 17 00:00:00 2001
From: franktip <f.tip@northeastern.edu>
Date: Tue, 2 Jul 2024 09:31:59 -0400
Subject: [PATCH 13/13] update workflow to reflect new ratelimiting mechanism

---
 .github/workflows/run-experiment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-experiment.yml b/.github/workflows/run-experiment.yml
index df40945..1929a4f 100644
--- a/.github/workflows/run-experiment.yml
+++ b/.github/workflows/run-experiment.yml
@@ -31,7 +31,7 @@ on:
         default: false
       rateLimiting:
         type: boolean
-        description: "Use rate limiting for running benchmarks"
+        description: "Use rate limiting"
         default: false
       debug_enabled:
         type: boolean