-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathbenchmark_gpt.ts
137 lines (115 loc) · 5.32 KB
/
benchmark_gpt.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import '@tensorflow/tfjs-node';
import { List } from "immutable";
import { parse } from "ts-command-line-args";
import { AutoTokenizer } from "@xenova/transformers";
import { fetchTasks, models, async_iterator, defaultTasks, processing, Task } from "@epfml/discojs";
import { loadModelFromDisk, loadText } from '@epfml/discojs-node'
import { Server } from "server";
interface CLIArguments{
modelType?: string; // 'gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'
contextLength?: number; // 128, 256, 512, 1024, 2048
batchSize?: number; // 8, 16, 32, 64
inference?: boolean; // benchmark inference if true, training otherwise
modelPath?: string;
help?: boolean // print help
}
const parsedArgs = parse<CLIArguments>({
modelType: { type: String, optional: true, description: "A GPT architecture: 'gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'" },
contextLength: { type: Number, optional: true, description: "The maximum input sequence length to train the model on" },
batchSize: { type: Number, optional: true, description: "The model training bat size" },
inference: { type: Boolean, optional: true, description: "Whether to benchmark the model inference or training" },
modelPath: { type: String, optional: true, description: "If benchmarking inference, the path to the trained model" },
help: { type: Boolean, optional: true, alias: 'h', description: 'Prints this usage guide' },
}, {helpArg: 'help'});
const defaultArgs: Required<CLIArguments> = {
modelType: 'gpt-nano',
contextLength: 128,
batchSize: 8,
inference: false,
modelPath: 'models/model.json',
help: false
}
// Fill parsed args with default args
const args = { ...defaultArgs, ...parsedArgs }
/**
* Benchmark results are reported in https://github.com/epfml/disco/pull/659
*/
async function main(args: Required<CLIArguments>): Promise<void> {
const { inference: benchmarkInference, modelType,
contextLength, batchSize, modelPath } = args
// Launch a server instance
const [server, url] = await new Server().serve(undefined, defaultTasks.wikitext);
// Fetch the wikitext task from the server
const tasks = await fetchTasks(url)
const task = tasks.get('llm_task') as Task<'text'> | undefined
if (task === undefined) { throw new Error('task not found') }
const tokenizerName = task.trainingInformation.tokenizer
if (typeof tokenizerName !== 'string') throw Error('no tokenizer name specified in the task training information')
const tokenizer = await AutoTokenizer.from_pretrained(tokenizerName)
/**
* Training benchmark
*/
if (!benchmarkInference) {
// Benchmark parameters
const epochsCount = 1
const iterationsPerEpoch = 10
const config: models.GPTConfig = {
modelType: modelType as models.GPTConfig['modelType'],
maxIter: iterationsPerEpoch,
lr: 0.0001,
contextLength,
}
// Load the dataset after setting the Task batch size and max sequence length
// to make sure the dataset is batched and tokenized correctly
task.trainingInformation.batchSize = batchSize
task.trainingInformation.contextLength = contextLength
const dataset = loadText('../datasets/wikitext/wiki.train.tokens')
.map(text => processing.tokenize(tokenizer, text))
.flatten()
.batch(config.contextLength + 1, 1)
const preprocessedDataset = dataset
.map((tokens) => [tokens.pop(), tokens.last()] as [List<number>, number])
.batch(batchSize);
// Init and train the model
const model = new models.GPT(config)
console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`)
let epochTime = performance.now()
for (let epochsCounter = 1; epochsCounter <= epochsCount; epochsCounter++) {
const [_, logs] = await async_iterator.gather(model.train(preprocessedDataset))
epochTime = (performance.now() - epochTime)
const msPerToken = epochTime / (batchSize * contextLength * iterationsPerEpoch * epochsCounter)
console.log(`\t\tTraining time: ${msPerToken.toFixed(2)} ms/token <br> ${logs.peakMemory.toFixed(2)} GB`)
}
/**
* Inference benchmark
*/
} else {
const model = await loadModelFromDisk(modelPath)
if (!(model instanceof models.GPT)){
throw new Error("Loaded model isn't a GPT model")
}
// Benchmark parameters
const prompt = 'The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion,'
const maxNewTokens = 200
const iterations = 10
console.log("Generating", maxNewTokens, "new tokens")
let tokens = processing.tokenize(tokenizer, prompt);
let inferenceTime = 0
for (let i = 0; i < iterations; i++) {
const timeStart = performance.now()
for (let n = 0; n < maxNewTokens; n++) {
const next = (await model.predict(List.of(tokens))).first();
if (next === undefined) throw new Error("empty prediction");
tokens = tokens.push(next)
}
inferenceTime += performance.now() - timeStart
}
console.log(`Inference time: ${(inferenceTime/ maxNewTokens / iterations).toFixed(2)} ms/token`)
}
await new Promise((resolve, reject) => {
server.once('close', resolve)
server.close(reject)
})
}
// You can run this example with "npm start" from this folder
main(args).catch(console.error)