Skip to content

Commit bcddd72

Browse files
peacefulottertharvik
authored andcommitted
discojs-core/models: add gpt
1 parent 10be5f1 commit bcddd72

File tree

6 files changed

+1020
-0
lines changed

6 files changed

+1020
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
type ModelType =
2+
| 'gpt2'
3+
| 'gpt2-medium'
4+
| 'gpt2-large'
5+
| 'gpt2-xl'
6+
| 'gpt-mini'
7+
| 'gpt-micro'
8+
| 'gpt-nano'
9+
10+
type ModelSize = {
11+
nLayer?: number
12+
nHead?: number
13+
nEmbd?: number
14+
}
15+
16+
export type GPTConfig = {
17+
lr: number
18+
batchSize: number
19+
blockSize: number
20+
vocabSize: number
21+
evaluate?: boolean
22+
maxEvalBatches?: number
23+
evaluateEvery?: number
24+
epochs?: number
25+
maxIter?: number
26+
weightDecay?: number
27+
verbose?: boolean
28+
bias?: boolean
29+
debug?: boolean
30+
dropout?: number
31+
residDrop?: number
32+
embdDrop?: number
33+
tokEmb?: boolean
34+
lmHead?: boolean
35+
modelType: ModelType
36+
}
37+
38+
export const DEFAULT_CONFIG: Required<GPTConfig> = {
39+
lr: 0.001,
40+
weightDecay: 0,
41+
batchSize: 2,
42+
epochs: 9999,
43+
maxIter: 10_000,
44+
verbose: false,
45+
modelType: 'gpt-nano',
46+
evaluate: true,
47+
maxEvalBatches: 12,
48+
evaluateEvery: 100,
49+
blockSize: 128,
50+
vocabSize: 50258,
51+
bias: true,
52+
debug: false,
53+
dropout: 0.2,
54+
residDrop: 0.2,
55+
embdDrop: 0.2,
56+
tokEmb: true,
57+
lmHead: true,
58+
}
59+
60+
export const getModelSizes = (modelType: ModelType): Required<ModelSize> => {
61+
switch (modelType) {
62+
case 'gpt2':
63+
return { nLayer: 12, nHead: 12, nEmbd: 768 }
64+
case 'gpt2-medium':
65+
return { nLayer: 24, nHead: 16, nEmbd: 1024 }
66+
case 'gpt2-large':
67+
return { nLayer: 36, nHead: 20, nEmbd: 1280 }
68+
case 'gpt2-xl':
69+
return { nLayer: 48, nHead: 25, nEmbd: 1600 }
70+
case 'gpt-mini':
71+
return { nLayer: 6, nHead: 6, nEmbd: 192 }
72+
case 'gpt-micro':
73+
return { nLayer: 4, nHead: 4, nEmbd: 128 }
74+
case 'gpt-nano':
75+
return { nLayer: 3, nHead: 3, nEmbd: 48 }
76+
}
77+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import tf from '@tensorflow/tfjs'
2+
3+
import { data } from '../..'
4+
import { GPTConfig } from '.'
5+
6+
export default async function evaluate(
7+
model: any,
8+
dataset: data.Dataset,
9+
config: Required<GPTConfig>
10+
) {
11+
console.log('Evaluating..')
12+
13+
const iter = await dataset.iterator()
14+
15+
let total_loss = 0
16+
const acc: [number, number] = [0, 0]
17+
18+
let iteration = 0
19+
while (iteration < config.maxEvalBatches) {
20+
const next = await iter.next()
21+
if (!next) break
22+
const { xs, ys } = next.value
23+
const logits = model.apply(xs)
24+
25+
// Loss
26+
const loss = tf.losses.softmaxCrossEntropy(ys, logits)
27+
const lossVal = await loss.array()
28+
total_loss += lossVal as number
29+
30+
// Accuracy
31+
const acc_tensor = tf.metrics.categoricalAccuracy(ys, logits)
32+
const acc_sum = acc_tensor.sum()
33+
acc[0] += (await acc_sum.array()) as number
34+
acc[1] += acc_tensor.shape[0] * (acc_tensor.shape[1] as number)
35+
36+
tf.dispose([acc_tensor, acc_sum, loss, logits, xs, ys])
37+
38+
iteration++
39+
}
40+
41+
const loss = total_loss / iteration
42+
const pp = 2.71828 ** loss
43+
44+
return {
45+
'val/loss': loss,
46+
'val/perplexity': pp,
47+
'val/acc': acc[0] / acc[1],
48+
}
49+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
export * from './train'
2+
export * from './optimizers'
3+
export * from './model'
4+
export * from './config'

0 commit comments

Comments
 (0)