-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathopennmt_tf_transformer_config.yml
42 lines (35 loc) · 1.36 KB
/
opennmt_tf_transformer_config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
model_dir: INSERT-OUTPUT-DIR
data:
train_features_file: INSERT-DATA-DIR/src-train.txt
train_labels_file: INSERT-DATA-DIR/tgt-train.txt
eval_features_file: INSERT-DATA-DIR/src-val.txt
eval_labels_file: INSERT-DATA-DIR/tgt-val.txt
source_vocabulary: INSERT-DATA-DIR/vocab.txt
target_vocabulary: INSERT-DATA-DIR/vocab.txt
source_tokenization:
type: SpaceTokenizer
# params:
# mode: space
# segment_case: true # Does not work with mode: none
target_tokenization:
type: SpaceTokenizer
# params:
# mode: space
# segment_case: true # Does not work with mode: none
train:
maximum_features_length: 200 # For camelcase (115 file tokens), max=362, avg=138, std=14
maximum_labels_length: 150 # For camelcase, max=487, avg=43, std=38
# Used by imitate__100_tokens__standard__1:
# maximum_features_length: 150 # avg=125, std=20, max=271
# maximum_labels_length: 100 # avg=40, std=30, max=276
# Used by extrapolate__100_tokens__standard__2:
# maximum_features_length: 350 # avg=122, std=14, max=347
# maximum_labels_length: 500 # avg=43, std=42, max=473
eval:
early_stopping:
# (optional) The target metric name (default: "loss").
# metric: bleu
# (optional) The metric should improve at least by this much to be considered
# as an improvement (default: 0)
min_improvement: 0.01
steps: 4