experiment/opennmt_tf_transformer_config.yml

model_dir: INSERT-OUTPUT-DIR

data:
  train_features_file: INSERT-DATA-DIR/src-train.txt
  train_labels_file: INSERT-DATA-DIR/tgt-train.txt
  eval_features_file: INSERT-DATA-DIR/src-val.txt
  eval_labels_file: INSERT-DATA-DIR/tgt-val.txt

  source_vocabulary: INSERT-DATA-DIR/vocab.txt
  target_vocabulary: INSERT-DATA-DIR/vocab.txt

  source_tokenization: 
    type: SpaceTokenizer
    # params:
      # mode: space
      # segment_case: true  # Does not work with mode: none
  target_tokenization:
    type: SpaceTokenizer
    # params:
      # mode: space
      # segment_case: true  # Does not work with mode: none

train:
  maximum_features_length: 200  # For camelcase (115 file tokens), max=362, avg=138, std=14
  maximum_labels_length: 150   # For camelcase, max=487, avg=43, std=38

  # Used by imitate__100_tokens__standard__1:
  # maximum_features_length: 150  # avg=125, std=20, max=271
  # maximum_labels_length: 100   # avg=40, std=30, max=276

  # Used by extrapolate__100_tokens__standard__2:
  # maximum_features_length: 350  # avg=122, std=14, max=347
  # maximum_labels_length: 500   # avg=43, std=42, max=473

eval:
  early_stopping:
    # (optional) The target metric name (default: "loss").
    # metric: bleu
    # (optional) The metric should improve at least by this much to be considered
    # as an improvement (default: 0)
    min_improvement: 0.01
    steps: 4