-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunrelated-vs-some-difference-slurm-array-jobs.sh
71 lines (59 loc) · 2.83 KB
/
unrelated-vs-some-difference-slurm-array-jobs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env bash
#############################################################################
# #
# #
# Beyond Noise: Mitigating the Impact of Fine-grained #
# Semantic Divergences on Neural Machine Translation #
# #
# eleftheria #
# #
# ==== Step 3b ==== #
# #
# Token-level predictions on divergent Wikimatrix data #
# #
# #
#############################################################################
# This scripts has been configured to run with SLURM job arrays
# for job parallelization on the CLIP cluster
source ~/.bashrc
conda activate semdiv
##############################################################################
if [ $1 = 'en' ]; then
non_en=$2
else
non_en=$1
fi;
source divergentmBERT_parameters.sh
################################################################################
set_=test
if [ ! -f $parallel_corpus_dir/div-split-${SLURM_ARRAY_TASK_ID}.predictions ] ; then
python $scripts_dir/run_div_multi.py \
--node $SLURM_NODELIST \
--model_type SemDivMulti \
--model_name_or_path $model \
--task_name SemDiv \
--do_eval \
--best_checkpoint \
--split $SLURM_ARRAY_TASK_ID\
--evaluation_set $set_ \
--data_dir $data_dir/ \
--output_dir $output_dir \
--synth_data_dir $parallel_corpus_dir/ \
#--overwrite_cache
echo '> Save results prediction results'
mv $output_dir/${SLURM_ARRAY_TASK_ID}_test_predictions.txt $parallel_corpus_dir/div-split-${SLURM_ARRAY_TASK_ID}.predictions
echo '> Done'
fi;
# If $3 argument is set to "extract" write no-meaning-difference to file
if [ $3 = 'extract' ]; then
if [ ! -f $child_data_dir/wikimatrix/wikimatrix-$src-$tgt-unrelated1 ]; then
mkdir -p $child_data_dir/wikimatrix
echo '> Extract unrelated vs some meaning difference'
python $child_scripts_dir/sd-vs-un.py \
--data-dir $child_dir/wikimatrix_for_huggingface/$src-$tgt \
--batch-mode \
--split-dir wikimatrix-div-split \
--number-batches 4 \
--output-corpus-prefix $child_data_dir/wikimatrix/wikimatrix-$src-$tgt-
fi;
fi;