From 18df31ad9af81b158965f319099c98ab9b119e15 Mon Sep 17 00:00:00 2001 From: dkorzinek Date: Tue, 26 Nov 2024 22:30:02 +0100 Subject: [PATCH 1/2] Fixed bugs to make compatible with openfst-1.8.3 --- src/bin/phonetisaurus-align.cc | 62 ++++++++-------- src/bin/phonetisaurus-arpa2wfst.cc | 14 ++-- src/bin/phonetisaurus-g2pfst.cc | 72 +++++++++--------- src/bin/phonetisaurus-g2prnn.cc | 114 ++++++++++++++--------------- src/include/LegacyRnnLMDecodable.h | 54 +++++++------- src/lib/LatticePruner.cc | 4 +- src/lib/util.cc | 14 ++-- 7 files changed, 167 insertions(+), 167 deletions(-) diff --git a/src/bin/phonetisaurus-align.cc b/src/bin/phonetisaurus-align.cc index 8812671..49c6a51 100644 --- a/src/bin/phonetisaurus-align.cc +++ b/src/bin/phonetisaurus-align.cc @@ -97,7 +97,7 @@ void write_alignments (M2MFstAligner* aligner, string ofile_name, for (unsigned int i = 0; i < aligner->fsas.size (); i++) { //Map to Tropical semiring VectorFst* tfst = new VectorFst (); - Map (aligner->fsas.at (i), tfst, LogToStdMapper ()); + ArcMap (aligner->fsas.at (i), tfst, LogToStdMapper ()); pruner.prune_fst (tfst); RmEpsilon (tfst); //Skip empty results. This should only happen @@ -160,7 +160,7 @@ void compileNBestFarArchive (M2MFstAligner* aligner, string key_suffix = ""; string key = ""; char keybuf[16]; - int32 generate_keys = 7; //Suitable for up to several million lattices + int32_t generate_keys = 7; //Suitable for up to several million lattices bool set_syms = false; //Have we set the isyms successfully yet?? //Build us a FarWriter to compile the archive FarWriter *far_writer = \ @@ -180,11 +180,11 @@ void compileNBestFarArchive (M2MFstAligner* aligner, VectorFst* ffst = new VectorFst (); //Map to the Tropical semiring - Map (fsts->at(i), tfst, LogToStdMapper ()); + ArcMap (fsts->at(i), tfst, LogToStdMapper ()); pruner.prune_fst (tfst); //Map back to the Log semiring - Map (*tfst, lfst, StdToLogMapper ()); + ArcMap (*tfst, lfst, StdToLogMapper ()); //Perform posterior normalization of the N-best lattice by pushing weights // in the log semiring and then removing the final weight. @@ -204,7 +204,7 @@ void compileNBestFarArchive (M2MFstAligner* aligner, if (pfst->NumStates () == 0) continue; //Finally map back to the Tropical semiring for the last time - Map (*pfst, ffst, LogToStdMapper ()); + ArcMap (*pfst, ffst, LogToStdMapper ()); if (set_syms == false) { ffst->SetInputSymbols (aligner->isyms); @@ -267,28 +267,28 @@ int main( int argc, char* argv[] ){ PhonetisaurusSetFlags (usage.c_str(), &argc, &argv, false); M2MFstAligner aligner; - if (FLAGS_load_model == true) { - aligner = *(new M2MFstAligner (FLAGS_model_file, FLAGS_penalize, - FLAGS_penalize_em, FLAGS_restrict)); - switch (load_input_file (&aligner, FLAGS_input, FLAGS_delim, - FLAGS_s1_char_delim, FLAGS_s2_char_delim, - FLAGS_load_model)) { + if (FST_FLAGS_load_model == true) { + aligner = *(new M2MFstAligner (FST_FLAGS_model_file, FST_FLAGS_penalize, + FST_FLAGS_penalize_em, FST_FLAGS_restrict)); + switch (load_input_file (&aligner, FST_FLAGS_input, FST_FLAGS_delim, + FST_FLAGS_s1_char_delim, FST_FLAGS_s2_char_delim, + FST_FLAGS_load_model)) { case 0: cerr << "Please provide a valid input file." << endl; case -1: return -1; } } else { - aligner = *(new M2MFstAligner (FLAGS_seq1_del, FLAGS_seq2_del, - FLAGS_seq1_max, FLAGS_seq2_max, - FLAGS_seq1_sep, FLAGS_seq2_sep, - FLAGS_s1s2_sep, FLAGS_eps, FLAGS_skip, - FLAGS_penalize, FLAGS_penalize_em, - FLAGS_restrict, FLAGS_grow + aligner = *(new M2MFstAligner (FST_FLAGS_seq1_del, FST_FLAGS_seq2_del, + FST_FLAGS_seq1_max, FST_FLAGS_seq2_max, + FST_FLAGS_seq1_sep, FST_FLAGS_seq2_sep, + FST_FLAGS_s1s2_sep, FST_FLAGS_eps, FST_FLAGS_skip, + FST_FLAGS_penalize, FST_FLAGS_penalize_em, + FST_FLAGS_restrict, FST_FLAGS_grow )); - switch (load_input_file (&aligner, FLAGS_input, FLAGS_delim, - FLAGS_s1_char_delim, FLAGS_s2_char_delim, - FLAGS_load_model)) { + switch (load_input_file (&aligner, FST_FLAGS_input, FST_FLAGS_delim, + FST_FLAGS_s1_char_delim, FST_FLAGS_s2_char_delim, + FST_FLAGS_load_model)) { case 0: cerr << "Please provide a valid input file." << endl; case -1: @@ -298,7 +298,7 @@ int main( int argc, char* argv[] ){ cerr << "Starting EM..." << endl; aligner.maximization (false); cerr << "Finished first iter..." << endl; - for (int i = 1; i <= FLAGS_iter; i++) { + for (int i = 1; i <= FST_FLAGS_iter; i++) { cerr << "Iteration: " << i << " Change: "; aligner.expectation (); cerr << aligner.maximization (false) << endl; @@ -309,21 +309,21 @@ int main( int argc, char* argv[] ){ aligner.maximization (true); } - StdArc::Weight pthresh = FLAGS_pthresh == -99.0 + StdArc::Weight pthresh = FST_FLAGS_pthresh == -99.0 ? LogWeight::Zero().Value() - : FLAGS_pthresh; - if (FLAGS_write_model.compare ("") != 0) { + : FST_FLAGS_pthresh; + if (FST_FLAGS_write_model.compare ("") != 0) { cerr << "Writing alignment model in OpenFst format to file: " - << FLAGS_write_model << endl; - aligner.write_model (FLAGS_write_model); + << FST_FLAGS_write_model << endl; + aligner.write_model (FST_FLAGS_write_model); } - if (FLAGS_lattice == true) - compileNBestFarArchive (&aligner, &aligner.fsas, FLAGS_ofile, pthresh, - FLAGS_nbest, FLAGS_fb, FLAGS_penalize); + if (FST_FLAGS_lattice == true) + compileNBestFarArchive (&aligner, &aligner.fsas, FST_FLAGS_ofile, pthresh, + FST_FLAGS_nbest, FST_FLAGS_fb, FST_FLAGS_penalize); else - write_alignments (&aligner, FLAGS_ofile, pthresh, FLAGS_nbest, - FLAGS_fb, FLAGS_penalize); + write_alignments (&aligner, FST_FLAGS_ofile, pthresh, FST_FLAGS_nbest, + FST_FLAGS_fb, FST_FLAGS_penalize); return 0; } diff --git a/src/bin/phonetisaurus-arpa2wfst.cc b/src/bin/phonetisaurus-arpa2wfst.cc index a74bb84..1c4507f 100644 --- a/src/bin/phonetisaurus-arpa2wfst.cc +++ b/src/bin/phonetisaurus-arpa2wfst.cc @@ -52,23 +52,23 @@ int main (int argc, char* argv []) { set_new_handler (FailedNewHandler); PhonetisaurusSetFlags (usage.c_str(), &argc, &argv, false); - if (FLAGS_lm.compare ("") == 0) { + if (FST_FLAGS_lm.compare ("") == 0) { cerr << "You must supply an ARPA format lm " "to --lm for conversion!" << endl; return 0; } cerr << "Initializing..." << endl; - ARPA2WFST* converter = new ARPA2WFST (FLAGS_lm, FLAGS_eps, FLAGS_sb, - FLAGS_se, FLAGS_split, FLAGS_skip, - FLAGS_tie); + ARPA2WFST* converter = new ARPA2WFST (FST_FLAGS_lm, FST_FLAGS_eps, FST_FLAGS_sb, + FST_FLAGS_se, FST_FLAGS_split, FST_FLAGS_skip, + FST_FLAGS_tie); cerr << "Converting..." << endl; converter->arpa_to_wfst (); - converter->arpafst.Write (FLAGS_ofile); + converter->arpafst.Write (FST_FLAGS_ofile); - if (FLAGS_ssyms.compare ("") != 0) { - converter->ssyms->WriteText (FLAGS_ssyms); + if (FST_FLAGS_ssyms.compare ("") != 0) { + converter->ssyms->WriteText (FST_FLAGS_ssyms); } delete converter; diff --git a/src/bin/phonetisaurus-g2pfst.cc b/src/bin/phonetisaurus-g2pfst.cc index 5ff0572..4b0ec9b 100644 --- a/src/bin/phonetisaurus-g2pfst.cc +++ b/src/bin/phonetisaurus-g2pfst.cc @@ -38,11 +38,11 @@ using namespace fst; typedef unordered_map > RMAP; -void PrintPathData (const vector& results, string FLAGS_word, +void PrintPathData (const vector& results, string FST_FLAGS_word, const SymbolTable* osyms, bool print_scores = true, bool nlog_probs = true) { for (int i = 0; i < results.size (); i++) { - cout << FLAGS_word << "\t"; + cout << FST_FLAGS_word << "\t"; if (print_scores == true) { if (nlog_probs == true) cout << results [i].PathWeight << "\t"; @@ -60,20 +60,20 @@ void PrintPathData (const vector& results, string FLAGS_word, } void EvaluateWordlist (PhonetisaurusScript& decoder, vector corpus, - int FLAGS_beam, int FLAGS_nbest, bool FLAGS_reverse, - string FLAGS_skip, double FLAGS_thresh, string FLAGS_gsep, - bool FLAGS_write_fsts, bool FLAGS_print_scores, - bool FLAGS_accumulate, double FLAGS_pmass, - bool FLAGS_nlog_probs) { + int FST_FLAGS_beam, int FST_FLAGS_nbest, bool FST_FLAGS_reverse, + string FST_FLAGS_skip, double FST_FLAGS_thresh, string FST_FLAGS_gsep, + bool FST_FLAGS_write_fsts, bool FST_FLAGS_print_scores, + bool FST_FLAGS_accumulate, double FST_FLAGS_pmass, + bool FST_FLAGS_nlog_probs) { for (int i = 0; i < corpus.size (); i++) { - vector results = decoder.Phoneticize (corpus [i], FLAGS_nbest, - FLAGS_beam, FLAGS_thresh, - FLAGS_write_fsts, - FLAGS_accumulate, FLAGS_pmass); + vector results = decoder.Phoneticize (corpus [i], FST_FLAGS_nbest, + FST_FLAGS_beam, FST_FLAGS_thresh, + FST_FLAGS_write_fsts, + FST_FLAGS_accumulate, FST_FLAGS_pmass); PrintPathData (results, corpus [i], decoder.osyms_, - FLAGS_print_scores, - FLAGS_nlog_probs); + FST_FLAGS_print_scores, + FST_FLAGS_nlog_probs); } } @@ -99,65 +99,65 @@ int main (int argc, char* argv []) { set_new_handler (FailedNewHandler); PhonetisaurusSetFlags (usage.c_str(), &argc, &argv, false); - if (FLAGS_model.compare ("") == 0) { + if (FST_FLAGS_model.compare ("") == 0) { cerr << "You must supply an FST model to --model" << endl; exit (1); } else { - std::ifstream model_ifp (FLAGS_model); + std::ifstream model_ifp (FST_FLAGS_model); if (!model_ifp.good ()) { cout << "Failed to open --model file '" - << FLAGS_model << "'" << endl; + << FST_FLAGS_model << "'" << endl; exit (1); } } - if (FLAGS_pmass < 0.0 || FLAGS_pmass > 1) { + if (FST_FLAGS_pmass < 0.0 || FST_FLAGS_pmass > 1) { cout << "--pmass must be a float value between 0.0 and 1.0." << endl; exit (1); } - if (FLAGS_pmass == 0.0) - FLAGS_pmass = 99.0; + if (FST_FLAGS_pmass == 0.0) + FST_FLAGS_pmass = 99.0; else - FLAGS_pmass = -log (FLAGS_pmass); + FST_FLAGS_pmass = -log (FST_FLAGS_pmass); bool use_wordlist = false; - if (FLAGS_wordlist.compare ("") != 0) { - std::ifstream wordlist_ifp (FLAGS_wordlist); + if (FST_FLAGS_wordlist.compare ("") != 0) { + std::ifstream wordlist_ifp (FST_FLAGS_wordlist); if (!wordlist_ifp.good ()) { cout << "Failed to open --wordlist file '" - << FLAGS_wordlist << "'" << endl; + << FST_FLAGS_wordlist << "'" << endl; exit (1); } else { use_wordlist = true; } } - if (FLAGS_wordlist.compare ("") == 0 && FLAGS_word.compare ("") == 0) { + if (FST_FLAGS_wordlist.compare ("") == 0 && FST_FLAGS_word.compare ("") == 0) { cout << "Either --wordlist or --word must be set!" << endl; exit (1); } if (use_wordlist == true) { vector corpus; - LoadWordList (FLAGS_wordlist, &corpus); + LoadWordList (FST_FLAGS_wordlist, &corpus); - PhonetisaurusScript decoder (FLAGS_model, FLAGS_gsep); + PhonetisaurusScript decoder (FST_FLAGS_model, FST_FLAGS_gsep); EvaluateWordlist ( - decoder, corpus, FLAGS_beam, FLAGS_nbest, FLAGS_reverse, - FLAGS_skip, FLAGS_thresh, FLAGS_gsep, FLAGS_write_fsts, - FLAGS_print_scores, FLAGS_accumulate, FLAGS_pmass, - FLAGS_nlog_probs + decoder, corpus, FST_FLAGS_beam, FST_FLAGS_nbest, FST_FLAGS_reverse, + FST_FLAGS_skip, FST_FLAGS_thresh, FST_FLAGS_gsep, FST_FLAGS_write_fsts, + FST_FLAGS_print_scores, FST_FLAGS_accumulate, FST_FLAGS_pmass, + FST_FLAGS_nlog_probs ); } else { - PhonetisaurusScript decoder (FLAGS_model, FLAGS_gsep); + PhonetisaurusScript decoder (FST_FLAGS_model, FST_FLAGS_gsep); vector results = decoder.Phoneticize ( - FLAGS_word, FLAGS_nbest, FLAGS_beam, FLAGS_thresh, - FLAGS_write_fsts, FLAGS_accumulate, FLAGS_pmass + FST_FLAGS_word, FST_FLAGS_nbest, FST_FLAGS_beam, FST_FLAGS_thresh, + FST_FLAGS_write_fsts, FST_FLAGS_accumulate, FST_FLAGS_pmass ); - PrintPathData (results, FLAGS_word, + PrintPathData (results, FST_FLAGS_word, decoder.osyms_, - FLAGS_print_scores, - FLAGS_nlog_probs); + FST_FLAGS_print_scores, + FST_FLAGS_nlog_probs); } return 0; diff --git a/src/bin/phonetisaurus-g2prnn.cc b/src/bin/phonetisaurus-g2prnn.cc index d50589d..c4a1173 100644 --- a/src/bin/phonetisaurus-g2prnn.cc +++ b/src/bin/phonetisaurus-g2prnn.cc @@ -17,33 +17,33 @@ typedef unordered_map RMAP; void ThreadedEvaluateWordlist (vector& corpus, RMAP& rmap, LegacyRnnLMHash& h, Decodable& s, - int FLAGS_threads, int FLAGS_beam, - int FLAGS_kmax, int FLAGS_nbest, - bool FLAGS_reverse, string FLAGS_gpdelim, - string FLAGS_gdelim, string FLAGS_skip, - double FLAGS_thresh, string FLAGS_gsep) { + int FST_FLAGS_threads, int FST_FLAGS_beam, + int FST_FLAGS_kmax, int FST_FLAGS_nbest, + bool FST_FLAGS_reverse, string FST_FLAGS_gpdelim, + string FST_FLAGS_gdelim, string FST_FLAGS_skip, + double FST_FLAGS_thresh, string FST_FLAGS_gsep) { int csize = corpus.size (); #ifdef _OPENMP #pragma omp parallel for #endif - for (int x = 0; x < FLAGS_threads; x++) { + for (int x = 0; x < FST_FLAGS_threads; x++) { RnnLMDecoder decoder (s); - int start = x * (csize / FLAGS_threads); - int end = (x == FLAGS_threads - 1) ? csize \ - : start + (csize / FLAGS_threads); + int start = x * (csize / FST_FLAGS_threads); + int end = (x == FST_FLAGS_threads - 1) ? csize \ + : start + (csize / FST_FLAGS_threads); for (int i = start; i < end; i++) { vector graphemes = tokenize_utf8_string (&corpus [i], - &FLAGS_gsep); - if (FLAGS_reverse == true) + &FST_FLAGS_gsep); + if (FST_FLAGS_reverse == true) reverse (graphemes.begin (), graphemes.end ()); graphemes.push_back (""); SimpleResult result = \ - decoder.Decode (graphemes, FLAGS_beam, FLAGS_kmax, - FLAGS_nbest, FLAGS_thresh, FLAGS_gpdelim, - FLAGS_gdelim, FLAGS_skip); + decoder.Decode (graphemes, FST_FLAGS_beam, FST_FLAGS_kmax, + FST_FLAGS_nbest, FST_FLAGS_thresh, FST_FLAGS_gpdelim, + FST_FLAGS_gdelim, FST_FLAGS_skip); rmap [i] = result; } } @@ -58,25 +58,25 @@ void ThreadedEvaluateWordlist (vector& corpus, RMAP& rmap, } void EvaluateWordlist (vector& corpus, - LegacyRnnLMHash& h, Decodable& s, int FLAGS_beam, - int FLAGS_kmax, int FLAGS_nbest, bool FLAGS_reverse, - string FLAGS_gpdelim, string FLAGS_gdelim, - string FLAGS_skip, double FLAGS_thresh, - string FLAGS_gsep) { + LegacyRnnLMHash& h, Decodable& s, int FST_FLAGS_beam, + int FST_FLAGS_kmax, int FST_FLAGS_nbest, bool FST_FLAGS_reverse, + string FST_FLAGS_gpdelim, string FST_FLAGS_gdelim, + string FST_FLAGS_skip, double FST_FLAGS_thresh, + string FST_FLAGS_gsep) { RnnLMDecoder decoder (s); for (int i = 0; i < corpus.size (); i++) { vector graphemes = tokenize_utf8_string (&corpus [i], - &FLAGS_gsep); - if (FLAGS_reverse == true) + &FST_FLAGS_gsep); + if (FST_FLAGS_reverse == true) reverse (graphemes.begin (), graphemes.end ()); graphemes.push_back (""); SimpleResult result = \ - decoder.Decode (graphemes, FLAGS_beam, FLAGS_kmax, - FLAGS_nbest, FLAGS_thresh, FLAGS_gpdelim, - FLAGS_gdelim, FLAGS_skip); + decoder.Decode (graphemes, FST_FLAGS_beam, FST_FLAGS_kmax, + FST_FLAGS_nbest, FST_FLAGS_thresh, FST_FLAGS_gpdelim, + FST_FLAGS_gdelim, FST_FLAGS_skip); for (int k = 0; k < result.pronunciations.size (); k++) cout << result.word << "\t" << result.scores [k] << "\t" @@ -85,22 +85,22 @@ void EvaluateWordlist (vector& corpus, } void EvaluateWord (string word, LegacyRnnLMHash& h, Decodable& s, - int FLAGS_beam, int FLAGS_kmax, int FLAGS_nbest, - bool FLAGS_reverse, string FLAGS_gpdelim, - string FLAGS_gdelim, string FLAGS_skip, - double FLAGS_thresh, string FLAGS_gsep) { + int FST_FLAGS_beam, int FST_FLAGS_kmax, int FST_FLAGS_nbest, + bool FST_FLAGS_reverse, string FST_FLAGS_gpdelim, + string FST_FLAGS_gdelim, string FST_FLAGS_skip, + double FST_FLAGS_thresh, string FST_FLAGS_gsep) { vector graphemes = tokenize_utf8_string (&word, - &FLAGS_gsep); - if (FLAGS_reverse == true) + &FST_FLAGS_gsep); + if (FST_FLAGS_reverse == true) reverse (graphemes.begin (), graphemes.end ()); graphemes.push_back (""); RnnLMDecoder decoder (s); SimpleResult result = \ - decoder.Decode (graphemes, FLAGS_beam, FLAGS_kmax, - FLAGS_nbest, FLAGS_thresh, FLAGS_gpdelim, - FLAGS_gdelim, FLAGS_skip); + decoder.Decode (graphemes, FST_FLAGS_beam, FST_FLAGS_kmax, + FST_FLAGS_nbest, FST_FLAGS_thresh, FST_FLAGS_gpdelim, + FST_FLAGS_gdelim, FST_FLAGS_skip); for (int k = 0; k < result.pronunciations.size (); k++) cout << result.word << "\t" << result.scores [k] << "\t" @@ -128,64 +128,64 @@ int main (int argc, char* argv []) { set_new_handler (FailedNewHandler); PhonetisaurusSetFlags (usage.c_str (), &argc, &argv, false); - if (FLAGS_rnnlm.compare ("") == 0) { + if (FST_FLAGS_rnnlm.compare ("") == 0) { cout << "--rnnlm model is required!" << endl; exit (1); } else { - std::ifstream rnnlm_ifp (FLAGS_rnnlm); + std::ifstream rnnlm_ifp (FST_FLAGS_rnnlm); if (!rnnlm_ifp.good ()) { cout << "Faile to open --rnnlm file '" - << FLAGS_rnnlm << "'" << endl; + << FST_FLAGS_rnnlm << "'" << endl; exit (1); } } bool use_wordlist = false; - if (FLAGS_wordlist.compare ("") != 0) { - std::ifstream wordlist_ifp (FLAGS_wordlist); + if (FST_FLAGS_wordlist.compare ("") != 0) { + std::ifstream wordlist_ifp (FST_FLAGS_wordlist); if (!wordlist_ifp.good ()) { cout << "Failed to open --wordlist file '" - << FLAGS_wordlist << "'" << endl; + << FST_FLAGS_wordlist << "'" << endl; exit (1); } else { use_wordlist = true; } } - if (FLAGS_wordlist.compare ("") == 0 && FLAGS_word.compare ("") == 0) { + if (FST_FLAGS_wordlist.compare ("") == 0 && FST_FLAGS_word.compare ("") == 0) { cout << "Either --wordlist or --word must be set!" << endl; } #ifdef _OPENMP - omp_set_num_threads (FLAGS_threads); + omp_set_num_threads (FST_FLAGS_threads); #endif vector corpus; - LoadWordList (FLAGS_wordlist, &corpus); + LoadWordList (FST_FLAGS_wordlist, &corpus); RMAP rmap; - LegacyRnnLMReader reader (FLAGS_rnnlm); - LegacyRnnLMHash h = reader.CopyVocabHash (FLAGS_gdelim, FLAGS_gpdelim); + LegacyRnnLMReader reader (FST_FLAGS_rnnlm); + LegacyRnnLMHash h = reader.CopyVocabHash (FST_FLAGS_gdelim, FST_FLAGS_gpdelim); Decodable s = reader.CopyLegacyRnnLM (h); if (use_wordlist == true) { - if (FLAGS_threads > 1) { - ThreadedEvaluateWordlist (corpus, rmap, h, s, FLAGS_threads, - FLAGS_beam, FLAGS_kmax, FLAGS_nbest, - FLAGS_reverse, FLAGS_gpdelim, - FLAGS_gdelim, FLAGS_skip, - FLAGS_thresh, FLAGS_gsep); + if (FST_FLAGS_threads > 1) { + ThreadedEvaluateWordlist (corpus, rmap, h, s, FST_FLAGS_threads, + FST_FLAGS_beam, FST_FLAGS_kmax, FST_FLAGS_nbest, + FST_FLAGS_reverse, FST_FLAGS_gpdelim, + FST_FLAGS_gdelim, FST_FLAGS_skip, + FST_FLAGS_thresh, FST_FLAGS_gsep); } else { - EvaluateWordlist (corpus, h, s, FLAGS_beam, - FLAGS_kmax, FLAGS_nbest, FLAGS_reverse, - FLAGS_gpdelim, FLAGS_gdelim, FLAGS_skip, - FLAGS_thresh, FLAGS_gsep); + EvaluateWordlist (corpus, h, s, FST_FLAGS_beam, + FST_FLAGS_kmax, FST_FLAGS_nbest, FST_FLAGS_reverse, + FST_FLAGS_gpdelim, FST_FLAGS_gdelim, FST_FLAGS_skip, + FST_FLAGS_thresh, FST_FLAGS_gsep); } } else { - EvaluateWord (FLAGS_word, h, s, FLAGS_beam, FLAGS_kmax, - FLAGS_nbest, FLAGS_reverse, FLAGS_gpdelim, - FLAGS_gdelim, FLAGS_skip, FLAGS_thresh, FLAGS_gsep); + EvaluateWord (FST_FLAGS_word, h, s, FST_FLAGS_beam, FST_FLAGS_kmax, + FST_FLAGS_nbest, FST_FLAGS_reverse, FST_FLAGS_gpdelim, + FST_FLAGS_gdelim, FST_FLAGS_skip, FST_FLAGS_thresh, FST_FLAGS_gsep); } return 0; diff --git a/src/include/LegacyRnnLMDecodable.h b/src/include/LegacyRnnLMDecodable.h index fc0a154..29f0ce8 100644 --- a/src/include/LegacyRnnLMDecodable.h +++ b/src/include/LegacyRnnLMDecodable.h @@ -44,8 +44,8 @@ static inline float FAST_EXP (float p) { template class LegacyRnnLMDecodable { public: - LegacyRnnLMDecodable (H& hash, int i, int h, int o, int d, int m) - : h (hash), isize (i), hsize (h), osize (o), order (d), max_order (m) { } + LegacyRnnLMDecodable (H& _hash, int i, int h, int o, int d, int m) + : h (_hash), isize (i), hsize (h), osize (o), order (d), max_order (m) { } double ComputeNet (const T& p, T* t) { vector olayer; @@ -73,28 +73,28 @@ class LegacyRnnLMDecodable { // Begin class direct connection activations if (synd.size () > 0) { - // Feature hash begin - vector hash; - hash.resize (max_order, 0); + // Feature _hash begin + vector _hash; + _hash.resize (max_order, 0); for (int i = 0; i < order; i++) { if (i > 0) if (t->history [i - 1] == -1) break; - hash [i] = h.primes_[0] * h.primes_[1]; + _hash [i] = h.primes_[0] * h.primes_[1]; for (int j = 1; j <= i; j++) - hash [i] += + _hash [i] += h.primes_[(i * h.primes_[j] + j) % h.primes_.size ()] - * static_cast(t->history [j - 1] + 1); + * static_cast(t->history [j - 1] + 1); - hash [i] = hash [i] % (synd.size () / 2); + _hash [i] = _hash [i] % (synd.size () / 2); } - // Feature hash end + // Feature _hash end for (int i = h.vocab_.size (); i < osize; i++) { for (int j = 0; j < order; j++) { - if (hash [j]) { - olayer [i] += synd [hash [j]]; - hash [j]++; + if (_hash [j]) { + olayer [i] += synd [_hash [j]]; + _hash [j]++; } else { break; } @@ -127,33 +127,33 @@ class LegacyRnnLMDecodable { // Begin word direct connection activations if (synd.size () > 0) { - // Begin feature hashing - uint64 hash [max_order]; + // Begin feature _hashing + uint64_t _hash [max_order]; for (int i = 0; i < order; i++) - hash [i] = 0; + _hash [i] = 0; for (int i = 0; i < order; i++) { if (i > 0) if (t->history [i - 1] == -1) break; - hash [i] = h.primes_[0] * h.primes_[1] - * static_cast (h.vocab_[t->word].class_index + 1); + _hash [i] = h.primes_[0] * h.primes_[1] + * static_cast (h.vocab_[t->word].class_index + 1); for (int j = 1; j <= i; j++) - hash [i] += h.primes_[(i * h.primes_[j] + j) % h.primes_.size ()] - * static_cast (t->history [j - 1] + 1); + _hash [i] += h.primes_[(i * h.primes_[j] + j) % h.primes_.size ()] + * static_cast (t->history [j - 1] + 1); - hash [i] = (hash [i] % (synd.size () / 2)) + (synd.size () / 2); + _hash [i] = (_hash [i] % (synd.size () / 2)) + (synd.size () / 2); } - // End feature hashing + // End feature _hashing for (int i = begin; i <= end; i++) { for (int j = 0; j < order; j++) { - if (hash [j]) { - olayer [i] += synd [hash [j]]; - hash [j]++; - hash [j] = hash [j] % synd.size (); + if (_hash [j]) { + olayer [i] += synd [_hash [j]]; + _hash [j]++; + _hash [j] = _hash [j] % synd.size (); } else { break; } @@ -179,7 +179,7 @@ class LegacyRnnLMDecodable { * olayer [h.vocab_.size () + h.vocab_[t->word].class_index]; } - // We need the synapses and the vocabulary hash + // We need the synapses and the vocabulary _hash H& h; int isize; int hsize; diff --git a/src/lib/LatticePruner.cc b/src/lib/LatticePruner.cc index 8ec2263..8f7a2b3 100644 --- a/src/lib/LatticePruner.cc +++ b/src/lib/LatticePruner.cc @@ -108,7 +108,7 @@ void LatticePruner::_forward_backward( VectorFst* fst ){ VectorFst* lfst = new VectorFst(); vector alpha, beta; - Map(*fst, lfst, StdToLogMapper()); + ArcMap(*fst, lfst, StdToLogMapper()); //Normalize so that subsequent operations don't go crazy Push(*lfst, pfst, kPushWeights); @@ -137,7 +137,7 @@ void LatticePruner::_forward_backward( VectorFst* fst ){ } } - Map(*pfst, fst, LogToStdMapper()); + ArcMap(*pfst, fst, LogToStdMapper()); delete lfst; delete pfst; diff --git a/src/lib/util.cc b/src/lib/util.cc index 01e2a1f..bcdf0e1 100644 --- a/src/lib/util.cc +++ b/src/lib/util.cc @@ -190,12 +190,12 @@ void PhonetisaurusSetFlags (const char* usage, int* argc, char*** argv, FlagRegister::GetRegister(); if (string_register->SetFlag(arg, val)) continue; - FlagRegister *int32_register = - FlagRegister::GetRegister(); + FlagRegister *int32_register = + FlagRegister::GetRegister(); if (int32_register->SetFlag(arg, val)) continue; - FlagRegister *int64_register = - FlagRegister::GetRegister(); + FlagRegister *int64_register = + FlagRegister::GetRegister(); if (int64_register->SetFlag(arg, val)) continue; FlagRegister *double_register = @@ -206,7 +206,7 @@ void PhonetisaurusSetFlags (const char* usage, int* argc, char*** argv, LOG(FATAL) << "SetFlags: Bad option: " << (*argv)[index]; } - if (FLAGS_help) { + if (FST_FLAGS_help) { //Just show program flags - NOT general OpenFst flags // There are too many and they are just confusing. std::set< pair > usage_set; @@ -217,9 +217,9 @@ void PhonetisaurusSetFlags (const char* usage, int* argc, char*** argv, bool_register->GetUsage(&usage_set); FlagRegister *string_register = FlagRegister::GetRegister(); string_register->GetUsage(&usage_set); - FlagRegister *int32_register = FlagRegister::GetRegister(); + FlagRegister *int32_register = FlagRegister::GetRegister(); int32_register->GetUsage(&usage_set); - FlagRegister *int64_register = FlagRegister::GetRegister(); + FlagRegister *int64_register = FlagRegister::GetRegister(); int64_register->GetUsage(&usage_set); FlagRegister *double_register = FlagRegister::GetRegister(); double_register->GetUsage(&usage_set); From 9a0c808a4ddaef42cf06750e23edccf9d8df3bb3 Mon Sep 17 00:00:00 2001 From: dkorzinek Date: Tue, 26 Nov 2024 22:37:16 +0100 Subject: [PATCH 2/2] Changed C++ std to 17 because openfst 1.8.3 doesn't compile on c++0x --- Makefile.am | 2 +- Makefile.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index 8575ca4..a891636 100644 --- a/Makefile.am +++ b/Makefile.am @@ -71,7 +71,7 @@ bin_PROGRAMS = \ phonetisaurus-g2prnn \ rnnlm -AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++0x -Wall -Wno-sign-compare -Wno-unused-local-typedefs +AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++17 -Wall -Wno-sign-compare -Wno-unused-local-typedefs phonetisaurus_align_SOURCES = src/bin/phonetisaurus-align.cc src/include/PhonetisaurusRex.h src/lib/util.cc src/include/util.h src/lib/LatticePruner.cc src/include/LatticePruner.h src/lib/M2MFstAligner.cc src/include/M2MFstAligner.h phonetisaurus_align_CXXFLAGS = $(OPENFST_CXXFLAGS) $(UTFCPP_CXXFLAGS) diff --git a/Makefile.in b/Makefile.in index 29843ea..cb8d6eb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -532,7 +532,7 @@ dist_bin_SCRIPTS = \ src/scripts/phonetisaurus-apply \ src/scripts/phonetisaurus-train -AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++0x -Wall -Wno-sign-compare -Wno-unused-local-typedefs +AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++17 -Wall -Wno-sign-compare -Wno-unused-local-typedefs phonetisaurus_align_SOURCES = src/bin/phonetisaurus-align.cc src/include/PhonetisaurusRex.h src/lib/util.cc src/include/util.h src/lib/LatticePruner.cc src/include/LatticePruner.h src/lib/M2MFstAligner.cc src/include/M2MFstAligner.h phonetisaurus_align_CXXFLAGS = $(OPENFST_CXXFLAGS) $(UTFCPP_CXXFLAGS) phonetisaurus_align_LDADD = $(OPENFST_LDFLAGS)