Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to make the project work with OpenFST 1.8.3 #89

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ bin_PROGRAMS = \
phonetisaurus-g2prnn \
rnnlm

AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++0x -Wall -Wno-sign-compare -Wno-unused-local-typedefs
AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++17 -Wall -Wno-sign-compare -Wno-unused-local-typedefs

phonetisaurus_align_SOURCES = src/bin/phonetisaurus-align.cc src/include/PhonetisaurusRex.h src/lib/util.cc src/include/util.h src/lib/LatticePruner.cc src/include/LatticePruner.h src/lib/M2MFstAligner.cc src/include/M2MFstAligner.h
phonetisaurus_align_CXXFLAGS = $(OPENFST_CXXFLAGS) $(UTFCPP_CXXFLAGS)
Expand Down
2 changes: 1 addition & 1 deletion Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ dist_bin_SCRIPTS = \
src/scripts/phonetisaurus-apply \
src/scripts/phonetisaurus-train

AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++0x -Wall -Wno-sign-compare -Wno-unused-local-typedefs
AM_CPPFLAGS = -I$(top_srcdir)/src -DGIT_REVISION=\"$(GIT_REVISION)\" -std=c++17 -Wall -Wno-sign-compare -Wno-unused-local-typedefs
phonetisaurus_align_SOURCES = src/bin/phonetisaurus-align.cc src/include/PhonetisaurusRex.h src/lib/util.cc src/include/util.h src/lib/LatticePruner.cc src/include/LatticePruner.h src/lib/M2MFstAligner.cc src/include/M2MFstAligner.h
phonetisaurus_align_CXXFLAGS = $(OPENFST_CXXFLAGS) $(UTFCPP_CXXFLAGS)
phonetisaurus_align_LDADD = $(OPENFST_LDFLAGS)
Expand Down
62 changes: 31 additions & 31 deletions src/bin/phonetisaurus-align.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void write_alignments (M2MFstAligner* aligner, string ofile_name,
for (unsigned int i = 0; i < aligner->fsas.size (); i++) {
//Map to Tropical semiring
VectorFst<StdArc>* tfst = new VectorFst<StdArc> ();
Map (aligner->fsas.at (i), tfst, LogToStdMapper ());
ArcMap (aligner->fsas.at (i), tfst, LogToStdMapper ());
pruner.prune_fst (tfst);
RmEpsilon (tfst);
//Skip empty results. This should only happen
Expand Down Expand Up @@ -160,7 +160,7 @@ void compileNBestFarArchive (M2MFstAligner* aligner,
string key_suffix = "";
string key = "";
char keybuf[16];
int32 generate_keys = 7; //Suitable for up to several million lattices
int32_t generate_keys = 7; //Suitable for up to several million lattices
bool set_syms = false; //Have we set the isyms successfully yet??
//Build us a FarWriter to compile the archive
FarWriter<StdArc> *far_writer = \
Expand All @@ -180,11 +180,11 @@ void compileNBestFarArchive (M2MFstAligner* aligner,
VectorFst<StdArc>* ffst = new VectorFst<StdArc> ();

//Map to the Tropical semiring
Map (fsts->at(i), tfst, LogToStdMapper ());
ArcMap (fsts->at(i), tfst, LogToStdMapper ());
pruner.prune_fst (tfst);

//Map back to the Log semiring
Map (*tfst, lfst, StdToLogMapper ());
ArcMap (*tfst, lfst, StdToLogMapper ());

//Perform posterior normalization of the N-best lattice by pushing weights
// in the log semiring and then removing the final weight.
Expand All @@ -204,7 +204,7 @@ void compileNBestFarArchive (M2MFstAligner* aligner,
if (pfst->NumStates () == 0) continue;

//Finally map back to the Tropical semiring for the last time
Map (*pfst, ffst, LogToStdMapper ());
ArcMap (*pfst, ffst, LogToStdMapper ());

if (set_syms == false) {
ffst->SetInputSymbols (aligner->isyms);
Expand Down Expand Up @@ -267,28 +267,28 @@ int main( int argc, char* argv[] ){
PhonetisaurusSetFlags (usage.c_str(), &argc, &argv, false);
M2MFstAligner aligner;

if (FLAGS_load_model == true) {
aligner = *(new M2MFstAligner (FLAGS_model_file, FLAGS_penalize,
FLAGS_penalize_em, FLAGS_restrict));
switch (load_input_file (&aligner, FLAGS_input, FLAGS_delim,
FLAGS_s1_char_delim, FLAGS_s2_char_delim,
FLAGS_load_model)) {
if (FST_FLAGS_load_model == true) {
aligner = *(new M2MFstAligner (FST_FLAGS_model_file, FST_FLAGS_penalize,
FST_FLAGS_penalize_em, FST_FLAGS_restrict));
switch (load_input_file (&aligner, FST_FLAGS_input, FST_FLAGS_delim,
FST_FLAGS_s1_char_delim, FST_FLAGS_s2_char_delim,
FST_FLAGS_load_model)) {
case 0:
cerr << "Please provide a valid input file." << endl;
case -1:
return -1;
}
} else {
aligner = *(new M2MFstAligner (FLAGS_seq1_del, FLAGS_seq2_del,
FLAGS_seq1_max, FLAGS_seq2_max,
FLAGS_seq1_sep, FLAGS_seq2_sep,
FLAGS_s1s2_sep, FLAGS_eps, FLAGS_skip,
FLAGS_penalize, FLAGS_penalize_em,
FLAGS_restrict, FLAGS_grow
aligner = *(new M2MFstAligner (FST_FLAGS_seq1_del, FST_FLAGS_seq2_del,
FST_FLAGS_seq1_max, FST_FLAGS_seq2_max,
FST_FLAGS_seq1_sep, FST_FLAGS_seq2_sep,
FST_FLAGS_s1s2_sep, FST_FLAGS_eps, FST_FLAGS_skip,
FST_FLAGS_penalize, FST_FLAGS_penalize_em,
FST_FLAGS_restrict, FST_FLAGS_grow
));
switch (load_input_file (&aligner, FLAGS_input, FLAGS_delim,
FLAGS_s1_char_delim, FLAGS_s2_char_delim,
FLAGS_load_model)) {
switch (load_input_file (&aligner, FST_FLAGS_input, FST_FLAGS_delim,
FST_FLAGS_s1_char_delim, FST_FLAGS_s2_char_delim,
FST_FLAGS_load_model)) {
case 0:
cerr << "Please provide a valid input file." << endl;
case -1:
Expand All @@ -298,7 +298,7 @@ int main( int argc, char* argv[] ){
cerr << "Starting EM..." << endl;
aligner.maximization (false);
cerr << "Finished first iter..." << endl;
for (int i = 1; i <= FLAGS_iter; i++) {
for (int i = 1; i <= FST_FLAGS_iter; i++) {
cerr << "Iteration: " << i << " Change: ";
aligner.expectation ();
cerr << aligner.maximization (false) << endl;
Expand All @@ -309,21 +309,21 @@ int main( int argc, char* argv[] ){
aligner.maximization (true);
}

StdArc::Weight pthresh = FLAGS_pthresh == -99.0
StdArc::Weight pthresh = FST_FLAGS_pthresh == -99.0
? LogWeight::Zero().Value()
: FLAGS_pthresh;
if (FLAGS_write_model.compare ("") != 0) {
: FST_FLAGS_pthresh;
if (FST_FLAGS_write_model.compare ("") != 0) {
cerr << "Writing alignment model in OpenFst format to file: "
<< FLAGS_write_model << endl;
aligner.write_model (FLAGS_write_model);
<< FST_FLAGS_write_model << endl;
aligner.write_model (FST_FLAGS_write_model);
}

if (FLAGS_lattice == true)
compileNBestFarArchive (&aligner, &aligner.fsas, FLAGS_ofile, pthresh,
FLAGS_nbest, FLAGS_fb, FLAGS_penalize);
if (FST_FLAGS_lattice == true)
compileNBestFarArchive (&aligner, &aligner.fsas, FST_FLAGS_ofile, pthresh,
FST_FLAGS_nbest, FST_FLAGS_fb, FST_FLAGS_penalize);
else
write_alignments (&aligner, FLAGS_ofile, pthresh, FLAGS_nbest,
FLAGS_fb, FLAGS_penalize);
write_alignments (&aligner, FST_FLAGS_ofile, pthresh, FST_FLAGS_nbest,
FST_FLAGS_fb, FST_FLAGS_penalize);

return 0;
}
14 changes: 7 additions & 7 deletions src/bin/phonetisaurus-arpa2wfst.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,23 @@ int main (int argc, char* argv []) {
set_new_handler (FailedNewHandler);
PhonetisaurusSetFlags (usage.c_str(), &argc, &argv, false);

if (FLAGS_lm.compare ("") == 0) {
if (FST_FLAGS_lm.compare ("") == 0) {
cerr << "You must supply an ARPA format lm "
"to --lm for conversion!" << endl;
return 0;
}

cerr << "Initializing..." << endl;
ARPA2WFST* converter = new ARPA2WFST (FLAGS_lm, FLAGS_eps, FLAGS_sb,
FLAGS_se, FLAGS_split, FLAGS_skip,
FLAGS_tie);
ARPA2WFST* converter = new ARPA2WFST (FST_FLAGS_lm, FST_FLAGS_eps, FST_FLAGS_sb,
FST_FLAGS_se, FST_FLAGS_split, FST_FLAGS_skip,
FST_FLAGS_tie);
cerr << "Converting..." << endl;
converter->arpa_to_wfst ();

converter->arpafst.Write (FLAGS_ofile);
converter->arpafst.Write (FST_FLAGS_ofile);

if (FLAGS_ssyms.compare ("") != 0) {
converter->ssyms->WriteText (FLAGS_ssyms);
if (FST_FLAGS_ssyms.compare ("") != 0) {
converter->ssyms->WriteText (FST_FLAGS_ssyms);
}

delete converter;
Expand Down
72 changes: 36 additions & 36 deletions src/bin/phonetisaurus-g2pfst.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ using namespace fst;

typedef unordered_map<int, vector<PathData> > RMAP;

void PrintPathData (const vector<PathData>& results, string FLAGS_word,
void PrintPathData (const vector<PathData>& results, string FST_FLAGS_word,
const SymbolTable* osyms, bool print_scores = true,
bool nlog_probs = true) {
for (int i = 0; i < results.size (); i++) {
cout << FLAGS_word << "\t";
cout << FST_FLAGS_word << "\t";
if (print_scores == true) {
if (nlog_probs == true)
cout << results [i].PathWeight << "\t";
Expand All @@ -60,20 +60,20 @@ void PrintPathData (const vector<PathData>& results, string FLAGS_word,
}

void EvaluateWordlist (PhonetisaurusScript& decoder, vector<string> corpus,
int FLAGS_beam, int FLAGS_nbest, bool FLAGS_reverse,
string FLAGS_skip, double FLAGS_thresh, string FLAGS_gsep,
bool FLAGS_write_fsts, bool FLAGS_print_scores,
bool FLAGS_accumulate, double FLAGS_pmass,
bool FLAGS_nlog_probs) {
int FST_FLAGS_beam, int FST_FLAGS_nbest, bool FST_FLAGS_reverse,
string FST_FLAGS_skip, double FST_FLAGS_thresh, string FST_FLAGS_gsep,
bool FST_FLAGS_write_fsts, bool FST_FLAGS_print_scores,
bool FST_FLAGS_accumulate, double FST_FLAGS_pmass,
bool FST_FLAGS_nlog_probs) {
for (int i = 0; i < corpus.size (); i++) {
vector<PathData> results = decoder.Phoneticize (corpus [i], FLAGS_nbest,
FLAGS_beam, FLAGS_thresh,
FLAGS_write_fsts,
FLAGS_accumulate, FLAGS_pmass);
vector<PathData> results = decoder.Phoneticize (corpus [i], FST_FLAGS_nbest,
FST_FLAGS_beam, FST_FLAGS_thresh,
FST_FLAGS_write_fsts,
FST_FLAGS_accumulate, FST_FLAGS_pmass);
PrintPathData (results, corpus [i],
decoder.osyms_,
FLAGS_print_scores,
FLAGS_nlog_probs);
FST_FLAGS_print_scores,
FST_FLAGS_nlog_probs);
}
}

Expand All @@ -99,65 +99,65 @@ int main (int argc, char* argv []) {
set_new_handler (FailedNewHandler);
PhonetisaurusSetFlags (usage.c_str(), &argc, &argv, false);

if (FLAGS_model.compare ("") == 0) {
if (FST_FLAGS_model.compare ("") == 0) {
cerr << "You must supply an FST model to --model" << endl;
exit (1);
} else {
std::ifstream model_ifp (FLAGS_model);
std::ifstream model_ifp (FST_FLAGS_model);
if (!model_ifp.good ()) {
cout << "Failed to open --model file '"
<< FLAGS_model << "'" << endl;
<< FST_FLAGS_model << "'" << endl;
exit (1);
}
}

if (FLAGS_pmass < 0.0 || FLAGS_pmass > 1) {
if (FST_FLAGS_pmass < 0.0 || FST_FLAGS_pmass > 1) {
cout << "--pmass must be a float value between 0.0 and 1.0." << endl;
exit (1);
}
if (FLAGS_pmass == 0.0)
FLAGS_pmass = 99.0;
if (FST_FLAGS_pmass == 0.0)
FST_FLAGS_pmass = 99.0;
else
FLAGS_pmass = -log (FLAGS_pmass);
FST_FLAGS_pmass = -log (FST_FLAGS_pmass);

bool use_wordlist = false;
if (FLAGS_wordlist.compare ("") != 0) {
std::ifstream wordlist_ifp (FLAGS_wordlist);
if (FST_FLAGS_wordlist.compare ("") != 0) {
std::ifstream wordlist_ifp (FST_FLAGS_wordlist);
if (!wordlist_ifp.good ()) {
cout << "Failed to open --wordlist file '"
<< FLAGS_wordlist << "'" << endl;
<< FST_FLAGS_wordlist << "'" << endl;
exit (1);
} else {
use_wordlist = true;
}
}

if (FLAGS_wordlist.compare ("") == 0 && FLAGS_word.compare ("") == 0) {
if (FST_FLAGS_wordlist.compare ("") == 0 && FST_FLAGS_word.compare ("") == 0) {
cout << "Either --wordlist or --word must be set!" << endl;
exit (1);
}

if (use_wordlist == true) {
vector<string> corpus;
LoadWordList (FLAGS_wordlist, &corpus);
LoadWordList (FST_FLAGS_wordlist, &corpus);

PhonetisaurusScript decoder (FLAGS_model, FLAGS_gsep);
PhonetisaurusScript decoder (FST_FLAGS_model, FST_FLAGS_gsep);
EvaluateWordlist (
decoder, corpus, FLAGS_beam, FLAGS_nbest, FLAGS_reverse,
FLAGS_skip, FLAGS_thresh, FLAGS_gsep, FLAGS_write_fsts,
FLAGS_print_scores, FLAGS_accumulate, FLAGS_pmass,
FLAGS_nlog_probs
decoder, corpus, FST_FLAGS_beam, FST_FLAGS_nbest, FST_FLAGS_reverse,
FST_FLAGS_skip, FST_FLAGS_thresh, FST_FLAGS_gsep, FST_FLAGS_write_fsts,
FST_FLAGS_print_scores, FST_FLAGS_accumulate, FST_FLAGS_pmass,
FST_FLAGS_nlog_probs
);
} else {
PhonetisaurusScript decoder (FLAGS_model, FLAGS_gsep);
PhonetisaurusScript decoder (FST_FLAGS_model, FST_FLAGS_gsep);
vector<PathData> results = decoder.Phoneticize (
FLAGS_word, FLAGS_nbest, FLAGS_beam, FLAGS_thresh,
FLAGS_write_fsts, FLAGS_accumulate, FLAGS_pmass
FST_FLAGS_word, FST_FLAGS_nbest, FST_FLAGS_beam, FST_FLAGS_thresh,
FST_FLAGS_write_fsts, FST_FLAGS_accumulate, FST_FLAGS_pmass
);
PrintPathData (results, FLAGS_word,
PrintPathData (results, FST_FLAGS_word,
decoder.osyms_,
FLAGS_print_scores,
FLAGS_nlog_probs);
FST_FLAGS_print_scores,
FST_FLAGS_nlog_probs);
}

return 0;
Expand Down
Loading