|
TRF Language Model
|
#include "trf-sa-train.h"Go to the source code of this file.
Macros | |
| #define | lout_exe lout<<"[TRF] " |
Functions | |
| double | CalculateLL (Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL=NULL) |
| void | WordStr2ID (Array< VocabID > &aIDs, Array< String > &aStrs, LHash< const char *, VocabID > &vocabhash) |
| void | LMRescore (Model &m, const char *pathTest) |
| void | ModelNorm (Model &m, const char *type) |
| void | ModelRevisePi (Model &m, const char *pathLenFile) |
| opt | Add (wbOPT_STRING, "vocab", &cfg_pathVocab, "The vocabulary") |
| opt | Add (wbOPT_STRING, "read", &cfg_pathModelRead, "Read the init model to train") |
| opt | Add (wbOPT_STRING, "write", &cfg_pathModelWrite, "output the normalizaed model") |
| opt | Add (wbOPT_INT, "thread", &cfg_nThread, "The thread number") |
| opt | Add (wbOPT_STRING, "test", &cfg_pathTest, "test corpus (TXT)") |
| opt | Add (wbOPT_STRING, "nbest", &cfg_pathNbest, "nbest list (kaldi output)") |
| opt | Add (wbOPT_STRING, "lmscore", &cfg_writeLmscore, "[LMrescore] output the lmsocre") |
| opt | Add (wbOPT_STRING, "lmscore-debug", &cfg_writeLmscoreDebug, "[LMrescore] output the lmscore of each word for word-level combination") |
| opt | Add (wbOPT_STRING, "lmscore-test-id", &cfg_writeTestID, "[LMrescore] output the vocab-id of test file") |
| opt | Add (wbOPT_STRING, "norm-method", &cfg_norm_method, "[Norm] method: Exact or AIS") |
| opt | Add (wbOPT_INT, "AIS-chain", &cfg_nAIS_chain_num, "[AIS] the chain number") |
| opt | Add (wbOPT_INT, "AIS-inter", &cfg_nAIS_inter_num, "[AIS] the intermediate distribution number") |
| opt | Add (wbOPT_INT, "norm-len-min", &cfg_norm_lenmin, "[Norm] min-length") |
| opt | Add (wbOPT_INT, "norm-len-max", &cfg_norm_lenmax, "[Norm] max-length") |
| opt | Add (wbOPT_STRING, "len-file", &cfg_pathLenFile, "[Revise pi] a txt-id-file used to summary pi") |
| opt | Parse (_argc, _argv) |
| aLL | Fill (0) |
| lout | Progress (0, true, nCorpusNum - 1, "omp GetLL") |
| for (int i=0;i< nCorpusNum;i++) | |
| lout_variable (nSent) | |
| lout_variable (nWord) | |
| if (pPPL) *pPPL | |
Variables | |
| char * | cfg_pathVocab = NULL |
| char * | cfg_pathModelRead = NULL |
| char * | cfg_pathModelWrite = NULL |
| int | cfg_nThread = 1 |
| char * | cfg_pathTest = NULL |
| char * | cfg_pathNbest = NULL |
| char * | cfg_writeLmscore = NULL |
| char * | cfg_writeLmscoreDebug = NULL |
| char * | cfg_writeTestID = NULL |
| char * | cfg_norm_method = NULL |
| int | cfg_nAIS_chain_num = 0 |
| int | cfg_nAIS_inter_num = 0 |
| int | cfg_norm_lenmin = 1 |
| int | cfg_norm_lenmax = -1 |
| char * | cfg_pathLenFile = NULL |
| Option | opt |
| const char * | cfg_strHelp |
| _wbMain | |
| lout<< "*********************************************"<< endl;lout<< " TRF.exe "<< endl;lout<< "\"<< __DATE__<< "\"<< __TIME__<< "\"<< endl;lout<< "**********************************************"<< endl;omp_set_num_threads(cfg_nThread);lout<< "[OMP] omp_thread = "<< omp_get_max_threads()<< endl;omp_rand(cfg_nThread);Vocab v(cfg_pathVocab);Model m(&v);lout_exe<< "Read model: "<< cfg_pathModelRead<< endl;m.ReadT(cfg_pathModelRead);if(cfg_norm_method) { ModelNorm(m, cfg_norm_method);} if(cfg_pathLenFile) { ModelRevisePi(m, cfg_pathLenFile);} if(cfg_pathTest) { CorpusTxt *p=new CorpusTxt(cfg_pathTest);double dPPL;double dLL=CalculateLL(m, p, p->GetNum(), &dPPL);lout_exe<< "calculate LL of : "<< cfg_pathTest<< endl;lout_exe<< "-LL = "<< -dLL<< endl;lout_exe<< "PPL = "<< dPPL<< endl;SAFE_DELETE(p);} if(cfg_pathNbest) { LMRescore(m, cfg_pathNbest);} if(cfg_pathModelWrite) { lout_exe<< "Write model: "<< cfg_pathModelWrite<< endl;m.WriteT(cfg_pathModelWrite);} return 1;}double CalculateLL(Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL){ Array< double > | aLL (omp_get_max_threads()) |
| Array< int > | aWords (omp_get_max_threads()) |
| Array< int > | aSents (omp_get_max_threads()) |
| Array< VocabID > | aSeq |
| double | dLL = aLL.Sum() / nCorpusNum |
| int | nSent = aSents.Sum() |
| int | nWord = aWords.Sum() |
| #define lout_exe lout<<"[TRF] " |
Definition at line 59 of file main-TRF.cpp.
| opt Add | ( | wbOPT_STRING | , |
| "vocab" | , | ||
| & | cfg_pathVocab, | ||
| "The vocabulary" | |||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "read" | , | ||
| & | cfg_pathModelRead, | ||
| "Read the init model to train" | |||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "write" | , | ||
| & | cfg_pathModelWrite, | ||
| "output the normalizaed model" | |||
| ) |
| opt Add | ( | wbOPT_INT | , |
| "thread" | , | ||
| & | cfg_nThread, | ||
| "The thread number" | |||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "test" | , | ||
| & | cfg_pathTest, | ||
| "test corpus (TXT)" | |||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "nbest" | , | ||
| & | cfg_pathNbest, | ||
| "nbest list (kaldi output)" | |||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "lmscore" | , | ||
| & | cfg_writeLmscore, | ||
| " output the lmsocre" | [LMrescore] | ||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "lmscore-debug" | , | ||
| & | cfg_writeLmscoreDebug, | ||
| " output the lmscore of each word for word-level combination" | [LMrescore] | ||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "lmscore-test-id" | , | ||
| & | cfg_writeTestID, | ||
| " output the vocab-id of test file" | [LMrescore] | ||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "norm-method" | , | ||
| & | cfg_norm_method, | ||
| " method: Exact or AIS" | [Norm] | ||
| ) |
| opt Add | ( | wbOPT_INT | , |
| "AIS-chain" | , | ||
| & | cfg_nAIS_chain_num, | ||
| " the chain number" | [AIS] | ||
| ) |
| opt Add | ( | wbOPT_INT | , |
| "AIS-inter" | , | ||
| & | cfg_nAIS_inter_num, | ||
| " the intermediate distribution number" | [AIS] | ||
| ) |
| opt Add | ( | wbOPT_INT | , |
| "norm-len-min" | , | ||
| & | cfg_norm_lenmin, | ||
| " min-length" | [Norm] | ||
| ) |
| opt Add | ( | wbOPT_INT | , |
| "norm-len-max" | , | ||
| & | cfg_norm_lenmax, | ||
| " max-length" | [Norm] | ||
| ) |
| opt Add | ( | wbOPT_STRING | , |
| "len-file" | , | ||
| & | cfg_pathLenFile, | ||
| " a txt-id-file used to summary pi" | [Revise pi] | ||
| ) |
| aSents Fill | ( | 0 | ) |
| for | ( | ) |
Definition at line 156 of file main-TRF.cpp.
| if | ( | pPPL | ) |
| void LMRescore | ( | Model & | m, |
| const char * | pathTest | ||
| ) |
| lout_variable | ( | nSent | ) |
| lout_variable | ( | nWord | ) |
| void ModelNorm | ( | Model & | m, |
| const char * | type | ||
| ) |
Definition at line 245 of file main-TRF.cpp.
| void ModelRevisePi | ( | Model & | m, |
| const char * | pathLenFile | ||
| ) |
Definition at line 285 of file main-TRF.cpp.
| opt Parse | ( | _argc | , |
| _argv | |||
| ) |
| lout Progress | ( | 0 | , |
| true | , | ||
| nCorpusNum - | 1, | ||
| "omp GetLL" | |||
| ) |
| void WordStr2ID | ( | Array< VocabID > & | aIDs, |
| Array< String > & | aStrs, | ||
| LHash< const char *, VocabID > & | vocabhash | ||
| ) |
Definition at line 180 of file main-TRF.cpp.
| _wbMain |
Definition at line 68 of file main-TRF.cpp.
| lout<< "*********************************************" << endl; lout << " TRF.exe " << endl; lout << "\t" << __DATE__ << "\t" << __TIME__ << "\t" << endl; lout << "**********************************************" << endl; omp_set_num_threads(cfg_nThread); lout << "[OMP] omp_thread = " << omp_get_max_threads() << endl; omp_rand(cfg_nThread); Vocab v(cfg_pathVocab); Model m(&v); lout_exe << "Read model: " << cfg_pathModelRead << endl; m.ReadT(cfg_pathModelRead); if (cfg_norm_method) { ModelNorm(m, cfg_norm_method); } if (cfg_pathLenFile) { ModelRevisePi(m, cfg_pathLenFile); } if (cfg_pathTest) { CorpusTxt *p = new CorpusTxt(cfg_pathTest); double dPPL; double dLL = CalculateLL(m, p, p->GetNum(), &dPPL); lout_exe << "calculate LL of : " << cfg_pathTest << endl; lout_exe << "-LL = " << -dLL << endl; lout_exe << "PPL = " << dPPL << endl; SAFE_DELETE(p); } if (cfg_pathNbest) { LMRescore(m, cfg_pathNbest); } if (cfg_pathModelWrite) { lout_exe << "Write model: " << cfg_pathModelWrite << endl; m.WriteT(cfg_pathModelWrite); } return 1;}double CalculateLL(Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL ){ Array<double> aLL(omp_get_max_threads()) |
Definition at line 145 of file main-TRF.cpp.
| Array<int> aSents(omp_get_max_threads()) |
Definition at line 153 of file main-TRF.cpp.
| Array<int> aWords(omp_get_max_threads()) |
| int cfg_nAIS_chain_num = 0 |
Definition at line 39 of file main-TRF.cpp.
| int cfg_nAIS_inter_num = 0 |
Definition at line 40 of file main-TRF.cpp.
| int cfg_norm_lenmax = -1 |
Definition at line 42 of file main-TRF.cpp.
| int cfg_norm_lenmin = 1 |
Definition at line 41 of file main-TRF.cpp.
| char* cfg_norm_method = NULL |
Definition at line 38 of file main-TRF.cpp.
| int cfg_nThread = 1 |
Definition at line 27 of file main-TRF.cpp.
| char* cfg_pathLenFile = NULL |
Definition at line 44 of file main-TRF.cpp.
| char* cfg_pathModelRead = NULL |
Definition at line 24 of file main-TRF.cpp.
| char* cfg_pathModelWrite = NULL |
Definition at line 25 of file main-TRF.cpp.
| char* cfg_pathNbest = NULL |
Definition at line 32 of file main-TRF.cpp.
| char* cfg_pathTest = NULL |
Definition at line 29 of file main-TRF.cpp.
| char* cfg_pathVocab = NULL |
Definition at line 23 of file main-TRF.cpp.
| const char* cfg_strHelp |
Definition at line 48 of file main-TRF.cpp.
| char* cfg_writeLmscore = NULL |
Definition at line 33 of file main-TRF.cpp.
| char* cfg_writeLmscoreDebug = NULL |
Definition at line 34 of file main-TRF.cpp.
| char* cfg_writeTestID = NULL |
Definition at line 35 of file main-TRF.cpp.
| return dLL = aLL.Sum() / nCorpusNum |
Definition at line 171 of file main-TRF.cpp.
| int nSent = aSents.Sum() |
Definition at line 172 of file main-TRF.cpp.
| int nWord = aWords.Sum() |
Definition at line 173 of file main-TRF.cpp.
| Option opt |
Definition at line 46 of file main-TRF.cpp.