TRF Language Model
|
#include "trf-sa-train.h"
Go to the source code of this file.
Macros | |
#define | lout_exe lout<<"[TRF] " |
Functions | |
double | CalculateLL (Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL=NULL) |
void | WordStr2ID (Array< VocabID > &aIDs, Array< String > &aStrs, LHash< const char *, VocabID > &vocabhash) |
void | LMRescore (Model &m, const char *pathTest) |
void | ModelNorm (Model &m, const char *type) |
void | ModelRevisePi (Model &m, const char *pathLenFile) |
opt | Add (wbOPT_STRING, "vocab", &cfg_pathVocab, "The vocabulary") |
opt | Add (wbOPT_STRING, "read", &cfg_pathModelRead, "Read the init model to train") |
opt | Add (wbOPT_STRING, "write", &cfg_pathModelWrite, "output the normalizaed model") |
opt | Add (wbOPT_INT, "thread", &cfg_nThread, "The thread number") |
opt | Add (wbOPT_STRING, "test", &cfg_pathTest, "test corpus (TXT)") |
opt | Add (wbOPT_STRING, "nbest", &cfg_pathNbest, "nbest list (kaldi output)") |
opt | Add (wbOPT_STRING, "lmscore", &cfg_writeLmscore, "[LMrescore] output the lmsocre") |
opt | Add (wbOPT_STRING, "lmscore-debug", &cfg_writeLmscoreDebug, "[LMrescore] output the lmscore of each word for word-level combination") |
opt | Add (wbOPT_STRING, "lmscore-test-id", &cfg_writeTestID, "[LMrescore] output the vocab-id of test file") |
opt | Add (wbOPT_STRING, "norm-method", &cfg_norm_method, "[Norm] method: Exact or AIS") |
opt | Add (wbOPT_INT, "AIS-chain", &cfg_nAIS_chain_num, "[AIS] the chain number") |
opt | Add (wbOPT_INT, "AIS-inter", &cfg_nAIS_inter_num, "[AIS] the intermediate distribution number") |
opt | Add (wbOPT_INT, "norm-len-min", &cfg_norm_lenmin, "[Norm] min-length") |
opt | Add (wbOPT_INT, "norm-len-max", &cfg_norm_lenmax, "[Norm] max-length") |
opt | Add (wbOPT_STRING, "len-file", &cfg_pathLenFile, "[Revise pi] a txt-id-file used to summary pi") |
opt | Parse (_argc, _argv) |
aLL | Fill (0) |
lout | Progress (0, true, nCorpusNum - 1, "omp GetLL") |
for (int i=0;i< nCorpusNum;i++) | |
lout_variable (nSent) | |
lout_variable (nWord) | |
if (pPPL) *pPPL | |
Variables | |
char * | cfg_pathVocab = NULL |
char * | cfg_pathModelRead = NULL |
char * | cfg_pathModelWrite = NULL |
int | cfg_nThread = 1 |
char * | cfg_pathTest = NULL |
char * | cfg_pathNbest = NULL |
char * | cfg_writeLmscore = NULL |
char * | cfg_writeLmscoreDebug = NULL |
char * | cfg_writeTestID = NULL |
char * | cfg_norm_method = NULL |
int | cfg_nAIS_chain_num = 0 |
int | cfg_nAIS_inter_num = 0 |
int | cfg_norm_lenmin = 1 |
int | cfg_norm_lenmax = -1 |
char * | cfg_pathLenFile = NULL |
Option | opt |
const char * | cfg_strHelp |
_wbMain | |
lout<< "*********************************************"<< endl;lout<< " TRF.exe "<< endl;lout<< "\"<< __DATE__<< "\"<< __TIME__<< "\"<< endl;lout<< "**********************************************"<< endl;omp_set_num_threads(cfg_nThread);lout<< "[OMP] omp_thread = "<< omp_get_max_threads()<< endl;omp_rand(cfg_nThread);Vocab v(cfg_pathVocab);Model m(&v);lout_exe<< "Read model: "<< cfg_pathModelRead<< endl;m.ReadT(cfg_pathModelRead);if(cfg_norm_method) { ModelNorm(m, cfg_norm_method);} if(cfg_pathLenFile) { ModelRevisePi(m, cfg_pathLenFile);} if(cfg_pathTest) { CorpusTxt *p=new CorpusTxt(cfg_pathTest);double dPPL;double dLL=CalculateLL(m, p, p->GetNum(), &dPPL);lout_exe<< "calculate LL of : "<< cfg_pathTest<< endl;lout_exe<< "-LL = "<< -dLL<< endl;lout_exe<< "PPL = "<< dPPL<< endl;SAFE_DELETE(p);} if(cfg_pathNbest) { LMRescore(m, cfg_pathNbest);} if(cfg_pathModelWrite) { lout_exe<< "Write model: "<< cfg_pathModelWrite<< endl;m.WriteT(cfg_pathModelWrite);} return 1;}double CalculateLL(Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL){ Array< double > | aLL (omp_get_max_threads()) |
Array< int > | aWords (omp_get_max_threads()) |
Array< int > | aSents (omp_get_max_threads()) |
Array< VocabID > | aSeq |
double | dLL = aLL.Sum() / nCorpusNum |
int | nSent = aSents.Sum() |
int | nWord = aWords.Sum() |
#define lout_exe lout<<"[TRF] " |
Definition at line 59 of file main-TRF.cpp.
opt Add | ( | wbOPT_STRING | , |
"vocab" | , | ||
& | cfg_pathVocab, | ||
"The vocabulary" | |||
) |
opt Add | ( | wbOPT_STRING | , |
"read" | , | ||
& | cfg_pathModelRead, | ||
"Read the init model to train" | |||
) |
opt Add | ( | wbOPT_STRING | , |
"write" | , | ||
& | cfg_pathModelWrite, | ||
"output the normalizaed model" | |||
) |
opt Add | ( | wbOPT_INT | , |
"thread" | , | ||
& | cfg_nThread, | ||
"The thread number" | |||
) |
opt Add | ( | wbOPT_STRING | , |
"test" | , | ||
& | cfg_pathTest, | ||
"test corpus (TXT)" | |||
) |
opt Add | ( | wbOPT_STRING | , |
"nbest" | , | ||
& | cfg_pathNbest, | ||
"nbest list (kaldi output)" | |||
) |
opt Add | ( | wbOPT_STRING | , |
"lmscore" | , | ||
& | cfg_writeLmscore, | ||
" output the lmsocre" | [LMrescore] | ||
) |
opt Add | ( | wbOPT_STRING | , |
"lmscore-debug" | , | ||
& | cfg_writeLmscoreDebug, | ||
" output the lmscore of each word for word-level combination" | [LMrescore] | ||
) |
opt Add | ( | wbOPT_STRING | , |
"lmscore-test-id" | , | ||
& | cfg_writeTestID, | ||
" output the vocab-id of test file" | [LMrescore] | ||
) |
opt Add | ( | wbOPT_STRING | , |
"norm-method" | , | ||
& | cfg_norm_method, | ||
" method: Exact or AIS" | [Norm] | ||
) |
opt Add | ( | wbOPT_INT | , |
"AIS-chain" | , | ||
& | cfg_nAIS_chain_num, | ||
" the chain number" | [AIS] | ||
) |
opt Add | ( | wbOPT_INT | , |
"AIS-inter" | , | ||
& | cfg_nAIS_inter_num, | ||
" the intermediate distribution number" | [AIS] | ||
) |
opt Add | ( | wbOPT_INT | , |
"norm-len-min" | , | ||
& | cfg_norm_lenmin, | ||
" min-length" | [Norm] | ||
) |
opt Add | ( | wbOPT_INT | , |
"norm-len-max" | , | ||
& | cfg_norm_lenmax, | ||
" max-length" | [Norm] | ||
) |
opt Add | ( | wbOPT_STRING | , |
"len-file" | , | ||
& | cfg_pathLenFile, | ||
" a txt-id-file used to summary pi" | [Revise pi] | ||
) |
aSents Fill | ( | 0 | ) |
for | ( | ) |
Definition at line 156 of file main-TRF.cpp.
if | ( | pPPL | ) |
void LMRescore | ( | Model & | m, |
const char * | pathTest | ||
) |
lout_variable | ( | nSent | ) |
lout_variable | ( | nWord | ) |
void ModelNorm | ( | Model & | m, |
const char * | type | ||
) |
Definition at line 245 of file main-TRF.cpp.
void ModelRevisePi | ( | Model & | m, |
const char * | pathLenFile | ||
) |
Definition at line 285 of file main-TRF.cpp.
opt Parse | ( | _argc | , |
_argv | |||
) |
lout Progress | ( | 0 | , |
true | , | ||
nCorpusNum - | 1, | ||
"omp GetLL" | |||
) |
void WordStr2ID | ( | Array< VocabID > & | aIDs, |
Array< String > & | aStrs, | ||
LHash< const char *, VocabID > & | vocabhash | ||
) |
Definition at line 180 of file main-TRF.cpp.
_wbMain |
Definition at line 68 of file main-TRF.cpp.
lout<< "*********************************************" << endl; lout << " TRF.exe " << endl; lout << "\t" << __DATE__ << "\t" << __TIME__ << "\t" << endl; lout << "**********************************************" << endl; omp_set_num_threads(cfg_nThread); lout << "[OMP] omp_thread = " << omp_get_max_threads() << endl; omp_rand(cfg_nThread); Vocab v(cfg_pathVocab); Model m(&v); lout_exe << "Read model: " << cfg_pathModelRead << endl; m.ReadT(cfg_pathModelRead); if (cfg_norm_method) { ModelNorm(m, cfg_norm_method); } if (cfg_pathLenFile) { ModelRevisePi(m, cfg_pathLenFile); } if (cfg_pathTest) { CorpusTxt *p = new CorpusTxt(cfg_pathTest); double dPPL; double dLL = CalculateLL(m, p, p->GetNum(), &dPPL); lout_exe << "calculate LL of : " << cfg_pathTest << endl; lout_exe << "-LL = " << -dLL << endl; lout_exe << "PPL = " << dPPL << endl; SAFE_DELETE(p); } if (cfg_pathNbest) { LMRescore(m, cfg_pathNbest); } if (cfg_pathModelWrite) { lout_exe << "Write model: " << cfg_pathModelWrite << endl; m.WriteT(cfg_pathModelWrite); } return 1;}double CalculateLL(Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL ){ Array<double> aLL(omp_get_max_threads()) |
Definition at line 145 of file main-TRF.cpp.
Array<int> aSents(omp_get_max_threads()) |
Definition at line 153 of file main-TRF.cpp.
Array<int> aWords(omp_get_max_threads()) |
int cfg_nAIS_chain_num = 0 |
Definition at line 39 of file main-TRF.cpp.
int cfg_nAIS_inter_num = 0 |
Definition at line 40 of file main-TRF.cpp.
int cfg_norm_lenmax = -1 |
Definition at line 42 of file main-TRF.cpp.
int cfg_norm_lenmin = 1 |
Definition at line 41 of file main-TRF.cpp.
char* cfg_norm_method = NULL |
Definition at line 38 of file main-TRF.cpp.
int cfg_nThread = 1 |
Definition at line 27 of file main-TRF.cpp.
char* cfg_pathLenFile = NULL |
Definition at line 44 of file main-TRF.cpp.
char* cfg_pathModelRead = NULL |
Definition at line 24 of file main-TRF.cpp.
char* cfg_pathModelWrite = NULL |
Definition at line 25 of file main-TRF.cpp.
char* cfg_pathNbest = NULL |
Definition at line 32 of file main-TRF.cpp.
char* cfg_pathTest = NULL |
Definition at line 29 of file main-TRF.cpp.
char* cfg_pathVocab = NULL |
Definition at line 23 of file main-TRF.cpp.
const char* cfg_strHelp |
Definition at line 48 of file main-TRF.cpp.
char* cfg_writeLmscore = NULL |
Definition at line 33 of file main-TRF.cpp.
char* cfg_writeLmscoreDebug = NULL |
Definition at line 34 of file main-TRF.cpp.
char* cfg_writeTestID = NULL |
Definition at line 35 of file main-TRF.cpp.
return dLL = aLL.Sum() / nCorpusNum |
Definition at line 171 of file main-TRF.cpp.
int nSent = aSents.Sum() |
Definition at line 172 of file main-TRF.cpp.
int nWord = aWords.Sum() |
Definition at line 173 of file main-TRF.cpp.
Option opt |
Definition at line 46 of file main-TRF.cpp.