TRF Language Model
main-TRF.cpp File Reference
#include "trf-sa-train.h"

Go to the source code of this file.

Macros

#define lout_exe   lout<<"[TRF] "
 

Functions

double CalculateLL (Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL=NULL)
 
void WordStr2ID (Array< VocabID > &aIDs, Array< String > &aStrs, LHash< const char *, VocabID > &vocabhash)
 
void LMRescore (Model &m, const char *pathTest)
 
void ModelNorm (Model &m, const char *type)
 
void ModelRevisePi (Model &m, const char *pathLenFile)
 
opt Add (wbOPT_STRING, "vocab", &cfg_pathVocab, "The vocabulary")
 
opt Add (wbOPT_STRING, "read", &cfg_pathModelRead, "Read the init model to train")
 
opt Add (wbOPT_STRING, "write", &cfg_pathModelWrite, "output the normalizaed model")
 
opt Add (wbOPT_INT, "thread", &cfg_nThread, "The thread number")
 
opt Add (wbOPT_STRING, "test", &cfg_pathTest, "test corpus (TXT)")
 
opt Add (wbOPT_STRING, "nbest", &cfg_pathNbest, "nbest list (kaldi output)")
 
opt Add (wbOPT_STRING, "lmscore", &cfg_writeLmscore, "[LMrescore] output the lmsocre")
 
opt Add (wbOPT_STRING, "lmscore-debug", &cfg_writeLmscoreDebug, "[LMrescore] output the lmscore of each word for word-level combination")
 
opt Add (wbOPT_STRING, "lmscore-test-id", &cfg_writeTestID, "[LMrescore] output the vocab-id of test file")
 
opt Add (wbOPT_STRING, "norm-method", &cfg_norm_method, "[Norm] method: Exact or AIS")
 
opt Add (wbOPT_INT, "AIS-chain", &cfg_nAIS_chain_num, "[AIS] the chain number")
 
opt Add (wbOPT_INT, "AIS-inter", &cfg_nAIS_inter_num, "[AIS] the intermediate distribution number")
 
opt Add (wbOPT_INT, "norm-len-min", &cfg_norm_lenmin, "[Norm] min-length")
 
opt Add (wbOPT_INT, "norm-len-max", &cfg_norm_lenmax, "[Norm] max-length")
 
opt Add (wbOPT_STRING, "len-file", &cfg_pathLenFile, "[Revise pi] a txt-id-file used to summary pi")
 
opt Parse (_argc, _argv)
 
aLL Fill (0)
 
lout Progress (0, true, nCorpusNum - 1, "omp GetLL")
 
 for (int i=0;i< nCorpusNum;i++)
 
 lout_variable (nSent)
 
 lout_variable (nWord)
 
 if (pPPL) *pPPL
 

Variables

char * cfg_pathVocab = NULL
 
char * cfg_pathModelRead = NULL
 
char * cfg_pathModelWrite = NULL
 
int cfg_nThread = 1
 
char * cfg_pathTest = NULL
 
char * cfg_pathNbest = NULL
 
char * cfg_writeLmscore = NULL
 
char * cfg_writeLmscoreDebug = NULL
 
char * cfg_writeTestID = NULL
 
char * cfg_norm_method = NULL
 
int cfg_nAIS_chain_num = 0
 
int cfg_nAIS_inter_num = 0
 
int cfg_norm_lenmin = 1
 
int cfg_norm_lenmax = -1
 
char * cfg_pathLenFile = NULL
 
Option opt
 
const char * cfg_strHelp
 
 _wbMain
 
lout<< "*********************************************"<< endl;lout<< " TRF.exe "<< endl;lout<< "\"<< __DATE__<< "\"<< __TIME__<< "\"<< endl;lout<< "**********************************************"<< endl;omp_set_num_threads(cfg_nThread);lout<< "[OMP] omp_thread = "<< omp_get_max_threads()<< endl;omp_rand(cfg_nThread);Vocab v(cfg_pathVocab);Model m(&v);lout_exe<< "Read model: "<< cfg_pathModelRead<< endl;m.ReadT(cfg_pathModelRead);if(cfg_norm_method) { ModelNorm(m, cfg_norm_method);} if(cfg_pathLenFile) { ModelRevisePi(m, cfg_pathLenFile);} if(cfg_pathTest) { CorpusTxt *p=new CorpusTxt(cfg_pathTest);double dPPL;double dLL=CalculateLL(m, p, p->GetNum(), &dPPL);lout_exe<< "calculate LL of : "<< cfg_pathTest<< endl;lout_exe<< "-LL = "<< -dLL<< endl;lout_exe<< "PPL = "<< dPPL<< endl;SAFE_DELETE(p);} if(cfg_pathNbest) { LMRescore(m, cfg_pathNbest);} if(cfg_pathModelWrite) { lout_exe<< "Write model: "<< cfg_pathModelWrite<< endl;m.WriteT(cfg_pathModelWrite);} return 1;}double CalculateLL(Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL){ Array< double > aLL (omp_get_max_threads())
 
Array< int > aWords (omp_get_max_threads())
 
Array< int > aSents (omp_get_max_threads())
 
Array< VocabIDaSeq
 
double dLL = aLL.Sum() / nCorpusNum
 
int nSent = aSents.Sum()
 
int nWord = aWords.Sum()
 

Macro Definition Documentation

§ lout_exe

#define lout_exe   lout<<"[TRF] "

Definition at line 59 of file main-TRF.cpp.

Function Documentation

§ Add() [1/15]

opt Add ( wbOPT_STRING  ,
"vocab"  ,
cfg_pathVocab,
"The vocabulary"   
)

§ Add() [2/15]

opt Add ( wbOPT_STRING  ,
"read"  ,
cfg_pathModelRead,
"Read the init model to train"   
)

§ Add() [3/15]

opt Add ( wbOPT_STRING  ,
"write"  ,
cfg_pathModelWrite,
"output the normalizaed model"   
)

§ Add() [4/15]

opt Add ( wbOPT_INT  ,
"thread"  ,
cfg_nThread,
"The thread number"   
)

§ Add() [5/15]

opt Add ( wbOPT_STRING  ,
"test"  ,
cfg_pathTest,
"test corpus (TXT)"   
)

§ Add() [6/15]

opt Add ( wbOPT_STRING  ,
"nbest"  ,
cfg_pathNbest,
"nbest list (kaldi output)"   
)

§ Add() [7/15]

opt Add ( wbOPT_STRING  ,
"lmscore"  ,
cfg_writeLmscore,
" output the lmsocre"  [LMrescore] 
)

§ Add() [8/15]

opt Add ( wbOPT_STRING  ,
"lmscore-debug"  ,
cfg_writeLmscoreDebug,
" output the lmscore of each word for word-level combination"  [LMrescore] 
)

§ Add() [9/15]

opt Add ( wbOPT_STRING  ,
"lmscore-test-id"  ,
cfg_writeTestID,
" output the vocab-id of test file"  [LMrescore] 
)

§ Add() [10/15]

opt Add ( wbOPT_STRING  ,
"norm-method"  ,
cfg_norm_method,
" method: Exact or AIS"  [Norm] 
)

§ Add() [11/15]

opt Add ( wbOPT_INT  ,
"AIS-chain"  ,
cfg_nAIS_chain_num,
" the chain number"  [AIS] 
)

§ Add() [12/15]

opt Add ( wbOPT_INT  ,
"AIS-inter"  ,
cfg_nAIS_inter_num,
" the intermediate distribution number"  [AIS] 
)

§ Add() [13/15]

opt Add ( wbOPT_INT  ,
"norm-len-min"  ,
cfg_norm_lenmin,
" min-length"  [Norm] 
)

§ Add() [14/15]

opt Add ( wbOPT_INT  ,
"norm-len-max"  ,
cfg_norm_lenmax,
" max-length"  [Norm] 
)

§ Add() [15/15]

opt Add ( wbOPT_STRING  ,
"len-file"  ,
cfg_pathLenFile,
" a txt-id-file used to summary pi"  [Revise pi] 
)

§ CalculateLL()

double CalculateLL ( Model m,
CorpusTxt pCorpus,
int  nCorpusNum,
double *  pPPL = NULL 
)

§ Fill()

aSents Fill ( )

§ for()

for ( )

Definition at line 156 of file main-TRF.cpp.

§ if()

if ( pPPL  )

§ LMRescore()

void LMRescore ( Model m,
const char *  pathTest 
)

hash the vocab

rescore

Definition at line 197 of file main-TRF.cpp.

§ lout_variable() [1/2]

lout_variable ( nSent  )

§ lout_variable() [2/2]

lout_variable ( nWord  )

§ ModelNorm()

void ModelNorm ( Model m,
const char *  type 
)

Definition at line 245 of file main-TRF.cpp.

§ ModelRevisePi()

void ModelRevisePi ( Model m,
const char *  pathLenFile 
)

Definition at line 285 of file main-TRF.cpp.

§ Parse()

opt Parse ( _argc  ,
_argv   
)

§ Progress()

lout Progress ( ,
true  ,
nCorpusNum -  1,
"omp GetLL"   
)

§ WordStr2ID()

void WordStr2ID ( Array< VocabID > &  aIDs,
Array< String > &  aStrs,
LHash< const char *, VocabID > &  vocabhash 
)

Definition at line 180 of file main-TRF.cpp.

Variable Documentation

§ _wbMain

_wbMain
Initial value:
{
const char * cfg_strHelp
Definition: main-TRF.cpp:48
string m_strOtherHelp
extra help information, which will be output in PrintUsage
Definition: wb-option.h:58
Option opt
Definition: main-TRF.cpp:46

Definition at line 68 of file main-TRF.cpp.

§ aLL

lout<< "*********************************************" << endl; lout << " TRF.exe " << endl; lout << "\t" << __DATE__ << "\t" << __TIME__ << "\t" << endl; lout << "**********************************************" << endl; omp_set_num_threads(cfg_nThread); lout << "[OMP] omp_thread = " << omp_get_max_threads() << endl; omp_rand(cfg_nThread); Vocab v(cfg_pathVocab); Model m(&v); lout_exe << "Read model: " << cfg_pathModelRead << endl; m.ReadT(cfg_pathModelRead); if (cfg_norm_method) { ModelNorm(m, cfg_norm_method); } if (cfg_pathLenFile) { ModelRevisePi(m, cfg_pathLenFile); } if (cfg_pathTest) { CorpusTxt *p = new CorpusTxt(cfg_pathTest); double dPPL; double dLL = CalculateLL(m, p, p->GetNum(), &dPPL); lout_exe << "calculate LL of : " << cfg_pathTest << endl; lout_exe << "-LL = " << -dLL << endl; lout_exe << "PPL = " << dPPL << endl; SAFE_DELETE(p); } if (cfg_pathNbest) { LMRescore(m, cfg_pathNbest); } if (cfg_pathModelWrite) { lout_exe << "Write model: " << cfg_pathModelWrite << endl; m.WriteT(cfg_pathModelWrite); } return 1;}double CalculateLL(Model &m, CorpusTxt *pCorpus, int nCorpusNum, double *pPPL ){ Array<double> aLL(omp_get_max_threads())

Definition at line 145 of file main-TRF.cpp.

§ aSents

Array<int> aSents(omp_get_max_threads())

§ aSeq

Array<VocabID> aSeq

Definition at line 153 of file main-TRF.cpp.

§ aWords

Array<int> aWords(omp_get_max_threads())

§ cfg_nAIS_chain_num

int cfg_nAIS_chain_num = 0

Definition at line 39 of file main-TRF.cpp.

§ cfg_nAIS_inter_num

int cfg_nAIS_inter_num = 0

Definition at line 40 of file main-TRF.cpp.

§ cfg_norm_lenmax

int cfg_norm_lenmax = -1

Definition at line 42 of file main-TRF.cpp.

§ cfg_norm_lenmin

int cfg_norm_lenmin = 1

Definition at line 41 of file main-TRF.cpp.

§ cfg_norm_method

char* cfg_norm_method = NULL

Definition at line 38 of file main-TRF.cpp.

§ cfg_nThread

int cfg_nThread = 1

Definition at line 27 of file main-TRF.cpp.

§ cfg_pathLenFile

char* cfg_pathLenFile = NULL

Definition at line 44 of file main-TRF.cpp.

§ cfg_pathModelRead

char* cfg_pathModelRead = NULL

Definition at line 24 of file main-TRF.cpp.

§ cfg_pathModelWrite

char* cfg_pathModelWrite = NULL

Definition at line 25 of file main-TRF.cpp.

§ cfg_pathNbest

char* cfg_pathNbest = NULL

Definition at line 32 of file main-TRF.cpp.

§ cfg_pathTest

char* cfg_pathTest = NULL

Definition at line 29 of file main-TRF.cpp.

§ cfg_pathVocab

char* cfg_pathVocab = NULL

Definition at line 23 of file main-TRF.cpp.

§ cfg_strHelp

const char* cfg_strHelp
Initial value:
= "[Usage] : \n"
"Normalizing: \n"
" trf -vocab [vocab] -read [model] -write [output model] -norm-method [Exact/AIS]\n"
"Calculate log-likelihood:\n"
" trf -vocab [vocab] -read [model] -test [txt-id-file]\n"
"language model rescoring:\n"
" trf -vocab [vocab] -read [model] -nbest [nbest list] -lmscore [output lmscore]\n"
"Revise the length distribution pi:\n"
" trf -vocab [vocab] -read [model] -write [output moddel] -len-file [a txt-id-file used to summary pi]\n"

Definition at line 48 of file main-TRF.cpp.

§ cfg_writeLmscore

char* cfg_writeLmscore = NULL

Definition at line 33 of file main-TRF.cpp.

§ cfg_writeLmscoreDebug

char* cfg_writeLmscoreDebug = NULL

Definition at line 34 of file main-TRF.cpp.

§ cfg_writeTestID

char* cfg_writeTestID = NULL

Definition at line 35 of file main-TRF.cpp.

§ dLL

return dLL = aLL.Sum() / nCorpusNum

Definition at line 171 of file main-TRF.cpp.

§ nSent

int nSent = aSents.Sum()

Definition at line 172 of file main-TRF.cpp.

§ nWord

int nWord = aWords.Sum()

Definition at line 173 of file main-TRF.cpp.

§ opt

Option opt

Definition at line 46 of file main-TRF.cpp.