TRF Language Model
|
#include <trf-sa-train.h>
Public Member Functions | |
SAfunc () | |
SAfunc (Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL, int nMinibatch=100) | |
~SAfunc () | |
virtual void | Reset (Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL, int nMinibatch=100) |
reset More... | |
void | PrintInfo () |
print information More... | |
int | GetFeatNum () const |
get the ngram feature number More... | |
int | GetZetaNum () const |
get the zeta parameter number More... | |
void | RandSeq (Seq &seq, int nLen=-1) |
get a random sequence More... | |
void | GetParam (double *pdParams) |
get the parameters More... | |
void | GetEmpVar (CorpusBase *pCorpus, Vec< double > &vVar) |
calculate the empirical expectation More... | |
virtual void | GetSampleExp (VecShell< double > &vExp, VecShell< double > &vExp2, VecShell< double > &vLen) |
calcualte the expectation of SA samples More... | |
void | IterEnd (double *pFinalParams) |
do something at the end of the SA iteration More... | |
void | WriteModel (int nEpoch) |
Write Model. More... | |
virtual void | SetParam (double *pdParams) |
set the parameter. More... | |
virtual void | GetGradient (double *pdGradient) |
calculate the gradient g(x) More... | |
virtual double | GetValue () |
calculate the function value f(x) More... | |
virtual int | GetExtraValues (int t, double *pdValues) |
calculate extra values which will be print at each iteration More... | |
Public Member Functions inherited from trf::MLfunc | |
MLfunc () | |
MLfunc (Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL) | |
void | Reset (Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL) |
void | GetParam (double *pdParams) |
virtual double | GetLL (CorpusBase *pCorpus, int nCalNum=-1, Vec< double > *pLL=NULL) |
calculate the log-likelihood on corpus More... | |
void | GetEmpExp (CorpusBase *pCorpus, Vec< double > &vExp) |
get the empirical expectation More... | |
Public Member Functions inherited from wb::Func | |
Func (int nParamNum=0) | |
void | SetParamNum (int n) |
setting the parameter number More... | |
int | GetParamNum () const |
get the paremeter number More... | |
Public Attributes | |
double | m_fRegL2 |
l2 regularization More... | |
double | m_var_gap |
a varicance gap used in gradient sacling More... | |
AISConfig | m_AISConfigForZ |
the AIS configuration for normalization More... | |
AISConfig | m_AISConfigForP |
the AIS configuration for calculating the LL. More... | |
int | m_nCDSampleTimes |
the CD-n: the sample number. More... | |
int | m_nSASampleTimes |
the SA sample times More... | |
File | m_fdbg |
output the sample pi/zete information More... | |
File | m_fparm |
output the parameters of each iteration More... | |
File | m_fgrad |
output the gradient of each iteration More... | |
File | m_fmean |
output the p[f] on training set More... | |
File | m_fvar |
output the variance at each iteration More... | |
File | m_fexp |
output the expectation of each iteartion More... | |
File | m_fsamp |
output all the samples More... | |
File | m_ftrain |
output all the training sequences More... | |
File | m_ftrainLL |
output loglikelihood on training set More... | |
File | m_fvallidLL |
output loglikelihood on valid set More... | |
File | m_ftestLL |
output loglikelihood on test set More... | |
Public Attributes inherited from trf::MLfunc | |
const char * | m_pathOutputModel |
Write to model during iteration. More... | |
Protected Attributes | |
int | m_nMiniBatchSample |
mini-batch for samples More... | |
Vec< Prob > | m_samplePi |
the length distribution used for sample More... | |
Vec< double > | m_vAllSampleLenCount |
the count of each length in all samples More... | |
Vec< double > | m_vCurSampleLenCount |
the count of length in samples of current iteration More... | |
int | m_nTotalSample |
the total sample number More... | |
Vec< double > | m_vSampleExp |
the sample expectation More... | |
Vec< double > | m_vSampleExp2 |
the sample expectation^2 More... | |
Vec< double > | m_vSampleLen |
the sample length expectation More... | |
Array< Seq * > | m_threadSeq |
save the last sequence of each threads More... | |
Mat< double > | m_matSampleExp |
the sample expectation of each thread More... | |
Mat< double > | m_matSampleExp2 |
the sample expectation^2 of each thread More... | |
Mat< double > | m_matSampleLen |
the length count of sample of each thread More... | |
Vec< double > | m_vEmpiricalVar |
empirical variance More... | |
Protected Attributes inherited from trf::MLfunc | |
Model * | m_pModel |
HRF model. More... | |
Vec< PValue > | m_value |
save the temp value of type PValue. More... | |
CorpusBase * | m_pCorpusTrain |
training corpus More... | |
CorpusBase * | m_pCorpusValid |
valid corpus More... | |
CorpusBase * | m_pCorpusTest |
test corpus More... | |
Vec< Prob > | m_trainPi |
the length distribution in training corpus More... | |
Vec< double > | m_vEmpiricalExp |
the empirical expectation More... | |
Protected Attributes inherited from wb::Func | |
Solve * | m_pSolve |
Save the solve pointor. More... | |
int | m_nParamNum |
the parameter number More... | |
Friends | |
class | SAtrain |
Additional Inherited Members | |
Static Public Attributes inherited from wb::Func | |
static const int | cn_exvalue_max_num = 100 |
Definition at line 66 of file trf-sa-train.h.
|
inline |
Definition at line 120 of file trf-sa-train.h.
|
inline |
Definition at line 124 of file trf-sa-train.h.
|
inline |
Definition at line 130 of file trf-sa-train.h.
void trf::SAfunc::GetEmpVar | ( | CorpusBase * | pCorpus, |
Vec< double > & | vVar | ||
) |
calculate the empirical expectation
Count p[f^2]
Count p_l[f] As save p_l[f] for all the length cost too much memory. So we calculate each p_l[f] separately.
find all the sequence with length nLen
calcualte p[f^2] - * p_l[f]^2
output the zero number
save
Definition at line 144 of file trf-sa-train.cpp.
|
virtual |
calculate extra values which will be print at each iteration
[in] | k | iteration number form 1 to ... |
[out] | pdValues | Return the values needed to be outputed. The memory is allocated outside and the maximum size = cn_exvalue_max_num |
Reimplemented from trf::MLfunc.
Definition at line 460 of file trf-sa-train.cpp.
|
inline |
get the ngram feature number
Definition at line 143 of file trf-sa-train.h.
|
virtual |
calculate the gradient g(x)
( m_fEmpiricalVarGap + m_fRegL2 )
Reimplemented from trf::MLfunc.
Definition at line 400 of file trf-sa-train.cpp.
void trf::SAfunc::GetParam | ( | double * | pdParams | ) |
get the parameters
Definition at line 120 of file trf-sa-train.cpp.
|
virtual |
calcualte the expectation of SA samples
save the length count
save current length count
Definition at line 301 of file trf-sa-train.cpp.
|
inlinevirtual |
calculate the function value f(x)
Reimplemented from trf::MLfunc.
Definition at line 163 of file trf-sa-train.h.
|
inline |
get the zeta parameter number
Definition at line 145 of file trf-sa-train.h.
void trf::SAfunc::IterEnd | ( | double * | pFinalParams | ) |
do something at the end of the SA iteration
Definition at line 380 of file trf-sa-train.cpp.
void trf::SAfunc::PrintInfo | ( | ) |
print information
Definition at line 85 of file trf-sa-train.cpp.
void trf::SAfunc::RandSeq | ( | Seq & | seq, |
int | nLen = -1 |
||
) |
|
virtual |
reset
Definition at line 39 of file trf-sa-train.cpp.
|
virtual |
void trf::SAfunc::WriteModel | ( | int | nEpoch | ) |
Write Model.
Definition at line 386 of file trf-sa-train.cpp.
|
friend |
Definition at line 68 of file trf-sa-train.h.
AISConfig trf::SAfunc::m_AISConfigForP |
the AIS configuration for calculating the LL.
Definition at line 101 of file trf-sa-train.h.
AISConfig trf::SAfunc::m_AISConfigForZ |
the AIS configuration for normalization
Definition at line 100 of file trf-sa-train.h.
File trf::SAfunc::m_fdbg |
output the sample pi/zete information
Definition at line 106 of file trf-sa-train.h.
File trf::SAfunc::m_fexp |
output the expectation of each iteartion
Definition at line 111 of file trf-sa-train.h.
File trf::SAfunc::m_fgrad |
output the gradient of each iteration
Definition at line 108 of file trf-sa-train.h.
File trf::SAfunc::m_fmean |
output the p[f] on training set
Definition at line 109 of file trf-sa-train.h.
File trf::SAfunc::m_fparm |
output the parameters of each iteration
Definition at line 107 of file trf-sa-train.h.
double trf::SAfunc::m_fRegL2 |
l2 regularization
Definition at line 97 of file trf-sa-train.h.
File trf::SAfunc::m_fsamp |
output all the samples
Definition at line 112 of file trf-sa-train.h.
File trf::SAfunc::m_ftestLL |
output loglikelihood on test set
Definition at line 116 of file trf-sa-train.h.
File trf::SAfunc::m_ftrain |
output all the training sequences
Definition at line 113 of file trf-sa-train.h.
File trf::SAfunc::m_ftrainLL |
output loglikelihood on training set
Definition at line 114 of file trf-sa-train.h.
File trf::SAfunc::m_fvallidLL |
output loglikelihood on valid set
Definition at line 115 of file trf-sa-train.h.
File trf::SAfunc::m_fvar |
output the variance at each iteration
Definition at line 110 of file trf-sa-train.h.
|
protected |
the sample expectation of each thread
Definition at line 90 of file trf-sa-train.h.
|
protected |
the sample expectation^2 of each thread
Definition at line 91 of file trf-sa-train.h.
|
protected |
the length count of sample of each thread
Definition at line 92 of file trf-sa-train.h.
int trf::SAfunc::m_nCDSampleTimes |
the CD-n: the sample number.
Definition at line 102 of file trf-sa-train.h.
|
protected |
mini-batch for samples
Definition at line 70 of file trf-sa-train.h.
int trf::SAfunc::m_nSASampleTimes |
the SA sample times
Definition at line 103 of file trf-sa-train.h.
|
protected |
the total sample number
Definition at line 76 of file trf-sa-train.h.
the length distribution used for sample
Definition at line 72 of file trf-sa-train.h.
save the last sequence of each threads
Definition at line 86 of file trf-sa-train.h.
|
protected |
the count of each length in all samples
Definition at line 74 of file trf-sa-train.h.
double trf::SAfunc::m_var_gap |
a varicance gap used in gradient sacling
Definition at line 98 of file trf-sa-train.h.
|
protected |
the count of length in samples of current iteration
Definition at line 75 of file trf-sa-train.h.
|
protected |
empirical variance
Definition at line 94 of file trf-sa-train.h.
|
protected |
the sample expectation
Definition at line 80 of file trf-sa-train.h.
|
protected |
the sample expectation^2
Definition at line 81 of file trf-sa-train.h.
|
protected |
the sample length expectation
Definition at line 82 of file trf-sa-train.h.