10 Reset(pModel, pTrain, pValid, pTest);
29 nMaxLen = max(nMaxLen, pValid->
GetMaxLen());
31 nMaxLen = max(nMaxLen, pTest->
GetMaxLen());
33 lout_warning(
"[MLfunc] Reset: the max-len in training (" << nMaxLen
35 lout_warning(
"[MLfunc] Reset: Re-set the model with length=" << nMaxLen);
43 for (
int i = 0; i < aLenCount.
GetNum(); i++) {
80 int nThread = omp_get_max_threads();
98 int nCorpusNum = (nCalNum == -1) ? pCorpus->
GetNum() : min(nCalNum, pCorpus->
GetNum());
99 Title::Precent(0,
true, nCorpusNum-1,
"omp GetLL");
100 #pragma omp parallel for firstprivate(aSeq) 101 for (
int i = 0; i < nCorpusNum; i++) {
107 vSum[omp_get_thread_num()] += logprob;
108 vNum[omp_get_thread_num()]++;
122 for (
int t = 0; t < nThread; t++) {
157 aExpEmpirical.
Fill(0);
166 aExpEmpirical += aExpGivenX;
169 aExpEmpirical *= 1.0 / nNum;
174 pdGradient[i] = -(aExpEmpirical[i] - aExpTheoretical[i]);
178 static File fileDbg(
"GradientML.dbg",
"wt");
197 if ( (t - 1) % 10 == 0) {
virtual double GetValue()
calculate the function value f(x)
Vec< Prob > m_pi
the prior length distribution
int GetParamNum() const
Get the total parameter number.
int m_hnode
the number of hidden nodes
virtual void SetParam(double *pdParams)
set the parameter.
void SetPi(Prob *pPi)
Set the pi.
void GetNodeExp(double *pExp, Prob *pLenProb=NULL)
[exact] sum_l { n_l/n * E_{p_l}[f] }: Exactly calculate the expectation over x and h ...
const char * m_pathOutputModel
Write to model during iteration.
virtual void SetParam(PValue *pParam)
Set the parameters.
hidden-random-field model
double ExactNormalize(int nLen)
[exact] Exact Normalization, return the logz of given length
CorpusBase * m_pCorpusTrain
training corpus
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
void BufMap(T *p, VecShell< T > &feat, Mat3dShell< T > &VH, Mat3dShell< T > &CH, Mat3dShell< T > &HH, MatShell< T > &Bias)
Map a paremeter vector to each kinds of parameters.
T * GetBuffer(int i=0) const
get the buffer pointer
virtual int GetExtraValues(int t, double *pdValues)
calculate extra values which will be print at each iteration
virtual void Print(const char *p_pMessage,...)
print
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL)
virtual bool GetSeq(int nLine, Array< VocabID > &aSeq)=0
get the sequence in nLine
int m_maxlen
the maximum length of model, excluding <s> and </s>. The min-len = 1
Vec< LogP > m_zeta
the estimated normalization constants (fix = 0)
void GetParam(double *pdParams)
Vec< Prob > m_trainPi
the length distribution in training corpus
virtual double GetLL(CorpusBase *pCorpus, int nCalNum=-1)
calculate the log-likelihood on corpus
Vocab * GetVocab() const
Get Vocab.
int GetNum() const
Get Array number.
int m_hlayer
the number of hidden layer
Log lout
the defination is in wb-log.cpp
CorpusBase * m_pCorpusValid
valid corpus
virtual void GetLenCount(Array< int > &aLenCount)=0
get the length count
Model * m_pModel
HRF model.
void Reset(Vocab *pv, int hlayer, int hnode, int maxlen)
reset, the maxlen is the length excluding the beg/end symbols.
void PrintArray(const char *pformat, TYPE *pbuf, int num)
print a array into file
int m_nParamNum
the parameter number
void GetParam(PValue *pParam)
Get the paremetre vector.
virtual void GetGradient(double *pdGradient)
calculate the gradient g(x)
Vec< LogP > m_logz
the normalization constants log Z_l
int GetMaxLen() const
Get max-len.
virtual int GetMaxLen() const
get the max length
void GetHiddenExp(VecShell< int > x, double *pExp)
[exact] E_{p_l(h|x)}[f]: don't clean the pExp and directly add the new exp to pExp.
LogP GetLogProb(Seq &seq, bool bNorm=true)
calculate the probability
void WriteT(const char *pfilename)
Write Model.
virtual int GetNum() const
get the seq number
CorpusBase * m_pCorpusTest
test corpus