27 Reset(pModel, pTrain, pValid, pTest);
46 nMaxLen = max(nMaxLen, pValid->
GetMaxLen());
48 nMaxLen = max(nMaxLen, pTest->
GetMaxLen());
51 lout_warning(
"[MLfunc] Reset: Re-set the model with length=" << nMaxLen);
55 lout_warning(
"[MLfunc] Reset: the max-len in training (" << nMaxLen
64 for (
int i = 1; i < aLenCount.
GetNum(); i++) {
104 int nThread = omp_get_max_threads();
112 int nCorpusNum = (nCalNum == -1) ? pCorpus->
GetNum() : min(nCalNum, pCorpus->
GetNum());
115 pLL->
Reset(nCorpusNum);
118 #pragma omp parallel for firstprivate(aSeq) 119 for (
int i = 0; i < nCorpusNum; i++) {
126 vSum[omp_get_thread_num()] += logprob;
127 vNum[omp_get_thread_num()]++;
140 for (
int t = 0; t < nThread; t++) {
153 #pragma omp parallel for firstprivate(aSeq) 154 for (
int i = 0; i < pCorpus->
GetNum(); i++) {
170 for (
int t = 0; t < omp_get_max_threads(); t++) {
173 vExp /= pCorpus->
GetNum();
196 static File fileDbg(
"GradientML.dbg",
"wt");
204 if ( (t - 1) % 10 == 0) {
CorpusBase * m_pCorpusValid
valid corpus
int GetParamNum() const
Get parameter number.
const char * m_pathOutputModel
Write to model during iteration.
void Reset(Vocab *pv, int maxlen)
reset, the maxlen is the length excluding the beg/end symbols.
void GetNodeExp(int nLen, double *pExp)
[exact] E_{p_l}[f]: Exactly calculate the expectation over x and h for length nLen ...
void SetPi(Prob *pPi)
Set the pi.
LogP GetLogProb(Seq &seq, bool bNorm=true)
calculate the probability
virtual void SetParam(PValue *pValue)
Set the parameters.
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
virtual int GetExtraValues(int t, double *pdValues)
calculate extra values which will be print at each iteration
Vec< PValue > m_value
save the temp value of type PValue.
CorpusBase * m_pCorpusTrain
training corpus
define a sequence including the word sequence and class sequence
virtual bool GetSeq(int nLine, Array< VocabID > &aSeq)=0
get the sequence in nLine
void FeatCount(Seq &seq, double *pCount, double dadd=1.0)
Count the feature number in a sequence.
virtual double GetLL(CorpusBase *pCorpus, int nCalNum=-1, Vec< double > *pLL=NULL)
calculate the log-likelihood on corpus
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL)
int m_maxlen
the maximum length of model, excluding <s> and </s>. The min-len = 1
Vec< Prob > m_trainPi
the length distribution in training corpus
virtual void GetGradient(double *pdGradient)
calculate the gradient g(x)
Vec< double > m_vEmpiricalExp
the empirical expectation
void GetParam(PValue *pValue)
Get the paremetre vector.
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
void Progress(long long n=-1, bool bInit=false, long long total=100, const char *head="")
progress bar
Vocab * GetVocab() const
Get Vocab.
void GetParam(double *pdParams)
int GetNum() const
Get Array number.
Log lout
the defination is in wb-log.cpp
virtual void SetParam(double *pdParams)
set the parameter.
void GetEmpExp(CorpusBase *pCorpus, Vec< double > &vExp)
get the empirical expectation
virtual void GetLenCount(Array< int > &aLenCount)=0
get the length count
CorpusBase * m_pCorpusTest
test corpus
void PrintArray(const char *pformat, TYPE *pbuf, int num)
print a array into file
virtual double ExactNormalize(int nLen)
[exact] Exact Normalization, return the logz of given length
int m_nParamNum
the parameter number
void WriteT(const char *pfilename)
Write Model.
Model * m_pModel
HRF model.
int GetMaxLen() const
Get max-len.
virtual int GetMaxLen() const
get the max length
virtual int GetNum() const
get the seq number
virtual double GetValue()
calculate the function value f(x)