25 for (
int i = 0; i <
aSeqs.GetNum(); i++) {
31 aSeqs.SetNum(maxlen + 1);
33 for (
int i = 1; i <
aSeqs.GetNum(); i++) {
42 GetEmpVar(pTrain, m_vEmpiricalVar);
44 m_nMiniBatchSample = nMinibatch;
49 lout <<
"Smoothing the pi" << endl;
52 for (
int i = 1; i < m_trainPi.GetSize(); i++) {
53 if (m_trainPi[i] > dMax) {
58 m_samplePi.Copy(m_trainPi);
59 for (
int i = 1; i < iMax; i++) {
62 for (
int i = 1; i < m_samplePi.GetSize(); i++) {
63 m_samplePi[i] = max((
double)m_samplePi[i], 1e-5);
65 LineNormalize(m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1);
67 lout <<
"sample-pi = [ "; lout.
output(m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1); lout <<
"]" << endl;
68 m_pModel->SetPi(m_samplePi.GetBuf());
71 m_vAllSampleLenCount.Reset(m_pModel->GetMaxLen() + 1);
72 m_vCurSampleLenCount.Reset(m_pModel->GetMaxLen() + 1);
73 m_vAllSampleLenCount.Fill(0);
79 m_nParamNum = m_pModel->GetParamNum() + m_pModel->GetMaxLen() + 1;
87 lout <<
"[SAfunc] *** Info: *** " << endl;
91 lout <<
"[SAfunc] *** [End] ***" << endl;
96 nLen = rand() % m_pModel->GetMaxLen() + 1;
100 seq.
Random(m_pModel->m_pVocab);
104 if (pdParams == NULL)
107 m_value.Reset(m_pModel->GetParamNum());
108 for (
int i = 0; i < m_value.GetSize(); i++)
109 m_value[i] = (
PValue)pdParams[i];
110 m_pModel->SetParam(m_value.GetBuf());
111 m_pModel->ExactNormalize(1);
114 m_pModel->SetZeta(pdParams + m_pModel->GetParamNum());
116 if (m_fparm.Good()) {
117 m_fparm.PrintArray(
"%f ", pdParams, m_nParamNum);
122 if (pdParams == NULL)
126 m_value.Reset(m_pModel->GetParamNum());
127 m_pModel->GetParam(m_value.GetBuf());
128 for (
int i = 0; i < m_value.GetSize(); i++)
129 pdParams[i] = m_value[i];
131 pdParams += m_pModel->GetParamNum();
132 for (
int i = 0; i <= m_pModel->GetMaxLen(); i++) {
133 pdParams[i] = m_pModel->m_zeta[i];
139 if (*(
double*)a < *(
double*)b)
return -1;
140 if (*(
double*)a == *(
double*)b)
return 0;
141 if (*(
double*)a > *(
double*)b)
return 1;
146 int nThread = omp_get_max_threads();
149 Prob *pi = m_trainPi.GetBuf();
166 #pragma omp parallel for firstprivate(aSeq) 167 for (
int l = 0; l < pCorpus->
GetNum(); l++) {
169 double *pExpf2 = matExpf2[omp_get_thread_num()].GetBuf();
173 seq.
Set(aSeq, m_pModel->m_pVocab);
175 int nLen = min(m_pModel->GetMaxLen(), seq.
GetLen());
180 m_pModel->m_pFeat->
Find(afeat, seq);
181 for (
int i = 0; i < afeat.
GetNum(); i++) {
182 int *p = aFeatNum.Insert(afeat[i], bFound);
189 while (pCount = iter.
Next(nFeat)) {
190 pExpf2[nFeat] += pow((
double)(*pCount), 2);
199 for (
int t = 0; t < nThread; t++) {
200 vExpf2 += matExpf2[t];
202 vExpf2 /= pCorpus->
GetNum();
209 lout.
Progress(0,
true, m_pModel->GetMaxLen(),
"[SAfunc] E_l[f]:");
210 for (
int nLen = 1; nLen <= m_pModel->GetMaxLen(); nLen++)
216 for (
int i = 0; i < pCorpus->
GetNum(); i++) {
218 int nSeqLen = aSeq.
GetNum();
219 if (nLen == m_pModel->GetMaxLen()) {
230 #pragma omp parallel for firstprivate(aSeq) 231 for (
int k = 0; k < aSeqId.
GetNum(); k++)
233 pCorpus->
GetSeq(aSeqId[k], aSeq);
236 seq.
Set(aSeq, m_pModel->m_pVocab);
237 m_pModel->FeatCount(seq, matExp_l[omp_get_thread_num()].GetBuf());
240 if (aSeqId.
GetNum() > 0) {
242 for (
int t = 0; t < nThread; t++) {
243 vExp_l += matExp_l[t];
245 vExp_l /= aSeqId.
GetNum();
252 for (
int i = 0; i < m_pModel->GetParamNum(); i++)
253 vExpf2[i] -= pi[nLen] * pow(vExp_l[i], 2);
261 double dMinVarOverZero = 100;
262 for (
int i = 0; i < m_nParamNum; i++) {
266 dMinVarOverZero = min(vExpf2[i], dMinVarOverZero);
268 if (vExpf2[i] < m_var_gap) {
270 vExpf2[i] = m_var_gap;
275 lout_warning(
"[EmpiricalVar] Exist zero expectation (zero-num=" << nZero <<
")");
277 lout <<
"[EmpiricalVar] the number of ( var < gap=" << m_var_gap <<
" ) is " << nDownGap << endl;
278 lout <<
"[EmpiricalVar] min variance value (over 0) is " << dMinVarOverZero << endl;
285 if (m_fmean.Good()) {
286 lout <<
"Write Empirical Mean ..." << endl;
288 for (
int i = 0; i < aLogExp.GetSize(); i++) aLogExp[i] = log(m_vEmpiricalExp[i]);
289 m_pModel->m_pFeat->WriteT(m_fmean, aLogExp.GetBuf());
293 lout <<
"Write Empirical Var ..." << endl;
295 for (
int i = 0; i < vVar.
GetSize(); i++) aLogVar[i] = log(vVar[i]);
296 m_pModel->m_pFeat->WriteT(m_fvar, aLogVar.GetBuf());
303 int nThread = omp_get_max_threads();
304 m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
305 m_matSampleExp2.Reset(nThread, m_pModel->GetParamNum());
306 m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
309 m_matSampleExp.Fill(0);
310 m_matSampleLen.Fill(0);
315 if (m_threadSeq.GetNum() != nThread) {
316 for (
int i = 0; i < nThread; i++) {
317 m_threadSeq[i] =
new Seq;
324 #pragma omp parallel for 325 for (
int sample = 0; sample < m_nMiniBatchSample; sample++)
327 int tid = omp_get_thread_num();
329 m_pModel->Sample(*m_threadSeq[tid]);
331 int nLen = min(m_pModel->GetMaxLen(), m_threadSeq[tid]->GetLen());
333 m_pModel->FeatCount(*m_threadSeq[tid], aCurCount.GetBuf(), m_trainPi[nLen] / m_pModel->m_pi[nLen]);
335 for (
int i = 0; i < aCurCount.GetSize(); i++) {
336 m_matSampleExp[tid][i] += aCurCount[i];
337 m_matSampleExp2[tid][i] += pow(aCurCount[i], 2);
339 m_matSampleLen[tid][nLen]++;
343 if (m_fsamp.Good()) {
344 m_threadSeq[tid]->Print(m_fsamp);
350 lout <<
" len-jump acc-rate=";
352 m_pModel->m_nLenJumpAccTimes = 0;
353 m_pModel->m_nLenJumpTotalTime = 0;
354 lout <<
" class-propose acc-rate=";
356 m_pModel->m_nSampleHAccTimes = 0;
357 m_pModel->m_nSampleHTotalTimes = 0;
366 for (
int t = 0; t < nThread; t++) {
367 vExp += m_matSampleExp[t];
368 vExp2 += m_matSampleExp2[t];
369 vLen += m_matSampleLen[t];
371 m_vAllSampleLenCount += vLen;
372 m_vCurSampleLenCount.Copy(vLen);
373 m_nTotalSample += m_nMiniBatchSample;
375 vExp /= m_nMiniBatchSample;
376 vExp2 /= m_nMiniBatchSample;
377 vLen /= m_nMiniBatchSample;
382 SetParam(pFinalParams);
384 m_pModel->SetPi(m_trainPi.GetBuf());
396 m_pModel->SetPi(m_trainPi.GetBuf());
397 m_pModel->WriteT(strTempModel);
398 m_pModel->SetPi(m_samplePi.GetBuf());
402 int nWeightNum = m_pModel->GetParamNum();
403 m_vSampleExp.Reset(nWeightNum);
404 m_vSampleExp2.Reset(nWeightNum);
405 m_vSampleLen.Reset(m_pModel->GetMaxLen() + 1);
409 GetSampleExp(m_vSampleExp, m_vSampleExp2, m_vSampleLen);
413 for (
int i = 0; i < nWeightNum; i++) {
414 pdGradient[i] = m_vEmpiricalExp[i] - m_vSampleExp[i]
418 #elif defined(_Hession) 419 for (
int i = 0; i < nWeightNum; i++) {
420 pdGradient[i] = m_vEmpiricalExp[i] - m_vSampleExp[i]
425 for (
int i = 0; i < nWeightNum; i++) {
427 m_vEmpiricalExp[i] - m_vSampleExp[i]
429 ) / ( m_vEmpiricalVar[i] +
m_fRegL2 ) ;
438 for (
int l = 0; l <= m_pModel->GetMaxLen(); l++) {
439 if (m_pModel->m_pi[l] > 0) {
440 pdGradient[nWeightNum + l] = m_vSampleLen[l] / m_pModel->m_pi[l];
443 pdGradient[nWeightNum + l] = 0;
448 if (m_fgrad.Good()) {
449 m_fgrad.PrintArray(
"%f ", pdGradient, m_nParamNum);
453 m_fexp.PrintArray(
"%f ", m_vSampleExp.GetBuf(), m_vSampleExp.GetSize());
465 m_pModel->SetPi(m_trainPi.GetBuf());
467 Vec<Prob> samsZeta(m_pModel->m_zeta.GetSize());
468 Vec<Prob> trueZeta(m_pModel->m_zeta.GetSize());
472 samsZeta = m_pModel->m_zeta;
476 if (m_pCorpusTrain) {
477 pdValues[nValue++] = -GetLL(m_pCorpusTrain, -1, &vLL);
478 if (m_ftrainLL.Good()) {
479 m_ftrainLL.Reopen(
"wt");
483 if (m_pCorpusValid) {
484 pdValues[nValue++] = -GetLL(m_pCorpusValid, -1, &vLL);
485 if (m_fvallidLL.Good()){
486 m_fvallidLL.Reopen(
"wt");
491 pdValues[nValue++] = -GetLL(m_pCorpusTest, -1, &vLL);
492 if (m_ftestLL.Good()){
493 m_ftestLL.Reopen(
"wt");
499 if (m_pModel->m_pVocab->GetSize() < 100 && m_pModel->GetMaxOrder() < 4) {
501 m_pModel->ExactNormalize();
502 trueZeta.Copy(m_pModel->m_zeta);
503 if (m_pCorpusTrain) pdValues[nValue++] = -GetLL(m_pCorpusTrain);
504 if (m_pCorpusValid) pdValues[nValue++] = -GetLL(m_pCorpusValid);
505 if (m_pCorpusTest) pdValues[nValue++] = -GetLL(m_pCorpusTest);
507 m_pModel->SetZeta(samsZeta.GetBuf());
512 if (!m_fdbg.Good()) {
513 m_fdbg.Open(
"SAfunc.dbg",
"wt");
515 m_vAllSampleLenCount *= 1.0 / m_nTotalSample;
516 m_vCurSampleLenCount *= 1.0 / m_nMiniBatchSample;
517 m_fdbg.PrintArray(
"%f ", m_vCurSampleLenCount.GetBuf() + 1, m_vCurSampleLenCount.GetSize() - 1);
518 m_fdbg.PrintArray(
"%f ", m_vAllSampleLenCount.GetBuf() + 1, m_vAllSampleLenCount.GetSize() - 1);
519 m_fdbg.PrintArray(
"%f ", m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1);
520 m_fdbg.PrintArray(
"%f ", trueZeta.GetBuf() + 1, trueZeta.GetSize() - 1);
521 m_fdbg.PrintArray(
"%f ", samsZeta.GetBuf() + 1, samsZeta.GetSize() - 1);
523 m_vAllSampleLenCount *= m_nTotalSample;
524 m_vCurSampleLenCount *= m_nMiniBatchSample;
526 m_pModel->SetPi(m_samplePi.GetBuf());
533 sscanf(pstr,
"%lf,%lf", &tc, &beta);
541 gamma = 1.0 / (tc + pow(t, beta));
544 gamma = 1.0 / (tc + pow(t0, beta) + t - t0);
564 double *pdCurParams =
new double[m_pfunc->GetParamNum()];
565 double *pdCurGradient =
new double[m_pfunc->GetParamNum()];
566 double *pdCurDir =
new double[m_pfunc->GetParamNum()];
567 double dCurValue = 0;
568 double dExValues[Func::cn_exvalue_max_num];
573 double *pdAvgParams = NULL;
575 pdAvgParams =
new double[m_pfunc->GetParamNum()];
580 for (
int i = 0; i < m_pfunc->GetParamNum(); i++) {
581 pdCurParams[i] = (pInitParams) ? pInitParams[i] : 1;
583 memset(pdCurGradient, 0,
sizeof(
double)*m_pfunc->GetParamNum());
584 memset(pdCurDir, 0,
sizeof(
double)*m_pfunc->GetParamNum());
587 m_pfunc->SetParam(pdCurParams);
591 lout_Solve <<
"************* Training Begin *****************" << endl;
595 for (m_nIterNum = m_nIterMin; m_nIterNum <=
m_nIterMax; m_nIterNum++)
601 m_pfunc->SetParam(pdCurParams);
603 m_pfunc->GetGradient(pdCurGradient);
605 dCurValue = m_pfunc->GetValue();
609 memcpy(pdAvgParams, pdCurParams,
sizeof(pdCurParams[0])*m_pfunc->GetParamNum());
612 for (
int i = 0; i < m_pfunc->GetParamNum(); i++) {
613 pdAvgParams[i] += (pdCurParams[i] - pdAvgParams[i]) / (m_nIterNum -
m_nAvgBeg);
627 cout<<setprecision(4)<<setiosflags(ios::fixed);
628 lout <<
" epoch=" << m_fEpochNun;
629 cout<<setprecision(2)<<setiosflags(ios::fixed);
630 lout <<
" time=" << m_dSpendMinute <<
"m";
631 lout << (bAvg ?
" [Avg]" :
" ");
636 if (bAvg) pSA->
SetParam(pdAvgParams);
642 lout<<
"ExValues={ ";
643 cout<< setprecision(2) << setiosflags(ios::fixed);
644 for (
int i = 0; i < nExValueNum; i++)
645 lout << dExValues[i] <<
" ";
649 if (m_aWriteAtIter.Find(m_nIterNum) != -1)
654 if (bAvg) pSA->
SetParam(pdCurParams);
662 if (StopDecision(m_nIterNum, dCurValue, pdCurGradient)) {
668 UpdateGamma(m_nIterNum);
671 UpdateDir(pdCurDir, pdCurGradient, pdCurParams);
674 Update(pdCurParams, pdCurDir, 0);
677 lout_Solve <<
"************* Training End *****************" << endl;
678 lout_Solve <<
"iter=" << m_nIterNum <<
" time=" << m_dSpendMinute <<
"m" << endl;
679 lout_Solve <<
"********************************************" << endl;
682 if (bAvg) pSA->
IterEnd(pdAvgParams);
683 else pSA->
IterEnd(pdCurParams);
694 m_gamma_lambda = m_gain_lambda.Get(nIterNum);
695 m_gamma_zeta = m_gain_zeta.Get(nIterNum);
698 <<
" g_zeta=" << m_gamma_zeta
710 lout_assert(nWeightNum + nZetaNum == m_pfunc->GetParamNum());
715 for (
int i = 0; i < nWeightNum; i++) {
716 double g = pGradient[i];
717 adam_m[i] = adam_beta1 * adam_m[i] + (1 - adam_beta1) * g;
718 adam_v[i] = adam_beta2 * adam_v[i] + (1 - adam_beta2)* g*g;
719 double m_hat = adam_m[i] / (1 - pow(adam_beta1, m_nIterNum));
720 double v_hat = adam_v[i] / (1 - pow(adam_beta2, m_nIterNum));
721 pDir[i] = adam_alpha * m_hat / (sqrt(v_hat) + adam_sigma);
723 #elif defined(_Hession) 724 for (
int i = 0; i < nWeightNum; i++) {
726 m_avgHes[i] += m_gamma_lambda * (h - m_avgHes[i]);
727 pDir[i] = m_gamma_lambda * pGradient[i] / max(1e-4, m_avgHes[i]);
731 for (
int i = 0; i < nWeightNum; i++) {
734 pDir[i] = m_gamma_lambda * pGradient[i];
738 int n_dgap_cutnum = 0;
739 for (
int i = 0; i < nWeightNum; i++) {
740 if (pDir[i] > m_dir_gap) {
744 else if (pDir[i] < -m_dir_gap) {
745 pDir[i] = -m_dir_gap;
755 for (
int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
772 if (m_bUpdate_lambda) {
773 for (
int i = 0; i < nWeightNum; i++) {
774 pdParam[i] += pdDir[i];
781 if (m_bUpdate_zeta) {
782 for (
int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
783 pdParam[i] += pdDir[i];
785 double zeta1 = pdParam[nWeightNum + 1];
786 for (
int i = nWeightNum + 1; i < nWeightNum + nZetaNum; i++) {
794 #define GAIN_INFO(g) lout<<" "#g"\ttc="<<g.tc<<" beta="<<g.beta<<" t0="<<g.t0<<endl; 797 lout <<
"[SATrain] *** Info: ***" << endl;
800 lout <<
" " <<
"m_dir_gap=" << m_dir_gap << endl;
801 lout <<
"[SATrain] *** [End] ***" << endl;
Vec< Prob > m_pi
the prior length distribution
Vec< double > m_vSampleExp2
the sample expectation^2
void GetParam(double *pdParams)
get the parameters
#define SAFE_DELETE(p)
memory release
DataT * Next(KeyT &key)
get next value
const char * Format(const char *p_pMessage,...)
format print to string
void UpdateDir(double *pDir, double *pGradient, const double *pParam)
compute the update direction
virtual void Update(double *pdParam, const double *pdDir, double dStep)
Update the parameters.
int GetFeatNum() const
get the ngram feature number
bool & bOutputCmd()
if output to the cmd window
clock_t Get()
get the time, but don't stop recording
void Random(Vocab *pv)
Random.
#define lout_variable_precent(x, y)
double m_fRegL2
l2 regularization
void IterEnd(double *pFinalParams)
do something at the end of the SA iteration
virtual void SetParam(double *pdParams)
set the parameter.
void LineNormalize(Prob *pdProbs, int nNum)
void PrintInfo()
Print Information.
virtual int GetExtraValues(int t, double *pdValues)
calculate extra values which will be print at each iteration
clock - used to record the time
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
void Reset(int p_len)
reset only change the len variable, does not change the buffer size.
clock_t Begin()
begin to record
void Reset(const char *pstr, int p_t0)
void RandSeq(Seq &seq, int nLen=-1)
get a random sequence
void Create(int maxlen, Model *pModel)
int GetZetaNum() const
get the zeta parameter number
CorpusBase * m_pCorpusTrain
training corpus
void UpdateGamma(int nIterNum)
Update the learning rate.
static double ToSecond(clock_t t)
transform the clock_t to second
define a sequence including the word sequence and class sequence
virtual bool GetSeq(int nLine, Array< VocabID > &aSeq)=0
get the sequence in nLine
a definition of a class Log, which can output to the cmd window and the log file simultaneously. In wb-log.cpp, there are a Log variable "lout", which can be directly used just like "cout". For example:
void GetEmpVar(CorpusBase *pCorpus, Vec< double > &vVar)
calculate the empirical expectation
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL)
int Find(T t)
Find a value and return the position.
int m_nMiniBatchSample
mini-batch for samples
#define SAFE_DELETE_ARRAY(p)
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
void Progress(long long n=-1, bool bInit=false, long long total=100, const char *head="")
progress bar
int qsort_compare_double(const void *a, const void *b)
void PrintInfo()
print information
double Get(int t)
input the iteration number, get the learning rate
#define lout_variable_rate(x, y)
int GetNum() const
Get Array number.
void Add(T t)
Add a value to the tail of array.
virtual void GetSampleExp(VecShell< double > &vExp, VecShell< double > &vExp2, VecShell< double > &vLen)
calcualte the expectation of SA samples
Log lout
the defination is in wb-log.cpp
char * GetBuffer() const
get buffer
virtual void GetGradient(double *pdGradient)
calculate the gradient g(x)
virtual void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL, int nMinibatch=100)
reset
void WriteModel(int nEpoch)
Write Model.
String FileName()
if the string is a path, this function return the file name.
Vec< double > m_vSampleExp
the sample expectation
Model * m_pModel
HRF model.
void Copy(VecShell< T > v)
int GetMaxLen() const
Get max-len.
virtual int GetNum() const
get the seq number
virtual bool Run(const double *pInitParams=NULL)
Run iteration. input the init-parameters.