8 for (
int i = 0; i <
aSeqs.GetNum(); i++) {
14 aSeqs.SetNum(maxlen + 1);
16 for (
int i = 1; i <
aSeqs.GetNum(); i++) {
25 m_nMiniBatchSample = nMinibatch;
26 m_nMiniBatchTraining = nMinibatch;
27 m_TrainSelect.Reset(pTrain);
28 m_TrainCache.Reset(pTrain, pModel);
32 m_samplePi.Copy(m_trainPi);
34 lout <<
"Smoothing the pi" << endl;
37 for (
int i = 1; i < m_trainPi.GetSize(); i++) {
38 if (m_trainPi[i] > dMax) {
43 for (
int i = 1; i < iMax; i++) {
46 for (
int i = 1; i < m_samplePi.GetSize(); i++) {
47 m_samplePi[i] = max((
double)m_samplePi[i], 1e-5);
51 lout <<
"sample-pi = [ "; lout.
output(m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1); lout <<
"]" << endl;
52 m_pModel->SetPi(m_samplePi.GetBuf());
55 m_vAllSampleLenCount.Reset(m_pModel->GetMaxLen()+1);
56 m_vCurSampleLenCount.Reset(m_pModel->GetMaxLen() + 1);
57 m_vAllSampleLenCount.Fill(0);
63 m_nParamNum = m_pModel->GetParamNum() + m_pModel->GetMaxLen() + 1;
67 int nHiddenParamNum = m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum();
68 m_nParamNum += nHiddenParamNum * 2;
69 m_vExpValue.Reset(nHiddenParamNum);
70 m_vExp2Value.Reset(nHiddenParamNum);
77 m_nTrainHiddenSampleTimes = 1;
78 m_nSampleHiddenSampleTimes = 1;
83 GetEmpiricalFeatExp(m_vEmpFeatExp);
84 GetEmpiricalFeatVar(m_vEmpFeatVar);
89 lout <<
"[SAfunc] *** Info: *** " << endl;
102 lout <<
"[SAfunc] *** [End] ***" << endl;
106 m_pModel->RandSeq(seq, nLen);
110 if (pdParams == NULL)
114 for (
int i = 0; i < m_pModel->GetParamNum(); i++) {
115 m_values[i] = (
PValue)pdParams[i];
117 m_pModel->SetParam(m_values.GetBuf());
118 m_pModel->ExactNormalize(1);
121 m_pModel->SetZeta(pdParams + m_pModel->GetParamNum());
125 double *p = pdParams + GetWeightNum() + GetZetaNum();
126 int nVarNum = m_vExpValue.GetSize();
130 if (m_fparm.Good()) {
131 m_fparm.PrintArray(
"%f ", pdParams, m_nParamNum);
136 if (pdParams == NULL)
140 m_values.Reset(m_pModel->GetParamNum());
141 m_pModel->GetParam(m_values.GetBuf());
142 for (
int i = 0; i < m_pModel->GetParamNum(); i++) {
143 pdParams[i] = m_values[i];
146 pdParams += m_pModel->GetParamNum();
147 for (
int i = 0; i <= m_pModel->GetMaxLen(); i++) {
148 pdParams[i] = m_pModel->m_zeta[i];
152 pdParams += GetZetaNum();
153 for (
int i = 0; i < m_vExpValue.GetSize(); i++) {
154 *pdParams = m_vExpValue[i];
157 for (
int i = 0; i < m_vExp2Value.GetSize(); i++) {
158 *pdParams = m_vExp2Value[i];
169 int nFeat = m_pModel->m_pFeat->
GetNum();
171 m_matEmpiricalExp.Reset(omp_get_max_threads(), nFeat);
172 m_matEmpiricalExp.Fill(0);
174 lout.
Progress(0,
true, m_pCorpusTrain->GetNum()-1,
"[SAfunc] E[f] :");
175 #pragma omp parallel for firstprivate(aSeq) 176 for (
int i = 0; i < m_pCorpusTrain->GetNum(); i++) {
177 m_pCorpusTrain->GetSeq(i, aSeq);
179 trfseq.
Set(aSeq, m_pModel->GetVocab());
180 ((
trf::Model*)m_pModel)->FeatCount(trfseq, m_matEmpiricalExp[omp_get_thread_num()].GetBuf());
187 for (
int t = 0; t < omp_get_max_threads(); t++) {
188 vExp += m_matEmpiricalExp[t];
190 vExp /= m_pCorpusTrain->GetNum();
192 if (m_feat_mean.Good()) {
193 lout <<
"Write Empirical Mean ..." << endl;
195 for (
int i = 0; i < aLogExp.GetSize(); i++) aLogExp[i] = log(vExp[i]);
196 m_pModel->m_pFeat->WriteT(m_feat_mean, aLogExp.GetBuf());
201 int nThread = omp_get_max_threads();
202 Prob *pi = m_trainPi.GetBuf();
204 int nFeatNum = m_pModel->m_pFeat->
GetNum();
206 vVar.
Reset(nFeatNum);
222 #pragma omp parallel for firstprivate(aSeq) 223 for (
int l = 0; l < pCorpus->
GetNum(); l++) {
224 double *pExpf2 = matExpf2[omp_get_thread_num()].GetBuf();
227 seq.
Set(aSeq, m_pModel->m_pVocab);
229 int nLen = min(m_pModel->GetMaxLen(), seq.
GetLen());
234 m_pModel->m_pFeat->
Find(afeat, seq);
235 for (
int i = 0; i < afeat.
GetNum(); i++) {
236 int *p = aFeatNum.Insert(afeat[i], bFound);
243 while (pCount = iter.
Next(nFeat)) {
244 pExpf2[nFeat] += pow((
double)(*pCount), 2);
251 for (
int t = 0; t < nThread; t++) {
252 vExpf2 += matExpf2[t];
254 vExpf2 /= pCorpus->
GetNum();
261 lout.
Progress(0,
true, m_pModel->GetMaxLen(),
"[SAfunc] E_l[f]:");
262 for (
int nLen = 1; nLen <= m_pModel->GetMaxLen(); nLen++)
268 for (
int i = 0; i < pCorpus->
GetNum(); i++) {
270 int nSeqLen = aSeq.
GetNum();
271 if (nLen == m_pModel->GetMaxLen()) {
282 #pragma omp parallel for firstprivate(aSeq) 283 for (
int k = 0; k < aSeqId.
GetNum(); k++)
285 pCorpus->
GetSeq(aSeqId[k], aSeq);
288 seq.
Set(aSeq, m_pModel->m_pVocab);
289 ((
trf::Model*)m_pModel)->FeatCount(seq, matExp_l[omp_get_thread_num()].GetBuf());
292 if (aSeqId.
GetNum() > 0) {
294 for (
int t = 0; t < nThread; t++) {
295 vExp_l += matExp_l[t];
297 vExp_l /= aSeqId.
GetNum();
304 for (
int i = 0; i < nFeatNum; i++)
305 vExpf2[i] -= pi[nLen] * pow(vExp_l[i], 2);
312 for (
int i = 0; i < nFeatNum; i++) {
317 lout_warning(
"[EmpiricalVar] Exist zero expectation (zero-num=" << nZero <<
")");
325 if (m_feat_var.Good()) {
326 lout <<
"Write Empirical Var ..." << endl;
328 for (
int i = 0; i < aLogVar.GetSize(); i++) aLogVar[i] = log(vVar[i]);
329 m_pModel->m_pFeat->WriteT(m_feat_var, aLogVar.GetBuf());
334 int nThread = omp_get_max_threads();
336 m_matEmpiricalExp.Reset(nThread, m_pModel->GetParamNum());
337 m_matEmpiricalExp.Fill(0);
340 m_matEmpiricalExp2.Reset(nThread, m_pModel->GetParamNum());
341 m_matEmpiricalExp2.Fill(0);
347 #pragma omp parallel for 348 for (
int i = 0; i < aRandIdx.
GetNum(); i++) {
350 int tnum = omp_get_thread_num();
354 Seq *pSeq = m_TrainCache.GetSeq(aRandIdx[i]);
355 int nLen = pSeq->
GetLen();
358 for (
int j = 0; j < m_nTrainHiddenSampleTimes; j++) {
359 m_pModel->SampleHAndCGivenX(*pSeq);
363 m_pModel->FeatCount(*pSeq, vExpGivenX);
365 m_matEmpiricalExp[tnum] += vExpGivenX;
366 for (
int n = 0; n < vExpGivenX.GetSize(); n++) {
367 m_matEmpiricalExp2[tnum][n] += pow(vExpGivenX[n], 2);
369 vTotalLen[tnum] += nLen;
372 if (m_ftrain.Good()) {
375 pSeq->
Write(m_ftrain);
386 for (
int t = 0; t < nThread; t++) {
387 vExp += m_matEmpiricalExp[t];
388 vExp2 += m_matEmpiricalExp2[t];
389 nTotalLen += vTotalLen[t];
391 vExp /= m_nMiniBatchTraining;
392 vExp2 /= m_nMiniBatchTraining;
400 aRandIdx.
SetNum(m_nMiniBatchTraining);
403 return GetEmpiricalExp(vExp, vExp2, aRandIdx);
407 int nThread = omp_get_max_threads();
408 m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
409 m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
411 m_matSampleExp.Fill(0);
412 m_matSampleLen.Fill(0);
419 if (m_aSeqs.GetNum() != nThread) {
420 for (
int i = 0; i < nThread; i++) {
421 m_aSeqs[i] =
new Seq;
422 m_pModel->RandSeq(*m_aSeqs[i]);
427 #pragma omp parallel for 428 for (
int sample = 0; sample < m_nMiniBatchSample; sample++)
433 int tid = omp_get_thread_num();
434 m_pModel->Sample(*m_aSeqs[tid]);
435 int nLen = min(m_pModel->GetMaxLen(), m_aSeqs[tid]->GetLen());
438 for (
int j = 0; j < m_nSampleHiddenSampleTimes; j++) {
439 m_pModel->SampleHAndCGivenX(*m_aSeqs[tid]);
445 m_pModel->FeatCount(*m_aSeqs[tid], m_matSampleExp[tid], m_trainPi[nLen] / m_pModel->m_pi[nLen]);
446 m_matSampleLen[tid][nLen]++;
447 vTotalLen[tid] += m_aSeqs[tid]->GetLen();
450 if (m_fsamp.Good()) {
453 m_aSeqs[tid]->Write(m_fsamp);
458 lout <<
" len-jump acc-rate=";
460 m_pModel->m_nLenJumpAccTimes = 0;
461 m_pModel->m_nLenJumpTotalTime = 0;
470 for (
int t = 0; t < nThread; t++) {
471 vExp += m_matSampleExp[t];
472 vLen += m_matSampleLen[t];
473 nTotalLen += vTotalLen[t];
475 m_vAllSampleLenCount += vLen;
476 m_vCurSampleLenCount.Copy(vLen);
477 m_nTotalSample += m_nMiniBatchSample;
479 vExp /= m_nMiniBatchSample;
480 vLen /= m_nMiniBatchSample;
487 int nThread = omp_get_max_threads();
489 m_matEmpiricalExp.Reset(nThread, m_pModel->GetParamNum());
490 m_matEmpiricalExp.Fill(0);
493 m_matEmpiricalExp2.Reset(nThread, m_pModel->GetParamNum());
494 m_matEmpiricalExp2.Fill(0);
497 m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
498 m_matSampleExp.Fill(0);
501 m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
502 m_matSampleLen.Fill(0);
505 Vec<int> aRanIdx(m_nMiniBatchTraining);
506 m_TrainSelect.GetIdx(aRanIdx.
GetBuf(), m_nMiniBatchTraining);
509 #pragma omp parallel for firstprivate(aSeq) //保证aSeq是每个线程独立变量 510 for (
int i = 0; i < m_nMiniBatchTraining; i++) {
512 int tnum = omp_get_thread_num();
518 m_pCorpusTrain->GetSeq(aRanIdx[i], aSeq);
524 m_matEmpiricalExp[tnum] += vExpGivenX;
525 for (
int n = 0; n < vExpGivenX.GetSize(); n++) {
526 m_matEmpiricalExp2[tnum][n] += pow(vExpGivenX[n], 2);
529 if (m_ftrain.Good()) {
530 m_ftrain.PrintArray(
"%d ", aSeq.
GetBuffer(), nLen);
535 m_pModel->RandSeq(seq, nLen);
536 seq.
x.
Set(aSeq, m_pModel->GetVocab());
538 for (
int j = 0; j < m_nCDSampleTimes; j++) {
539 for (
int nPos = 0; nPos < nLen; nPos++) {
540 m_pModel->SampleC(seq, nPos);
541 m_pModel->SampleW(seq, nPos);
543 m_pModel->SampleHAndCGivenX(seq);
547 m_pModel->FeatCount(seq, m_matSampleExp[tnum]);
548 m_matSampleLen[tnum][nLen]++;
550 if (m_fsamp.Good()) {
561 for (
int t = 0; t < nThread; t++) {
562 vEmpExp += m_matEmpiricalExp[t];
563 vEmpExp2 += m_matEmpiricalExp2[t];
565 vEmpExp /= m_nMiniBatchTraining;
566 vEmpExp2 /= m_nMiniBatchTraining;
572 for (
int t = 0; t < nThread; t++) {
573 vSamExp += m_matSampleExp[t];
574 vLen += m_matSampleLen[t];
576 m_vAllSampleLenCount += vLen;
577 m_vCurSampleLenCount.Copy(vLen);
578 m_nTotalSample += m_nMiniBatchTraining;
580 vSamExp /= m_nMiniBatchTraining;
581 vLen /= m_nMiniBatchTraining;
586 lout_assert(m_nMiniBatchSample == m_nMiniBatchTraining);
588 int nThread = omp_get_max_threads();
590 m_matEmpiricalExp.Reset(nThread, m_pModel->GetParamNum());
591 m_matEmpiricalExp.Fill(0);
594 m_matEmpiricalExp2.Reset(nThread, m_pModel->GetParamNum());
595 m_matEmpiricalExp2.Fill(0);
598 m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
599 m_matSampleExp.Fill(0);
602 m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
603 m_matSampleLen.Fill(0);
606 Vec<int> aRanIdx(m_nMiniBatchTraining);
607 Vec<int> aRanLen(m_nMiniBatchTraining);
608 m_TrainSelect.GetIdx(aRanIdx.
GetBuf(), m_nMiniBatchTraining);
611 #pragma omp parallel for firstprivate(aSeq) 612 for (
int i = 0; i < m_nMiniBatchTraining; i++) {
614 int tnum = omp_get_thread_num();
619 Seq *pSeq = m_TrainCache.GetSeq(aRanIdx[i]);
620 int nLen = pSeq->
GetLen();
624 for (
int j = 0; j < m_nTrainHiddenSampleTimes; j++) {
625 m_pModel->SampleHAndCGivenX(*pSeq);
629 m_pModel->FeatCount(*pSeq, vExpGivenX);
631 m_matEmpiricalExp[tnum] += vExpGivenX;
632 for (
int n = 0; n < vExpGivenX.GetSize(); n++) {
633 m_matEmpiricalExp2[tnum][n] += pow(vExpGivenX[n], 2);
636 if (m_ftrain.Good()) {
639 pSeq->
Write(m_ftrain);
645 if (m_threadData.GetNum() != nThread) {
646 m_threadData.SetNum(nThread);
647 for (
int i = 0; i < m_threadData.GetNum(); i++) {
649 m_threadData[i]->
Create(m_pModel->GetMaxLen(), m_pModel);
654 #pragma omp parallel for 655 for (
int i = 0; i < m_nMiniBatchTraining; i++)
657 int threadID = omp_get_thread_num();
660 int nLen = aRanLen[i];
665 Seq *pSeq = m_threadData[threadID]->aSeqs[nLen];
666 for (
int j = 0; j < m_nSASampleTimes; j++)
667 m_pModel->MarkovMove(*pSeq);
671 for (
int j = 0; j < m_nSampleHiddenSampleTimes; j++) {
672 m_pModel->SampleHAndCGivenX(*pSeq);
676 m_pModel->FeatCount(*pSeq, m_matSampleExp[threadID]);
678 m_matSampleLen[threadID][nLen]++;
680 if (m_fsamp.Good()) {
683 pSeq->
Write(m_fsamp);
692 for (
int t = 0; t < nThread; t++) {
693 vEmpExp += m_matEmpiricalExp[t];
694 vEmpExp2 += m_matEmpiricalExp2[t];
696 vEmpExp /= m_nMiniBatchTraining;
697 vEmpExp2 /= m_nMiniBatchTraining;
702 for (
int t = 0; t < nThread; t++) {
703 vSamExp += m_matSampleExp[t];
704 vLen += m_matSampleLen[t];
706 m_vAllSampleLenCount += vLen;
707 m_vCurSampleLenCount.Copy(vLen);
708 m_nTotalSample += m_nMiniBatchTraining;
710 vSamExp /= m_nMiniBatchTraining;
711 vLen /= m_nMiniBatchTraining;
758 int nThread = omp_get_max_threads();
766 int nCorpusNum = (nCalNum == -1) ? pCorpus->
GetNum() : min(nCalNum, pCorpus->
GetNum());
767 Title::Precent(0,
true, nCorpusNum,
"GetSampleLL");
768 #pragma omp parallel for firstprivate(aSeq) 769 for (
int i = 0; i < nCorpusNum; i++) {
772 if (aSeq.
GetNum() > m_pModel->GetMaxLen()) {
783 vSum[omp_get_thread_num()] += logprob;
784 vNum[omp_get_thread_num()]++;
790 for (
int t = 0; t < nThread; t++) {
798 SetParam(pFinalParams);
800 m_pModel->SetPi(m_trainPi.GetBuf());
812 m_pModel->SetPi(m_trainPi.GetBuf());
813 m_pModel->WriteT(strTempModel);
814 m_pModel->SetPi(m_samplePi.GetBuf());
818 int nWeightNum = m_pModel->GetParamNum();
819 m_vEmpExp.Reset(nWeightNum);
820 m_vEmpExp2.Reset(nWeightNum);
821 m_vSampleExp.Reset(nWeightNum);
822 m_vSampleLen.Reset(m_pModel->GetMaxLen() + 1);
825 PerfromCD(m_vEmpExp, m_vSampleExp, m_vEmpExp2, m_vSampleLen);
830 GetSampleExp(m_vSampleExp, m_vSampleLen);
833 PerfromSA(m_vEmpExp, m_vSampleExp, m_vEmpExp2, m_vSampleLen);
838 int nFeatNum = m_pModel->m_pFeat->GetNum();
839 for (
int i = 0; i < nFeatNum; i++) {
840 pdGradient[i] = ( m_vEmpFeatExp[i] - m_vSampleExp[i] ) / m_vEmpFeatVar[i];
843 for (
int i = nFeatNum; i < nWeightNum; i++) {
845 double dVar = m_vExp2Value[i - nFeatNum] - pow(m_vExpValue[i - nFeatNum], 2);
846 pdGradient[i] = (m_vEmpExp[i] - m_vSampleExp[i]) / max(m_var_gap, dVar);
848 pdGradient[i] = m_vEmpExp[i] - m_vSampleExp[i];
874 for (
int l = 0; l <= m_pModel->GetMaxLen(); l++) {
875 if (m_pModel->m_pi[l] > 0) {
876 pdGradient[nWeightNum + l] = m_vSampleLen[l] / m_pModel->m_pi[l];
879 pdGradient[nWeightNum + l] = 0;
885 double *pgExp = pdGradient + nWeightNum + GetZetaNum();
886 double *pgExp2 = pgExp + m_vExpValue.GetSize();
887 for (
int i = nFeatNum; i < nWeightNum; i++) {
888 pgExp[i - nFeatNum] = m_vEmpExp[i] - m_vExpValue[i - nFeatNum];
889 pgExp2[i - nFeatNum] = m_vEmpExp2[i] - m_vExp2Value[i - nFeatNum];
893 m_fvar.PrintArray(
"%f ", m_vExpValue.GetBuf(), m_vExpValue.GetSize());
894 m_fvar.PrintArray(
"%f ", m_vExp2Value.GetBuf(), m_vExp2Value.GetSize());
895 for (
int i = 0; i < m_vExpValue.GetSize(); i++)
896 m_fvar.Print(
"%f ", m_vExp2Value[i] - pow(m_vExpValue[i], 2));
903 if (m_fgrad.Good()) {
904 m_fgrad.PrintArray(
"%f ", pdGradient + m_pModel->m_pFeat->GetNum(), m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum());
910 m_fexp.PrintArray(
"%f ", m_vEmpExp.GetBuf() + m_pModel->m_pFeat->GetNum(), m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum());
911 m_fexp.PrintArray(
"%f ", m_vSampleExp.GetBuf() + m_pModel->m_pFeat->GetNum(), m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum());
942 m_pModel->SetPi(m_trainPi.GetBuf());
944 Vec<Prob> samsZeta(m_pModel->m_zeta.GetSize());
945 Vec<Prob> trueZeta(m_pModel->m_zeta.GetSize());
949 samsZeta = m_pModel->m_zeta;
955 if (m_pCorpusTrain && m_bPrintTrain) pdValues[nValue++] = -GetLL(m_pCorpusTrain);
956 if (m_pCorpusValid && m_bPrintValie) pdValues[nValue++] = -GetLL(m_pCorpusValid);
957 if (m_pCorpusTest && m_bPrintTest) pdValues[nValue++] = -GetLL(m_pCorpusTest);
961 if (m_pModel->m_hlayer * m_pModel->m_hnode < 5 && m_pModel->m_pVocab->GetSize() < 100) {
962 Vec<LogP> oldZeta(m_pModel->m_zeta.GetSize());
963 oldZeta = m_pModel->m_zeta;
965 m_pModel->ExactNormalize();
966 trueZeta.
Copy(m_pModel->m_zeta);
967 if (m_pCorpusTrain && m_bPrintTrain) pdValues[nValue++] = -GetLL(m_pCorpusTrain);
968 if (m_pCorpusValid && m_bPrintValie) pdValues[nValue++] = -GetLL(m_pCorpusValid);
969 if (m_pCorpusTest && m_bPrintTest) pdValues[nValue++] = -GetLL(m_pCorpusTest);
971 m_pModel->SetZeta(oldZeta.
GetBuf());
976 if (!m_fdbg.Good()) {
977 m_fdbg.Open(
"SAfunc.dbg",
"wt");
979 m_vAllSampleLenCount *= 1.0 / m_nTotalSample;
980 m_vCurSampleLenCount *= 1.0 / m_nMiniBatchSample;
981 m_fdbg.Print(
"pi_cur_: "); m_fdbg.PrintArray(
"%f ", m_vCurSampleLenCount.GetBuf() + 1, m_vCurSampleLenCount.GetSize() - 1);
982 m_fdbg.Print(
"pi_all_: "); m_fdbg.PrintArray(
"%f ", m_vAllSampleLenCount.GetBuf() + 1, m_vAllSampleLenCount.GetSize() - 1);
983 m_fdbg.Print(
"pi_true: "); m_fdbg.PrintArray(
"%f ", m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1);
984 m_fdbg.Print(
"z_ais__: "); m_fdbg.PrintArray(
"%f ", m_pModel->m_zeta.GetBuf() + 1, m_pModel->m_zeta.GetSize() - 1);
985 m_fdbg.Print(
"z_sams_: "); m_fdbg.PrintArray(
"%f ", samsZeta.GetBuf() + 1, samsZeta.GetSize() - 1);
986 m_fdbg.Print(
"z_true_: "); m_fdbg.PrintArray(
"%f ", trueZeta.GetBuf() + 1, trueZeta.GetSize() - 1);
988 m_vAllSampleLenCount *= m_nTotalSample;
989 m_vCurSampleLenCount *= m_nMiniBatchSample;
991 m_pModel->SetPi(m_samplePi.GetBuf());
998 sscanf(pstr,
"%lf,%lf", &tc, &beta);
1006 gamma = 1.0 / (tc + pow(t, beta));
1009 gamma = 1.0 / (tc + pow(t0, beta) + t - t0);
1029 double *pdCurParams =
new double[m_pfunc->GetParamNum()];
1030 double *pdCurGradient =
new double[m_pfunc->GetParamNum()];
1031 double *pdCurDir =
new double[m_pfunc->GetParamNum()];
1032 double dCurValue = 0;
1033 double dExValues[Func::cn_exvalue_max_num];
1038 double *pdAvgParams = NULL;
1040 pdAvgParams =
new double[m_pfunc->GetParamNum()];
1044 for (
int i = 0; i < m_pfunc->GetParamNum(); i++) {
1045 pdCurParams[i] = (pInitParams) ? pInitParams[i] : 1;
1047 memset(pdCurGradient, 0,
sizeof(
double)*m_pfunc->GetParamNum());
1048 memset(pdCurDir, 0,
sizeof(
double)*m_pfunc->GetParamNum());
1051 m_pfunc->SetParam(pdCurParams);
1055 lout_Solve <<
"************* Training Begin *****************" << endl;
1059 for (m_nIterNum = m_nIterMin; m_nIterNum <=
m_nIterMax; m_nIterNum++)
1065 m_pfunc->SetParam(pdCurParams);
1067 m_pfunc->GetGradient(pdCurGradient);
1069 dCurValue = m_pfunc->GetValue();
1073 memcpy(pdAvgParams, pdCurParams,
sizeof(pdCurParams[0])*m_pfunc->GetParamNum());
1076 for (
int i = 0; i < m_pfunc->GetParamNum(); i++) {
1077 pdAvgParams[i] += (pdCurParams[i] - pdAvgParams[i]) / (m_nIterNum -
m_nAvgBeg);
1092 cout << setprecision(4) << setiosflags(ios::fixed);
1093 lout <<
" epoch=" << m_fEpochNum;
1094 cout << setprecision(2) << setiosflags(ios::fixed);
1095 lout <<
" time=" << m_dSpendMinute <<
"m";
1096 lout << (bAvg ?
" [Avg]" :
" ");
1101 if (bAvg) pSA->
SetParam(pdAvgParams);
1107 lout <<
"ExValues={ ";
1108 cout << setprecision(3) << setiosflags(ios::fixed);
1109 for (
int i = 0; i < nExValueNum; i++)
1110 lout << dExValues[i] <<
" ";
1111 lout <<
"}" << endl;
1114 if (m_aWriteAtIter.Find(m_nIterNum) != -1)
1117 for (
int i = 1; i < pSA->
GetZetaNum(); i++) {
1122 if (bAvg) pSA->
SetParam(pdCurParams);
1128 if (StopDecision(m_nIterNum, dCurValue, pdCurGradient)) {
1134 UpdateGamma(m_nIterNum);
1137 UpdateDir(pdCurDir, pdCurGradient, pdCurParams);
1140 Update(pdCurParams, pdCurDir, 0);
1147 lout_Solve <<
"======== iter:" << m_nIterNum <<
" ===(" << m_dSpendMinute <<
"m)=======" << endl;
1151 if (bAvg) pSA->
IterEnd(pdAvgParams);
1152 else pSA->
IterEnd(pdCurParams);
1163 m_gamma_lambda = m_gain_lambda.Get(nIterNum);
1164 m_gamma_hidden = m_gain_hidden.Get(nIterNum);
1165 m_gamma_zeta = m_gain_zeta.Get(nIterNum);
1172 m_gamma_var = m_gain_var.Get(nIterNum);
1177 <<
" g_hidden=" << m_gamma_hidden
1178 <<
" g_zeta=" << m_gamma_zeta
1179 <<
" momentum=" << m_fMomentum
1193 for (
int i = 0; i < nNgramFeatNum; i++) {
1194 pDir[i] = m_gamma_lambda * pGradient[i];
1196 for (
int i = nNgramFeatNum; i < nWeightNum; i++) {
1197 pDir[i] = m_fMomentum * pDir[i] + m_gamma_hidden * pGradient[i];
1204 for (
int i = nWeightNum + nZetaNum; i < pSA->
GetParamNum(); i++) {
1205 pDir[i] = m_gamma_var * pGradient[i];
1212 for (
int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
1213 pDir[i] = m_gamma_zeta * pGradient[i];
1219 int n_dgap_cutnum = CutValue(pDir, nWeightNum, m_dir_gap);
1220 int n_zgap_cutnum = CutValue(pDir+nWeightNum, nZetaNum, m_zeta_gap);
1223 lout <<
" cut-zeta=";
1238 if (m_bUpdate_lambda) {
1239 for (
int i = 0; i < nWeightNum; i++) {
1240 pdParam[i] += pdDir[i];
1246 for (
int i=0; i<nWeightNum; i++) {
1247 pdParam[i] += m_fMomentum * pdDir[i];
1253 for (
int i = nWeightNum + nZetaNum; i < pSA->
GetParamNum(); i++) {
1254 pdParam[i] += pdDir[i];
1263 if (m_bUpdate_zeta) {
1264 for (
int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
1265 pdParam[i] += pdDir[i];
1267 double zeta1 = pdParam[nWeightNum + 1];
1268 for (
int i = nWeightNum + 1; i < nWeightNum + nZetaNum; i++) {
1269 pdParam[i] -= zeta1;
1276 #define GAIN_INFO(g) lout<<" "#g"\ttc="<<g.tc<<" beta="<<g.beta<<" t0="<<g.t0<<endl; 1279 lout <<
"[SATrain] *** Info: ***" << endl;
1288 lout <<
"[SATrain] *** [End] ***" << endl;
1297 for (
int i = 0; i < num; i++) {
1302 else if (p[i] < -gap) {
double Get(int t)
input the iteration number, get the learning rate
#define SAFE_DELETE(p)
memory release
DataT * Next(KeyT &key)
get next value
const char * Format(const char *p_pMessage,...)
format print to string
bool & bOutputCmd()
if output to the cmd window
void RandSeq(Seq &seq, int nLen=-1)
get a random sequence
clock_t Get()
get the time, but don't stop recording
virtual void SetParam(double *pdParams)
set the parameter.
void PrintInfo()
Print Information.
void LineNormalize(Prob *pdProbs, int nNum)
void PerfromCD(VecShell< double > &vEmpExp, VecShell< double > &vSamExp, VecShell< double > &vEmpExp2, VecShell< double > &vLen)
perform CD process and get the expectation
int GetEmpiricalExp(VecShell< double > &vExp, VecShell< double > &vExp2, Array< int > &aRandIdx)
calculate the empirical expectation of given sequence
clock - used to record the time
int m_nMiniBatchTraining
mini-batch for training set
hidden-random-field model
virtual bool Run(const double *pInitParams=NULL)
Run iteration. input the init-parameters.
CorpusBase * m_pCorpusTrain
training corpus
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
int GetWeightNum() const
get the bias mat number
void PerfromSA(VecShell< double > &vEmpExp, VecShell< double > &vSamExp, VecShell< double > &vEmpExp2, VecShell< double > &vLen)
perform SA process and get the expectation
T * GetBuffer(int i=0) const
get the buffer pointer
clock_t Begin()
begin to record
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL, int nMinibatch=100)
reset
int CutValue(double *p, int num, double gap)
cut array
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL)
void Create(int maxlen, Model *pModel)
static double ToSecond(clock_t t)
transform the clock_t to second
void GetEmpiricalFeatExp(Vec< double > &vExp)
get the empirical variance of features
define a sequence including the word sequence and class sequence
virtual bool GetSeq(int nLine, Array< VocabID > &aSeq)=0
get the sequence in nLine
a definition of a class Log, which can output to the cmd window and the log file simultaneously. In wb-log.cpp, there are a Log variable "lout", which can be directly used just like "cout". For example:
int GetZetaNum() const
get the zeta parameter number
int Find(T t)
Find a value and return the position.
void WriteModel(int nEpoch)
Write Model.
#define SAFE_DELETE_ARRAY(p)
Vec< LogP > m_zeta
the estimated normalization constants (fix = 0)
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
virtual void GetGradient(double *pdGradient)
calculate the gradient g(x)
void SetNum(int n)
Set Array number, to melloc enough memory.
void Progress(long long n=-1, bool bInit=false, long long total=100, const char *head="")
progress bar
void GetParam(double *pdParams)
get the parameters
int GetParamNum() const
get the paremeter number
#define lout_variable_rate(x, y)
int GetNum() const
Get Array number.
void Add(T t)
Add a value to the tail of array.
void GetEmpiricalFeatVar(Vec< double > &vVar)
claculate the empirical variance of features
Log lout
the defination is in wb-log.cpp
void IterEnd(double *pFinalParams)
do something at the end of the SA iteration
Model * m_pModel
HRF model.
void Reset(const char *pstr, int p_t0)
double GetSampleLL(CorpusBase *pCorpus, int nCalNum=-1, int method=0)
perform SAMS, and then select the training sequences of the same length.
char * GetBuffer() const
get buffer
int m_nMiniBatchSample
mini-batch for samples
void UpdateDir(double *pDir, double *pGradient, const double *pParam)
compute the update direction
void UpdateGamma(int nIterNum)
Update the learning rate.
clock_t End()
record end and return the time
virtual int GetExtraValues(int t, double *pdValues)
calculate extra values which will be print at each iteration
String FileName()
if the string is a path, this function return the file name.
void RandSeq(Seq &seq, int nLen=-1)
Random init sequence, if nLen==-1, random the length also.
int GetSampleExp(VecShell< double > &vExp, VecShell< double > &vLen)
calcualte the expectation of SA samples
void PrintInfo()
print information
void Copy(VecShell< T > v)
int GetNgramFeatNum() const
get the ngram feature number
virtual void Update(double *pdParam, const double *pdDir, double dStep)
Update the parameters.
virtual int GetNum() const
get the seq number