TRF Language Model
hrf-sa-train.cpp
Go to the documentation of this file.
1 #include "hrf-sa-train.h"
2 #include "wb-log.h"
3 
4 namespace hrf
5 {
7  {
8  for (int i = 0; i < aSeqs.GetNum(); i++) {
9  SAFE_DELETE(aSeqs[i]);
10  }
11  }
12  void ThreadData::Create(int maxlen, Model *pModel)
13  {
14  aSeqs.SetNum(maxlen + 1);
15  aSeqs.Fill(NULL);
16  for (int i = 1; i < aSeqs.GetNum(); i++) {
17  aSeqs[i] = new Seq;
18  pModel->RandSeq(*aSeqs[i], i);
19  }
20  }
21 
22  void SAfunc::Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid /* = NULL */, CorpusBase *pTest /* = NULL */, int nMinibatch /* = 100 */)
23  {
24  MLfunc::Reset(pModel, pTrain, pValid, pTest);
25  m_nMiniBatchSample = nMinibatch;
26  m_nMiniBatchTraining = nMinibatch;
27  m_TrainSelect.Reset(pTrain);
28  m_TrainCache.Reset(pTrain, pModel);
29  /*
30  sampling pi
31  */
32  m_samplePi.Copy(m_trainPi);
33 
34  lout << "Smoothing the pi" << endl;
35  double dMax = 0;
36  int iMax = 0;
37  for (int i = 1; i < m_trainPi.GetSize(); i++) {
38  if (m_trainPi[i] > dMax) {
39  dMax = m_trainPi[i];
40  iMax = i;
41  }
42  }
43  for (int i = 1; i < iMax; i++) {
44  m_samplePi[i] = dMax;
45  }
46  for (int i = 1; i < m_samplePi.GetSize(); i++) {
47  m_samplePi[i] = max((double)m_samplePi[i], 1e-5);
48  }
49  trf::LineNormalize(m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1);
50 
51  lout << "sample-pi = [ "; lout.output(m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1); lout << "]" << endl;
52  m_pModel->SetPi(m_samplePi.GetBuf());
53 
54  /* save the sample count */
55  m_vAllSampleLenCount.Reset(m_pModel->GetMaxLen()+1);
56  m_vCurSampleLenCount.Reset(m_pModel->GetMaxLen() + 1);
57  m_vAllSampleLenCount.Fill(0);
58  m_nTotalSample = 0;
59 
60  /* for SA estimateio. there are two set of paremeters
61  i.e. feature weight \lambda and normalization constants \zeta
62  */
63  m_nParamNum = m_pModel->GetParamNum() + m_pModel->GetMaxLen() + 1;
64 #ifdef _Var
65  /* set the var as part of the paremeters */
66  /* only record the var for hidden-dependent parameters */
67  int nHiddenParamNum = m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum();
68  m_nParamNum += nHiddenParamNum * 2;
69  m_vExpValue.Reset(nHiddenParamNum);
70  m_vExp2Value.Reset(nHiddenParamNum);
71  m_vExpValue.Fill(0);
72  m_vExp2Value.Fill(1);
73 // m_vEstimatedVar.Reset(m_pModel->GetParamNum());
74 // m_vEstimatedVar.Fill(1);
75 #endif
76 
77  m_nTrainHiddenSampleTimes = 1;
78  m_nSampleHiddenSampleTimes = 1;
79  m_nCDSampleTimes = 1;
80  m_nSASampleTimes = 1;
81 
82  // count feature expectation and variance
83  GetEmpiricalFeatExp(m_vEmpFeatExp);
84  GetEmpiricalFeatVar(m_vEmpFeatVar);
85 
86  }
88  {
89  lout << "[SAfunc] *** Info: *** " << endl;
90  lout << " "; lout_variable(m_nMiniBatchTraining);
91  lout << " "; lout_variable(m_nMiniBatchSample);
92  lout << " "; lout_variable(m_nTrainHiddenSampleTimes);
93  lout << " "; lout_variable(m_nSampleHiddenSampleTimes);
94  lout << " "; lout_variable(m_nCDSampleTimes);
95  lout << " "; lout_variable(m_nSASampleTimes);
96 #ifdef _Var
97  lout << " "; lout_variable(m_var_gap);
98 #endif
99  lout << " "; lout_variable(m_bSAMSSample);
100  lout << " [AISConfig for Z] nChain=" << m_AISConfigForZ.nChain << " nIter=" << m_AISConfigForZ.nInter << endl;
101  lout << " [AISConfig for LL] nChain=" << m_AISConfigForP.nChain << " nIter=" << m_AISConfigForP.nInter << endl;
102  lout << "[SAfunc] *** [End] ***" << endl;
103  }
104  void SAfunc::RandSeq(Seq &seq, int nLen /* = -1 */)
105  {
106  m_pModel->RandSeq(seq, nLen);
107  }
108  void SAfunc::SetParam(double *pdParams)
109  {
110  if (pdParams == NULL)
111  return;
112 
113  /* set lambda */
114  for (int i = 0; i < m_pModel->GetParamNum(); i++) {
115  m_values[i] = (PValue)pdParams[i];
116  }
117  m_pModel->SetParam(m_values.GetBuf());
118  m_pModel->ExactNormalize(1); // only calculate Z_1
119 
120  /* set zeta */
121  m_pModel->SetZeta(pdParams + m_pModel->GetParamNum());
122 
123 #ifdef _Var
124  /* set var */
125  double *p = pdParams + GetWeightNum() + GetZetaNum();
126  int nVarNum = m_vExpValue.GetSize();
127  m_vExpValue.Copy( VecShell<double>(p, nVarNum));
128  m_vExp2Value.Copy(VecShell<double>(p + nVarNum, nVarNum));
129 #endif
130  if (m_fparm.Good()) {
131  m_fparm.PrintArray("%f ", pdParams, m_nParamNum);
132  }
133  }
134  void SAfunc::GetParam(double *pdParams)
135  {
136  if (pdParams == NULL)
137  return;
138 
139  /* get lambda */
140  m_values.Reset(m_pModel->GetParamNum());
141  m_pModel->GetParam(m_values.GetBuf());
142  for (int i = 0; i < m_pModel->GetParamNum(); i++) {
143  pdParams[i] = m_values[i];
144  }
145  /* get zeta */
146  pdParams += m_pModel->GetParamNum();
147  for (int i = 0; i <= m_pModel->GetMaxLen(); i++) {
148  pdParams[i] = m_pModel->m_zeta[i];
149  }
150 #ifdef _Var
151  /* get var */
152  pdParams += GetZetaNum();
153  for (int i = 0; i < m_vExpValue.GetSize(); i++) {
154  *pdParams = m_vExpValue[i];
155  pdParams++;
156  }
157  for (int i = 0; i < m_vExp2Value.GetSize(); i++) {
158  *pdParams = m_vExp2Value[i];
159  pdParams++;
160  }
161 #endif
162 
163  }
164 
166  {
167  /* for empirical exp */
169  int nFeat = m_pModel->m_pFeat->GetNum();
170  vExp.Reset(nFeat);
171  m_matEmpiricalExp.Reset(omp_get_max_threads(), nFeat);
172  m_matEmpiricalExp.Fill(0);
173 
174  lout.Progress(0, true, m_pCorpusTrain->GetNum()-1, "[SAfunc] E[f] :");
175 #pragma omp parallel for firstprivate(aSeq)
176  for (int i = 0; i < m_pCorpusTrain->GetNum(); i++) {
177  m_pCorpusTrain->GetSeq(i, aSeq);
178  trf::Seq trfseq;
179  trfseq.Set(aSeq, m_pModel->GetVocab());
180  ((trf::Model*)m_pModel)->FeatCount(trfseq, m_matEmpiricalExp[omp_get_thread_num()].GetBuf());
181 #pragma omp critical
182  lout.Progress();
183  //lout.output(m_matEmpiricalExp[omp_get_thread_num()].GetBuf() + m_pModel->m_pFeat->GetNum(), 10);
184  }
185 
186  vExp.Fill(0);
187  for (int t = 0; t < omp_get_max_threads(); t++) {
188  vExp += m_matEmpiricalExp[t]; // E[f]
189  }
190  vExp /= m_pCorpusTrain->GetNum(); // E[f]
191 
192  if (m_feat_mean.Good()) {
193  lout << "Write Empirical Mean ..." << endl;
194  Vec<PValue> aLogExp(vExp.GetSize());
195  for (int i = 0; i < aLogExp.GetSize(); i++) aLogExp[i] = log(vExp[i]);
196  m_pModel->m_pFeat->WriteT(m_feat_mean, aLogExp.GetBuf());
197  }
198  }
200  {
201  int nThread = omp_get_max_threads();
202  Prob *pi = m_trainPi.GetBuf();
203  CorpusBase *pCorpus = m_pCorpusTrain;
204  int nFeatNum = m_pModel->m_pFeat->GetNum();
205 
206  vVar.Reset(nFeatNum);
207  vVar.Fill(0);
209  Vec<double> vExpf2(nFeatNum);
210  Vec<double> vExp_l(nFeatNum);
211 
212  Mat<double> matExpf2(nThread, vExpf2.GetSize());
213  Mat<double> matExp_l(nThread, vExp_l.GetSize());
214 
215  vExpf2.Fill(0);
216  vExp_l.Fill(0);
217  matExpf2.Fill(0);
218  matExp_l.Fill(0);
219 
221  lout.Progress(0, true, pCorpus->GetNum() - 1, "[SAfunc] E[f^2]:");
222 #pragma omp parallel for firstprivate(aSeq)
223  for (int l = 0; l < pCorpus->GetNum(); l++) {
224  double *pExpf2 = matExpf2[omp_get_thread_num()].GetBuf();
225  pCorpus->GetSeq(l, aSeq);
226  trf::Seq seq;
227  seq.Set(aSeq, m_pModel->m_pVocab);
228 
229  int nLen = min(m_pModel->GetMaxLen(), seq.GetLen());
230 
231  LHash<int, int> aFeatNum;
232  bool bFound;
233  Array<int> afeat;
234  m_pModel->m_pFeat->Find(afeat, seq);
235  for (int i = 0; i < afeat.GetNum(); i++) {
236  int *p = aFeatNum.Insert(afeat[i], bFound);
237  if (!bFound) *p = 0;
238  (*p) += 1;
239  }
240  LHashIter<int, int> iter(&aFeatNum);
241  int *pCount;
242  int nFeat;
243  while (pCount = iter.Next(nFeat)) {
244  pExpf2[nFeat] += pow((double)(*pCount), 2);
245  }
246 #pragma omp critical
247  lout.Progress();
248  }
249 
250  vExpf2.Fill(0);
251  for (int t = 0; t < nThread; t++) {
252  vExpf2 += matExpf2[t];
253  }
254  vExpf2 /= pCorpus->GetNum();
255 
256 
257  //lout_variable(aExpFeatSqu[38272]);
258 
261  lout.Progress(0, true, m_pModel->GetMaxLen(), "[SAfunc] E_l[f]:");
262  for (int nLen = 1; nLen <= m_pModel->GetMaxLen(); nLen++)
263  {
264  matExp_l.Fill(0);
265 
266  Array<int> aSeqId;
268  for (int i = 0; i < pCorpus->GetNum(); i++) {
269  pCorpus->GetSeq(i, aSeq);
270  int nSeqLen = aSeq.GetNum();
271  if (nLen == m_pModel->GetMaxLen()) {
272  if (nSeqLen < nLen)
273  continue;
274  }
275  else {
276  if (nSeqLen != nLen)
277  continue;
278  }
279  aSeqId.Add(i);
280  }
281 
282 #pragma omp parallel for firstprivate(aSeq)
283  for (int k = 0; k < aSeqId.GetNum(); k++)
284  {
285  pCorpus->GetSeq(aSeqId[k], aSeq);
286 
287  trf::Seq seq;
288  seq.Set(aSeq, m_pModel->m_pVocab);
289  ((trf::Model*)m_pModel)->FeatCount(seq, matExp_l[omp_get_thread_num()].GetBuf());
290  }
291 
292  if (aSeqId.GetNum() > 0) {
293  vExp_l.Fill(0);
294  for (int t = 0; t < nThread; t++) {
295  vExp_l += matExp_l[t];
296  }
297  vExp_l /= aSeqId.GetNum();
298  }
299  else {
300  vExp_l.Fill(0);
301  }
302 
303 
304  for (int i = 0; i < nFeatNum; i++)
305  vExpf2[i] -= pi[nLen] * pow(vExp_l[i], 2);
306 
307  lout.Progress(nLen);
308  }
309 
311  int nZero = 0;
312  for (int i = 0; i < nFeatNum; i++) {
313  if (vExpf2[i] == 0)
314  nZero++;
315  }
316  if (nZero > 0) {
317  lout_warning("[EmpiricalVar] Exist zero expectation (zero-num=" << nZero << ")");
318  }
319 
320 
322  vVar = vExpf2;
323 
324  // Write
325  if (m_feat_var.Good()) {
326  lout << "Write Empirical Var ..." << endl;
327  Vec<PValue> aLogVar(vVar.GetSize());
328  for (int i = 0; i < aLogVar.GetSize(); i++) aLogVar[i] = log(vVar[i]);
329  m_pModel->m_pFeat->WriteT(m_feat_var, aLogVar.GetBuf());
330  }
331  }
333  {
334  int nThread = omp_get_max_threads();
335  /* for empirical exp */
336  m_matEmpiricalExp.Reset(nThread, m_pModel->GetParamNum());
337  m_matEmpiricalExp.Fill(0);
338 
339  /*for empirical variance estimation */
340  m_matEmpiricalExp2.Reset(nThread, m_pModel->GetParamNum());
341  m_matEmpiricalExp2.Fill(0);
342 
343  Vec<int> vTotalLen(nThread);
344  vTotalLen.Fill(0);
345 
346  /* count the empirical expectation */
347 #pragma omp parallel for
348  for (int i = 0; i < aRandIdx.GetNum(); i++) {
349 
350  int tnum = omp_get_thread_num();
351  Vec<double> vExpGivenX(m_pModel->GetParamNum());
352  vExpGivenX.Fill(0);
353 
354  Seq *pSeq = m_TrainCache.GetSeq(aRandIdx[i]);
355  int nLen = pSeq->GetLen();
356 
357  /* sample H */
358  for (int j = 0; j < m_nTrainHiddenSampleTimes; j++) {
359  m_pModel->SampleHAndCGivenX(*pSeq);
360  }
361 
362  //m_pModel->GetHiddenExp(VecShell<VocabID>(aSeq.GetBuffer(), nLen), vExpGivenX.GetBuf());
363  m_pModel->FeatCount(*pSeq, vExpGivenX); // count
364 
365  m_matEmpiricalExp[tnum] += vExpGivenX;
366  for (int n = 0; n < vExpGivenX.GetSize(); n++) {
367  m_matEmpiricalExp2[tnum][n] += pow(vExpGivenX[n], 2);
368  }
369  vTotalLen[tnum] += nLen;
370 
371 
372  if (m_ftrain.Good()) {
373 #pragma omp critical
374  {
375  pSeq->Write(m_ftrain);
376  }
377  }
378 
379  }
380 
381 
382  // only change the hidden depended value
383  vExp.Fill(0);
384  vExp2.Fill(0);
385  int nTotalLen = 0;
386  for (int t = 0; t < nThread; t++) {
387  vExp += m_matEmpiricalExp[t]; // E[f]
388  vExp2 += m_matEmpiricalExp2[t]; // E[f^2]
389  nTotalLen += vTotalLen[t];
390  }
391  vExp /= m_nMiniBatchTraining;
392  vExp2 /= m_nMiniBatchTraining;
393 
394  return nTotalLen;
395  }
397  {
398  //Array<VocabID> aSeq;
399  Array<int> aRandIdx;
400  aRandIdx.SetNum(m_nMiniBatchTraining);
401  m_TrainSelect.GetIdx(aRandIdx.GetBuffer(), aRandIdx.GetNum());
402 
403  return GetEmpiricalExp(vExp, vExp2, aRandIdx);
404  }
406  {
407  int nThread = omp_get_max_threads();
408  m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
409  m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
410 
411  m_matSampleExp.Fill(0);
412  m_matSampleLen.Fill(0);
413 
414  Vec<int> vTotalLen(nThread);
415  vTotalLen.Fill(0);
416 
417 
418  // init the sequence
419  if (m_aSeqs.GetNum() != nThread) {
420  for (int i = 0; i < nThread; i++) {
421  m_aSeqs[i] = new Seq;
422  m_pModel->RandSeq(*m_aSeqs[i]);
423  }
424  }
425 
426  /* sampling */
427 #pragma omp parallel for
428  for (int sample = 0; sample < m_nMiniBatchSample; sample++)
429  {
430  Vec<double> vExpGivenX(m_pModel->GetParamNum());
431  vExpGivenX.Fill(0);
432 
433  int tid = omp_get_thread_num();
434  m_pModel->Sample(*m_aSeqs[tid]);
435  int nLen = min(m_pModel->GetMaxLen(), m_aSeqs[tid]->GetLen());
436 
437  /* sample hidden several times */
438  for (int j = 0; j < m_nSampleHiddenSampleTimes; j++) {
439  m_pModel->SampleHAndCGivenX(*m_aSeqs[tid]);
440  }
441 
442 // m_pModel->GetHiddenExp(m_aSeqs[tid]->GetWordSeq(), vExpGivenX.GetBuf());
443 // vExpGivenX *= m_trainPi[nLen] / m_pModel->m_pi[nLen];
444 // m_matSampleExp[tid] += vExpGivenX;
445  m_pModel->FeatCount(*m_aSeqs[tid], m_matSampleExp[tid], m_trainPi[nLen] / m_pModel->m_pi[nLen]);
446  m_matSampleLen[tid][nLen]++;
447  vTotalLen[tid] += m_aSeqs[tid]->GetLen();
448 
449 
450  if (m_fsamp.Good()) {
451 #pragma omp critical
452  {
453  m_aSeqs[tid]->Write(m_fsamp);
454  }
455  }
456 
457  }
458  lout << " len-jump acc-rate=";
459  lout_variable_rate(m_pModel->m_nLenJumpAccTimes, m_pModel->m_nLenJumpTotalTime);
460  m_pModel->m_nLenJumpAccTimes = 0;
461  m_pModel->m_nLenJumpTotalTime = 0;
462  lout << endl;
463 
464 
465 
466  // summarization
467  vExp.Fill(0);
468  vLen.Fill(0);
469  int nTotalLen = 0;
470  for (int t = 0; t < nThread; t++) {
471  vExp += m_matSampleExp[t];
472  vLen += m_matSampleLen[t];
473  nTotalLen += vTotalLen[t];
474  }
475  m_vAllSampleLenCount += vLen;
476  m_vCurSampleLenCount.Copy(vLen);
477  m_nTotalSample += m_nMiniBatchSample;
478 
479  vExp /= m_nMiniBatchSample;
480  vLen /= m_nMiniBatchSample;
481 
482  return nTotalLen;
483  }
484 
486  {
487  int nThread = omp_get_max_threads();
488  /* for empirical expectation p[f] */
489  m_matEmpiricalExp.Reset(nThread, m_pModel->GetParamNum());
490  m_matEmpiricalExp.Fill(0);
491 
492  /*for empirical variance estimation p[f^2] */
493  m_matEmpiricalExp2.Reset(nThread, m_pModel->GetParamNum());
494  m_matEmpiricalExp2.Fill(0);
495 
496  /* for sample expectation p_n[f] */
497  m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
498  m_matSampleExp.Fill(0);
499 
500  /* for the length */
501  m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
502  m_matSampleLen.Fill(0);
503 
505  Vec<int> aRanIdx(m_nMiniBatchTraining);
506  m_TrainSelect.GetIdx(aRanIdx.GetBuf(), m_nMiniBatchTraining);
507 
508  /* count the empirical variance */
509 #pragma omp parallel for firstprivate(aSeq) //保证aSeq是每个线程独立变量
510  for (int i = 0; i < m_nMiniBatchTraining; i++) {
511 
512  int tnum = omp_get_thread_num();
513  Vec<double> vExpGivenX(m_pModel->GetParamNum());
514  vExpGivenX.Fill(0);
515 
516  /* read a sequence*/
518  m_pCorpusTrain->GetSeq(aRanIdx[i], aSeq);
519  int nLen = aSeq.GetNum();
520 
521 
522  /* empirical expectation */
523  m_pModel->GetHiddenExp(VecShell<int>(aSeq, nLen), vExpGivenX.GetBuf());
524  m_matEmpiricalExp[tnum] += vExpGivenX;
525  for (int n = 0; n < vExpGivenX.GetSize(); n++) {
526  m_matEmpiricalExp2[tnum][n] += pow(vExpGivenX[n], 2);
527  }
528 
529  if (m_ftrain.Good()) {
530  m_ftrain.PrintArray("%d ", aSeq.GetBuffer(), nLen);
531  }
532 
533  /* sample X and then sample H again */
534  Seq seq;
535  m_pModel->RandSeq(seq, nLen);
536  seq.x.Set(aSeq, m_pModel->GetVocab());
537  /* perform n times samples */
538  for (int j = 0; j < m_nCDSampleTimes; j++) {
539  for (int nPos = 0; nPos < nLen; nPos++) {
540  m_pModel->SampleC(seq, nPos);
541  m_pModel->SampleW(seq, nPos);
542  }
543  m_pModel->SampleHAndCGivenX(seq);
544  }
545 
546  /* sample expectation */
547  m_pModel->FeatCount(seq, m_matSampleExp[tnum]);
548  m_matSampleLen[tnum][nLen]++;
549 
550  if (m_fsamp.Good()) {
551  seq.Write(m_fsamp);
552  }
553 
554 
555  //Title::Precent();
556  }
557 
558  // summarization
559  vEmpExp.Fill(0);
560  vEmpExp2.Fill(0);
561  for (int t = 0; t < nThread; t++) {
562  vEmpExp += m_matEmpiricalExp[t]; // E[f]
563  vEmpExp2 += m_matEmpiricalExp2[t]; // E[f^2]
564  }
565  vEmpExp /= m_nMiniBatchTraining; // E[f]
566  vEmpExp2 /= m_nMiniBatchTraining; // E[f^2]
567 
568 
569  // summarization
570  vSamExp.Fill(0);
571  vLen.Fill(0);
572  for (int t = 0; t < nThread; t++) {
573  vSamExp += m_matSampleExp[t];
574  vLen += m_matSampleLen[t];
575  }
576  m_vAllSampleLenCount += vLen;
577  m_vCurSampleLenCount.Copy(vLen);
578  m_nTotalSample += m_nMiniBatchTraining;
579 
580  vSamExp /= m_nMiniBatchTraining;
581  vLen /= m_nMiniBatchTraining;
582 
583  }
585  {
586  lout_assert(m_nMiniBatchSample == m_nMiniBatchTraining);
587 
588  int nThread = omp_get_max_threads();
589  /* for empirical expectation p[f] */
590  m_matEmpiricalExp.Reset(nThread, m_pModel->GetParamNum());
591  m_matEmpiricalExp.Fill(0);
592 
593  /*for empirical variance estimation p[f^2] */
594  m_matEmpiricalExp2.Reset(nThread, m_pModel->GetParamNum());
595  m_matEmpiricalExp2.Fill(0);
596 
597  /* for sample expectation p_n[f] */
598  m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
599  m_matSampleExp.Fill(0);
600 
601  /* for the length */
602  m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
603  m_matSampleLen.Fill(0);
604 
605  //Array<VocabID> aSeq;
606  Vec<int> aRanIdx(m_nMiniBatchTraining);
607  Vec<int> aRanLen(m_nMiniBatchTraining);
608  m_TrainSelect.GetIdx(aRanIdx.GetBuf(), m_nMiniBatchTraining);
609 
610  /* count the empirical variance */
611 #pragma omp parallel for firstprivate(aSeq)
612  for (int i = 0; i < m_nMiniBatchTraining; i++) {
613 
614  int tnum = omp_get_thread_num();
615  Vec<double> vExpGivenX(m_pModel->GetParamNum());
616  vExpGivenX.Fill(0);
617 
618  /* read a sequence*/
619  Seq *pSeq = m_TrainCache.GetSeq(aRanIdx[i]);
620  int nLen = pSeq->GetLen();
621  aRanLen[i] = nLen;
622 
623  /* sample H */
624  for (int j = 0; j < m_nTrainHiddenSampleTimes; j++) {
625  m_pModel->SampleHAndCGivenX(*pSeq);
626  }
627 
628  /* empirical expectation */
629  m_pModel->FeatCount(*pSeq, vExpGivenX);
630  //m_pModel->GetHiddenExp(VecShell<int>(aSeq, nLen), vExpGivenX.GetBuf()); /// count
631  m_matEmpiricalExp[tnum] += vExpGivenX;
632  for (int n = 0; n < vExpGivenX.GetSize(); n++) {
633  m_matEmpiricalExp2[tnum][n] += pow(vExpGivenX[n], 2);
634  }
635 
636  if (m_ftrain.Good()) {
637 #pragma omp critical
638  {
639  pSeq->Write(m_ftrain);
640  }
641  }
642  }
643 
644  // init the sequence
645  if (m_threadData.GetNum() != nThread) {
646  m_threadData.SetNum(nThread);
647  for (int i = 0; i < m_threadData.GetNum(); i++) {
648  m_threadData[i] = new ThreadData;
649  m_threadData[i]->Create(m_pModel->GetMaxLen(), m_pModel);
650  }
651  }
652 
653  /* SA sampling */
654 #pragma omp parallel for
655  for (int i = 0; i < m_nMiniBatchTraining; i++)
656  {
657  int threadID = omp_get_thread_num();
658 
659  /* sample a length */
660  int nLen = aRanLen[i];
661  lout_assert(nLen >= 1);
662  lout_assert(nLen <= m_pModel->GetMaxLen());
663 
664  /* perform gibbs */
665  Seq *pSeq = m_threadData[threadID]->aSeqs[nLen];
666  for (int j = 0; j < m_nSASampleTimes; j++)
667  m_pModel->MarkovMove(*pSeq);
668  //m_pModel->Sample(*pSeq);
669 
670  /* sample hidden several times */
671  for (int j = 0; j < m_nSampleHiddenSampleTimes; j++) {
672  m_pModel->SampleHAndCGivenX(*pSeq);
673  }
674 
675  /* expectation */
676  m_pModel->FeatCount(*pSeq, m_matSampleExp[threadID]);
677  //m_pModel->GetHiddenExp(pSeq->GetWordSeq(), m_matSampleExp[threadID].GetBuf());
678  m_matSampleLen[threadID][nLen]++;
679 
680  if (m_fsamp.Good()) {
681 #pragma omp critical
682  {
683  pSeq->Write(m_fsamp);
684  }
685  }
686 
687  }
688 
689  // summarization
690  vEmpExp.Fill(0);
691  vEmpExp2.Fill(0);
692  for (int t = 0; t < nThread; t++) {
693  vEmpExp += m_matEmpiricalExp[t]; // E[f]
694  vEmpExp2 += m_matEmpiricalExp2[t]; // E[f^2]
695  }
696  vEmpExp /= m_nMiniBatchTraining; // E[f]
697  vEmpExp2 /= m_nMiniBatchTraining; // E[f^2]
698 
699  // summarization
700  vSamExp.Fill(0);
701  vLen.Fill(0);
702  for (int t = 0; t < nThread; t++) {
703  vSamExp += m_matSampleExp[t];
704  vLen += m_matSampleLen[t];
705  }
706  m_vAllSampleLenCount += vLen;
707  m_vCurSampleLenCount.Copy(vLen);
708  m_nTotalSample += m_nMiniBatchTraining;
709 
710  vSamExp /= m_nMiniBatchTraining;
711  vLen /= m_nMiniBatchTraining;
712 
713  }
714 // void SAfunc::PerfromSAMS(VecShell<double> &vEmpExp, VecShell<double> &vSamExp, VecShell<double> &vEmpExp2, VecShell<double> &vLen)
715 // {
716 // lout_assert(m_nMiniBatchTraining == m_nMiniBatchSample);
717 // /// perform SAMS
718 // int nTotalSampleLen = GetSampleExp(vSamExp, vLen);
719 //
720 // /// get training set
721 // if (m_trainSelectPerLen.GetNum() == 0) {
722 // // init
723 // m_trainSelectPerLen.SetNum(m_pCorpusTrain->GetMaxLen() + 1);
724 // m_trainSelectPerLen.Fill(NULL);
725 // Array<int> aSeq;
726 // for (int i = 0; i < m_pCorpusTrain->GetNum(); i++) {
727 // m_pCorpusTrain->GetSeq(i, aSeq);
728 // int nLen = aSeq.GetNum();
729 // if (!m_trainSelectPerLen[nLen]) {
730 // m_trainSelectPerLen[nLen] = new trf::RandSeq<int>();
731 // }
732 // m_trainSelectPerLen[nLen]->Add(i);
733 // }
734 // for (int i = 0; i < m_trainSelectPerLen.GetNum(); i++) {
735 // if (m_trainSelectPerLen[i])
736 // m_trainSelectPerLen[i]->Random();
737 // }
738 // }
739 //
740 // Array<int> aTrainIdx;
741 // for (int len = 1; len<=m_pModel->GetMaxLen(); len++) {
742 // if (!m_trainSelectPerLen[len]) {
743 // lout_error("Cannot find the len=" << len << " in training corpus");
744 // }
745 // for (int i = 0; i < (int)round(vLen[len] * m_nMiniBatchSample); i++) {
746 // aTrainIdx.Add() = m_trainSelectPerLen[len]->Get();
747 // }
748 // }
749 // lout_assert(aTrainIdx.GetNum() == m_nMiniBatchTraining);
750 //
751 // // claculate empirical expectation
752 // int nTotalEmpLen = GetEmpiricalExp(vEmpExp, vEmpExp2, aTrainIdx);
753 //
754 // lout_assert(nTotalEmpLen == nTotalSampleLen);
755 // }
756  double SAfunc::GetSampleLL(CorpusBase *pCorpus, int nCalNum /* = -1 */, int method /* = 0 */)
757  {
758  int nThread = omp_get_max_threads();
759 
761  Vec<double> vSum(nThread);
762  Vec<int> vNum(nThread);
763  vSum.Fill(0);
764  vNum.Fill(0);
765 
766  int nCorpusNum = (nCalNum == -1) ? pCorpus->GetNum() : min(nCalNum, pCorpus->GetNum());
767  Title::Precent(0, true, nCorpusNum, "GetSampleLL");
768 #pragma omp parallel for firstprivate(aSeq)
769  for (int i = 0; i < nCorpusNum; i++) {
770  pCorpus->GetSeq(i, aSeq);
771 
772  if (aSeq.GetNum() > m_pModel->GetMaxLen()) {
773  continue;
774  }
775 
776  LogP logprob;
777 // if (method == 0)
778 // logprob = m_pModel->GetLogProbX_AIS(VecShell<VocabID>(aSeq.GetBuffer(), aSeq.GetNum()), m_AISConfigForP.nChain, m_AISConfigForP.nInter);
779 // else
780 // logprob = m_pModel->GetLogProbX_Chib(VecShell<VocabID>(aSeq.GetBuffer(), aSeq.GetNum()), 10);
781  logprob = m_pModel->GetLogProb(VecShell<VocabID>(aSeq.GetBuffer(), aSeq.GetNum()));
782 
783  vSum[omp_get_thread_num()] += logprob;
784  vNum[omp_get_thread_num()]++;
785  Title::Precent();
786  }
787 
788  double dsum = 0;
789  int nNum = 0;
790  for (int t = 0; t < nThread; t++) {
791  dsum += vSum[t];
792  nNum += vNum[t];
793  }
794  return dsum / nNum;
795  }
796  void SAfunc::IterEnd(double *pFinalParams)
797  {
798  SetParam(pFinalParams);
799  // set the pi as the len-prob in training set.
800  m_pModel->SetPi(m_trainPi.GetBuf());
801  }
802  void SAfunc::WriteModel(int nEpoch)
803  {
804  String strTempModel;
805  String strName = String(m_pathOutputModel).FileName();
806 #ifdef __linux
807  strTempModel.Format("%s.n%d.model", strName.GetBuffer(), nEpoch);
808 #else
809  strTempModel.Format("%s.n%d.model", strName.GetBuffer(), nEpoch);
810 #endif
811  // set the pi as the pi of training set
812  m_pModel->SetPi(m_trainPi.GetBuf());
813  m_pModel->WriteT(strTempModel);
814  m_pModel->SetPi(m_samplePi.GetBuf());
815  }
816  void SAfunc::GetGradient(double *pdGradient)
817  {
818  int nWeightNum = m_pModel->GetParamNum();
819  m_vEmpExp.Reset(nWeightNum);
820  m_vEmpExp2.Reset(nWeightNum);
821  m_vSampleExp.Reset(nWeightNum);
822  m_vSampleLen.Reset(m_pModel->GetMaxLen() + 1);
823 
824 #ifdef _CD
825  PerfromCD(m_vEmpExp, m_vSampleExp, m_vEmpExp2, m_vSampleLen);
826 #else
827 
828  if (m_bSAMSSample) {
829  //GetEmpiricalExp(m_vEmpExp, m_vEmpExp2);
830  GetSampleExp(m_vSampleExp, m_vSampleLen);
831  }
832  else {
833  PerfromSA(m_vEmpExp, m_vSampleExp, m_vEmpExp2, m_vSampleLen);
834  }
835 #endif
836 
837  /* Calculate the gradient */
838  int nFeatNum = m_pModel->m_pFeat->GetNum();
839  for (int i = 0; i < nFeatNum; i++) {
840  pdGradient[i] = ( m_vEmpFeatExp[i] - m_vSampleExp[i] ) / m_vEmpFeatVar[i];
841  }
842 
843  for (int i = nFeatNum; i < nWeightNum; i++) {
844 #ifdef _Var
845  double dVar = m_vExp2Value[i - nFeatNum] - pow(m_vExpValue[i - nFeatNum], 2);
846  pdGradient[i] = (m_vEmpExp[i] - m_vSampleExp[i]) / max(m_var_gap, dVar);
847 #else
848  pdGradient[i] = m_vEmpExp[i] - m_vSampleExp[i];
849 #endif
850  }
851 
852 // static bool bUpdateVHmat = false;
853 // static int times = 0;
854 // times++;
855 // if (times % 10 == 0) {
856 // bUpdateVHmat = !bUpdateVHmat;
857 // }
858 // if (bUpdateVHmat) {
859 // for (int i = nFeatNum + m_pModel->m_m3dVH.GetSize() + m_pModel->m_m3dCH.GetSize(); i < nWeightNum; i++) {
860 // pdGradient[i] = 0;
861 // }
862 // }
863 // else {
864 // for (int i = nFeatNum; i < nFeatNum + m_pModel->m_m3dVH.GetSize() + m_pModel->m_m3dCH.GetSize(); i++) {
865 // pdGradient[i] = 0;
866 // }
867 // }
868 
869 
870 
871  /*
872  Zeta update
873  */
874  for (int l = 0; l <= m_pModel->GetMaxLen(); l++) {
875  if (m_pModel->m_pi[l] > 0) {
876  pdGradient[nWeightNum + l] = m_vSampleLen[l] / m_pModel->m_pi[l];
877  }
878  else {
879  pdGradient[nWeightNum + l] = 0;
880  }
881  }
882 
883 #ifdef _Var
884  /* Var update */
885  double *pgExp = pdGradient + nWeightNum + GetZetaNum();
886  double *pgExp2 = pgExp + m_vExpValue.GetSize();
887  for (int i = nFeatNum; i < nWeightNum; i++) {
888  pgExp[i - nFeatNum] = m_vEmpExp[i] - m_vExpValue[i - nFeatNum];
889  pgExp2[i - nFeatNum] = m_vEmpExp2[i] - m_vExp2Value[i - nFeatNum];
890  }
891 
892  if (m_fvar.Good()) {
893  m_fvar.PrintArray("%f ", m_vExpValue.GetBuf(), m_vExpValue.GetSize());
894  m_fvar.PrintArray("%f ", m_vExp2Value.GetBuf(), m_vExp2Value.GetSize());
895  for (int i = 0; i < m_vExpValue.GetSize(); i++)
896  m_fvar.Print("%f ", m_vExp2Value[i] - pow(m_vExpValue[i], 2));
897  m_fvar.Print("\n");
898  m_fvar.Print("\n");
899  }
900 #endif
901 
902 
903  if (m_fgrad.Good()) {
904  m_fgrad.PrintArray("%f ", pdGradient + m_pModel->m_pFeat->GetNum(), m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum());
905 // MLfunc::GetGradient(pdGradient);
906 // m_fgrad.PrintArray("%f ", pdGradient + m_pModel->GetParamNum() - GetHHmatSize(), GetHHmatSize());
907  m_fgrad.Print("\n");
908  }
909  if (m_fexp.Good()) {
910  m_fexp.PrintArray("%f ", m_vEmpExp.GetBuf() + m_pModel->m_pFeat->GetNum(), m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum());
911  m_fexp.PrintArray("%f ", m_vSampleExp.GetBuf() + m_pModel->m_pFeat->GetNum(), m_pModel->GetParamNum() - m_pModel->m_pFeat->GetNum());
912 // m_fexp.PrintArray("%f ", m_vEmpiricalExp.GetBuf(), nLambdaNum);
913 // m_fexp.PrintArray("%f ", m_vSampleExp.GetBuf(), nLambdaNum);
914  m_fexp.Print("\n");
915  }
916 
917 
918 
919 
920 // if (m_vEmpiricalExp[nOffset] == m_vEmpiricalExp[nOffset + 1]) {
921 // //m_pModel->WriteT(m_pathOutputModel);
922 // Pause();
923 // }
924 
925 // VecShell<double> featexp;
926 // MatShell<double> VHexp, HHexp;
927 // m_pModel->BufMap(pdGradient, featexp, VHexp, HHexp);
928 // fileDbg.PrintArray("%f ", featexp.GetBuf(), featexp.GetSize());
929 // fileDbg.PrintArray("%f ", VHexp.GetBuf(), VHexp.GetSize());
930 // fileDbg.PrintArray("%f ", HHexp.GetBuf(), HHexp.GetSize());
931 // fileDbg.Print("\n");
932 // fileDbg.PrintArray("%f ", m_pModel->m_zeta.GetBuf(), m_pModel->GetMaxLen() + 1);
933 // fileDbg.PrintArray("%f ", m_pModel->m_logz.GetBuf(), m_pModel->GetMaxLen() + 1);
934 
935 
936  }
937  int SAfunc::GetExtraValues(int t, double *pdValues)
938  {
939  int nValue = 0;
940 
941  // set the training pi
942  m_pModel->SetPi(m_trainPi.GetBuf());
943 
944  Vec<Prob> samsZeta(m_pModel->m_zeta.GetSize());
945  Vec<Prob> trueZeta(m_pModel->m_zeta.GetSize());
946  //Vec<double> trueLogZ(m_pModel->m_logz.GetSize());
947  samsZeta.Fill(0);
948  trueZeta.Fill(0);
949  samsZeta = m_pModel->m_zeta;
950 
951  //m_pModel->ApproxNormalize_AIS(m_AISConfigForZ.nChain, m_AISConfigForZ.nInter);
952 
953 
954  // calcualte LL using exact hidden expectation
955  if (m_pCorpusTrain && m_bPrintTrain) pdValues[nValue++] = -GetLL(m_pCorpusTrain);
956  if (m_pCorpusValid && m_bPrintValie) pdValues[nValue++] = -GetLL(m_pCorpusValid);
957  if (m_pCorpusTest && m_bPrintTest) pdValues[nValue++] = -GetLL(m_pCorpusTest);
958 
959 
960  /* true Z_L to get the LL */
961  if (m_pModel->m_hlayer * m_pModel->m_hnode < 5 && m_pModel->m_pVocab->GetSize() < 100) {
962  Vec<LogP> oldZeta(m_pModel->m_zeta.GetSize());
963  oldZeta = m_pModel->m_zeta;
964 
965  m_pModel->ExactNormalize(); // normalization
966  trueZeta.Copy(m_pModel->m_zeta);
967  if (m_pCorpusTrain && m_bPrintTrain) pdValues[nValue++] = -GetLL(m_pCorpusTrain);
968  if (m_pCorpusValid && m_bPrintValie) pdValues[nValue++] = -GetLL(m_pCorpusValid);
969  if (m_pCorpusTest && m_bPrintTest) pdValues[nValue++] = -GetLL(m_pCorpusTest);
970 
971  m_pModel->SetZeta(oldZeta.GetBuf());
972  }
973 
974 
975  /* output debug */
976  if (!m_fdbg.Good()) {
977  m_fdbg.Open("SAfunc.dbg", "wt");
978  }
979  m_vAllSampleLenCount *= 1.0 / m_nTotalSample;
980  m_vCurSampleLenCount *= 1.0 / m_nMiniBatchSample;
981  m_fdbg.Print("pi_cur_: "); m_fdbg.PrintArray("%f ", m_vCurSampleLenCount.GetBuf() + 1, m_vCurSampleLenCount.GetSize() - 1);
982  m_fdbg.Print("pi_all_: "); m_fdbg.PrintArray("%f ", m_vAllSampleLenCount.GetBuf() + 1, m_vAllSampleLenCount.GetSize() - 1);
983  m_fdbg.Print("pi_true: "); m_fdbg.PrintArray("%f ", m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1);
984  m_fdbg.Print("z_ais__: "); m_fdbg.PrintArray("%f ", m_pModel->m_zeta.GetBuf() + 1, m_pModel->m_zeta.GetSize() - 1);
985  m_fdbg.Print("z_sams_: "); m_fdbg.PrintArray("%f ", samsZeta.GetBuf() + 1, samsZeta.GetSize() - 1);
986  m_fdbg.Print("z_true_: "); m_fdbg.PrintArray("%f ", trueZeta.GetBuf() + 1, trueZeta.GetSize() - 1);
987  m_fdbg.Print("\n");
988  m_vAllSampleLenCount *= m_nTotalSample;
989  m_vCurSampleLenCount *= m_nMiniBatchSample;
990 
991  m_pModel->SetPi(m_samplePi.GetBuf());
992 
993  return nValue;
994  }
995 
996  void LearningRate::Reset(const char *pstr, int p_t0)
997  {
998  sscanf(pstr, "%lf,%lf", &tc, &beta);
999  t0 = p_t0;
1000  //lout << "[Learning Rate] tc=" << tc << " beta=" << beta << " t0=" << t0 << endl;
1001  }
1002  double LearningRate::Get(int t)
1003  {
1004  double gamma;
1005  if (t <= t0) {
1006  gamma = 1.0 / (tc + pow(t, beta));
1007  }
1008  else {
1009  gamma = 1.0 / (tc + pow(t0, beta) + t - t0);
1010  }
1011  return gamma;
1012  }
1013 
1014 
1015  bool SAtrain::Run(const double *pInitParams /* = NULL */)
1016  {
1017  if (!m_pfunc) {
1018  lout_Solve << "m_pFunc == NULL" << endl;
1019  return false;
1020  }
1021  Clock ck;
1022  m_dSpendMinute = 0;
1023  lout.bOutputCmd() = false;
1024 
1025  SAfunc *pSA = (SAfunc*)m_pfunc;
1026  int nIterPerEpoch = pSA->m_pCorpusTrain->GetNum() / pSA->m_nMiniBatchTraining + 1;
1027  lout_variable(nIterPerEpoch);
1028 
1029  double *pdCurParams = new double[m_pfunc->GetParamNum()]; //current parameters x_k
1030  double *pdCurGradient = new double[m_pfunc->GetParamNum()]; //current gradient df_k
1031  double *pdCurDir = new double[m_pfunc->GetParamNum()]; // current update direction
1032  double dCurValue = 0; // function value f_k
1033  double dExValues[Func::cn_exvalue_max_num]; // save the extra values
1034  double nExValueNum; // number of extra values
1035 
1036  // if average
1037  bool bAvg = (m_nAvgBeg > 0);
1038  double *pdAvgParams = NULL;
1039  if (bAvg) {
1040  pdAvgParams = new double[m_pfunc->GetParamNum()];
1041  }
1042 
1043  //init
1044  for (int i = 0; i < m_pfunc->GetParamNum(); i++) {
1045  pdCurParams[i] = (pInitParams) ? pInitParams[i] : 1;
1046  }
1047  memset(pdCurGradient, 0, sizeof(double)*m_pfunc->GetParamNum());
1048  memset(pdCurDir, 0, sizeof(double)*m_pfunc->GetParamNum());
1049 
1050  IterInit();
1051  m_pfunc->SetParam(pdCurParams);
1052  pSA->WriteModel(0);
1053 
1054  // iteration begin
1055  lout_Solve << "************* Training Begin *****************" << endl;
1056  lout_Solve << "print-per-iter=" << m_nPrintPerIter << endl;
1057  lout.bOutputCmd() = false;
1058  ck.Begin();
1059  for (m_nIterNum = m_nIterMin; m_nIterNum <= m_nIterMax; m_nIterNum++)
1060  {
1061  // epoch number
1062  m_fEpochNum = 1.0 * m_nIterNum * pSA->m_nMiniBatchSample / pSA->m_pCorpusTrain->GetNum();
1063 
1064  // set the parameter
1065  m_pfunc->SetParam(pdCurParams);
1066  // get the gradient
1067  m_pfunc->GetGradient(pdCurGradient);
1068  // get the function value
1069  dCurValue = m_pfunc->GetValue();
1070  // get the averaged parameters
1071  if (bAvg) {
1072  if (m_nIterNum <= m_nAvgBeg) {
1073  memcpy(pdAvgParams, pdCurParams, sizeof(pdCurParams[0])*m_pfunc->GetParamNum());
1074  }
1075  else {
1076  for (int i = 0; i < m_pfunc->GetParamNum(); i++) {
1077  pdAvgParams[i] += (pdCurParams[i] - pdAvgParams[i]) / (m_nIterNum - m_nAvgBeg);
1078  }
1079  }
1080  }
1081 
1082 
1083  /* output the values */
1084  if (m_nIterNum % m_nPrintPerIter == 0 || m_nIterNum == m_nIterMax)
1085  {
1086  m_dSpendMinute = ck.ToSecond(ck.Get()) / 60;
1087  bool bPrintCmd;
1088 
1089  bPrintCmd = lout.bOutputCmd();
1090  lout.bOutputCmd() = true;
1091  lout_Solve << "t=" << m_nIterNum;
1092  cout << setprecision(4) << setiosflags(ios::fixed);
1093  lout << " epoch=" << m_fEpochNum;
1094  cout << setprecision(2) << setiosflags(ios::fixed);
1095  lout << " time=" << m_dSpendMinute << "m";
1096  lout << (bAvg ? " [Avg]" : " ");
1097  lout.bOutputCmd() = bPrintCmd;
1098 
1099 
1100  // get the ex-value
1101  if (bAvg) pSA->SetParam(pdAvgParams);
1102  // This function will use AIS to normaliza the model
1103  nExValueNum = pSA->GetExtraValues(m_nIterNum, dExValues);
1104 
1105  bPrintCmd = lout.bOutputCmd();
1106  lout.bOutputCmd() = true;
1107  lout << "ExValues={ ";
1108  cout << setprecision(3) << setiosflags(ios::fixed);
1109  for (int i = 0; i < nExValueNum; i++)
1110  lout << dExValues[i] << " ";
1111  lout << "}" << endl;
1112 
1113  // write model
1114  if (m_aWriteAtIter.Find(m_nIterNum) != -1)
1115  pSA->WriteModel(m_nIterNum);
1116  // revise the zeta
1117  for (int i = 1; i < pSA->GetZetaNum(); i++) {
1118  pdCurParams[i + pSA->GetWeightNum()] = pSA->m_pModel->m_zeta[i];
1119  pdCurGradient[i + pSA->GetWeightNum()] = 0;
1120  }
1121 
1122  if (bAvg) pSA->SetParam(pdCurParams);
1123 
1124  lout.bOutputCmd() = bPrintCmd;
1125  }
1126 
1127  /* Stop Decision */
1128  if (StopDecision(m_nIterNum, dCurValue, pdCurGradient)) {
1129  break;
1130  }
1131 
1132 
1133  // update the learning rate gamma
1134  UpdateGamma(m_nIterNum);
1135 
1136  // update the direction
1137  UpdateDir(pdCurDir, pdCurGradient, pdCurParams);
1138 
1139  // Update parameters
1140  Update(pdCurParams, pdCurDir, 0);
1141 
1142  // Add the spend times
1143  m_dSpendMinute += ck.ToSecond(ck.End()) / 60;
1144  }
1145 
1146 
1147  lout_Solve << "======== iter:" << m_nIterNum << " ===(" << m_dSpendMinute << "m)=======" << endl;
1148  lout_Solve << "Iter Finished!" << endl;
1149 
1150  // do something at the end of the iteration
1151  if (bAvg) pSA->IterEnd(pdAvgParams);
1152  else pSA->IterEnd(pdCurParams);
1153 
1154  SAFE_DELETE_ARRAY(pdCurGradient);
1155  SAFE_DELETE_ARRAY(pdCurDir);
1156  SAFE_DELETE_ARRAY(pdCurParams);
1157  SAFE_DELETE_ARRAY(pdAvgParams);
1158  return true;
1159  }
1160 
1161  void SAtrain::UpdateGamma(int nIterNum)
1162  {
1163  m_gamma_lambda = m_gain_lambda.Get(nIterNum);
1164  m_gamma_hidden = m_gain_hidden.Get(nIterNum);
1165  m_gamma_zeta = m_gain_zeta.Get(nIterNum);
1166 
1167 // if (m_fMomentum > 0 && nIterNum > m_gain_lambda.t0) {
1168 // m_fMomentum = 0.99;
1169 // }
1170 
1171 #ifdef _Var
1172  m_gamma_var = m_gain_var.Get(nIterNum);
1173  lout_Solve << "g_var=" << m_gamma_var<<endl;
1174 #endif
1175 
1176  lout_Solve << "g_lambda=" << m_gamma_lambda
1177  << " g_hidden=" << m_gamma_hidden
1178  << " g_zeta=" << m_gamma_zeta
1179  << " momentum=" << m_fMomentum
1180  << endl;
1181  }
1182  void SAtrain::UpdateDir(double *pDir, double *pGradient, const double *pdParam)
1183  {
1184  /* using the momentum */
1185  // pdDir is actually the gradient
1186 
1187  SAfunc* pSA = (SAfunc*)m_pfunc;
1188  int nNgramFeatNum = pSA->GetNgramFeatNum();
1189  int nWeightNum = pSA->GetWeightNum();
1190  int nZetaNum = pSA->GetZetaNum();
1191 
1192  // update lambda
1193  for (int i = 0; i < nNgramFeatNum; i++) {
1194  pDir[i] = m_gamma_lambda * pGradient[i];
1195  }
1196  for (int i = nNgramFeatNum; i < nWeightNum; i++) {
1197  pDir[i] = m_fMomentum * pDir[i] + m_gamma_hidden * pGradient[i];
1198  }
1199 
1200 
1201 
1202 #ifdef _Var
1203  /* update exp and exp2 */
1204  for (int i = nWeightNum + nZetaNum; i < pSA->GetParamNum(); i++) {
1205  pDir[i] = m_gamma_var * pGradient[i];
1206  }
1207 
1208 #endif
1209 
1210 
1211  // update zeta
1212  for (int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
1213  pDir[i] = m_gamma_zeta * pGradient[i];
1214  //pDir[i] = min(m_gamma_zeta, 1.0*pSA->m_pModel->GetMaxLen()*pSA->m_pModel->m_pi[i - nWeightNum]) * pGradient[i];
1215  }
1216 
1217 
1218  // for gap
1219  int n_dgap_cutnum = CutValue(pDir, nWeightNum, m_dir_gap);
1220  int n_zgap_cutnum = CutValue(pDir+nWeightNum, nZetaNum, m_zeta_gap);
1221  lout << "cut-dir=";
1222  lout_variable_rate(n_dgap_cutnum, nWeightNum);
1223  lout << " cut-zeta=";
1224  lout_variable_rate(n_dgap_cutnum, nWeightNum);
1225  lout << endl;
1226  }
1227  void SAtrain::Update(double *pdParam, const double *pdDir, double dStep)
1228  {
1229  // pdDir is actually the gradient
1230 
1231  SAfunc* pSA = (SAfunc*)m_pfunc;
1232  int nWeightNum = pSA->GetWeightNum();
1233  int nZetaNum = pSA->GetZetaNum();
1234 
1235 // lout_assert(nWeightNum == nNgramFeatNum + nVHsize + nCHsize + nHHsize);
1236 
1237  // update lambda
1238  if (m_bUpdate_lambda) {
1239  for (int i = 0; i < nWeightNum; i++) {
1240  pdParam[i] += pdDir[i];
1241  }
1242 
1243  /* using Nesterov’s Accelerated Gradient momentum setting*/
1244  /* See "On the importance of initialization and momentum in deep learning" */
1245  if (m_fMomentum) {
1246  for (int i=0; i<nWeightNum; i++) {
1247  pdParam[i] += m_fMomentum * pdDir[i];
1248  }
1249  }
1250 
1251 #ifdef _Var
1252  /* update var */
1253  for (int i = nWeightNum + nZetaNum; i < pSA->GetParamNum(); i++) {
1254  pdParam[i] += pdDir[i];
1255  }
1256 #endif
1257 
1258  }
1259 
1260 
1261 
1262  // update zeta
1263  if (m_bUpdate_zeta) {
1264  for (int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
1265  pdParam[i] += pdDir[i];
1266  }
1267  double zeta1 = pdParam[nWeightNum + 1];
1268  for (int i = nWeightNum + 1; i < nWeightNum + nZetaNum; i++) {
1269  pdParam[i] -= zeta1; // minus the zeta[1];
1270  }
1271  }
1272 
1273 
1274  }
1275 
1276 #define GAIN_INFO(g) lout<<" "#g"\ttc="<<g.tc<<" beta="<<g.beta<<" t0="<<g.t0<<endl;
1278  {
1279  lout << "[SATrain] *** Info: ***" << endl;
1280  GAIN_INFO(m_gain_lambda);
1281  GAIN_INFO(m_gain_hidden);
1282 #ifdef _Var
1283  GAIN_INFO(m_gain_var);
1284 #endif
1285  GAIN_INFO(m_gain_zeta);
1286  lout << " "; lout_variable(m_dir_gap);
1287  lout << " "; lout_variable(m_zeta_gap);
1288  lout << "[SATrain] *** [End] ***" << endl;
1289  }
1290 
1291  int SAtrain::CutValue(double *p, int num, double gap)
1292  {
1293  int nCutNum = 0;
1294  if (gap <= 0)
1295  return nCutNum;
1296 
1297  for (int i = 0; i < num; i++) {
1298  if (p[i] > gap) {
1299  p[i] = gap;
1300  nCutNum++;
1301  }
1302  else if (p[i] < -gap) {
1303  p[i] = -gap;
1304  nCutNum++;
1305  }
1306  }
1307  return nCutNum;
1308  }
1309 }
double Get(int t)
input the iteration number, get the learning rate
void Write(File &file)
Definition: hrf-model.cpp:53
#define SAFE_DELETE(p)
memory release
Definition: wb-vector.h:49
DataT * Next(KeyT &key)
get next value
Definition: wb-lhash.h:576
const char * Format(const char *p_pMessage,...)
format print to string
Definition: wb-string.cpp:69
pFunc m_AISConfigForP
a dynamic string class
Definition: wb-string.h:53
int GetLen() const
Definition: hrf-model.h:54
bool & bOutputCmd()
if output to the cmd window
Definition: wb-log.cpp:96
void RandSeq(Seq &seq, int nLen=-1)
get a random sequence
Array< Seq * > aSeqs
Definition: hrf-sa-train.h:32
clock_t Get()
get the time, but don&#39;t stop recording
Definition: wb-win.cpp:151
virtual void SetParam(double *pdParams)
set the parameter.
pFunc m_nPrintPerIter
void PrintInfo()
Print Information.
#define lout_assert(p)
Definition: wb-log.h:185
trf::PValue PValue
Definition: hrf-model.h:24
void LineNormalize(Prob *pdProbs, int nNum)
Definition: trf-def.cpp:87
void PerfromCD(VecShell< double > &vEmpExp, VecShell< double > &vSamExp, VecShell< double > &vEmpExp2, VecShell< double > &vLen)
perform CD process and get the expectation
int GetEmpiricalExp(VecShell< double > &vExp, VecShell< double > &vExp2, Array< int > &aRandIdx)
calculate the empirical expectation of given sequence
clock - used to record the time
Definition: wb-win.h:95
pFunc m_nIterMax
int m_nMiniBatchTraining
mini-batch for training set
Definition: hrf-sa-train.h:47
hidden-random-field model
Definition: hrf-model.h:98
trf::Prob Prob
Definition: hrf-model.h:26
virtual bool Run(const double *pInitParams=NULL)
Run iteration. input the init-parameters.
CorpusBase * m_pCorpusTrain
training corpus
Definition: hrf-ml-train.h:20
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
Definition: wb-log.h:170
int GetWeightNum() const
get the bias mat number
Definition: hrf-sa-train.h:163
#define GAIN_INFO(g)
void PerfromSA(VecShell< double > &vEmpExp, VecShell< double > &vSamExp, VecShell< double > &vEmpExp2, VecShell< double > &vLen)
perform SA process and get the expectation
T * GetBuffer(int i=0) const
get the buffer pointer
Definition: wb-vector.h:97
clock_t Begin()
begin to record
Definition: wb-win.cpp:138
int GetLen() const
Definition: trf-feature.h:71
#define lout_variable(x)
Definition: wb-log.h:179
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL, int nMinibatch=100)
reset
int CutValue(double *p, int num, double gap)
cut array
trf::Seq x
Definition: hrf-model.h:43
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL)
void Create(int maxlen, Model *pModel)
static double ToSecond(clock_t t)
transform the clock_t to second
Definition: wb-win.h:115
void GetEmpiricalFeatExp(Vec< double > &vExp)
get the empirical variance of features
define a sequence including the word sequence and class sequence
Definition: trf-feature.h:41
virtual bool GetSeq(int nLine, Array< VocabID > &aSeq)=0
get the sequence in nLine
a definition of a class Log, which can output to the cmd window and the log file simultaneously. In wb-log.cpp, there are a Log variable "lout", which can be directly used just like "cout". For example:
int GetZetaNum() const
get the zeta parameter number
Definition: hrf-sa-train.h:165
the iter of LHash
Definition: wb-lhash.h:42
TRF model.
Definition: trf-model.h:51
void Fill(T v)
Definition: wb-mat.h:279
int Find(T t)
Find a value and return the position.
Definition: wb-vector.h:248
int GetSize() const
Definition: wb-mat.h:69
void WriteModel(int nEpoch)
Write Model.
#define SAFE_DELETE_ARRAY(p)
Definition: wb-vector.h:50
Vec< LogP > m_zeta
the estimated normalization constants (fix = 0)
Definition: trf-model.h:60
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
Definition: trf-feature.cpp:22
virtual void GetGradient(double *pdGradient)
calculate the gradient g(x)
T * GetBuf() const
Definition: wb-mat.h:68
void SetNum(int n)
Set Array number, to melloc enough memory.
Definition: wb-vector.h:238
void Progress(long long n=-1, bool bInit=false, long long total=100, const char *head="")
progress bar
Definition: wb-log.cpp:146
pFunc m_nAvgBeg
Array< VocabID > aSeq
Definition: main-TRF.cpp:153
void GetParam(double *pdParams)
get the parameters
int GetParamNum() const
get the paremeter number
Definition: wb-solve.h:52
#define lout_warning(x)
Definition: wb-log.h:184
#define lout_variable_rate(x, y)
Definition: wb-log.h:181
int GetNum() const
Get Array number.
Definition: wb-vector.h:240
void Add(T t)
Add a value to the tail of array.
Definition: wb-vector.h:242
void GetEmpiricalFeatVar(Vec< double > &vVar)
claculate the empirical variance of features
void Reset(int size=0)
Definition: wb-mat.h:360
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22
void IterEnd(double *pFinalParams)
do something at the end of the SA iteration
Model * m_pModel
HRF model.
Definition: hrf-ml-train.h:18
void Reset(const char *pstr, int p_t0)
double GetSampleLL(CorpusBase *pCorpus, int nCalNum=-1, int method=0)
perform SAMS, and then select the training sequences of the same length.
char * GetBuffer() const
get buffer
Definition: wb-string.h:74
int m_nMiniBatchSample
mini-batch for samples
Definition: hrf-sa-train.h:46
void UpdateDir(double *pDir, double *pGradient, const double *pParam)
compute the update direction
void UpdateGamma(int nIterNum)
Update the learning rate.
clock_t End()
record end and return the time
Definition: wb-win.cpp:143
virtual int GetExtraValues(int t, double *pdValues)
calculate extra values which will be print at each iteration
trf::LogP LogP
Definition: hrf-model.h:27
String FileName()
if the string is a path, this function return the file name.
Definition: wb-string.cpp:162
#define lout_Solve
Definition: wb-solve.h:69
void RandSeq(Seq &seq, int nLen=-1)
Random init sequence, if nLen==-1, random the length also.
Definition: hrf-model.cpp:1083
int GetSampleExp(VecShell< double > &vExp, VecShell< double > &vLen)
calcualte the expectation of SA samples
void PrintInfo()
print information
pFunc m_AISConfigForZ
void Copy(VecShell< T > v)
Definition: wb-mat.h:386
int GetNgramFeatNum() const
get the ngram feature number
Definition: hrf-sa-train.h:153
virtual void Update(double *pdParam, const double *pdDir, double dStep)
Update the parameters.
virtual int GetNum() const
get the seq number
Definition: trf-corpus.h:47