TRF Language Model
hrf-model.cpp
Go to the documentation of this file.
1 #include "hrf-model.h"
2 
3 namespace hrf
4 {
5  void Seq::Reset(int len, int hlayer, int hnode)
6  {
7  if (m_nLen != len || GetHlayer() != hlayer || GetHnode() != hnode) {
8  m_nLen = len;
9  m_hlayer = hlayer;
10  m_hnode = hnode;
11  x.Reset(len);
12  h.Reset(len, hlayer * hnode);
13  }
14  }
15  void Seq::Copy(Seq &seq)
16  {
17  x.Copy(seq.x);
18  h.Copy(seq.h);
19  m_nLen = seq.m_nLen;
20  }
21  Seq Seq::GetSubSeq(int nPos, int nOrder)
22  {
23  if (nPos + nOrder > m_nLen) {
24  lout_error("[Seq] GetSubSeq: nPos+nOrder > nLen!!");
25  }
26  Seq sub(nOrder, GetHlayer(), GetHnode());
27  for (int i = nPos; i < nPos + nOrder; i++) {
28  sub.x.GetWordSeq()[i - nPos] = x.GetWordSeq()[i];
29  sub.x.GetClassSeq()[i - nPos] = x.GetClassSeq()[i];
30  sub.h[i - nPos] = h[i];
31  }
32  return sub;
33  }
35  {
36  if (GetLen() != s.GetLen())
37  return false;
38 
39  if (x.x == s.x.x && h == s.h)
40  return true;
41  return false;
42  }
43  void Seq::Print()
44  {
45  for (int i = 0; i < h.GetCol(); i++) {
46  for (int j = 0; j < m_nLen; j++) {
47  lout << h[j][i] << "\t";
48  }
49  lout << endl;
50  }
51  x.Print();
52  }
53  void Seq::Write(File &file)
54  {
55  ofstream ofile(file.fp);
56  for (int i = 0; i < h.GetCol(); i++) {
57  for (int j = 0; j < m_nLen; j++) {
58  ofile << h[j][i] << "\t";
59  }
60  ofile << endl;
61  }
62  x.Print(file);
63  }
64 
65  void Model::Reset(Vocab *pv, int hlayer, int hnode, int maxlen)
66  {
67  trf::Model::Reset(pv, maxlen);
68 
69  m_hlayer = hlayer;
70  m_hnode = hnode;
71  m_m3dVH.Reset(m_pVocab->GetSize(), m_hlayer * m_hnode, 2); // 0 and 1
72  m_m3dCH.Reset(m_pVocab->GetClassNum(), m_hlayer * m_hnode, 2); // 0 and 1
73  m_m3dHH.Reset(m_hlayer * m_hnode, m_hnode, 4); // 0-0, 0-1, 1-0, 1-1
74  m_matBias.Reset(m_hlayer*m_hnode, 2); // 0 and 1
75  }
76 
77 
78  void Model::SetParam(PValue *pParam)
79  {
80  if (m_pFeat) {
81  trf::Model::SetParam(pParam);
82  pParam += m_pFeat->GetNum();
83  }
84 
85  HRF_VALUE_SET(pParam, m_m3dVH);
86  HRF_VALUE_SET(pParam, m_m3dCH);
87  HRF_VALUE_SET(pParam, m_m3dHH);
88  HRF_VALUE_SET(pParam, m_matBias);
89 // memcpy(m_m3dVH.GetBuf(), pParam, sizeof(PValue)*m_m3dVH.GetSize());
90 // pParam += m_m3dVH.GetSize();
91 // memcpy(m_m3dCH.GetBuf(), pParam, sizeof(PValue)*m_m3dCH.GetSize());
92 // pParam += m_m3dCH.GetSize();
93 // memcpy(m_m3dHH.GetBuf(), pParam, sizeof(PValue)*m_m3dHH.GetSize());
94  }
95  void Model::GetParam(PValue *pParam)
96  {
97  if (m_pFeat) {
98  trf::Model::GetParam(pParam);
99  pParam += m_pFeat->GetNum();
100  }
101  HRF_VALUE_GET(pParam, m_m3dVH);
102  HRF_VALUE_GET(pParam, m_m3dCH);
103  HRF_VALUE_GET(pParam, m_m3dHH);
104  HRF_VALUE_GET(pParam, m_matBias);
105 
106 // memcpy(pParam, m_m3dVH.GetBuf(), sizeof(PValue)*m_m3dVH.GetSize());
107 // pParam += m_m3dVH.GetSize();
108 // memcpy(pParam, m_m3dCH.GetBuf(), sizeof(PValue)*m_m3dCH.GetSize());
109 // pParam += m_m3dCH.GetSize();
110 // memcpy(pParam, m_m3dHH.GetBuf(), sizeof(PValue)*m_m3dHH.GetSize());
111  }
112  LogP Model::GetLogProb(Seq &seq, bool bNorm /* = true */)
113  {
114  LogP logSum = trf::Model::GetLogProb(seq.x, false);
115 
116  //double dfactor = 1.0 / seq.GetLen();
117  double dfactor = 1.0;
118 
119  // Vocab * Hidden
120  for (int i = 0; i < seq.GetLen(); i++) {
121  logSum += dfactor * SumVHWeight(m_m3dVH[seq.wseq()[i]], seq.h[i]);
122  }
123 
124  // Class * Hidden
125  if (m_m3dCH.GetSize() > 0) {
126  for (int i = 0; i < seq.GetLen(); i++) {
127  logSum += dfactor * SumVHWeight(m_m3dCH[seq.cseq()[i]], seq.h[i]);
128  }
129  }
130 
131  // Hidden * Hidden
132  for (int i = 0; i < seq.GetLen() - 1; i++) {
133  logSum += dfactor * SumHHWeight(m_m3dHH, seq.h[i], seq.h[i + 1]);
134  }
135 
136  // Bias
137  for (int i = 0; i < seq.GetLen(); i++) {
138  logSum += dfactor * SumVHWeight(m_matBias, seq.h[i]);
139  }
140 
141  // normalization
142  if (bNorm) {
143  int nLen = min(m_maxlen, seq.GetLen());
144  logSum = logSum - m_logz[nLen] + trf::Prob2LogP(m_pi[nLen]);
145  }
146  return logSum;
147  }
148 
149  void Model::ReadT(const char *pfilename)
150  {
151  File fout(pfilename, "rt");
152 
153  lout << "[Model]: Read(txt) from " << pfilename << endl;
154 
155  int nVocabSize = 0;
156  fout.Scanf("m_vocabsize=%d\n", &nVocabSize);
157  fout.Scanf("m_maxlen=%d\n", &m_maxlen);
158  fout.Scanf("m_hlayer=%d\n", &m_hlayer);
159  fout.Scanf("m_hnode=%d\n", &m_hnode);
160  // Reset
161  Reset(m_pVocab, m_hlayer, m_hnode, m_maxlen);
162  if (m_pVocab->GetSize() != nVocabSize) {
163  lout_error("[Model] ReadT: the input nVocabSize(" << nVocabSize << ") != m_pVocab->GetSize(" << m_pVocab->GetSize() << ")");
164  }
165 
166  double dValue;
167  fout.Scanf("m_pi=[ ");
168  for (int i = 1; i <= m_maxlen; i++) {
169  fout.Scanf("%lf ", &dValue);
170  m_pi[i] = dValue;
171  }
172  fout.Scanf("]\n");
173  fout.Scanf("m_logz=[ ");
174  for (int i = 1; i <= m_maxlen; i++) {
175  fout.Scanf("%lf ", &dValue);
176  m_logz[i] = dValue;
177  }
178  fout.Scanf("]\n");
179  fout.Scanf("m_zeta=[ ");
180  for (int i = 1; i <= m_maxlen; i++) {
181  fout.Scanf("%lf ", &dValue);
182  m_zeta[i] = dValue;
183  }
184  fout.Scanf("]\n");
185 
186  int nValue = 0;
187  fout.Scanf("featnum=%d\n", &nValue);
188  m_value.Reset(nValue);
189  SAFE_DELETE(m_pFeat);
190  m_pFeat = new trf::Feat;
191  m_pFeat->m_nTotalNum = nValue;
192  m_pFeat->ReadT(fout, m_value.GetBuf());
193 
194  /* Init all the values */
195  m_m3dVH.Reset();
196  m_m3dCH.Reset();
197  m_m3dHH.Reset();
198 
199  char *pLine = NULL;
200  while (pLine = fout.GetLine()) {
201  int nFeatNum = 0;
202  int nRow, nCol;
203  String strLabel = strtok(pLine, ": \t");
204  pLine = strtok(NULL, ": \t");
205  if (strLabel == "m_matVH")
206  {
207  // VH
208  sscanf(pLine, "(num=%d*%d)", &nRow, &nCol);
209  m_m3dVH.Reset(nRow, nCol, 2);
210  m_m3dVH.Read(fout);
211  }
212  else if (strLabel == "m_matCH")
213  {
214  // CH
215  sscanf(pLine, "(num=%d*%d)", &nRow, &nCol);
216  m_m3dCH.Reset(nRow, nCol, 2);
217  m_m3dCH.Read(fout);
218  }
219  else if (strLabel == "m_matHH")
220  {
221  sscanf(pLine, "(num=%d*%d)", &nRow, &nCol);
222  m_m3dHH.Reset(nRow, nCol, 4);
223  m_m3dHH.Read(fout);
224  }
225  else if (strLabel == "m_matBias")
226  {
227  sscanf(pLine, "(num=%d)", &nRow);
228  m_matBias.Reset(nRow, 2);
229  m_matBias.Read(fout);
230  }
231  }
232  }
233  void Model::WriteT(const char *pfilename)
234  {
235  File fout(pfilename, "wt");
236  lout << "[Model] Write(txt) to " << pfilename << endl;
237 
238  fout.Print("m_vocabsize=%d\n", m_pVocab->GetSize());
239  fout.Print("m_maxlen=%d\n", m_maxlen);
240  fout.Print("m_hlayer=%d\n", m_hlayer);
241  fout.Print("m_hnode=%d\n", m_hnode);
242  fout.Print("m_pi=[ ");
243  for (int i = 1; i <= m_maxlen; i++) {
244  fout.Print("%f ", m_pi[i]);
245  }
246  fout.Print("]\n");
247  fout.Print("m_logz=[ ");
248  for (int i = 1; i <= m_maxlen; i++) {
249  fout.Print("%f ", m_logz[i]);
250  }
251  fout.Print("]\n");
252  fout.Print("m_zeta=[ ");
253  for (int i = 1; i <= m_maxlen; i++) {
254  fout.Print("%f ", m_zeta[i]);
255  }
256  fout.Print("]\n");
257 
258  fout.Print("featnum=%d\n", m_pFeat->GetNum());
259  m_pFeat->WriteT(fout, m_value.GetBuf());
260 
261  // VH
262  fout.Print("m_matVH: (num=%d*%d)\n", m_m3dVH.GetXDim(), m_m3dVH.GetYDim());
263  m_m3dVH.Write(fout);
264 
265  // CH
266  fout.Print("m_matCH: (num=%d*%d)\n", m_m3dCH.GetXDim(), m_m3dCH.GetYDim());
267  m_m3dCH.Write(fout);
268 
269  // HH
270  fout.Print("m_matHH: (num=%d*%d)\n", m_m3dHH.GetXDim(), m_m3dHH.GetYDim());
271  m_m3dHH.Write(fout);
272 
273  // Bias
274  fout.Print("m_matBias: (num=%d)\n", m_matBias.GetRow());
275  m_matBias.Write(fout);
276  }
277  LogP Model::GetLogProb(VecShell<VocabID> &x, bool bNorm /* = true */)
278  {
279  LogP logProb = 0;
280  for (int layer = 0; layer < m_hlayer; layer++) {
281  AlgLayer alg(this, x, layer);
282  alg.ForwardBackward(x.GetSize(), GetHiddenOrder(), GetEncodeLayerLimit());
283  logProb += alg.GetLogSummation();
284  }
285 
286  trf::Seq trfseq;
287  trfseq.Set(x.GetBuf(), x.GetSize(), m_pVocab);
289  logProb += FeatClusterSum(trfseq, 0, x.GetSize());
290 
291  if (bNorm) {
292  int nLen = min(m_maxlen, x.GetSize());
293  logProb = logProb - m_logz[nLen] + trf::Prob2LogP(m_pi[nLen]);
294  }
295 
296  return logProb;
297  }
298  LogP Model::ClusterSum(Seq &seq, int nPos, int nOrder)
299  {
300  return FeatClusterSum(seq.x, nPos, nOrder) + HiddenClusterSum(seq, nPos, nOrder);
301  }
302  LogP Model::FeatClusterSum(trf::Seq &x, int nPos, int nOrder)
303  {
304  return trf::Model::ClusterSum(x, nPos, nOrder);
305  }
306  LogP Model::HiddenClusterSum(Seq &seq, int nPos, int nOrder)
307  {
308  LogP LogSum = 0;
309 
310  //double dfactor = 1.0 / seq.GetLen();
311  double dfactor = 1.0;
312 
313  // Word * hidden
314  LogSum += dfactor * SumVHWeight(m_m3dVH[seq.wseq()[nPos]], seq.h[nPos]);
315 
316  if (nPos == seq.GetLen() - nOrder) { // The last cluster
317  for (int i = nPos + 1; i < seq.GetLen(); i++) {
318  LogSum += dfactor * SumVHWeight(m_m3dVH[seq.wseq()[i]], seq.h[i]);
319  }
320  }
321 
322  // Class * hidden
323  if (m_m3dCH.GetSize() > 0) {
324  LogSum += dfactor * SumVHWeight(m_m3dCH[seq.cseq()[nPos]], seq.h[nPos]);
325 
326  if (nPos == seq.GetLen() - nOrder) { // The last cluster
327  for (int i = nPos + 1; i < seq.GetLen(); i++) {
328  LogSum += dfactor * SumVHWeight(m_m3dCH[seq.cseq()[i]], seq.h[i]);
329  }
330  }
331  }
332 
333  // Hidden * Hidden
334  if (nOrder > 1) { // if order=1, then no HH matrix
335  LogSum += dfactor * SumHHWeight(m_m3dHH, seq.h[nPos], seq.h[nPos + 1]);
336 
337  if (nPos == seq.GetLen() - nOrder) { // The last cluster
338  for (int i = nPos + 1; i < seq.GetLen() - 1; i++)
339  LogSum += dfactor * SumHHWeight(m_m3dHH, seq.h[i], seq.h[i + 1]);
340  }
341  }
342 
343  // bias
344  LogSum += dfactor * SumVHWeight(m_matBias, seq.h[nPos]);
345 
346  if (nPos == seq.GetLen() - nOrder) { // The last cluster
347  for (int i = nPos + 1; i < seq.GetLen(); i++) {
348  LogSum += dfactor * SumVHWeight(m_matBias, seq.h[i]);
349  }
350  }
351 
352 
353  return LogSum;
354  }
355  LogP Model::LayerClusterSum(Seq &seq, int nlayer, int nPos, int nOrder)
356  {
357  LogP LogSum = 0;
358  //double dfactor = 1.0 / seq.GetLen();
359  double dfactor = 1.0;
360 
361  // Word * hidden
362  LogSum += dfactor * SumVHWeight(m_m3dVH[seq.wseq()[nPos]], seq.h[nPos], nlayer);
363 
364  if (nPos == seq.GetLen() - nOrder) { // The last cluster
365  for (int i = nPos + 1; i < seq.GetLen(); i++) {
366  LogSum += dfactor * SumVHWeight(m_m3dVH[seq.wseq()[i]], seq.h[i], nlayer);
367  }
368  }
369 
370  // Class * hidden
371  if (m_m3dCH.GetSize() > 0) {
372  LogSum += dfactor * SumVHWeight(m_m3dCH[seq.cseq()[nPos]], seq.h[nPos], nlayer);
373 
374  if (nPos == seq.GetLen() - nOrder) { // The last cluster
375  for (int i = nPos + 1; i < seq.GetLen(); i++) {
376  LogSum += dfactor * SumVHWeight(m_m3dCH[seq.cseq()[i]], seq.h[i], nlayer);
377  }
378  }
379  }
380 
381  // Hidden * Hidden
382  if (nOrder > 1) { // if order=1, then no HH matrix
383  LogSum += dfactor * SumHHWeight(m_m3dHH, seq.h[nPos], seq.h[nPos + 1], nlayer);
384 
385  if (nPos == seq.GetLen() - nOrder) { // The last cluster
386  for (int i = nPos + 1; i < seq.GetLen() - 1; i++)
387  LogSum += dfactor * SumHHWeight(m_m3dHH, seq.h[i], seq.h[i + 1], nlayer);
388  }
389  }
390 
391  // bias hidden
392  LogSum += dfactor * SumVHWeight(m_matBias, seq.h[nPos], nlayer);
393 
394  if (nPos == seq.GetLen() - nOrder) { // The last cluster
395  for (int i = nPos + 1; i < seq.GetLen(); i++) {
396  LogSum += dfactor * SumVHWeight(m_matBias, seq.h[i], nlayer);
397  }
398  }
399 
400 
401  return LogSum;
402  }
403  double Model::ExactNormalize(int nLen)
404  {
405  int nMaxOrder = max(GetMaxOrder(), GetHiddenOrder());
406  int nIterDim = min(nMaxOrder, nLen);
407 
408 
409  /* as for exact Z_1 is need in joint SA algorithm.
410  Calculate Z_1 using a different way
411  */
412  if (nLen == 1) {
413  double dLogSum = trf::LogP_zero;
414  for (VocabID x = m_pVocab->IterBeg(); x <= m_pVocab->IterEnd(); x++) {
415  trf::Seq xseq;
416  VocabID cid = m_pVocab->GetClass(x);
417  xseq.Set(&x, 1, m_pVocab);
418  double d1 = FeatClusterSum(xseq, 0, 1);
419  double d2 = 0;
420  for (int k = 0; k < m_hlayer * m_hnode; k++) {
421  /* After introducing CHmat, revise the equation !!! */
422  if (cid != trf::VocabID_none && m_m3dCH.GetSize() > 0) {
423  d2 += trf::Log_Sum(m_matBias[k][0] + m_m3dVH[x][k][0] + m_m3dCH[cid][k][0],
424  m_matBias[k][1] + m_m3dVH[x][k][1] + m_m3dCH[cid][k][1]);
425  }
426  else { // if cid == VocabID_none, it means on class infromation
427  d2 += trf::Log_Sum(m_matBias[k][0] + m_m3dVH[x][k][0], m_matBias[k][1] + m_m3dVH[x][k][1]);
428  }
429  }
430  dLogSum = trf::Log_Sum(dLogSum, d1 + d2);
431  }
432  m_logz[nLen] = dLogSum;
433  }
434  else {
435  int nEncoderLimit = GetEncodeNodeLimit();
436  // forward-backward
437  m_nodeCal.ForwardBackward(nLen, nMaxOrder, nEncoderLimit);
438 
439  m_logz[nLen] = m_nodeCal.GetLogSummation();
440  }
441 
442  return m_logz[nLen];
443 
444  }
446  {
447  for (int len = 1; len <= m_maxlen; len++) {
448  ExactNormalize(len);
449  m_zeta[len] = m_logz[len] - m_logz[1];
450  //lout << " logZ[" << len << "] = " << m_logz[len] << endl;
451  }
452  }
453  LogP Model::GetMarginalLogProb(int nLen, int nPos, Seq &sub, bool bNorm /* = true */)
454  {
455  // Forward-backward need be calculate
456 
457  if (nPos + sub.GetLen() > nLen) {
458  lout_error("[Model] GetMarginalLogProb: nPos(" << nPos << ")+nOrder(" << sub.GetLen() << ") > seq.len(" << nLen << ")!!");
459  }
460 
461  // encode the sub sequence
462  Vec<int> nsub(sub.GetLen());
463  EncodeNode(nsub, sub);
464 
465  LogP dSum = m_nodeCal.GetMarginalLogProb(nPos, nsub.GetBuf(), nsub.GetSize());
466 
467  return (bNorm) ? dSum - m_logz[nLen] : dSum;
468  }
469 
470  void Model::GetNodeExp(double *pExp, Prob *pLenProb/* = NULL*/)
471  {
472  if (pLenProb == NULL)
473  pLenProb = m_pi.GetBuf();
474  VecShell<double> exp(pExp, GetParamNum());
475  Vec<double> expTemp(GetParamNum());
476 
477  double *p = expTemp.GetBuf();
478  VecShell<double> featexp;
479  Mat3dShell<double> VHexp, CHexp, HHexp;
480  MatShell<double> Bexp;
481  BufMap(p, featexp, VHexp, CHexp, HHexp, Bexp);
482 
483  exp.Fill(0);
484  for (int len = 1; len <= m_maxlen; len++) {
485 
486  int nMaxOrder = max(GetMaxOrder(), GetHiddenOrder());
487  m_nodeCal.ForwardBackward(len, nMaxOrder, GetEncodeNodeLimit());
488 
489  GetNodeExp(len, featexp, VHexp, CHexp, HHexp, Bexp);
490  // GetNodeExp_feat(len, featexp);
491  // GetNodeExp_VH(len, VHexp);
492  // GetNodeExp_HH(len, HHexp);
493 
494  for (int i = 0; i < exp.GetSize(); i++) {
495  exp[i] += pLenProb[len] * expTemp[i];
496  }
497  }
498  }
499  void Model::GetNodeExp(int nLen, double *pExp)
500  {
501  VecShell<double> featexp;
502  Mat3dShell<double> VHexp, CHexp, HHexp;
503  MatShell<double> Bexp;
504  BufMap(pExp, featexp, VHexp, CHexp, HHexp, Bexp);
505  GetNodeExp(nLen, featexp, VHexp, CHexp, HHexp, Bexp);
506  }
507  void Model::GetNodeExp(int nLen, VecShell<double> featexp,
509  MatShell<double> Bexp)
510  {
511  // make sure the forward-backward is performed.
512  featexp.Fill(0);
513  VHexp.Fill(0);
514  CHexp.Fill(0);
515  HHexp.Fill(0);
516  Bexp.Fill(0);
517 
518  //double dfactor = 1.0 / nLen;
519  double dfactor = 1.0;
520 
521  //int nMaxOrder = m_nodeCal.m_nOrder;
522  int nClusterNum = nLen - m_nodeCal.m_nOrder + 1;
523  int nClusterDim = m_nodeCal.m_nOrder;
524  if (nClusterNum < 1) {
525  nClusterNum = 1;
526  nClusterDim = nLen;
527  }
528 
529  Vec<int> nseq(nLen);
530  Seq seq(nLen, m_hlayer, m_hnode);
531 
532  // circle for the position pos
533  for (int pos = 0; pos < nClusterNum; pos++) {
534  // ergodic the cluster
535  trf::VecIter iter(nseq.GetBuf() + pos, nClusterDim, 0, GetEncodeNodeLimit() - 1);
536  while (iter.Next()) {
537  DecodeNode(nseq, seq, pos, nClusterDim);
538  Prob prob = trf::LogP2Prob(m_nodeCal.GetMarginalLogProb(pos, nseq.GetBuf()+pos, nClusterDim, m_logz[nLen]));
539 
541  // the cluster before the last one
543  Array<int> afeat;
544  for (int n = 1; n <= nClusterDim; n++) {
545  m_pFeat->Find(afeat, seq.x, pos, n);
546  }
547  for (int i = 0; i < afeat.GetNum(); i++) {
548  featexp[afeat[i]] += prob;
549  }
550 
551  VocabID x = seq.wseq()[pos];
552  for (int k = 0; k < m_hlayer*m_hnode; k++) {
553  VHexp[x][k][(int)(seq.h[pos][k])] += dfactor * prob;
554  }
555  if (m_pVocab->GetClassNum() > 0) {
556  VocabID c = seq.cseq()[pos];
557  for (int k = 0; k < m_hlayer*m_hnode; k++) {
558  CHexp[c][k][(int)(seq.h[pos][k])] += dfactor * prob;
559  }
560  }
561  if (nClusterDim > 1) {
562  for (int l = 0; l < m_hlayer; l++) {
563  for (int a = 0; a < m_hnode; a++) {
564  for (int b = 0; b < m_hnode; b++) {
565  HHexp[l*m_hnode + a][b][HHMap(seq.h[pos][l*m_hnode + a], seq.h[pos + 1][l*m_hnode + b])] += dfactor * prob;
566  }
567  }
568  }
569  }
570  for (int k = 0; k < m_hlayer*m_hnode; k++) {
571  Bexp[k][(int)(seq.h[pos][k])] += dfactor * prob;
572  }
573 
574 
576  // the last cluster
578  if (pos == nClusterNum - 1) {
579  afeat.Clean();
580  for (int ii = 1; ii < nClusterDim; ii++) { // position ii
581  for (int n = 1; n <= nClusterDim - ii; n++) { // order n
582  m_pFeat->Find(afeat, seq.x, pos + ii, n);
583  }
584  }
585  for (int i = 0; i < afeat.GetNum(); i++) {
586  featexp[afeat[i]] += prob;
587  }
588 
589  for (int ii = 1; ii < nClusterDim; ii++) {
590  VocabID x = seq.wseq()[pos+ii];
591  for (int k = 0; k < m_hlayer*m_hnode; k++) {
592  VHexp[x][k][seq.h[pos + ii][k]] += dfactor * prob;
593  }
594  }
595  if (m_pVocab->GetClassNum() > 0) {
596  for (int ii = 1; ii < nClusterDim; ii++) {
597  VocabID c = seq.cseq()[pos+ii];
598  for (int k = 0; k < m_hlayer*m_hnode; k++) {
599  CHexp[c][k][seq.h[pos + ii][k]] += dfactor * prob;
600  }
601  }
602  }
603  for (int ii = 1; ii < nClusterDim - 1; ii++) {
604  for (int l = 0; l < m_hlayer; l++) {
605  for (int a = 0; a < m_hnode; a++) {
606  for (int b = 0; b < m_hnode; b++) {
607  HHexp[l*m_hnode + a][b][HHMap(seq.h[pos + ii][l*m_hnode + a], seq.h[pos + ii + 1][l*m_hnode + b])] += dfactor * prob;
608  }
609  }
610  }
611  }
612  for (int ii = 1; ii < nClusterDim; ii++) {
613  for (int k = 0; k < m_hlayer*m_hnode; k++) {
614  Bexp[k][seq.h[pos + ii][k]] += dfactor * prob;
615  }
616  }
617  }
618  }
619  }
620  }
621 
623  {
624 
625  VecShell<double> featexp;
626  Mat3dShell<double> VHexp, CHexp, HHexp;
627  MatShell<double> Bexp;
628  BufMap(pExp, featexp, VHexp, CHexp, HHexp, Bexp);
629 
630 
631  int nLen = x.GetSize();
632  int nMaxOrder = GetHiddenOrder();
633 
634  for (int layer = 0; layer < m_hlayer; layer++) {
635  AlgLayer fb(this, x, layer);
636  // forward-backward
637  fb.ForwardBackward(nLen, nMaxOrder, GetEncodeLayerLimit());
638  // get the normalization constant
639  LogP logz = fb.GetLogSummation();
640  // get the exp
641  GetLayerExp(fb, layer, VHexp, CHexp, HHexp, Bexp, logz);
642  }
643 
644  //get the feature expectation
645  trf::Seq trfseq(nLen);
646  trfseq.Set(x.GetBuf(), nLen, m_pVocab);
647  trf::Model::FeatCount(trfseq, featexp.GetBuf());
648  }
649 
650  void Model::GetLayerExp(AlgLayer &fb, int nLayer,
652  LogP logz /* = 0 */)
653  {
654  /* Don't clean the buffer!!!! */
655  //int nMaxOrder = GetHiddenOrder();
656  int nLen = fb.m_nLen;
657  int nClusterNum = nLen - fb.m_nOrder + 1;
658  int nClusterDim = fb.m_nOrder;
659  if (nClusterNum < 1) {
660  nClusterNum = 1;
661  nClusterDim = nLen;
662  }
663  //double dfactor = 1.0 / nLen;
664  double dfactor = 1.0;
665  Vec<int> hseq(nLen);
666  Mat<HValue> h(nLen, m_hlayer * m_hnode);
667  for (int pos = 0; pos < nClusterNum; pos++) {
668  // ergodic the cluster
669  trf::VecIter iter(hseq.GetBuf() + pos, nClusterDim, 0, GetEncodeLayerLimit() - 1);
670  while (iter.Next()) {
671  DecodeLayer(hseq, h, nLayer, pos, nClusterDim);
672  Prob prob = trf::LogP2Prob(fb.GetMarginalLogProb(pos, hseq.GetBuf()+pos, nClusterDim, logz)); // the prob of current cluster
673 
674  // the cluster before the last one
675  VocabID x = fb.m_seq.wseq()[pos];
676  for (int k = nLayer*m_hnode; k < nLayer*m_hnode+m_hnode; k++) {
677  VHexp[x][k][h[pos][k]] += dfactor * prob;
678  }
679  if (m_pVocab->GetClassNum() > 0) {
680  VocabID c = fb.m_seq.cseq()[pos];
681  for (int k = nLayer*m_hnode; k < nLayer*m_hnode + m_hnode; k++) {
682  CHexp[c][k][h[pos][k]] += dfactor * prob;
683  }
684  }
685  if (nClusterDim > 1) {
686  for (int a = 0; a < m_hnode; a++) {
687  for (int b = 0; b < m_hnode; b++) {
688  HHexp[nLayer*m_hnode + a][b][HHMap(h[pos][nLayer*m_hnode + a], h[pos + 1][nLayer*m_hnode + b])] += dfactor * prob;
689  }
690  }
691  }
692  for (int k = nLayer*m_hnode; k < nLayer*m_hnode + m_hnode; k++) {
693  Bexp[k][h[pos][k]] += dfactor * prob;
694  }
695 
696  // the last cluster
697  if (pos == nClusterNum - 1) {
698  for (int ii = 1; ii < nClusterDim; ii++) {
699  VocabID x = fb.m_seq.wseq()[pos + ii];
700  for (int k = nLayer*m_hnode; k < nLayer*m_hnode + m_hnode; k++) {
701  VHexp[x][k][h[pos + ii][k]] += dfactor * prob;
702  }
703  if (m_pVocab->GetClassNum() > 0) {
704  VocabID c = fb.m_seq.cseq()[pos + ii];
705  for (int k = nLayer*m_hnode; k < nLayer*m_hnode + m_hnode; k++) {
706  CHexp[c][k][h[pos + ii][k]] += dfactor * prob;
707  }
708  }
709  for (int k = nLayer*m_hnode; k < nLayer*m_hnode + m_hnode; k++) {
710  Bexp[k][h[pos + ii][k]] += dfactor * prob;
711  }
712  }
713  for (int ii = 1; ii < nClusterDim - 1; ii++) {
714  for (int a = 0; a < m_hnode; a++) {
715  for (int b = 0; b < m_hnode; b++) {
716  HHexp[nLayer*m_hnode + a][b][HHMap(h[pos + ii][nLayer*m_hnode + a], h[pos + ii + 1][nLayer*m_hnode + b])] += dfactor * prob;
717  }
718  }
719  }
720  }
721  }
722  }
723  }
724 
725  void Model::Sample(Seq &seq)
726  {
727  LocalJump(seq);
728  MarkovMove(seq);
729  }
731  {
732  int nOldLen = seq.GetLen();
733  int nNewLen = 0;
734  LogP j1 = ProposeLength(nOldLen, nNewLen, true);
735  LogP j2 = ProposeLength(nNewLen, nOldLen, false);
736 
737  if (nNewLen == nOldLen)
738  return;
739 
740  LogP logpAcc = 0;
741  if (nNewLen == nOldLen + 1) {
742  LogP logpold = GetLogProb(seq);
743  seq.Reset(nNewLen, seq.GetHlayer(), seq.GetHnode());
744  LogP Q = ProposeH0(seq.h[nNewLen - 1], seq, nNewLen - 1, true);
745  LogP R = ProposeC0(seq.cseq()[nNewLen - 1], seq, nNewLen - 1, true);
746  LogP G = SampleW(seq, nNewLen - 1);
747  LogP logpnew = GetLogProb(seq);
748 
749  logpAcc = (j2 - j1) + logpnew - (logpold + Q + R + G);
750  }
751  else if (nNewLen == nOldLen - 1) {
752  LogP logpold = GetLogProb(seq);
753  LogP Q = ProposeH0(seq.h[nOldLen - 1], seq, nOldLen - 1, false);
754  LogP R = ProposeC0(seq.cseq()[nOldLen - 1], seq, nOldLen - 1, false);
755  LogP G = SampleW(seq, nOldLen - 1, false);
756 
757  seq.Reset(nNewLen, seq.GetHlayer(), seq.GetHnode());
758  LogP logpnew = GetLogProb(seq);
759 
760  logpAcc = (j2 - j1) + logpnew + Q + R + G - logpold;
761  }
762  else if (nNewLen != nOldLen){
763  lout_error("[Model] Sample: nNewLen(" << nNewLen << ") and nOldLen(" << nOldLen << ")");
764  }
765 
766 
767  if (trf::Acceptable(trf::LogP2Prob(logpAcc))) {
768  seq.Reset(nNewLen, seq.GetHlayer(), seq.GetHnode());
769  m_nLenJumpAccTimes++;
770  }
771  else {
772  seq.Reset(nOldLen, seq.GetHlayer(), seq.GetHnode());
773  }
774  m_nLenJumpTotalTime++;
775 
776  }
778  {
779  /* Gibbs sampling */
780  SampleHAndCGivenX(seq);
781  for (int nPos = 0; nPos < seq.GetLen(); nPos++) {
782  SampleC(seq, nPos);
783  SampleW(seq, nPos);
784  }
785  //SampleHAndCGivenX(seq);
786  }
787 
788  LogP Model::ProposeLength(int nOld, int &nNew, bool bSample)
789  {
790  if (bSample) {
791  nNew = trf::LineSampling(m_matLenJump[nOld].GetBuf(), m_maxlen + 1);
792  }
793 
794  return trf::Prob2LogP(m_matLenJump[nOld][nNew]);
795  }
796  LogP Model::ProposeH0(VecShell<HValue> &hi, Seq &seq, int nPos, bool bSample)
797  {
798  /* Note:
799  The nPos may be larger than the length of seq. i.e nPos >= seq.GetLen();
800  As we may want to propose a new position over the sequence.
801  */
802 
803  if (nPos + 1 > seq.GetLen()) {
804  seq.Reset(nPos + 1, seq.GetHlayer(), seq.GetHnode());
805  }
806 
807  Vec<LogP> logps(m_hlayer*m_hnode);
808  ProposeHProbs(logps, seq, nPos);
809 
810 
811  /* Sample */
812  if (bSample) {
813  for (int i = 0; i < logps.GetSize(); i++) {
814  hi[i] = trf::Acceptable(trf::LogP2Prob(logps[i])) ? 1.0f : 0.0f;
815  }
816  }
817 
818  /* Get The probs */
819  LogP resLogp = GetConditionalProbForH(hi, logps);
820 
821 
822  return resLogp;
823  }
824  LogP Model::ProposeC0(VocabID &ci, Seq &seq, int nPos, bool bSample)
825  {
826  /* if there are no class, then return 0 */
827  if (m_pVocab->GetClassNum() == 0) {
828  ci = trf::VocabID_none;
829  return 0;
830  }
831 
832  Vec<LogP> vlogps(m_pVocab->GetClassNum());
833  ProposeCProbs(vlogps, seq, nPos);
834 
835  if (bSample) {
836  ci = trf::LogLineSampling(vlogps.GetBuf(), vlogps.GetSize());
837  }
838 
839  return vlogps[ci];
840  }
841  void Model::ProposeHProbs(VecShell<LogP> &logps, Seq &seq, int nPos, bool bConsiderXandC /*=false*/)
842  {
843  logps.Fill(0);
844  Mat<LogP> matLogp(m_hlayer*m_hnode, 2);
845  matLogp.Fill(0);
846 
847  //double dfactor = 1.0 / seq.GetLen();
848  double dfactor = 1.0;
849 
850  // HH connection
851  if (nPos - 1 >= 0 && nPos - 1 <= seq.GetLen() - 1) {
852  for (int l = 0; l < m_hlayer; l++) {
853  for (int i = 0; i < m_hnode; i++) {
854  HValue curh = seq.h[nPos - 1][l*m_hnode + i];
855  for (int j = 0; j < m_hnode; j++) {
856  matLogp.Get(l*m_hnode + j, 0) += dfactor * m_m3dHH.Get(l*m_hnode + i, j, HHMap(curh, 0));
857  matLogp.Get(l*m_hnode + j, 1) += dfactor * m_m3dHH.Get(l*m_hnode + i, j, HHMap(curh, 1));
858  }
859  }
860  }
861  }
862  if (nPos + 1 <= seq.GetLen() - 1) {
863  for (int l = 0; l < m_hlayer; l++) {
864  for (int i = 0; i < m_hnode; i++) {
865  HValue curh = seq.h[nPos + 1][l*m_hnode + i];
866  for (int j = 0; j < m_hnode; j++) {
867  matLogp.Get(l*m_hnode + j, 0) += dfactor * m_m3dHH.Get(l*m_hnode + j, i, HHMap(0, curh));
868  matLogp.Get(l*m_hnode + j, 1) += dfactor * m_m3dHH.Get(l*m_hnode + j, i, HHMap(1, curh));
869  }
870  }
871  }
872  }
873 
874  /* bias for H */
875  for (int i = 0; i < m_hlayer*m_hnode; i++) {
876  matLogp[i][0] += dfactor * m_matBias[i][0];
877  matLogp[i][1] += dfactor * m_matBias[i][1];
878  }
879 
880  if (bConsiderXandC) {
881  /* Consider the VH matrix */
882  for (int i = 0; i < m_hlayer*m_hnode; i++) {
883  matLogp[i][0] += dfactor * m_m3dVH[seq.wseq()[nPos]][i][0];
884  matLogp[i][1] += dfactor * m_m3dVH[seq.wseq()[nPos]][i][1];
885  }
886  if (m_m3dCH.GetSize() > 0) {
887  /* Consider the CH matrix */
888  for (int i = 0; i < m_hlayer*m_hnode; i++) {
889  matLogp[i][0] += dfactor * m_m3dCH[seq.cseq()[nPos]][i][0];
890  matLogp[i][1] += dfactor * m_m3dCH[seq.cseq()[nPos]][i][1];
891  }
892  }
893  }
894 
895  /*
896  Get Probs
897  */
898  for (int i = 0; i < m_hlayer*m_hnode; i++) {
899  //logps[i] = logps[i] - Log_Sum(logps[i], 0);
900  logps[i] = matLogp[i][1] - trf::Log_Sum(matLogp[i][1], matLogp[i][0]);
901  }
902  }
903  void Model::ProposeCProbs(VecShell<LogP> &logps, Seq &seq, int nPos)
904  {
905  VocabID savecid = seq.cseq()[nPos];
906  for (int cid = 0; cid < m_pVocab->GetClassNum(); cid++) {
907  seq.cseq()[nPos] = cid;
908  logps[cid] = GetReducedModelForC(seq, nPos);
909  }
910  seq.cseq()[nPos] = savecid;
911  trf::LogLineNormalize(logps.GetBuf(), m_pVocab->GetClassNum());
912  }
914  {
915  // Only consider the HH-matrix, as VH matrix has been considered in GetLogWeightSumForW
916  LogP logSum = 0;
917  //double dfactor = 1.0 / seq.GetLen();
918  double dfactor = 1.0;
919  // Hidden * Hidden
920  for (int i = max(0, nPos - 1); i <= min(seq.GetLen() - 2, nPos); i++) {
921  logSum += dfactor * SumHHWeight(m_m3dHH, seq.h[i], seq.h[i + 1]);
922  }
923  // consider the bias for H
924  logSum += dfactor * SumVHWeight(m_matBias, seq.h[nPos]);
925  return logSum;
926  }
928  {
929  // class features
930  LogP logSum = trf::Model::GetReducedModelForC(seq.x, nPos);
931 
932  // CH
933  //double dfactor = 1.0 / seq.GetLen();
934  double dfactor = 1.0;
935  if (m_m3dCH.GetSize() > 0) {
936  logSum += dfactor * SumVHWeight(m_m3dCH[seq.cseq()[nPos]], seq.h[nPos]);
937  }
938 
939  return logSum;
940  }
942  {
943  // word features
944  LogP logSum = trf::Model::GetReducedModelForW(seq.x, nPos);
945  // VH
946  //double dfactor = 1.0 / seq.GetLen();
947  double dfactor = 1.0;
948  logSum += dfactor * SumVHWeight(m_m3dVH[seq.wseq()[nPos]], seq.h[nPos]);
949  return logSum;
950  }
952  {
953  /* Get The probs */
954  LogP resLogp = 0;
955  for (int i = 0; i < hi.GetSize(); i++) {
956  resLogp += (hi[i] == 0) ? trf::Log_Sub(0, logps[i]) : logps[i];
957  }
958 
959  return resLogp;
960  }
962  {
963  LogP resLogp = trf::LogP_zero;
964 
965  Array<VocabID> *pXs = m_pVocab->GetWord(seq.cseq()[nPos]);
966 
967  VocabID saveX = seq.wseq()[nPos];
968  for (int i = 0; i < pXs->GetNum(); i++) {
969  seq.wseq()[nPos] = pXs->Get(i);
970  /* Only need to calculate the summation of weight depending on x[nPos], c[nPos] */
971  /* used to sample the c_i, fixed h */
972  resLogp = trf::Log_Sum(resLogp, GetReducedModelForW(seq, nPos) + GetReducedModelForC(seq, nPos));
973  //resLogp = Log_Sum(resLogp, GetLogProb(seq, false));
974  }
975  seq.wseq()[nPos] = saveX;
976 
977  return resLogp;
978  }
979  void Model::SampleC(Seq &seq, int nPos)
980  {
981  if (m_pVocab->GetClassNum() == 0) {
982  seq.cseq()[nPos] = trf::VocabID_none;
983  return;
984  }
985 
986  /* Sample C0 */
987  Vec<LogP> vlogps_c(m_pVocab->GetClassNum());
988  ProposeCProbs(vlogps_c, seq, nPos);
989  VocabID ci = seq.cseq()[nPos];
990  VocabID C0 = trf::LogLineSampling(vlogps_c.GetBuf(), vlogps_c.GetSize());
991  LogP logpRi = vlogps_c[ci];
992  LogP logpR0 = vlogps_c[C0];
993 
994 
995  /* Calculate the probability p_t(h, c) */
996  seq.cseq()[nPos] = ci;
997  LogP Logp_ci = GetMarginalProbOfC(seq, nPos);
998  seq.cseq()[nPos] = C0;
999  LogP Logp_C0 = GetMarginalProbOfC(seq, nPos);
1000 
1001  LogP acclogp = logpRi + Logp_C0 - (logpR0 + Logp_ci);
1002 
1003  m_nSampleHTotalTimes++;
1004  if (trf::Acceptable(trf::LogP2Prob(acclogp))) {
1005  m_nSampleHAccTimes++;
1006  seq.cseq()[nPos] = C0;
1007  }
1008  else {
1009  seq.cseq()[nPos] = ci;
1010  }
1011  }
1012  LogP Model::SampleW(Seq &seq, int nPos, bool bSample/* = true*/)
1013  {
1014  /*
1015  The function calculate G(w_i| w_{other}, c, h)
1016  if bSample is true, draw a sample for w_i;
1017  otherwise, only calcualte the conditional probability.
1018  */
1019  if (nPos >= seq.GetLen()) {
1020  lout_error("[Model] SampleH: the nPos(" << nPos << ") > the length of sequence(" << seq.GetLen() << ")");
1021  }
1022 
1023  Array<VocabID> *pXs = m_pVocab->GetWord(seq.cseq()[nPos]);
1024  Array<LogP> aLogps;
1025 
1026  VocabID nSaveX = seq.wseq()[nPos]; // save x[nPos]
1027  for (int i = 0; i < pXs->GetNum(); i++) {
1028  seq.wseq()[nPos] = pXs->Get(i);
1029  /* To reduce the computational cost, instead of GetLogProb,
1030  we just need to calculate the summation of weight depending on w[nPos]
1031  */
1032  aLogps[i] = GetReducedModelForW(seq, nPos);
1033  }
1034  trf::LogLineNormalize(aLogps, pXs->GetNum());
1035 
1036  int idx;
1037  if (bSample) {
1038  /* sample a value for x[nPos] */
1039  idx = trf::LogLineSampling(aLogps, pXs->GetNum());
1040  seq.wseq()[nPos] = pXs->Get(idx);
1041  }
1042  else {
1043  idx = pXs->Find(nSaveX); // find nSave in the array.
1044  seq.wseq()[nPos] = nSaveX;
1045  if (idx == -1) {
1046  lout_error("Can't find the VocabID(" << nSaveX << ") in the array.\n"
1047  << "This may beacuse word(" << nSaveX << ") doesnot belongs to class(" << seq.cseq()[nPos] << ")");
1048  }
1049  }
1050 
1051  return aLogps[idx];
1052  }
1054  {
1055  LogP totallogProb = 0;
1056 
1057  /* set class */
1058  m_pVocab->GetClass(seq.cseq(), seq.wseq(), seq.GetLen());
1059 
1060  /* sample h */
1061  for (int nPos = 0; nPos < seq.GetLen(); nPos++) {
1062  Vec<LogP> vlogps_h(m_hlayer * m_hnode);
1063  ProposeHProbs(vlogps_h, seq, nPos, true);
1064 
1065  Vec<HValue> hsample(m_hlayer * m_hnode);
1066  if (tagH) {
1067  hsample.Copy((*tagH)[nPos]);
1068  }
1069  else { /* sampling */
1070  for (int i = 0; i < hsample.GetSize(); i++) {
1071  hsample[i] = trf::Acceptable(trf::LogP2Prob(vlogps_h[i])) ? 1.0f : 0.0f;
1072  }
1073 
1074  }
1075  seq.h[nPos] = hsample;
1076 
1077  LogP logprob = GetConditionalProbForH(hsample, vlogps_h);
1078  totallogProb += logprob;
1079  }
1080  return totallogProb;
1081  }
1082 
1083  void Model::RandSeq(Seq &seq, int nLen /* = -1 */)
1084  {
1085  if (nLen == -1) {
1086  seq.Reset(rand() % GetMaxLen() + 1, m_hlayer, m_hnode);
1087  }
1088  else {
1089  seq.Reset(nLen, m_hlayer, m_hnode);
1090  }
1091 
1092  /* randomly set h*/
1093  for (int i = 0; i < seq.h.GetRow(); i++) {
1094  for (int k = 0; k < seq.h.GetCol(); k++) {
1095  seq.h[i][k] = rand() % 2;
1096  }
1097  }
1098 
1099  seq.x.Random(m_pVocab);
1100  }
1102  {
1103  /* randomly set h*/
1104  for (int i = 0; i < seq.h.GetRow(); i++) {
1105  for (int k = 0; k < seq.h.GetCol(); k++) {
1106  seq.h[i][k] = rand() % 2;
1107  }
1108  }
1109  }
1110 
1112  {
1113  int hnum = EncodeHidden(hi);
1114 
1115  return hnum * m_pVocab->GetSize() + xi;
1116  }
1117  void Model::EncodeNode(VecShell<int> &vn, Seq &seq, int nPos /* = 0 */, int nDim /* = -1 */)
1118  {
1119  nDim = (nDim == -1) ? seq.GetLen() - nPos : nDim;
1120  for (int i = nPos; i < nPos + nDim; i++) {
1121  vn[i] = EncodeNode(seq.wseq()[i], seq.cseq()[i], seq.h[i]);
1122  }
1123  }
1125  {
1126  int hnum = n / m_pVocab->GetSize();
1127 
1128  xi = n % m_pVocab->GetSize();
1129  ci = m_pVocab->GetClass(xi);
1130  DecodeHidden(hnum, hi);
1131  }
1132  void Model::DecodeNode(VecShell<int> &vn, Seq &seq, int nPos /* = 0 */, int nDim /* = -1 */)
1133  {
1134  nDim = (nDim == -1) ? vn.GetSize() - nPos : nDim;
1135  for (int i = nPos; i < nPos + nDim; i++) {
1136  DecodeNode(vn[i], seq.wseq()[i], seq.cseq()[i], seq.h[i]);
1137  }
1138  }
1140  {
1141  return GetEncodeHiddenLimit() * m_pVocab->GetSize();
1142  }
1144  {
1145  int hnum = 0;
1146  for (int i = 0; i < hi.GetSize(); i++) {
1147  hnum += (int)hi[i] * (1 << i);
1148  }
1149 
1150  return hnum;
1151  }
1153  {
1154  for (int i = 0; i < hi.GetSize(); i++) {
1155  hi[i] = n % 2;
1156  n >>= 1;
1157  }
1158  }
1159  void Model::DecodeHidden(VecShell<int> &vn, Mat<HValue> &h, int nPos /* = 0 */, int nDim /* = -1 */)
1160  {
1161  nDim = (nDim == -1) ? vn.GetSize() - nPos : nDim;
1162  for (int i = nPos; i < nPos + nDim; i++) {
1163  DecodeHidden(vn[i], h[i]);
1164  }
1165  }
1167  {
1168  /* if the m_hnode >= 32, the value over the maxiume number of int*/
1169  if (m_hnode >= 30) {
1170  lout_error("[Model] GetEncodeHiddenLimit: overflow! m_hnode = " << m_hnode);
1171  }
1172  return 1 << m_hlayer * m_hnode;
1173  }
1174  void Model::DecodeLayer(VecShell<int> &vn, Mat<HValue> &h, int layer, int nPos /* = 0 */, int nDim /* = -1 */)
1175  {
1176  nDim = (nDim == -1) ? vn.GetSize() - nPos : nDim;
1177  for (int i = nPos; i < nPos + nDim; i++) {
1178  DecodeHidden(vn[i], h[i].GetSub(layer*m_hnode, m_hnode));
1179  }
1180  }
1182  {
1183  return 1 << m_hnode;
1184  }
1185 
1186  void Model::FeatCount(Seq &seq, VecShell<double> featcount,
1187  Mat3dShell<double> VHcount, Mat3dShell<double> CHcount, Mat3dShell<double> HHcount,
1188  MatShell<double> Bcount, double dadd /* = 1 */)
1189  {
1190  trf::Model::FeatCount(seq.x, featcount.GetBuf(), dadd);
1191 
1192  HiddenFeatCount(seq, VHcount, CHcount, HHcount, Bcount, dadd);
1193  }
1195  Mat3dShell<double> VHcount, Mat3dShell<double> CHcount, Mat3dShell<double> HHcount,
1196  MatShell<double> Bcount, double dadd /* = 1 */)
1197  {
1198  //double dfactor = 1.0 / seq.GetLen();
1199  double dfactor = 1.0;
1200  /* VH count */
1201  for (int i = 0; i < seq.GetLen(); i++) {
1202  for (int k = 0; k < m_hlayer*m_hnode; k++) {
1203  VHcount[seq.wseq()[i]][k][seq.h[i][k]] += dfactor * dadd;
1204  }
1205  }
1206 
1207  /* CH count */
1208  if (m_pVocab->GetClassNum() > 0) {
1209  for (int i = 0; i < seq.GetLen(); i++) {
1210  for (int k = 0; k < m_hlayer*m_hnode; k++) {
1211  CHcount[seq.cseq()[i]][k][seq.h[i][k]] += dfactor * dadd;
1212  }
1213  }
1214  }
1215 
1216  /* HH count */
1217  for (int i = 0; i < seq.GetLen() - 1; i++) {
1218  for (int l = 0; l < m_hlayer; l++) {
1219  for (int a = 0; a < m_hnode; a++) {
1220  for (int b = 0; b < m_hnode; b++) {
1221  HHcount.Get(l * m_hnode + a, b, HHMap(seq.h.Get(i, l*m_hnode + a), seq.h.Get(i + 1, l*m_hnode + b))) += dfactor * dadd;
1222  }
1223  }
1224  }
1225  }
1226 
1227  /* Bias count */
1228  for (int i = 0; i < seq.GetLen(); i++) {
1229  for (int k = 0; k < m_hlayer*m_hnode; k++) {
1230  Bcount[k][seq.h[i][k]] += dfactor * dadd;
1231  }
1232  }
1233  }
1234  void Model::FeatCount(Seq &seq, VecShell<double> count, double dadd /* = 1 */)
1235  {
1236  VecShell<double> featcount;
1237  Mat3dShell<double> VHcount, CHcount, HHcount;
1238  MatShell<double> Bcount;
1239  BufMap(count.GetBuf(), featcount, VHcount, CHcount, HHcount, Bcount);
1240  FeatCount(seq, featcount, VHcount, CHcount, HHcount, Bcount, dadd);
1241  }
1242 
1244  {
1245  PValue dsum = 0;
1246  for (int i = 0; i < h.GetSize(); i++) {
1247  dsum += m[i][(int)h[i]];
1248  }
1249  return dsum;
1250  }
1252  {
1253  PValue dsum = 0;
1254 
1255  for (int k = 0; k < m_hlayer; k++) {
1256  for (int i = 0; i < m_hnode; i++) {
1257  for (int j = 0; j < m_hnode; j++)
1258  {
1259  dsum += m.Get(k*m_hnode + i, j, HHMap(h1[k*m_hnode + i], h2[k*m_hnode + j]));
1260  }
1261  }
1262  }
1263  return dsum;
1264  }
1266  {
1267  PValue dsum = 0;
1268  for (int i = layer * m_hnode; i < layer *m_hnode + m_hnode; i++) {
1269  dsum += m[i][(int)h[i]];
1270  }
1271  return dsum;
1272  }
1274  {
1275  PValue dsum = 0;
1276 
1277  int k = layer;
1278  for (int i = 0; i < m_hnode; i++) {
1279  for (int j = 0; j < m_hnode; j++)
1280  {
1281  dsum += m.Get(k*m_hnode + i, j, HHMap(h1[k*m_hnode + i], h2[k*m_hnode + j]));
1282  }
1283  }
1284 
1285  return dsum;
1286  }
1287 
1288  void Model::PerformSAMS(int nMinibatch, int tmax, int t0, int beta, double zgap)
1289  {
1290  int nThread = omp_get_max_threads();
1291  Mat<double> m_matSampleLen(nThread, GetMaxLen() + 1);
1292  Vec<double> m_vecSampleLen(GetMaxLen() + 1);
1293 
1294  // sequence for each threads
1295  Array<Seq*> aSeqs;
1296  for (int i = 0; i < nThread; i++) {
1297  aSeqs[i] = new Seq;
1298  RandSeq(*aSeqs[i]);
1299  }
1300 
1301 
1302  Vec<LogP> zeta;
1303  zeta.Copy(m_zeta);
1304 
1305  for (int t = 1; t <= tmax; t++) {
1306  m_matSampleLen.Fill(0);
1307  m_vecSampleLen.Fill(0);
1308  // sampling
1309 #pragma omp parallel for
1310  for (int m = 0; m < nMinibatch; m++) {
1311  int tid = omp_get_thread_num();
1312  this->Sample(*aSeqs[tid]);
1313  int nLen = min(GetMaxLen(), aSeqs[tid]->GetLen());
1314  m_matSampleLen[tid][nLen]++;
1315  }
1316 
1317  // Count
1318  for (int i = 0; i < nThread; i++) {
1319  m_vecSampleLen += m_matSampleLen[i];
1320  }
1321  m_vecSampleLen /= nMinibatch;
1322 
1323  // learning rate
1324  double gamma = 0;
1325  if (t <= t0) {
1326  gamma = 1.0 / pow(t, beta);
1327  }
1328  else {
1329  gamma = 1.0 / (pow(t0, beta) + t - t0);
1330  }
1331 
1332  // update
1333  for (int i = 1; i <= GetMaxLen(); i++) {
1334  zeta[i] += min(zgap, gamma * m_vecSampleLen[i] / m_pi[i]);
1335  }
1336  this->SetZeta(zeta.GetBuf());
1337  }
1338 
1339 
1340  }
1341 
1342  LogP Model::GetLogProb_AIS(VecShell<VocabID> &x, int nChain /* = 100 */, int nIntermediate /* = 10000 */)
1343  {
1344  int nLen = x.GetSize();
1345  int nParamsNumOfIntermediateModel = GetParamNum() - m_pFeat->GetNum();
1346 
1347  Vec<PValue> vParamsPn(nParamsNumOfIntermediateModel);
1348  Vec<PValue> vParamsP0(nParamsNumOfIntermediateModel);
1349  Vec<PValue> vParamsCur(nParamsNumOfIntermediateModel);
1350 
1351  /* get the parameters of current model */
1352  PValue *p = vParamsP0.GetBuf();
1353  HRF_VALUE_GET(p, m_m3dVH);
1354  HRF_VALUE_GET(p, m_m3dCH);
1355  HRF_VALUE_GET(p, m_m3dHH);
1356  HRF_VALUE_GET(p, m_matBias);
1357 
1358 
1359  /* get the parameters of the distribution P_n */
1360  /* Set with all the unigram values, i.e. all the VH and CH and bias*/
1361  vParamsPn.Copy(vParamsP0);
1362  p = vParamsPn.GetBuf() + m_m3dVH.GetSize() + m_m3dCH.GetSize();
1363  memset(p, 0, sizeof(PValue)*m_m3dHH.GetSize());
1364 
1365  /* calculate the normalization constants of P_n */
1366  LogP logz_pn = 0;
1367  for (int nPos = 0; nPos < nLen; nPos++) {
1368  VocabID xid = x[nPos];
1369  VocabID cid = m_pVocab->GetClass(xid);
1370  /* sum_{hi} Q(hi)*/
1371  double d2 = 0;
1372  for (int k = 0; k < m_hlayer * m_hnode; k++) {
1373  if (cid != trf::VocabID_none && m_m3dCH.GetSize() > 0) {
1374  d2 += trf::Log_Sum(m_m3dVH[xid][k][0] + m_m3dCH[cid][k][0] + m_matBias[k][0], m_m3dVH[xid][k][1] + m_m3dCH[cid][k][1] + m_matBias[k][1]);
1375  }
1376  else { // if cid == VocabID_none, it means on class infromation
1377  d2 += trf::Log_Sum(m_m3dVH[xid][k][0] + m_matBias[k][0], m_m3dVH[xid][k][1] + m_matBias[k][1]);
1378  }
1379  }
1380  logz_pn += d2;
1381  }
1382 
1383  // set intermediate model
1384  Array<Model*> aInterModel;
1385  aInterModel.SetNum(1);
1386  for (int i = 0; i < aInterModel.GetNum(); i++) {
1387  /* In the new created model :
1388  There are no word/class ngram features;
1389  All the word/class ngram parameters are not used, as we just sample H
1390  */
1391  aInterModel[i] = new Model(m_pVocab, m_hlayer, m_hnode, m_maxlen);
1392  aInterModel[i]->SetPi(m_pi.GetBuf());
1393  aInterModel[i]->m_zeta.Copy(m_zeta);
1394  aInterModel[i]->m_logz.Copy(m_logz);
1395  lout_assert(aInterModel[i]->GetParamNum() == nParamsNumOfIntermediateModel);
1396  }
1397  Array<LogP> aLogWeight;
1398  aLogWeight.SetNum(nChain);
1399  aLogWeight.Fill(0);
1400 
1401  //Title::Precent(0, true, nChain, "AIS");
1402  //#pragma omp parallel for firstprivate(vParamsCur)
1403  for (int k = 0; k < nChain; k++) {
1404  int tid = 0;//omp_get_thread_num();
1405  Model *pInterModel = aInterModel[tid];
1406  Seq seq(nLen, m_hlayer, m_hnode);
1407  seq.x.Set(x.GetBuf(), x.GetSize(), m_pVocab);
1408 
1409  /* sample the initial sequence */
1410  for (int nPos = 0; nPos < seq.GetLen(); nPos++) {
1411  VocabID xid = x[nPos];
1412  VocabID cid = m_pVocab->GetClass(xid);
1413  for (int k = 0; k < m_hlayer * m_hnode; k++) {
1414  LogP curP[2];
1415  if (cid != trf::VocabID_none && m_m3dCH.GetSize() > 0) {
1416  curP[0] = m_m3dVH[xid][k][0] + m_m3dCH[cid][k][0] + m_matBias[k][0];
1417  curP[1] = m_m3dVH[xid][k][1] + m_m3dCH[cid][k][1] + m_matBias[k][1];
1418  }
1419  else { // if cid == VocabID_none, it means on class infromation
1420  curP[0] = m_m3dVH[xid][k][0] + m_matBias[k][0];
1421  curP[1] = m_m3dVH[xid][k][1] + m_matBias[k][1];
1422  }
1423  trf::LogLineNormalize(curP, 2);
1424  seq.h[nPos][k] = trf::LogLineSampling(curP, 2);
1425  }
1426  }
1427 
1428 
1429  pInterModel->SetParam(vParamsPn.GetBuf());
1430  //LogP logp_old = - GetHNode() * nLen * log(2);
1431  LogP logp_old = pInterModel->GetLogProb(seq, false) - logz_pn;
1432 
1433  double log_w = 0;
1434  for (int t = nIntermediate - 1; t >= 0; t--) {
1435  /* set the intermediate parameters */
1436  //double beta = 1.0 / nIntermediate * t;
1437  double beta = trf::GetAISFactor(t, nIntermediate);
1438  for (int i = 0; i < vParamsCur.GetSize(); i++)
1439  vParamsCur[i] = vParamsP0[i] * (1 - beta) + vParamsPn[i] * beta;
1440  pInterModel->SetParam(vParamsCur.GetBuf());
1441 
1442  /* compute the weight */
1443  LogP rate = pInterModel->GetLogProb(seq) - logp_old;
1444  log_w += rate;
1445 
1446  /* sample H */
1447  pInterModel->SampleHAndCGivenX(seq);
1448  logp_old = pInterModel->GetLogProb(seq);
1449  }
1450 
1451  aLogWeight[k] = log_w; // record the log-weight
1452  //Title::Precent();
1453  }
1454 
1455  for (int i = 0; i < aInterModel.GetNum(); i++) {
1456  SAFE_DELETE(aInterModel[i]);
1457  }
1458 
1459 
1460  LogP logprob = trf::Log_Sum(aLogWeight.GetBuffer(), aLogWeight.GetNum()) - trf::Prob2LogP(nChain);
1461 
1462  /* the logprob of all the x and c */
1463  Seq seq(nLen, m_hlayer, m_hnode);
1464  seq.x.Set(x.GetBuf(), x.GetSize(), m_pVocab);
1465  LogP logpx = FeatClusterSum(seq.x, 0, nLen);
1466 
1467  return logprob + logpx;
1468  }
1469 
1471  {
1472  int nLen = x.GetSize();
1473  Seq seq(nLen, m_hlayer, m_hnode);
1474  seq.x.Set(x.GetBuf(), x.GetSize(), m_pVocab);
1475  RandHidden(seq);
1476 
1477  LogP maxLogp = trf::LogP_zero;
1478  for (int i = 0; i < num; i++) {
1479  SampleHAndCGivenX(seq);
1480  LogP curlogp = GetLogProb(seq, true);
1481  if (curlogp > maxLogp) {
1482  maxLogp = curlogp;
1483  }
1484  }
1485  return maxLogp;
1486  }
1487 
1488  /************************************************************************/
1489  /* Forward-backward class */
1490  /************************************************************************/
1492  {
1493  m_pModel = p;
1494  }
1495  LogP AlgNode::ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
1496  {
1497  m_seq.Reset(nLen, m_pModel->m_hlayer, m_pModel->m_hnode);
1498  m_pModel->DecodeNode(VecShell<int>(pSeq, nLen), m_seq, nPos, nOrder);
1499  return m_pModel->ClusterSum(m_seq, nPos, nOrder);
1500  }
1501 
1503  {
1504  m_pModel = p;
1505  m_nlayer = nlayer;
1506  m_seq.Reset(x.GetSize(), p->m_hlayer, p->m_hnode);
1507  m_seq.x.Set(x.GetBuf(), x.GetSize(), p->GetVocab());
1508  }
1509  LogP AlgLayer::ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
1510  {
1511  m_pModel->DecodeLayer(VecShell<int>(pSeq, nLen), m_seq.h, m_nlayer, nPos, nOrder);
1512  return m_pModel->LayerClusterSum(m_seq, m_nlayer, nPos, nOrder);
1513  }
1514 }
void Write(File &file)
Definition: hrf-model.cpp:53
virtual LogP SampleHAndCGivenX(Seq &seq, MatShell< HValue > *tagH=NULL)
[sample] sample h given x using gibbs sampling.
Definition: hrf-model.cpp:1053
AlgLayer(Model *p, VecShell< VocabID > x, int nlayer)
Definition: hrf-model.cpp:1502
void Copy(Seq &seq)
copy the sequence
Definition: trf-feature.h:66
const float LogP_zero
Definition: trf-def.h:30
T & Get(unsigned int i, unsigned int j)
Definition: wb-mat.h:125
void Copy(const Array< T > &array)
Copy the array to current array.
Definition: wb-vector.h:260
trf::VocabID VocabID
Definition: hrf-model.h:29
#define SAFE_DELETE(p)
memory release
Definition: wb-vector.h:49
LogP Log_Sub(LogP x, LogP y)
log[exp(x)-exp(y)]
Definition: trf-def.h:44
int GetEncodeNodeLimit() const
The encoded integer size.
Definition: hrf-model.cpp:1139
int m_hlayer
Definition: hrf-model.h:46
a dynamic string class
Definition: wb-string.h:53
T & Get(int i)
get the value at position i
Definition: wb-vector.h:99
void LocalJump(Seq &seq)
[sample] Local Jump - sample a new length
Definition: hrf-model.cpp:730
int GetLen() const
Definition: hrf-model.h:54
int LogLineSampling(const LogP *pdProbs, int nNum)
Definition: trf-def.cpp:62
VocabID * cseq()
Definition: hrf-model.h:59
int m_hnode
the number of hidden nodes
Definition: hrf-model.h:102
void Copy(Seq &seq)
Definition: hrf-model.cpp:15
Seq GetSubSeq(int nPos, int nOrder)
Return the sub-sequence.
Definition: hrf-model.cpp:21
void Random(Vocab *pv)
Random.
Definition: trf-feature.cpp:39
void Reset(Vocab *pv, int maxlen)
reset, the maxlen is the length excluding the beg/end symbols.
Definition: trf-model.cpp:28
VocabID * wseq()
Definition: hrf-model.h:58
const int VocabID_none
Definition: trf-vocab.h:24
virtual LogP ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
This function need be derived. Calcualte the log probability of each cluster.
Definition: hrf-model.cpp:1509
#define lout_error(x)
Definition: wb-log.h:183
#define lout_assert(p)
Definition: wb-log.h:185
trf::PValue PValue
Definition: hrf-model.h:24
void Fill(T v)
Definition: wb-mat.h:397
void Reset(int len, int hlayer, int hnode)
Definition: hrf-model.cpp:5
bool operator==(Seq &s)
If the two sequence is equal.
Definition: hrf-model.cpp:34
int m_nLen
the sequence length.
Definition: trf-alg.h:38
void ExactNormalize()
[exact] Exact Normalization all the length
Definition: hrf-model.cpp:445
LogP GetMarginalLogProb(int nLen, int nPos, Seq &sub, bool bNorm=true)
[exact] Exactly calculate the marginal probability at position &#39;nPos&#39; and with order &#39;nOrder&#39; ...
Definition: hrf-model.cpp:453
LogP Prob2LogP(Prob x)
Definition: trf-def.h:36
LogP GetLogProb_Gibbs(VecShell< VocabID > &x, int num=100)
sample the best hidden and calculate the joint probability.
Definition: hrf-model.cpp:1470
LogP Log_Sum(LogP x, LogP y)
Definition: trf-def.h:40
LogP HiddenClusterSum(Seq &seq, int nPos, int nOrder)
[exact] Calculate the logp in each cluster. Only consinder the VH,CH,HH values, used in class AlgHidd...
Definition: hrf-model.cpp:306
LogP GetLogProb(Seq &seq, bool bNorm=true)
calculate the probability
Definition: trf-model.cpp:74
void GetNodeExp(double *pExp, Prob *pLenProb=NULL)
[exact] sum_l { n_l/n * E_{p_l}[f] }: Exactly calculate the expectation over x and h ...
Definition: hrf-model.cpp:470
int m_nLen
mutiple hidden matrix [position * (layer * hnode)]
Definition: hrf-model.h:45
T & Get(int x, int y, int z)
Definition: wb-mat.h:172
virtual void SetParam(PValue *pParam)
Set the parameters.
Definition: hrf-model.cpp:78
hidden-random-field model
Definition: hrf-model.h:98
void GetLayerExp(AlgLayer &fb, int nLayer, Mat3dShell< double > &VHexp, Mat3dShell< double > &CHexp, Mat3dShell< double > &HHexp, MatShell< double > &Bexp, LogP logz=0)
[exact] called in GetHiddenExp.
Definition: hrf-model.cpp:650
trf::Prob Prob
Definition: hrf-model.h:26
virtual void SetParam(PValue *pValue)
Set the parameters.
Definition: trf-model.cpp:58
LogP GetReducedModelForC(Seq &seq, int nPos)
[sample] A unnormalized reduced model to sample class c_i.
Definition: trf-model.cpp:419
virtual int Scanf(const char *p_pMessage,...)
scanf
Definition: wb-file.cpp:132
void FeatCount(Seq &seq, VecShell< double > featcount, Mat3dShell< double > VHcount, Mat3dShell< double > CHcount, Mat3dShell< double > HHcount, MatShell< double > Bcount, double dadd=1)
Count the feature number in current sequence, and add to the result.
Definition: hrf-model.cpp:1186
int GetEncodeHiddenLimit() const
The encoded integer size.
Definition: hrf-model.cpp:1166
void Reset(int p_len)
reset only change the len variable, does not change the buffer size.
Definition: trf-feature.h:51
T * GetBuffer(int i=0) const
get the buffer pointer
Definition: wb-vector.h:97
FILE * fp
file pointer
Definition: wb-file.h:97
LogP GetMarginalProbOfC(Seq &seq, int nPos)
[sample] Fixed h, given c_i, summate the probabilities of x_i, i.e. P(c_i)
Definition: hrf-model.cpp:961
Mat< HValue > h
Definition: hrf-model.h:44
void SampleC(Seq &seq, int nPos)
[sample] Sample the c_i at position nPos given h_i without x_i.
Definition: hrf-model.cpp:979
virtual void Print(const char *p_pMessage,...)
print
Definition: wb-file.cpp:115
trf::Seq x
Definition: hrf-model.h:43
int m_nOrder
the order, i.e. the node number at each cluster {x_1,x_2,...,x_n}
Definition: trf-alg.h:37
define a sequence including the word sequence and class sequence
Definition: trf-feature.h:41
LogP ProposeC0(VocabID &ci, Seq &seq, int nPos, bool bSample)
[sample] Propose the c_{i} at position i. Then return the propose probability R(c_i|h_i,c_{other})
Definition: hrf-model.cpp:824
void FeatCount(Seq &seq, double *pCount, double dadd=1.0)
Count the feature number in a sequence.
Definition: trf-model.cpp:106
Definition: wb-mat.h:29
int GetHnode() const
Definition: hrf-model.h:56
LogP ClusterSum(Seq &seq, int nPos, int nOrder)
[exact] Calculate the logP in each cluster. Only used for forward-backword algorithms ( class AlgNode...
Definition: hrf-model.cpp:298
void Print()
Definition: trf-feature.cpp:52
int m_nTotalNum
total feature number
Definition: trf-feature.h:183
void ReadT(const char *pfilename)
Read Model.
Definition: hrf-model.cpp:149
int LineSampling(const Prob *pdProbs, int nNum)
Definition: trf-def.cpp:103
LogP ProposeH0(VecShell< HValue > &hi, Seq &seq, int nPos, bool bSample)
[sample] Propose the h_{i} at position i. Then return the propose probability Q(h_i|h_{other}) ...
Definition: hrf-model.cpp:796
LogP GetReducedModelForC(Seq &seq, int nPos)
[sample] A unnormalized reduced model to sample class c_i, consindering CH matrix(U) and class-ngram ...
Definition: hrf-model.cpp:927
int GetHlayer() const
Definition: hrf-model.h:55
void Fill(T v)
Definition: wb-mat.h:279
int Find(T t)
Find a value and return the position.
Definition: wb-vector.h:248
file class.
Definition: wb-file.h:94
int GetSize() const
Definition: wb-mat.h:69
LogP SampleW(Seq &seq, int nPos, bool bSample=true)
[sample] Sample the w_i at position nPos
Definition: hrf-model.cpp:1012
void GetParam(PValue *pValue)
Get the paremetre vector.
Definition: trf-model.cpp:64
int GetEncodeLayerLimit() const
The encoded integer size of one layer.
Definition: hrf-model.cpp:1181
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
Definition: trf-feature.cpp:22
T * GetBuf() const
Definition: wb-mat.h:68
LogP ProposeLength(int nOld, int &nNew, bool bSample)
[sample] Propose the length, using the variable m_matLenJump
Definition: hrf-model.cpp:788
#define HRF_VALUE_SET(p, m)
Definition: hrf-model.h:86
void SetNum(int n)
Set Array number, to melloc enough memory.
Definition: wb-vector.h:238
Mat< VocabID > x
Definition: trf-feature.h:44
virtual char * GetLine(bool bPrecent=false)
Read a line into the buffer.
Definition: wb-file.cpp:47
bool Acceptable(Prob prob)
Definition: trf-def.cpp:127
LogP GetLogSummation()
Get the summation over the sequence, corresponding to the log normalization constants &#39;logZ&#39;...
Definition: trf-alg.cpp:155
void ProposeCProbs(VecShell< LogP > &logps, Seq &seq, int nPos)
[sample] Return the distribution of c_i at position nPos
Definition: hrf-model.cpp:903
void DecodeNode(int n, VocabID &xi, VocabID &ci, VecShell< HValue > &hi)
decode a integer to the x_i and h_i
Definition: hrf-model.cpp:1124
void Clean()
Clean the array. Just set the top of array to -1 and donot release the memory.
Definition: wb-vector.h:258
void HiddenFeatCount(Seq &seq, Mat3dShell< double > VHcount, Mat3dShell< double > CHcount, Mat3dShell< double > HHcount, MatShell< double > Bcount, double dadd=1)
Count the hidden features.
Definition: hrf-model.cpp:1194
int EncodeNode(VocabID xi, VocabID ci, VecShell< HValue > &hi)
encode the x_i and h_i at position i to a integer
Definition: hrf-model.cpp:1111
Vocab * GetVocab() const
Get Vocab.
Definition: trf-model.h:102
AlgNode(Model *p)
Definition: hrf-model.cpp:1491
LogP ClusterSum(Seq &seq, int nPos, int nOrder)
Read Binary.
Definition: trf-model.cpp:185
void PerformSAMS(int nMinibatch, int tmax, int t0, int beta, double zgap=10)
perform the SAMS to estimate the normalization constants zeta
Definition: hrf-model.cpp:1288
LogP LayerClusterSum(Seq &seq, int nlayer, int nPos, int nOrder)
[exact] Calculate the logp in each cluster. Only consinder the VH,CH,HH values on such layer ...
Definition: hrf-model.cpp:355
int GetNum() const
Get Array number.
Definition: wb-vector.h:240
void Reset(int row=0, int col=0)
Definition: wb-mat.h:445
void DecodeHidden(int n, VecShell< HValue > hi)
decode a integer to a hidden vector
Definition: hrf-model.cpp:1152
VocabID * GetClassSeq()
get class sequence
Definition: trf-feature.h:82
int m_hlayer
the number of hidden layer
Definition: hrf-model.h:101
LogP LogLineNormalize(LogP *pdProbs, int nNum)
Definition: trf-def.cpp:53
float HValue
Definition: hrf-model.h:25
void Print()
Definition: hrf-model.cpp:43
void ForwardBackward(int nLen, int nOrder, int nValueLimit)
forward-backward calculation
Definition: trf-alg.cpp:54
void MarkovMove(Seq &seq)
[sample] Markov Move - perform the gibbs sampling
Definition: hrf-model.cpp:777
pFunc Reset & m
double GetAISFactor(int t, int T)
Get the AIS intermediate factor beta_t.
Definition: trf-def.cpp:165
VocabID * GetWordSeq()
get word sequence
Definition: trf-feature.h:80
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22
LogP FeatClusterSum(trf::Seq &x, int nPos, int nOrder)
[exact] Calculate the logp in each cluster. Only consinder the feature values
Definition: hrf-model.cpp:302
void Fill(T m)
set all the values to m
Definition: wb-vector.h:139
void Reset(Vocab *pv, int hlayer, int hnode, int maxlen)
reset, the maxlen is the length excluding the beg/end symbols.
Definition: hrf-model.cpp:65
LogP GetMarginalLogProb(int nPos, int *pSubSeq, int nSubLen, double logz=0)
Get the marginal probability. &#39;logz&#39; is the input of the log normalization constants.
Definition: trf-alg.cpp:99
void ProposeHProbs(VecShell< LogP > &logps, Seq &seq, int nPos, bool bConsiderXandC=false)
[sample] A reduced model only consinder HHmat(W) and VHmat(M) and CHmat(U).
Definition: hrf-model.cpp:841
#define HRF_VALUE_GET(p, m)
Definition: hrf-model.h:89
Prob LogP2Prob(LogP x)
Definition: trf-def.h:33
void RandHidden(Seq &seq)
Random init the hidden variables.
Definition: hrf-model.cpp:1101
int GetCol() const
Definition: wb-mat.h:129
void GetParam(PValue *pParam)
Get the paremetre vector.
Definition: hrf-model.cpp:95
void Copy(MatShell< T > &m)
Definition: wb-mat.h:475
trf::LogP LogP
Definition: hrf-model.h:27
int GetRow() const
Definition: wb-mat.h:128
int EncodeHidden(VecShell< HValue > hi)
encode the hidden vector h_i to a integer
Definition: hrf-model.cpp:1143
void DecodeLayer(VecShell< int > &vn, Mat< HValue > &h, int layer, int nPos=0, int nDim=-1)
decoder several integer to a sequence
Definition: hrf-model.cpp:1174
LogP GetReducedModelForW(Seq &seq, int nPos)
[sample] A unnormalized reduced model to sample word w_i, consindering VH matrix(M) and word-ngram (l...
Definition: hrf-model.cpp:941
include all the feature table
Definition: trf-feature.h:179
PValue SumHHWeight(Mat3dShell< PValue > m, VecShell< HValue > h1, VecShell< HValue > h2)
Definition: hrf-model.cpp:1251
virtual LogP ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
This function need be derived. Calcualte the log probability of each cluster.
Definition: hrf-model.cpp:1495
LogP GetLogProb_AIS(VecShell< VocabID > &x, int nChain=10, int nIntermediate=10000)
perform AIS to esitmate the mariginal probabilities
Definition: hrf-model.cpp:1342
void RandSeq(Seq &seq, int nLen=-1)
Random init sequence, if nLen==-1, random the length also.
Definition: hrf-model.cpp:1083
int m_hnode
Definition: hrf-model.h:47
void Fill(T v)
Definition: wb-mat.h:482
PValue SumVHWeight(MatShell< PValue > m, VecShell< HValue > h)
Definition: hrf-model.cpp:1243
LogP GetConditionalProbForH(VecShell< HValue > &hi, VecShell< Prob > &probs)
[sample] using the logprobs returned by ProposeHProb to calculate the logprob of hi.
Definition: hrf-model.cpp:951
void Copy(VecShell< T > v)
Definition: wb-mat.h:386
LogP GetReducedModelForH(Seq &seq, int nPos)
[sample] A unnormalized reduced model. It only consindering the HH matrix (W)
Definition: hrf-model.cpp:913
void GetHiddenExp(VecShell< int > x, double *pExp)
[exact] E_{p_l(h|x)}[f]: don&#39;t clean the pExp and directly add the new exp to pExp.
Definition: hrf-model.cpp:622
LogP GetLogProb(Seq &seq, bool bNorm=true)
calculate the probability
Definition: hrf-model.cpp:112
void Sample(Seq &seq)
[sample] Perform one train-dimensional mixture sampling
Definition: hrf-model.cpp:725
#define HHMap(h1, h2)
Definition: hrf-model.h:84
LogP GetReducedModelForW(Seq &seq, int nPos)
[sample] A unnormalized reduced model to sample word w_i.
Definition: trf-model.cpp:434
void WriteT(const char *pfilename)
Write Model.
Definition: hrf-model.cpp:233