TRF Language Model
trf-model.cpp
Go to the documentation of this file.
1 // You may obtain a copy of the License at
2 //
3 // http://www.apache.org/licenses/LICENSE-2.0
4 //
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 //
11 // Copyright 2014-2015 Tsinghua University
12 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
13 //
14 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
15 // license declaration. Different coding language may use different comment styles.
16 
17 
18 #include "trf-model.h"
19 
20 namespace trf
21 {
22  LogP AlgNode::ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
23  {
24  m_seq.Set(pSeq, nLen, m_pModel->m_pVocab);
25  return m_pModel->ClusterSum(m_seq, nPos, nOrder);
26  }
27 
28  void Model::Reset(Vocab *pv, int maxlen)
29  {
30  m_pVocab = pv;
31  m_maxlen = maxlen;
32  m_maxSampleLen = (int)(1.02 * maxlen);
33 
34  if (maxlen <= 0)
35  return;
36 
37 
38 // SAFE_DELETE(m_pFeat);
39 
40  m_pi.Reset(m_maxlen + 1);
41  m_logz.Reset(m_maxlen + 1);
42  m_zeta.Reset(m_maxlen + 1);
43  m_pi.Fill(1);
44  m_logz.Fill(0);
45  m_zeta.Fill(0);
46 
47  // length jump probability
48  m_matLenJump.Reset(m_maxSampleLen + 1, m_maxSampleLen + 1);
49  m_matLenJump.Fill(0);
50  for (int i = 1; i < m_matLenJump.GetRow(); i++) {
51  for (int j = max(1, i - 1); j <= min(m_matLenJump.GetCol()-1, i + 1); j++) {
52  m_matLenJump[i][j] = 1;
53  }
54  m_matLenJump[i][i] = 0; // avoid the self-jump.
55  LineNormalize(m_matLenJump[i].GetBuf(), m_matLenJump.GetCol());
56  }
57  }
58  void Model::SetParam(PValue *pValue)
59  {
60  if (pValue) {
61  memcpy(m_value.GetBuf(), pValue, sizeof(pValue[0])*GetParamNum());
62  }
63  }
64  void Model::GetParam(PValue *pValue)
65  {
66  if (pValue) {
67  memcpy(pValue, m_value.GetBuf(), sizeof(pValue[0])*GetParamNum());
68  }
69  }
70  void Model::SetPi(Prob *pPi)
71  {
72  m_pi.Copy(VecShell<Prob>(pPi, m_pi.GetSize()));
73  }
74  LogP Model::GetLogProb(Seq &seq, bool bNorm /* = true */)
75  {
76  if (!m_pFeat)
77  return 0;
78  if (seq.GetLen() <= 0)
79  return 0;
80 
81  Array<int> afeat;
82  m_pFeat->Find(afeat, seq);
83 
84  LogP logSum = 0;
85  for (int i = 0; i < afeat.GetNum(); i++) {
86  logSum += m_value[afeat[i]];
87  }
88 
89  if (bNorm) {
90  int nLen = min(m_maxlen, seq.GetLen());
91  logSum = logSum - m_logz[nLen] + Prob2LogP(m_pi[nLen]);
92  }
93  return logSum;
94  }
95  void Model::LoadFromCorpus(const char *pcorpus, const char *pfeatstyle, int nOrder)
96  {
97  if (pcorpus) {
98  m_pFeat = new Feat(nOrder, m_pVocab->GetClassNum() > 0);
99  if (pfeatstyle)
100  m_pFeat->Reset(pfeatstyle);
101  m_pFeat->LoadFeatFromCorpus(pcorpus, m_pVocab);
102  m_value.Reset(m_pFeat->GetNum());
103  m_value.Fill(0);
104  }
105  }
106  void Model::FeatCount(Seq &seq, double *pCount, double dadd /* = 1.0 */)
107  {
108  Array<int> afeat;
109  m_pFeat->Find(afeat, seq);
110  for (int i = 0; i < afeat.GetNum(); i++) {
111  pCount[afeat[i]] += dadd;
112  }
113  }
114  void Model::ReadT(const char *pfilename)
115  {
116  File fout(pfilename, "rt");
117 
118  lout << "[Model]: Read(txt) from " << pfilename << endl;
119 
120  int nVocabSize = 0;
121  fout.Scanf("m_vocabsize=%d\n", &nVocabSize);
122  fout.Scanf("m_maxlen=%d\n", &m_maxlen);
123 
124  // Reset
125  Reset(m_pVocab, m_maxlen);
126  if (m_pVocab->GetSize() != nVocabSize) {
127  lout_error("[Model] ReadT: the input nVocabSize(" << nVocabSize << ") != m_pVocab->GetSize(" << m_pVocab->GetSize() << ")");
128  }
129 
130  double dValue;
131  fout.Scanf("m_pi=[ ");
132  for (int i = 1; i <= m_maxlen; i++) {
133  fout.Scanf("%lf ", &dValue);
134  m_pi[i] = dValue;
135  }
136  fout.Scanf("]\n");
137  fout.Scanf("m_logz=[ ");
138  for (int i = 1; i <= m_maxlen; i++) {
139  fout.Scanf("%lf ", &dValue);
140  m_logz[i] = dValue;
141  }
142  fout.Scanf("]\n");
143  fout.Scanf("m_zeta=[ ");
144  for (int i = 1; i <= m_maxlen; i++) {
145  fout.Scanf("%lf ", &dValue);
146  m_zeta[i] = dValue;
147  }
148  fout.Scanf("]\n");
149 
150  int nValue = 0;
151  fout.Scanf("featnum=%d\n", &nValue);
152  m_value.Reset(nValue);
153  SAFE_DELETE(m_pFeat);
154  m_pFeat = new Feat;
155  m_pFeat->m_nTotalNum = nValue;
156  m_pFeat->ReadT(fout, m_value.GetBuf());
157  }
158  void Model::WriteT(const char *pfilename)
159  {
160  File fout(pfilename, "wt");
161  lout << "[Model] Write(txt) to " << pfilename << endl;
162 
163  fout.Print("m_vocabsize=%d\n", m_pVocab->GetSize());
164  fout.Print("m_maxlen=%d\n", m_maxlen);
165  fout.Print("m_pi=[ ");
166  for (int i = 1; i <= m_maxlen; i++) {
167  fout.Print("%f ", m_pi[i]);
168  }
169  fout.Print("]\n");
170  fout.Print("m_logz=[ ");
171  for (int i = 1; i <= m_maxlen; i++) {
172  fout.Print("%f ", m_logz[i]);
173  }
174  fout.Print("]\n");
175  fout.Print("m_zeta=[ ");
176  for (int i = 1; i <= m_maxlen; i++) {
177  fout.Print("%f ", m_zeta[i]);
178  }
179  fout.Print("]\n");
180 
181  fout.Print("featnum=%d\n", m_pFeat->GetNum());
182  m_pFeat->WriteT(fout, m_value.GetBuf());
183  }
184 
185  LogP Model::ClusterSum(Seq &seq, int nPos, int nOrder)
186  {
187  LogP LogSum = 0;
188  Array<int> afeat;
189 
190  int nLen = seq.GetLen();
191  // input nOrder can be larger than the max-order of features.
192  int nWordFeatOrder = min(nOrder, GetMaxOrder());
193 
194  for (int n = 1; n <= nWordFeatOrder; n++) {
195  m_pFeat->Find(afeat, seq, nPos, n);
196  }
197 
198  // the last cluster
199  if (nPos == nLen - nOrder) {
200  for (int i = nPos + 1; i < nLen; i++) {
201  nWordFeatOrder = min(nLen - i, GetMaxOrder());
202  for (int n = 1; n <= nWordFeatOrder; n++) {
203  m_pFeat->Find(afeat, seq, i, n);
204  }
205  }
206  }
207 
208  for (int i = 0; i < afeat.GetNum(); i++)
209  LogSum += m_value[afeat[i]];
210 
211  return LogSum;
212  }
213  double Model::ExactNormalize(int nLen)
214  {
215  int nMaxOrder = GetMaxOrder();
216  LogP logZ = LogP_zero;
217 
218  /* If the length is less than order, then we enumerate all the sequence of such length */
219  if (nLen <= nMaxOrder) {
220  Seq seq(nLen);
221  vIter<VocabID> SeqIter(seq.GetWordSeq(), nLen);
222  SeqIter.AddAllLine(0, m_pVocab->GetSize() - 1);
223  while (SeqIter.Next()) {
224  seq.SetClass(m_pVocab);
225  double d = GetLogProb(seq, false);
226  logZ = Log_Sum(logZ, d);
227  }
228  }
229  else {
230  m_AlgNode.ForwardBackward(nLen, nMaxOrder, m_pVocab->GetSize());
231  logZ = m_AlgNode.GetLogSummation();
232  }
233 
234  m_logz[nLen] = logZ;
235  return logZ;
236  }
238  {
239  for (int len = 1; len <= m_maxlen; len++) {
240  ExactNormalize(len);
241  m_zeta[len] = m_logz[len] - m_logz[1];
242  //lout << " logZ[" << len << "] = " << m_logz[len] << endl;
243  }
244  }
245  void Model::GetNodeExp(int nLen, double *pExp)
246  {
247  memset(pExp, 0, sizeof(pExp[0])*GetParamNum());
248 
249  int nMaxOrder = GetMaxOrder();
250  /* If the length is less than order, then we enumerate all the sequence of such length */
251  if (nLen <= nMaxOrder) {
252  Seq seq(nLen);
253  vIter<VocabID> SeqIter(seq.GetWordSeq(), nLen);
254  SeqIter.AddAllLine(0, m_pVocab->GetSize() - 1);
255  while (SeqIter.Next()) {
256  seq.SetClass(m_pVocab);
257  Prob prob = LogP2Prob(GetLogProb(seq));
258  Array<int> afeat;
259  m_pFeat->Find(afeat, seq);
260  for (int i = 0; i < afeat.GetNum(); i++) {
261  pExp[afeat[i]] += prob;
262  }
263  }
264  }
265  else {
266  int nClusterNum = nLen - nMaxOrder + 1;
267  // circle for the position pos
268  for (int pos = 0; pos < nClusterNum; pos++) {
269  // ergodic the cluster
270  Seq seq(nLen);
271  vIter<VocabID> SeqIter(seq.GetWordSeq() + pos, nMaxOrder);
272  SeqIter.AddAllLine(0, m_pVocab->GetSize() - 1);
273  while (SeqIter.Next()) {
274  seq.SetClass(m_pVocab);
275  Prob prob = LogP2Prob(m_AlgNode.GetMarginalLogProb(pos, seq.GetWordSeq() + pos, nMaxOrder, m_logz[nLen]));
276  Array<int> afeat;
277  for (int n = 1; n <= nMaxOrder; n++)
278  m_pFeat->Find(afeat, seq, pos, n);
279  for (int i = 0; i < afeat.GetNum(); i++) {
280  pExp[afeat[i]] += prob;
281  }
282 
284  // the last cluster
286  if (pos == nClusterNum - 1) {
287  afeat.Clean();
288  for (int ii = 1; ii < nMaxOrder; ii++) { // position ii
289  for (int n = 1; n <= nMaxOrder - ii; n++) { // order n
290  m_pFeat->Find(afeat, seq, pos + ii, n);
291  }
292  }
293  for (int i = 0; i < afeat.GetNum(); i++) {
294  pExp[afeat[i]] += prob;
295  }
296  }
297  }
298  }
299  }
300  }
301  void Model::GetNodeExp(double *pExp, Prob *pLenProb/* = NULL*/)
302  {
303  if (pLenProb == NULL)
304  pLenProb = m_pi.GetBuf();
305  VecShell<double> exp(pExp, GetParamNum());
306  Vec<double> expTemp(GetParamNum());
307 
308  exp.Fill(0);
309  for (int len = 1; len <= m_maxlen; len++) {
310 
311  int nMaxOrder = GetMaxOrder();
312  m_AlgNode.ForwardBackward(len, nMaxOrder, m_pVocab->GetSize());
313 
314  GetNodeExp(len, expTemp.GetBuf());
315 
316  for (int i = 0; i < exp.GetSize(); i++) {
317  exp[i] += pLenProb[len] * expTemp[i];
318  }
319  }
320  }
321 
322  void Model::Sample(Seq &seq)
323  {
324  LocalJump(seq);
325  MarkovMove(seq);
326  }
328  {
329  int nOldLen = seq.GetLen();
330  int nNewLen = 0;
331  LogP j1 = ProposeLength(nOldLen, nNewLen, true);
332  LogP j2 = ProposeLength(nNewLen, nOldLen, false);
333 
334  if (nNewLen == nOldLen)
335  return;
336 
337  LogP logpAcc = 0;
338  if (nNewLen == nOldLen + 1) {
339  LogP logpold = GetLogProb(seq);
340  seq.Reset(nNewLen);
341  LogP R = ProposeC0(seq.x[class_layer][nNewLen - 1], seq, nNewLen - 1, true);
342  LogP G = SampleX(seq, nNewLen - 1);
343  LogP logpnew = GetLogProb(seq);
344 
345  logpAcc = (j2 - j1) + logpnew - (logpold + R + G);
346  }
347  else if (nNewLen == nOldLen - 1) {
348  LogP logpold = GetLogProb(seq);
349  LogP R = ProposeC0(seq.x[class_layer][nOldLen - 1], seq, nOldLen - 1, false);
350  LogP G = SampleX(seq, nOldLen - 1, false);
351 
352  seq.Reset(nNewLen);
353  LogP logpnew = GetLogProb(seq);
354 
355  logpAcc = (j2 - j1) + logpnew + R + G - logpold;
356  }
357  else if (nNewLen != nOldLen){
358  lout_error("[Model] Sample: nNewLen(" << nNewLen << ") and nOldLen(" << nOldLen << ")");
359  }
360 
361 
362  if (Acceptable(LogP2Prob(logpAcc))) {
363  seq.Reset(nNewLen);
364  m_nLenJumpAccTimes++;
365  }
366  else {
367  seq.Reset(nOldLen);
368  }
369  m_nLenJumpTotalTime++;
370 
371  }
373  {
374  /* Gibbs sampling */
375  for (int nPos = 0; nPos < seq.GetLen(); nPos++) {
376  SampleC(seq, nPos);
377  SampleX(seq, nPos);
378  }
379  }
380 
381  LogP Model::ProposeLength(int nOld, int &nNew, bool bSample)
382  {
383  if (bSample) {
384  nNew = LineSampling(m_matLenJump[nOld].GetBuf(), m_matLenJump[nOld].GetSize());
385  }
386 
387  return Prob2LogP(m_matLenJump[nOld][nNew]);
388  }
389  LogP Model::ProposeC0(VocabID &ci, Seq &seq, int nPos, bool bSample)
390  {
391  /* if there are no class, then return 0 */
392  if (m_pVocab->GetClassNum() == 0) {
393  ci = VocabID_none;
394  return 0;
395  }
396 
397  Vec<LogP> vlogps(m_pVocab->GetClassNum());
398  ProposeCProbs(vlogps, seq, nPos);
399 
400  if (bSample) {
401  ci = LogLineSampling(vlogps.GetBuf(), vlogps.GetSize());
402  }
403 
404  return vlogps[ci];
405  }
406  void Model::ProposeCProbs(VecShell<LogP> &logps, Seq &seq, int nPos)
407  {
408 // logps.Fill(Prob2LogP(1.0 /m_pVocab->GetClassNum()));
409 // return;
410 
411  VocabID savecid = seq.x[class_layer][nPos];
412  for (int cid = 0; cid < m_pVocab->GetClassNum(); cid++) {
413  seq.x[class_layer][nPos] = cid;
414  logps[cid] = GetReducedModelForC(seq, nPos);
415  }
416  seq.x[class_layer][nPos] = savecid;
417  LogLineNormalize(logps.GetBuf(), m_pVocab->GetClassNum());
418  }
420  {
421  if (seq.x[class_layer][nPos] == VocabID_none)
422  return 0;
423 
424  LogP logSum = 0;
425  // class ngram features
426  Array<int> afeat;
427  m_pFeat->FindPosDep(afeat, seq, nPos, 1);
428  for (int i = 0; i < afeat.GetNum(); i++) {
429  logSum += m_value[afeat[i]];
430  }
431 
432  return logSum;
433  }
435  {
436  LogP logSum = 0;
437  Array<int> afeat;
438  m_pFeat->FindPosDep(afeat, seq, nPos, 2);
439  for (int i = 0; i < afeat.GetNum(); i++) {
440  logSum += m_value[afeat[i]];
441  }
442 
443  return logSum;
444  }
446  {
447  LogP logSum = 0;
448  Array<int> afeat;
449  m_pFeat->FindPosDep(afeat, seq, nPos, 0); // all
450  for (int i = 0; i < afeat.GetNum(); i++) {
451  logSum += m_value[afeat[i]];
452  }
453 
454  return logSum;
455  }
457  {
458  LogP resLogp = LogP_zero;
459 
460  Array<VocabID> *pXs = m_pVocab->GetWord(seq.x[class_layer][nPos]);
461 
462  VocabID saveX = seq.x[word_layer][nPos];
463  for (int i = 0; i < pXs->GetNum(); i++) {
464  seq.x[word_layer][nPos] = pXs->Get(i);
465  /* Only need to calculate the summation of weight depending on x[nPos], c[nPos] */
466  /* used to sample the c_i */
467  resLogp = Log_Sum(resLogp, GetReducedModel(seq, nPos));
468  //resLogp = Log_Sum(resLogp, GetLogProb(seq, false));
469  }
470  seq.x[word_layer][nPos] = saveX;
471 
472  return resLogp;
473  }
474  void Model::SampleC(Seq &seq, int nPos)
475  {
476  if (m_pVocab->GetClassNum() == 0) {
477  seq.x[class_layer][nPos] = VocabID_none;
478  return;
479  }
480 
481  /* Sample C0 */
482  Vec<LogP> vlogps_c(m_pVocab->GetClassNum());
483  ProposeCProbs(vlogps_c, seq, nPos);
484  VocabID ci = seq.x[class_layer][nPos];
485  VocabID C0 = LogLineSampling(vlogps_c.GetBuf(), vlogps_c.GetSize());
486  LogP logpRi = vlogps_c[ci];
487  LogP logpR0 = vlogps_c[C0];
488 
489 
490  /* Calculate the probability p_t(h, c) */
491  seq.x[class_layer][nPos] = ci;
492  LogP Logp_ci = GetMarginalProbOfC(seq, nPos);
493  seq.x[class_layer][nPos] = C0;
494  LogP Logp_C0 = GetMarginalProbOfC(seq, nPos);
495 
496  LogP acclogp = logpRi + Logp_C0 - (logpR0 + Logp_ci);
497 
498  m_nSampleHTotalTimes++;
499  if (Acceptable(LogP2Prob(acclogp))) {
500  m_nSampleHAccTimes++;
501  seq.x[class_layer][nPos] = C0;
502  }
503  else {
504  seq.x[class_layer][nPos] = ci;
505  }
506  }
507  LogP Model::SampleX(Seq &seq, int nPos, bool bSample/* = true*/)
508  {
509  /*
510  The function calculate G(x_i| x_{other}, h)
511  if bSample is true, draw a sample for x_i;
512  otherwise, only calcualte the conditional probability.
513  */
514  if (nPos >= seq.GetLen()) {
515  lout_error("[Model] SampleH: the nPos(" << nPos << ") > the length of sequence(" << seq.GetLen() << ")");
516  }
517 
518  Array<VocabID> *pXs = m_pVocab->GetWord(seq.x[class_layer][nPos]);
519  Array<LogP> aLogps;
520 
521  VocabID nSaveX = seq.x[word_layer][nPos]; // save w[nPos]
522  for (int i = 0; i < pXs->GetNum(); i++) {
523  seq.x[word_layer][nPos] = pXs->Get(i);
524  /* To reduce the computational cost, instead of GetLogProb,
525  we just need to calculate the summation of weight depending on x[nPos]
526  */
527  aLogps[i] = GetReducedModelForW(seq, nPos);
528  }
529  LogLineNormalize(aLogps, pXs->GetNum());
530 
531  int idx;
532  if (bSample) {
533  /* sample a value for x[nPos] */
534  idx = LogLineSampling(aLogps, pXs->GetNum());
535  seq.x[word_layer][nPos] = pXs->Get(idx);
536  }
537  else {
538  idx = pXs->Find(nSaveX); // find nSave in the array.
539  seq.x[word_layer][nPos] = nSaveX;
540  if (idx == -1) {
541  lout_error("Can't find the VocabID(" << nSaveX << ") in the array.\n"
542  << "This may beacuse word(" << nSaveX << ") doesnot belongs to class("
543  << seq.x[class_layer][nPos] << ")");
544  }
545  }
546 
547  return aLogps[idx];
548  }
549 
550  LogP Model::AISNormalize(int nLen, int nChain, int nInter)
551  {
552  int nParamsNum = GetParamNum();
553 
554  Vec<PValue> vParamsPn(nParamsNum);
555  Vec<PValue> vParamsP0(nParamsNum);
556  Vec<PValue> vParamsCur(nParamsNum);
557  this->GetParam(vParamsP0.GetBuf());
558 
559  /* set the P_n */
560  /* Set with all the unigram values, i.e. all the VH and CH */
561  vParamsPn.Fill(0);
562  /* calculate the normalization constants of P_n */
563  LogP logz_pn = nLen * log((double)m_pVocab->GetSize());
564 
565 
566  /* In the intermediate models,
567  these models share the word/class ngram features,
568  to save the memory cost.
569  */
570  Model *pInterModel = new Model(m_pVocab, m_maxlen);
571  pInterModel->m_pFeat = m_pFeat;
572  pInterModel->m_value.Reset(GetParamNum());
573 
574  // Weight for each chain
575  Array<LogP> aLogWeight;
576  aLogWeight.SetNum(nChain);
577 
578  LogP localLogSum = LogP_zero; // save the current results
579  int localChainNum = 0;
580 
581  for (int k = 0; k < nChain; k++) {
582  PValue* pParamsCur = vParamsCur.GetBuf();
583  PValue *pP0 = vParamsP0.GetBuf();
584  PValue *pPn = vParamsPn.GetBuf();
585 
586  Seq seq(nLen);
587  /* As here we set the P_n is the uniform distribution. */
588  seq.Random(m_pVocab);
589  pInterModel->SetParam(vParamsPn.GetBuf());
590  //LogP logp_old = -(log(2) * GetHNode() + log(m_pVocab->GetSize())) * nLen;
591  LogP logp_old = pInterModel->GetLogProb(seq, false) - logz_pn;
592 
593  double log_w = 0;
594  for (int t = nInter - 1; t >= 0; t--) {
595  /* set the intermediate parameters */
596  double beta = GetAISFactor(t, nInter);
597  for (int i = 0; i < nParamsNum; i++)
598  pParamsCur[i] = pP0[i] * (1 - beta) + pPn[i] * beta;
599  pInterModel->SetParam(pParamsCur);
600 
601  /* compute the weight */
602  LogP rate = pInterModel->GetLogProb(seq, false) - logp_old;
603  log_w += rate;
604 
605 
606  /* sample sequence*/
607  pInterModel->MarkovMove(seq);
608  logp_old = pInterModel->GetLogProb(seq, false);
609  }
610 
611  aLogWeight[k] = log_w; // record the log-weight
612 
613 
614 
615  localLogSum = Log_Sum(localLogSum, log_w);
616  localChainNum = localChainNum + 1;
617  LogP localLogz = localLogSum - log(localChainNum);
618  lout << localLogz << "(" << localChainNum << ") ";
619  }
620 
621 
622  pInterModel->m_pFeat = NULL; // avoid to release the feature buffer
623  SAFE_DELETE(pInterModel);
624 
625 
626  LogP logz = Log_Sum(aLogWeight.GetBuffer(), aLogWeight.GetNum()) - Prob2LogP(nChain);
627 
628  m_logz[nLen] = logz;
629  return logz;
630  }
631  void Model::AISNormalize(int nLenMin, int nLenMax, int nChain, int nInter)
632  {
633  lout << "AIS norm len form " << nLenMin << " to " << nLenMax << endl;
634  int nParamsNum = GetParamNum();
635 
636  Vec<PValue> vParamsPn(nParamsNum);
637  Vec<PValue> vParamsP0(nParamsNum);
638  Vec<PValue> vParamsCur(nParamsNum);
639  this->GetParam(vParamsP0.GetBuf());
640 
641  /* set the P_n */
642  /* Set all the unigram values */
643  lout << "AISNorm: Using all the unigram." << endl;
644  vParamsPn.Fill(0);
645  Seq seq(10);
646  seq.x.Fill(0);
647  Vec<LogP> aWordLogp(m_pVocab->GetSize());
648  for (int w = 0; w < m_pVocab->GetSize(); w++) {
649  seq.x[word_layer][1] = w;
650  seq.x[class_layer][1] = m_pVocab->GetClass(w);
651  Array<int> afind;
652  m_pFeat->Find(afind, seq, 1, 1);
653  lout_assert(afind.GetNum() <= 2);
654  double dvalue = 0;
655  for (int i = 0; i < afind.GetNum(); i++) {
656  vParamsPn[afind[i]] = vParamsP0[afind[i]];
657  dvalue += vParamsP0[afind[i]];
658  }
659  aWordLogp[w] = dvalue;
660  }
661  /* calculate the normalization constants of P_n for each length */
662  Vec<LogP> alogz_pn(m_maxlen + 1);
663  LogP logsum = Log_Sum(aWordLogp.GetBuf(), aWordLogp.GetSize());
664  LogLineNormalize(aWordLogp.GetBuf(), aWordLogp.GetSize());
665  for (int i = 0; i <= m_maxlen; i++) {
666  alogz_pn[i] = i * logsum;
667  //alogz_pn[i] = i * log((double)m_pVocab->GetSize());
668  }
669 
670 
671 
672  /* In the intermediate models,
673  these models share the features,
674  to save the memory cost.
675  */
676  Model *pInterModel = new Model(m_pVocab, m_maxlen);
677  pInterModel->m_pFeat = m_pFeat;
678  pInterModel->m_value.Reset(GetParamNum());
679  pInterModel->SetParam(vParamsPn.GetBuf());
680 
681  // Weight for length, for each chain
682  Mat<LogP> matLogWeight(m_maxlen+1, nChain);
683  Mat<LogP> matLogPOld(m_maxlen+1, nChain);
684  Mat<Seq*> matSeq(m_maxlen+1, nChain);
685  matLogWeight.Fill(0);
686  matLogPOld.Fill(0);
687  matSeq.Fill(0);
688  for (int i = 1; i <= m_maxlen; i++) {
689  for (int j = 0; j < nChain; j++) {
690  matSeq[i][j] = new Seq(i);
691  /* sample the initial sequence */
692  //matSeq[i][j]->Random(m_pVocab);
693  Seq *pSeq = matSeq[i][j];
694  for (int nPos = 0; nPos < pSeq->GetLen(); nPos++) {
695  pSeq->GetWordSeq()[nPos] = LogLineSampling(aWordLogp.GetBuf(), aWordLogp.GetSize());
696  pSeq->GetClassSeq()[nPos] = m_pVocab->GetClass(pSeq->GetWordSeq()[nPos]);
697  }
698 
699  matLogPOld[i][j] = pInterModel->GetLogProb(*matSeq[i][j], false) - alogz_pn[i];
700  }
701  }
702 
703 
704  // for each intermediate distribution
705  lout.Progress(0, true, nInter, "AIS");
706  for (int t = nInter - 1; t >= 0; t--) {
707  PValue* pParamsCur = vParamsCur.GetBuf();
708  PValue *pP0 = vParamsP0.GetBuf();
709  PValue *pPn = vParamsPn.GetBuf();
710 
711  // get the intermediate parameters
712  double beta = GetAISFactor(t, nInter);
713  for (int i = 0; i < nParamsNum; i++)
714  pParamsCur[i] = pP0[i] * (1 - beta) + pPn[i] * beta;
715  pInterModel->SetParam(pParamsCur);
716 
717 #pragma omp parallel for
718  for (int nLen = nLenMin; nLen <= nLenMax; nLen++) { // for each length
719  for (int k = 0; k < nChain; k++) { // for each chain
720  /* compute the weight */
721  LogP rate = pInterModel->GetLogProb(*matSeq[nLen][k], false) - matLogPOld[nLen][k];
722  matLogWeight[nLen][k] += rate;
723 
724 
725  /* sample sequence*/
726  pInterModel->MarkovMove(*matSeq[nLen][k]);
727  matLogPOld[nLen][k] = pInterModel->GetLogProb(*matSeq[nLen][k], false);
728  }
729  }
730 
731  lout.Progress(nInter-t);
732  }
733 
734  pInterModel->m_pFeat = NULL; // avoid to release the feature buffer
735  SAFE_DELETE(pInterModel);
736  for (int i = 1; i <= m_maxlen; i++) {
737  for (int j = 0; j < nChain; j++) {
738  SAFE_DELETE(matSeq[i][j]);
739  }
740  }
741 
742  for (int nLen = nLenMin; nLen <= nLenMax; nLen++) {
743  LogP logz = Log_Sum(matLogWeight[nLen].GetBuf(), matLogWeight[nLen].GetSize()) - Prob2LogP(nChain);
744  m_logz[nLen] = logz;
745  lout << "logz[" << nLen << "] = " << logz << " logw= ";
746  for (int i = 0; i < matLogWeight[nLen].GetSize(); i++) {
747  lout << matLogWeight[nLen][i] << " ";
748  }
749  lout << endl;
750  }
751  }
752 }
void Sample(Seq &seq)
[sample] Perform one train-dimensional mixture sampling
Definition: trf-model.cpp:322
Definition: wb-mat.h:30
int GetSize() const
Definition: wb-mat.h:126
const float LogP_zero
Definition: trf-def.h:30
T & Get(unsigned int i, unsigned int j)
Definition: wb-mat.h:125
double Prob
Definition: trf-def.h:28
#define SAFE_DELETE(p)
memory release
Definition: wb-vector.h:49
virtual void MarkovMove(Seq &seq)
[sample] Markov Move - perform the gibbs sampling
Definition: trf-model.cpp:372
T & Get(int i)
get the value at position i
Definition: wb-vector.h:99
LogP ProposeLength(int nOld, int &nNew, bool bSample)
[sample] Propose the length, using the variable m_matLenJump
Definition: trf-model.cpp:381
void ReadT(const char *pfilename)
Read Model.
Definition: trf-model.cpp:114
int LogLineSampling(const LogP *pdProbs, int nNum)
Definition: trf-def.cpp:62
virtual void ExactNormalize()
[exact] Exact Normalization
Definition: trf-model.cpp:237
void SetClass(Vocab *pv)
set the class based the word sequence
Definition: trf-feature.cpp:35
void Random(Vocab *pv)
Random.
Definition: trf-feature.cpp:39
void Reset(Vocab *pv, int maxlen)
reset, the maxlen is the length excluding the beg/end symbols.
Definition: trf-model.cpp:28
void GetNodeExp(int nLen, double *pExp)
[exact] E_{p_l}[f]: Exactly calculate the expectation over x and h for length nLen ...
Definition: trf-model.cpp:245
int VocabID
Definition: trf-vocab.h:23
const int VocabID_none
Definition: trf-vocab.h:24
#define lout_error(x)
Definition: wb-log.h:183
#define lout_assert(p)
Definition: wb-log.h:185
void Fill(T v)
Definition: wb-mat.h:397
LogP SampleX(Seq &seq, int nPos, bool bSample=true)
[sample] Sample the x_i at position nPos
Definition: trf-model.cpp:507
LogP Prob2LogP(Prob x)
Definition: trf-def.h:36
LogP ProposeC0(VocabID &ci, Seq &seq, int nPos, bool bSample)
[sample] Propose the c_{i} at position i. Then return the propose probability R(c_i|h_i,c_{other})
Definition: trf-model.cpp:389
void LineNormalize(Prob *pdProbs, int nNum)
Definition: trf-def.cpp:87
void SetPi(Prob *pPi)
Set the pi.
Definition: trf-model.cpp:70
LogP Log_Sum(LogP x, LogP y)
Definition: trf-def.h:40
LogP GetLogProb(Seq &seq, bool bNorm=true)
calculate the probability
Definition: trf-model.cpp:74
LogP GetReducedModel(Seq &seq, int nPos)
[sample] A unnormalized reduced depending on nPos.
Definition: trf-model.cpp:445
double PValue
Definition: trf-def.h:26
void LoadFromCorpus(const char *pcorpus, const char *pfeatstyle, int nOrder)
load ngram features from corpus
Definition: trf-model.cpp:95
virtual void SetParam(PValue *pValue)
Set the parameters.
Definition: trf-model.cpp:58
LogP GetReducedModelForC(Seq &seq, int nPos)
[sample] A unnormalized reduced model to sample class c_i.
Definition: trf-model.cpp:419
virtual int Scanf(const char *p_pMessage,...)
scanf
Definition: wb-file.cpp:132
void Reset(int p_len)
reset only change the len variable, does not change the buffer size.
Definition: trf-feature.h:51
double LogP
Definition: trf-def.h:27
T * GetBuffer(int i=0) const
get the buffer pointer
Definition: wb-vector.h:97
int GetLen() const
Definition: trf-feature.h:71
virtual void Print(const char *p_pMessage,...)
print
Definition: wb-file.cpp:115
virtual LogP ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
This function need be derived. Calcualte the log probability of each cluster.
Definition: trf-model.cpp:22
define a sequence including the word sequence and class sequence
Definition: trf-feature.h:41
void FeatCount(Seq &seq, double *pCount, double dadd=1.0)
Count the feature number in a sequence.
Definition: trf-model.cpp:106
int m_nTotalNum
total feature number
Definition: trf-feature.h:183
int LineSampling(const Prob *pdProbs, int nNum)
Definition: trf-def.cpp:103
TRF model.
Definition: trf-model.h:51
void Fill(T v)
Definition: wb-mat.h:279
int Find(T t)
Find a value and return the position.
Definition: wb-vector.h:248
file class.
Definition: wb-file.h:94
int GetSize() const
Definition: wb-mat.h:69
void GetParam(PValue *pValue)
Get the paremetre vector.
Definition: trf-model.cpp:64
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
Definition: trf-feature.cpp:22
T * GetBuf() const
Definition: wb-mat.h:68
LogP GetMarginalProbOfC(Seq &seq, int nPos)
[sample] given c_i, summate the probabilities of x_i, i.e. P(c_i)
Definition: trf-model.cpp:456
#define word_layer
Definition: trf-feature.h:31
void SetNum(int n)
Set Array number, to melloc enough memory.
Definition: wb-vector.h:238
Mat< VocabID > x
Definition: trf-feature.h:44
void AddAllLine(T beg, T end, T step=1)
Definition: wb-iter.h:124
void Progress(long long n=-1, bool bInit=false, long long total=100, const char *head="")
progress bar
Definition: wb-log.cpp:146
bool Acceptable(Prob prob)
Definition: trf-def.cpp:127
Feat * m_pFeat
hash all the features
Definition: trf-model.h:54
Vocab * m_pVocab
Definition: trf-model.h:62
void SampleC(Seq &seq, int nPos)
[sample] Sample the c_i at position nPos without x_i.
Definition: trf-model.cpp:474
void ProposeCProbs(VecShell< LogP > &logps, Seq &seq, int nPos)
[sample] Return the propose distribution of c_i at position nPos
Definition: trf-model.cpp:406
void Clean()
Clean the array. Just set the top of array to -1 and donot release the memory.
Definition: wb-vector.h:258
LogP ClusterSum(Seq &seq, int nPos, int nOrder)
Read Binary.
Definition: trf-model.cpp:185
void Reset(int nOrder, bool bClass)
Reset, set the order. Node: the maximum order (including the skip) may be larger than nOrder...
int GetNum() const
Get Array number.
Definition: wb-vector.h:240
VocabID * GetClassSeq()
get class sequence
Definition: trf-feature.h:82
LogP LogLineNormalize(LogP *pdProbs, int nNum)
Definition: trf-def.cpp:53
double GetAISFactor(int t, int T)
Get the AIS intermediate factor beta_t.
Definition: trf-def.cpp:165
void Reset(int size=0)
Definition: wb-mat.h:360
VocabID * GetWordSeq()
get word sequence
Definition: trf-feature.h:80
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22
Prob LogP2Prob(LogP x)
Definition: trf-def.h:33
include all the feature table
Definition: trf-feature.h:179
void WriteT(const char *pfilename)
Write Model.
Definition: trf-model.cpp:158
Definition: trf-alg.cpp:20
#define class_layer
Definition: trf-feature.h:32
Vec< PValue > m_value
the value for each features
Definition: trf-model.h:55
LogP GetReducedModelForW(Seq &seq, int nPos)
[sample] A unnormalized reduced model to sample word w_i.
Definition: trf-model.cpp:434
LogP AISNormalize(int nLen, int nChain, int nInter)
perform AIS to calculate the normalization constants, return the logz of given length ...
Definition: trf-model.cpp:550
void LocalJump(Seq &seq)
[sample] Local Jump - sample a new length
Definition: trf-model.cpp:327