TRF Language Model
trf-model.h
Go to the documentation of this file.
1 // You may obtain a copy of the License at
2 //
3 // http://www.apache.org/licenses/LICENSE-2.0
4 //
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 //
11 // Copyright 2014-2015 Tsinghua University
12 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
13 //
14 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
15 // license declaration. Different coding language may use different comment styles.
16 
17 
18 #pragma once
19 #include "trf-feature.h"
20 #include "trf-alg.h"
21 #include <omp.h>
22 
23 namespace trf
24 {
25 
26 
32  class Model;
33 
38  class AlgNode : public Algfb
39  {
40  private:
41  Model *m_pModel;
42  Seq m_seq;
43  public:
44  AlgNode(Model *p) :m_pModel(p){};
45  virtual LogP ClusterSum(int *pSeq, int nLen, int nPos, int nOrder);
46  };
51  class Model
52  {
53  public:
56 
57  int m_maxlen;
61 
63 
66 
67  protected:
69 
70  public:
72  Model(Vocab *pv) :
73  m_pFeat(NULL),
74  m_maxlen(0),
75  m_pVocab(pv),
76  m_AlgNode(this){
77  m_nLenJumpAccTimes = 0;
78  m_nLenJumpTotalTime = 0;
79  m_nSampleHAccTimes = 0;
80  m_nSampleHTotalTimes = 0;
81  };
83  Model(Vocab *pv, int maxlen) :
84  m_pFeat(NULL),
85  m_maxlen(0),
86  m_pVocab(pv),
87  m_AlgNode(this) {
88  Reset(pv, maxlen);
89  m_nLenJumpAccTimes = 0;
90  m_nLenJumpTotalTime = 0;
91  m_nSampleHAccTimes = 0;
92  m_nSampleHTotalTimes = 0;
93  }
96  {
97  SAFE_DELETE(m_pFeat);
98  }
100  int GetMaxLen() const { return m_maxlen; }
102  Vocab *GetVocab() const { return m_pVocab; }
104  int GetMaxOrder() const { return m_pFeat->GetMaxOrder(); }
106  int GetParamNum() const { return (m_pFeat) ? m_pFeat->GetNum() : 0; }
108  void Reset(Vocab *pv, int maxlen);
110  virtual void SetParam(PValue *pValue);
112  void GetParam(PValue *pValue);
114  void SetPi(Prob *pPi);
116  template <typename T>
117  void SetZeta(T *pzeta)
118  {
119  ExactNormalize(1);
120  for (int i = 1; i <= m_maxlen; i++) {
121  m_zeta[i] = (LogP)( pzeta[i] - pzeta[1] );
122  m_logz[i] = (LogP)( m_zeta[i] + m_logz[1] );
123  }
124  }
125  template <typename T>
126  void GetZeta(T *pzeta)
127  {
128  for (int i = 1; i <= m_maxlen; i++) {
129  pzeta[i] = (T) m_zeta[i];
130  }
131  }
133  LogP GetLogProb(Seq &seq, bool bNorm = true);
135  void LoadFromCorpus(const char *pcorpus, const char *pfeatstyle, int nOrder);
137  void FeatCount(Seq &seq, double *pCount, double dadd = 1.0);
138 
140  void ReadT(const char *pfilename);
142  void WriteT(const char *pfilename);
144 // void ReadB(const char *pfilename);
145 // /// Write Binary
146 // void WriteB(const char *pfilename);
147 
148  /************************************************************************/
149  /*exactly calculation functions */
150  /************************************************************************/
151  public:
153  LogP ClusterSum(Seq &seq, int nPos, int nOrder);
155  virtual double ExactNormalize(int nLen);
157  virtual void ExactNormalize();
159  void GetNodeExp(int nLen, double *pExp);
161  void GetNodeExp(double *pExp, Prob *pLenProb = NULL);
162 
163  /************************************************************************/
164  /*sampling functions */
165  /************************************************************************/
166  public:
171  void Sample(Seq &seq);
174  void LocalJump(Seq &seq);
176  virtual void MarkovMove(Seq &seq);
178  LogP ProposeLength(int nOld, int &nNew, bool bSample);
180  LogP ProposeC0(VocabID &ci, Seq &seq, int nPos, bool bSample);
182  void ProposeCProbs(VecShell<LogP> &logps, Seq &seq, int nPos);
184  /* To reduce the computation cost, we using the following function to replace GetLogProb
185  when sampling c_i at position i in function ProposeCProbs and ProposeC0.
186  There we only consinder the features depending on c_i and indenpending with w_i,
187  i.e. calculating the propose prob without knowing the w_i at position i.
188  */
189  LogP GetReducedModelForC(Seq &seq, int nPos);
191  LogP GetReducedModelForW(Seq &seq, int nPos);
193  LogP GetReducedModel(Seq &seq, int nPos);
195  LogP GetMarginalProbOfC(Seq &seq, int nPos);
197  /* the only differnece with ProposeC0 is than
198  SampleC will accept the current class after propose it.
199  While ProposeC0 not.
200  ProposeC0 used in local jump. It cannot accept the propose c0 as there is no intial value of c_i.
201  SampleC used in Markov move.
202  */
203  void SampleC(Seq &seq, int nPos);
205  /* if bSample=ture, then sample x[nPos]. Otherwise only calculate the conditional probabilities of current x[nPos]. */
206  LogP SampleX(Seq &seq, int nPos, bool bSample = true);
207  public:
209  LogP AISNormalize(int nLen, int nChain, int nInter);
210  void AISNormalize(int nLenMin, int nLenMax, int nChain, int nInter);
211  };
212 
217  class Model_FastSample : public Model
218  {
219  public:
221  public:
223  m_nMHtimes = 1;
224  }
225  Model_FastSample(Vocab *pv, int maxlen) :Model(pv, maxlen) {
226  m_nMHtimes = 1;
227  }
228  LogP ProposeW0(VocabID &wi, Seq &seq, int nPos, bool bSample = true)
229  {
230  Array<VocabID> *pXs = m_pVocab->GetWord(seq.x[class_layer][nPos]);
231  Array<LogP> aLogps;
232 
233  VocabID nSaveX = seq.x[word_layer][nPos]; // save w[nPos]
234  for (int i = 0; i < pXs->GetNum(); i++) {
235  seq.x[word_layer][nPos] = pXs->Get(i);
236 
237 // LogP d = 0;
238 // Array<int> afeat;
239 // m_pFeat->Find(afeat, seq, nPos, 1);
240 // for (int i = 0; i < afeat.GetNum(); i++)
241 // d += m_value[afeat[i]];
242  aLogps[i] = 1; //GetReducedModelForW(seq, nPos);
243  }
244  seq.x[word_layer][nPos] = nSaveX;
245  LogLineNormalize(aLogps, pXs->GetNum());
246 
247  int idx;
248  if (bSample) {
249  /* sample a value for x[nPos] */
250  idx = LogLineSampling(aLogps, pXs->GetNum());
251  wi = pXs->Get(idx);
252  }
253  else {
254  idx = pXs->Find(nSaveX); // find nSave in the array.
255  if (idx == -1) {
256  lout_error("Can't find the VocabID(" << nSaveX << ") in the array.\n"
257  << "This may beacuse word(" << nSaveX << ") doesnot belongs to class("
258  << seq.x[class_layer][nPos] << ")");
259  }
260  }
261 
262  return aLogps[idx];
263  }
264 
265  void ProposeCProbs(VecShell<LogP> &logps, Seq &seq, int nPos)
266  {
267  VocabID savecid = seq.x[class_layer][nPos];
268  for (int cid = 0; cid < m_pVocab->GetClassNum(); cid++) {
269  seq.x[class_layer][nPos] = cid;
270  logps[cid] = 1;
271  }
272  seq.x[class_layer][nPos] = savecid;
273  LogLineNormalize(logps.GetBuf(), m_pVocab->GetClassNum());
274  }
275 
276  void MarkovMove(Seq &seq)
277  {
278  for (int i = 0; i < seq.GetLen(); i++)
279  SamplePos(seq, i);
280  }
281  void SamplePos(Seq &seq, int nPos)
282  {
283  for (int times = 0; times < m_nMHtimes; times++)
284  {
285 
286  VocabID old_c = seq.x[class_layer][nPos];
287  VocabID old_w = seq.x[word_layer][nPos];
288  LogP pold = GetReducedModel(seq, nPos);
289 
290 
291  VocabID prop_c = omp_nrand(0, m_pVocab->GetClassNum());
292  Array<VocabID> *pWords = m_pVocab->GetWord(prop_c);
293  int prop_w_id = omp_nrand(0, pWords->GetNum());
294  VocabID prop_w = pWords->Get(prop_w_id);
295 
296  seq.x[class_layer][nPos] = prop_c;
297  seq.x[word_layer][nPos] = prop_w;
298  LogP pnew = GetReducedModel(seq, nPos);
299 
300  LogP g_old = Prob2LogP(1.0 / m_pVocab->GetClassNum()) + Prob2LogP(1.0 / m_pVocab->GetWord(old_c)->GetNum());
301  LogP g_new = Prob2LogP(1.0 / m_pVocab->GetClassNum()) + Prob2LogP(1.0 / m_pVocab->GetWord(prop_c)->GetNum());
302  LogP acclogp = pnew + g_old - (pold + g_new);
303 
304  if (Acceptable(LogP2Prob(acclogp))) {
305  m_nSampleHAccTimes++;
306  seq.x[class_layer][nPos] = prop_c;
307  seq.x[word_layer][nPos] = prop_w;
308  }
309  else {
310  seq.x[class_layer][nPos] = old_c;
311  seq.x[word_layer][nPos] = old_w;
312  }
313  m_nSampleHTotalTimes++;
314 
315  lout_assert(seq.x[class_layer][nPos] == m_pVocab->GetClass(seq.x[word_layer][nPos]));
316  }
317 
318 // Vec<LogP> vlogps_c(m_pVocab->GetClassNum());
319 // ProposeCProbs(vlogps_c, seq, nPos);
320 // VocabID ci = seq.x[class_layer][nPos];
321 // VocabID C0 = LogLineSampling(vlogps_c.GetBuf(), vlogps_c.GetSize());
322 // LogP gci = vlogps_c[ci];
323 // LogP gc0 = vlogps_c[C0];
324 //
325 // VocabID wi = seq.x[word_layer][nPos];
326 // VocabID w0;
327 // seq.x[class_layer][nPos] = ci;
328 // LogP gwi_ci = ProposeW0(wi, seq, nPos, false);
329 // seq.x[class_layer][nPos] = C0;
330 // LogP gw0_c0 = ProposeW0(w0, seq, nPos, true);
331 //
332 // seq.x[class_layer][nPos] = ci;
333 // seq.x[word_layer][nPos] = wi;
334 // LogP pold = GetReducedModel(seq, nPos);
335 // seq.x[class_layer][nPos] = C0;
336 // seq.x[word_layer][nPos] = w0;
337 // LogP pnew = GetReducedModel(seq, nPos);
338 //
339 // LogP acclogp = pnew + gci + gwi_ci - (pold + gc0 + gw0_c0);
340 // if (Acceptable(LogP2Prob(acclogp))) {
341 // m_nSampleHAccTimes++;
342 // seq.x[class_layer][nPos] = C0;
343 // seq.x[word_layer][nPos] = w0;
344 // }
345 // else {
346 // seq.x[class_layer][nPos] = ci;
347 // seq.x[word_layer][nPos] = wi;
348 // }
349 // m_nSampleHTotalTimes++;
350  }
351  };
353 }
Vec< Prob > m_pi
the prior length distribution
Definition: trf-model.h:58
int GetParamNum() const
Get parameter number.
Definition: trf-model.h:106
double Prob
Definition: trf-def.h:28
#define SAFE_DELETE(p)
memory release
Definition: wb-vector.h:49
T & Get(int i)
get the value at position i
Definition: wb-vector.h:99
Model(Vocab *pv)
constructor
Definition: trf-model.h:72
m WriteT(cfg_pathModelWrite)
int LogLineSampling(const LogP *pdProbs, int nNum)
Definition: trf-def.cpp:62
Model(Vocab *pv, int maxlen)
constructor
Definition: trf-model.h:83
void MarkovMove(Seq &seq)
[sample] Markov Move - perform the gibbs sampling
Definition: trf-model.h:276
int GetMaxOrder()
Get maximum order.
int m_maxSampleLen
[sample] the maximum sample length, default = m_maxlen + 2
Definition: trf-model.h:65
int VocabID
Definition: trf-vocab.h:23
#define lout_error(x)
Definition: wb-log.h:183
#define lout_assert(p)
Definition: wb-log.h:185
TRF model, revise the sample method to speedup the MCMC.
Definition: trf-model.h:217
void SetZeta(T *pzeta)
Set updated zeta.
Definition: trf-model.h:117
LogP Prob2LogP(Prob x)
Definition: trf-def.h:36
double PValue
Definition: trf-def.h:26
int m_nSampleHAccTimes
sample H the acceptance times
Definition: trf-model.h:169
void GetZeta(T *pzeta)
Definition: trf-model.h:126
double LogP
Definition: trf-def.h:27
Model_FastSample(Vocab *pv)
Definition: trf-model.h:222
int GetLen() const
Definition: trf-feature.h:71
void SamplePos(Seq &seq, int nPos)
Definition: trf-model.h:281
int m_nLenJumpAccTimes
lenght jump the acceptance times
Definition: trf-model.h:167
virtual LogP ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
This function need be derived. Calcualte the log probability of each cluster.
Definition: trf-model.cpp:22
define a sequence including the word sequence and class sequence
Definition: trf-feature.h:41
LogP ProposeW0(VocabID &wi, Seq &seq, int nPos, bool bSample=true)
Definition: trf-model.h:228
int GetNum() const
Get number.
Definition: trf-feature.h:200
int GetMaxOrder() const
Get maximum order.
Definition: trf-model.h:104
int m_maxlen
the maximum length of model, excluding <s> and </s>. The min-len = 1
Definition: trf-model.h:57
TRF model.
Definition: trf-model.h:51
int Find(T t)
Find a value and return the position.
Definition: wb-vector.h:248
Vec< LogP > m_zeta
the estimated normalization constants (fix = 0)
Definition: trf-model.h:60
T * GetBuf() const
Definition: wb-mat.h:68
#define word_layer
Definition: trf-feature.h:31
Mat< VocabID > x
Definition: trf-feature.h:44
int m_nLenJumpTotalTime
total times of length jump
Definition: trf-model.h:168
bool Acceptable(Prob prob)
Definition: trf-def.cpp:127
Feat * m_pFeat
hash all the features
Definition: trf-model.h:54
Vocab * m_pVocab
Definition: trf-model.h:62
Vocab * GetVocab() const
Get Vocab.
Definition: trf-model.h:102
int m_nSampleHTotalTimes
Definition: trf-model.h:170
int GetNum() const
Get Array number.
Definition: wb-vector.h:240
LogP LogLineNormalize(LogP *pdProbs, int nNum)
Definition: trf-def.cpp:53
AlgNode(Model *p)
Definition: trf-model.h:44
void ProposeCProbs(VecShell< LogP > &logps, Seq &seq, int nPos)
Definition: trf-model.h:265
Model_FastSample(Vocab *pv, int maxlen)
Definition: trf-model.h:225
Prob LogP2Prob(LogP x)
Definition: trf-def.h:33
~Model()
destructor
Definition: trf-model.h:95
include all the feature table
Definition: trf-feature.h:179
int omp_nrand(int nMin, int nMax)
get a random integer int [nMin, nMax-1]
Definition: trf-def.cpp:152
the forward-backward algorithms for TRF model
Definition: trf-model.h:38
Definition: trf-alg.cpp:20
Vec< LogP > m_logz
the normalization constants log Z_l
Definition: trf-model.h:59
int GetMaxLen() const
Get max-len.
Definition: trf-model.h:100
AlgNode m_AlgNode
the forward-backward calculation each node
Definition: trf-model.h:68
#define class_layer
Definition: trf-feature.h:32
Vec< PValue > m_value
the value for each features
Definition: trf-model.h:55
Mat< Prob > m_matLenJump
[sample] used to propose a new length
Definition: trf-model.h:64