TRF Language Model
hrf-ml-train.cpp
Go to the documentation of this file.
1 #include "hrf-ml-train.h"
2 #include <omp.h>
3 
4 namespace hrf
5 {
6  MLfunc::MLfunc(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid /* = NULL */, CorpusBase *pTest /* = NULL */)
7  {
8  m_pathOutputModel = NULL;
9 
10  Reset(pModel, pTrain, pValid, pTest);
11  }
12  void MLfunc::Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid /* = NULL */, CorpusBase *pTest /* = NULL */)
13  {
14  m_pModel = pModel;
15  m_pCorpusTrain = pTrain;
16  m_pCorpusValid = pValid;
17  m_pCorpusTest = pTest;
18 
22 
24  m_values.Reset(m_nParamNum);
25 
27  int nMaxLen = m_pCorpusTrain->GetMaxLen();
28  if (pValid)
29  nMaxLen = max(nMaxLen, pValid->GetMaxLen());
30  if (pTest)
31  nMaxLen = max(nMaxLen, pTest->GetMaxLen());
32  if (nMaxLen != m_pModel->m_maxlen) {
33  lout_warning("[MLfunc] Reset: the max-len in training (" << nMaxLen
34  << ") is not equal to m_pModel->m_maxlen (" << m_pModel->m_maxlen<<")");
35  lout_warning("[MLfunc] Reset: Re-set the model with length=" << nMaxLen);
37  }
38 
40  Array<int> aLenCount;
41  m_trainPi.Reset(nMaxLen+1);
42  m_pCorpusTrain->GetLenCount(aLenCount);
43  for (int i = 0; i < aLenCount.GetNum(); i++) {
44  m_trainPi[i] = 1.0* aLenCount[i] / m_pCorpusTrain->GetNum();
45  }
47 
48  lout_variable(nMaxLen);
49  lout << "train-pi = [ "; lout.output(m_trainPi.GetBuf() + 1, m_trainPi.GetSize() - 1); lout << "]"<< endl;
50 
51 
52 
53 // m_TrainSelect.Reset(m_pCorpusTrain);
54 // m_nMiniBatch = 10;
55 // m_nScanSeq = 0;
56  }
57  void MLfunc::SetParam(double *pdParams)
58  {
59  if (pdParams == NULL)
60  return;
61 
62  for (int i = 0; i < m_nParamNum; i++) {
63  m_values[i] = (PValue) pdParams[i];
64  }
67  }
68  void MLfunc::GetParam(double *pdParams)
69  {
70  if (pdParams == NULL)
71  return;
72 
74  for (int i = 0; i < m_nParamNum; i++) {
75  pdParams[i] = m_values[i];
76  }
77  }
78  double MLfunc::GetLL(CorpusBase *pCorpus, int nCalNum /* = -1 */)
79  {
80  int nThread = omp_get_max_threads();
81 
83  Vec<double> vSum(nThread);
84  Vec<int> vNum(nThread);
85  vSum.Fill(0);
86  vNum.Fill(0);
87 
88  Vec<double> vWeight(nThread);
89  Vec<double> vLogz(nThread);
90  Vec<double> vZeta(nThread);
91  Vec<double> vPi(nThread);
92  vWeight.Fill(0);
93  vLogz.Fill(0);
94  vZeta.Fill(0);
95  vPi.Fill(0);
96 
97 
98  int nCorpusNum = (nCalNum == -1) ? pCorpus->GetNum() : min(nCalNum, pCorpus->GetNum());
99  Title::Precent(0, true, nCorpusNum-1, "omp GetLL");
100 #pragma omp parallel for firstprivate(aSeq)
101  for (int i = 0; i < nCorpusNum; i++) {
102  pCorpus->GetSeq(i, aSeq);
103 
104  VecShell<VocabID> x(aSeq.GetBuffer(), aSeq.GetNum());
105  LogP logprob = m_pModel->GetLogProb(x);
106 
107  vSum[omp_get_thread_num()] += logprob;
108  vNum[omp_get_thread_num()]++;
109 
110  int nLen = min(aSeq.GetNum(), m_pModel->GetMaxLen());
111  vWeight[omp_get_thread_num()] += m_pModel->GetLogProb(x, false);
112  vLogz[omp_get_thread_num()] += m_pModel->m_logz[nLen];
113  vZeta[omp_get_thread_num()] += m_pModel->m_zeta[nLen];
114  vPi[omp_get_thread_num()] += trf::Prob2LogP(m_pModel->m_pi[nLen]);
115 
116 
117  Title::Precent();
118  }
119 
120  double dsum = 0;
121  int nNum = 0;
122  for (int t = 0; t < nThread; t++) {
123  dsum += vSum[t];
124  nNum += vNum[t];
125 
126  }
127 
128  lout_variable(vSum.Sum() / nNum);
129  lout_variable(vNum.Sum() / nNum);
130  lout_variable(vLogz.Sum() / nNum);
131  lout_variable(vZeta.Sum()/nNum);
132  lout_variable(vPi.Sum()/nNum);
133 
135  lout_variable(vSum.Sum() / nNum);
136  lout_variable((vWeight.Sum() - vZeta.Sum() + vPi.Sum()) / nNum);
137  lout_variable((vWeight.Sum() - vLogz.Sum() + vPi.Sum()) / nNum);
138 
139  return dsum / nNum;
140  }
142  {
143  //SetParam(pdParams);
144 
145  return -GetLL(m_pCorpusTrain);
146 
147  return 0;
148  }
149  void MLfunc::GetGradient(double *pdGradient)
150  {
151  //SetParam(pdParams);
152 
153  Vec<double> aExpEmpirical(m_nParamNum);
154  Vec<double> aExpGivenX(m_nParamNum);
155  Vec<double> aExpTheoretical(m_nParamNum);
156 
157  aExpEmpirical.Fill(0);
159  int nNum = 0;
160  for (int i = 0; i < m_pCorpusTrain->GetNum(); i++) {
161  m_pCorpusTrain->GetSeq(i, aSeq);
162 
163  VecShell<VocabID> x(aSeq.GetBuffer(), aSeq.GetNum());
164  aExpGivenX.Fill(0);
165  m_pModel->GetHiddenExp(x, aExpGivenX.GetBuf());
166  aExpEmpirical += aExpGivenX;
167  nNum++;
168  }
169  aExpEmpirical *= 1.0 / nNum;
170 
171  m_pModel->GetNodeExp(aExpTheoretical.GetBuf(), m_trainPi.GetBuf());
172 
173  for (int i = 0; i < m_nParamNum; i++) {
174  pdGradient[i] = -(aExpEmpirical[i] - aExpTheoretical[i]);
175  }
176 
177 
178  static File fileDbg("GradientML.dbg", "wt");
179  VecShell<double> featexp;
180  Mat3dShell<double> VHexp, CHexp, HHexp;
181  MatShell<double> Bexp;
182  m_pModel->BufMap(pdGradient, featexp, VHexp, CHexp, HHexp, Bexp);
183  fileDbg.PrintArray("%f ", featexp.GetBuf(), featexp.GetSize());
184  fileDbg.PrintArray("%f ", VHexp.GetBuf(), VHexp.GetSize());
185  fileDbg.PrintArray("%f ", HHexp.GetBuf(), HHexp.GetSize());
186  fileDbg.Print("\n");
187  fileDbg.PrintArray("%f ", aExpEmpirical.GetBuf(), m_nParamNum);
188  fileDbg.PrintArray("%f ", aExpTheoretical.GetBuf(), m_nParamNum);
189 /* fileDbg.PrintArray("%f ", pdGradient, m_nParamNum);*/
190  //Pause();
191  /*return false;*/
192  }
193  int MLfunc::GetExtraValues(int t/*, double *pdParams*/, double *pdValues)
194  {
195  //SetParam(pdParams);
196 
197  if ( (t - 1) % 10 == 0) {
199  }
200 
201  int nValue = 0;
202  pdValues[nValue++] = -GetLL(m_pCorpusTrain);
203  if (m_pCorpusValid) {
204  pdValues[nValue++] = -GetLL(m_pCorpusValid);
205  }
206  if (m_pCorpusTest) {
207  pdValues[nValue++] = -GetLL(m_pCorpusTest);
208  }
209 
210  return nValue;
211 
212  }
213 }
virtual double GetValue()
calculate the function value f(x)
Vec< Prob > m_pi
the prior length distribution
Definition: trf-model.h:58
int GetParamNum() const
Get the total parameter number.
Definition: hrf-model.h:130
T * GetBuf() const
Definition: wb-mat.h:171
int m_hnode
the number of hidden nodes
Definition: hrf-model.h:102
virtual void SetParam(double *pdParams)
set the parameter.
trf::PValue PValue
Definition: hrf-model.h:24
LogP Prob2LogP(Prob x)
Definition: trf-def.h:36
void SetPi(Prob *pPi)
Set the pi.
Definition: trf-model.cpp:70
void GetNodeExp(double *pExp, Prob *pLenProb=NULL)
[exact] sum_l { n_l/n * E_{p_l}[f] }: Exactly calculate the expectation over x and h ...
Definition: hrf-model.cpp:470
const char * m_pathOutputModel
Write to model during iteration.
Definition: hrf-ml-train.h:28
virtual void SetParam(PValue *pParam)
Set the parameters.
Definition: hrf-model.cpp:78
hidden-random-field model
Definition: hrf-model.h:98
double ExactNormalize(int nLen)
[exact] Exact Normalization, return the logz of given length
Definition: hrf-model.cpp:403
CorpusBase * m_pCorpusTrain
training corpus
Definition: hrf-ml-train.h:20
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
Definition: wb-log.h:170
void BufMap(T *p, VecShell< T > &feat, Mat3dShell< T > &VH, Mat3dShell< T > &CH, Mat3dShell< T > &HH, MatShell< T > &Bias)
Map a paremeter vector to each kinds of parameters.
Definition: hrf-model.h:256
T * GetBuffer(int i=0) const
get the buffer pointer
Definition: wb-vector.h:97
int GetSize() const
Definition: wb-mat.h:173
virtual int GetExtraValues(int t, double *pdValues)
calculate extra values which will be print at each iteration
#define lout_variable(x)
Definition: wb-log.h:179
virtual void Print(const char *p_pMessage,...)
print
Definition: wb-file.cpp:115
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL)
virtual bool GetSeq(int nLine, Array< VocabID > &aSeq)=0
get the sequence in nLine
int m_maxlen
the maximum length of model, excluding <s> and </s>. The min-len = 1
Definition: trf-model.h:57
void Fill(T v)
Definition: wb-mat.h:279
file class.
Definition: wb-file.h:94
int GetSize() const
Definition: wb-mat.h:69
Vec< LogP > m_zeta
the estimated normalization constants (fix = 0)
Definition: trf-model.h:60
Vec< PValue > m_values
Definition: hrf-ml-train.h:24
void GetParam(double *pdParams)
T * GetBuf() const
Definition: wb-mat.h:68
Vec< Prob > m_trainPi
the length distribution in training corpus
Definition: hrf-ml-train.h:25
Array< VocabID > aSeq
Definition: main-TRF.cpp:153
virtual double GetLL(CorpusBase *pCorpus, int nCalNum=-1)
calculate the log-likelihood on corpus
#define lout_warning(x)
Definition: wb-log.h:184
Vocab * GetVocab() const
Get Vocab.
Definition: trf-model.h:102
int GetNum() const
Get Array number.
Definition: wb-vector.h:240
int m_hlayer
the number of hidden layer
Definition: hrf-model.h:101
void Reset(int size=0)
Definition: wb-mat.h:360
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22
CorpusBase * m_pCorpusValid
valid corpus
Definition: hrf-ml-train.h:21
virtual void GetLenCount(Array< int > &aLenCount)=0
get the length count
Model * m_pModel
HRF model.
Definition: hrf-ml-train.h:18
void Reset(Vocab *pv, int hlayer, int hnode, int maxlen)
reset, the maxlen is the length excluding the beg/end symbols.
Definition: hrf-model.cpp:65
void PrintArray(const char *pformat, TYPE *pbuf, int num)
print a array into file
Definition: wb-file.h:148
int m_nParamNum
the parameter number
Definition: wb-solve.h:45
void GetParam(PValue *pParam)
Get the paremetre vector.
Definition: hrf-model.cpp:95
trf::LogP LogP
Definition: hrf-model.h:27
virtual void GetGradient(double *pdGradient)
calculate the gradient g(x)
Vec< LogP > m_logz
the normalization constants log Z_l
Definition: trf-model.h:59
int GetMaxLen() const
Get max-len.
Definition: trf-model.h:100
T Sum()
Definition: wb-mat.h:81
virtual int GetMaxLen() const
get the max length
Definition: trf-corpus.h:51
void GetHiddenExp(VecShell< int > x, double *pExp)
[exact] E_{p_l(h|x)}[f]: don&#39;t clean the pExp and directly add the new exp to pExp.
Definition: hrf-model.cpp:622
LogP GetLogProb(Seq &seq, bool bNorm=true)
calculate the probability
Definition: hrf-model.cpp:112
void WriteT(const char *pfilename)
Write Model.
Definition: hrf-model.cpp:233
virtual int GetNum() const
get the seq number
Definition: trf-corpus.h:47
CorpusBase * m_pCorpusTest
test corpus
Definition: hrf-ml-train.h:22