TRF Language Model
trf-ml-train.h
Go to the documentation of this file.
1 // You may obtain a copy of the License at
2 //
3 // http://www.apache.org/licenses/LICENSE-2.0
4 //
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 //
11 // Copyright 2014-2015 Tsinghua University
12 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
13 //
14 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
15 // license declaration. Different coding language may use different comment styles.
16 
17 
18 #pragma once
19 #include "trf-model.h"
20 #include "trf-corpus.h"
21 #include "wb-solve.h"
22 using namespace wb;
23 
24 namespace trf
25 {
30  class MLfunc : public Func
31  {
32  protected:
35 
39 
41 
43  public:
44  const char *m_pathOutputModel;
45 
46  public:
47  MLfunc() :m_pModel(NULL), m_pCorpusTrain(NULL), m_pCorpusValid(NULL), m_pCorpusTest(NULL) {
48  m_pathOutputModel = NULL;
49  };
50  MLfunc(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid = NULL, CorpusBase *pTest = NULL);
51  void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid = NULL, CorpusBase *pTest = NULL);
52  virtual void SetParam(double *pdParams);
53  void GetParam(double *pdParams);
55  /* - if nCalNum = -1, calculate all the sequences in corpus;
56  - if nCalNum != -1, calculate the first min(nNum, curpus number) sequences.
57  */
58  virtual double GetLL(CorpusBase *pCorpus, int nCalNum = -1, Vec<double> *pLL = NULL);
60  void GetEmpExp(CorpusBase *pCorpus, Vec<double> &vExp);
61 
62  virtual double GetValue();
63  virtual void GetGradient(double *pdGradient);
64  virtual int GetExtraValues(int t, double *pdValues);
65  };
66 }
CorpusBase * m_pCorpusValid
valid corpus
Definition: trf-ml-train.h:37
const char * m_pathOutputModel
Write to model during iteration.
Definition: trf-ml-train.h:44
Vec< PValue > m_value
save the temp value of type PValue.
Definition: trf-ml-train.h:34
CorpusBase * m_pCorpusTrain
training corpus
Definition: trf-ml-train.h:36
Vec< Prob > m_trainPi
the length distribution in training corpus
Definition: trf-ml-train.h:40
TRF model.
Definition: trf-model.h:51
Vec< double > m_vEmpiricalExp
the empirical expectation
Definition: trf-ml-train.h:42
define the framework of iterative algorithms, such as gradient descent or LBFGS.
CorpusBase * m_pCorpusTest
test corpus
Definition: trf-ml-train.h:38
the objective function, used to derive
Definition: wb-solve.h:39
Definition: trf-alg.cpp:20
Model * m_pModel
HRF model.
Definition: trf-ml-train.h:33
define all the code written by Bin Wang.
Definition: wb-file.cpp:21