45 virtual LogP ClusterSum(
int *pSeq,
int nLen,
int nPos,
int nOrder);
77 m_nLenJumpAccTimes = 0;
78 m_nLenJumpTotalTime = 0;
79 m_nSampleHAccTimes = 0;
80 m_nSampleHTotalTimes = 0;
89 m_nLenJumpAccTimes = 0;
90 m_nLenJumpTotalTime = 0;
91 m_nSampleHAccTimes = 0;
92 m_nSampleHTotalTimes = 0;
108 void Reset(
Vocab *pv,
int maxlen);
110 virtual void SetParam(
PValue *pValue);
112 void GetParam(
PValue *pValue);
114 void SetPi(
Prob *pPi);
116 template <
typename T>
120 for (
int i = 1; i <= m_maxlen; i++) {
121 m_zeta[i] = (
LogP)( pzeta[i] - pzeta[1] );
122 m_logz[i] = (
LogP)( m_zeta[i] + m_logz[1] );
125 template <
typename T>
128 for (
int i = 1; i <= m_maxlen; i++) {
129 pzeta[i] = (T) m_zeta[i];
133 LogP GetLogProb(
Seq &seq,
bool bNorm =
true);
135 void LoadFromCorpus(
const char *pcorpus,
const char *pfeatstyle,
int nOrder);
137 void FeatCount(
Seq &seq,
double *pCount,
double dadd = 1.0);
140 void ReadT(
const char *pfilename);
142 void WriteT(
const char *pfilename);
155 virtual double ExactNormalize(
int nLen);
157 virtual void ExactNormalize();
159 void GetNodeExp(
int nLen,
double *pExp);
161 void GetNodeExp(
double *pExp,
Prob *pLenProb = NULL);
171 void Sample(
Seq &seq);
174 void LocalJump(
Seq &seq);
176 virtual void MarkovMove(
Seq &seq);
178 LogP ProposeLength(
int nOld,
int &nNew,
bool bSample);
189 LogP GetReducedModelForC(
Seq &seq,
int nPos);
191 LogP GetReducedModelForW(
Seq &seq,
int nPos);
193 LogP GetReducedModel(
Seq &seq,
int nPos);
195 LogP GetMarginalProbOfC(
Seq &seq,
int nPos);
203 void SampleC(
Seq &seq,
int nPos);
206 LogP SampleX(
Seq &seq,
int nPos,
bool bSample =
true);
209 LogP AISNormalize(
int nLen,
int nChain,
int nInter);
210 void AISNormalize(
int nLenMin,
int nLenMax,
int nChain,
int nInter);
234 for (
int i = 0; i < pXs->
GetNum(); i++) {
254 idx = pXs->
Find(nSaveX);
256 lout_error(
"Can't find the VocabID(" << nSaveX <<
") in the array.\n" 257 <<
"This may beacuse word(" << nSaveX <<
") doesnot belongs to class(" 268 for (
int cid = 0; cid < m_pVocab->GetClassNum(); cid++) {
278 for (
int i = 0; i < seq.
GetLen(); i++)
283 for (
int times = 0; times < m_nMHtimes; times++)
288 LogP pold = GetReducedModel(seq, nPos);
293 int prop_w_id =
omp_nrand(0, pWords->GetNum());
294 VocabID prop_w = pWords->Get(prop_w_id);
298 LogP pnew = GetReducedModel(seq, nPos);
300 LogP g_old =
Prob2LogP(1.0 / m_pVocab->GetClassNum()) +
Prob2LogP(1.0 / m_pVocab->GetWord(old_c)->GetNum());
301 LogP g_new =
Prob2LogP(1.0 / m_pVocab->GetClassNum()) +
Prob2LogP(1.0 / m_pVocab->GetWord(prop_c)->GetNum());
302 LogP acclogp = pnew + g_old - (pold + g_new);
305 m_nSampleHAccTimes++;
313 m_nSampleHTotalTimes++;
Vec< Prob > m_pi
the prior length distribution
int GetParamNum() const
Get parameter number.
#define SAFE_DELETE(p)
memory release
T & Get(int i)
get the value at position i
Model(Vocab *pv)
constructor
m WriteT(cfg_pathModelWrite)
int LogLineSampling(const LogP *pdProbs, int nNum)
Model(Vocab *pv, int maxlen)
constructor
void MarkovMove(Seq &seq)
[sample] Markov Move - perform the gibbs sampling
int GetMaxOrder()
Get maximum order.
int m_maxSampleLen
[sample] the maximum sample length, default = m_maxlen + 2
TRF model, revise the sample method to speedup the MCMC.
void SetZeta(T *pzeta)
Set updated zeta.
int m_nSampleHAccTimes
sample H the acceptance times
Model_FastSample(Vocab *pv)
void SamplePos(Seq &seq, int nPos)
int m_nLenJumpAccTimes
lenght jump the acceptance times
virtual LogP ClusterSum(int *pSeq, int nLen, int nPos, int nOrder)
This function need be derived. Calcualte the log probability of each cluster.
define a sequence including the word sequence and class sequence
LogP ProposeW0(VocabID &wi, Seq &seq, int nPos, bool bSample=true)
int GetNum() const
Get number.
int GetMaxOrder() const
Get maximum order.
int m_maxlen
the maximum length of model, excluding <s> and </s>. The min-len = 1
int Find(T t)
Find a value and return the position.
Vec< LogP > m_zeta
the estimated normalization constants (fix = 0)
int m_nLenJumpTotalTime
total times of length jump
bool Acceptable(Prob prob)
Feat * m_pFeat
hash all the features
Vocab * GetVocab() const
Get Vocab.
int GetNum() const
Get Array number.
LogP LogLineNormalize(LogP *pdProbs, int nNum)
void ProposeCProbs(VecShell< LogP > &logps, Seq &seq, int nPos)
Model_FastSample(Vocab *pv, int maxlen)
include all the feature table
int omp_nrand(int nMin, int nMax)
get a random integer int [nMin, nMax-1]
the forward-backward algorithms for TRF model
Vec< LogP > m_logz
the normalization constants log Z_l
int GetMaxLen() const
Get max-len.
AlgNode m_AlgNode
the forward-backward calculation each node
Vec< PValue > m_value
the value for each features
Mat< Prob > m_matLenJump
[sample] used to propose a new length