TRF Language Model
wb::WordCluster_t Class Reference

#include <wb-word-cluster.h>

Public Member Functions

 WordCluster_t (int nClass, char *pathRes=NULL)
 
 ~WordCluster_t (void)
 
void WriteRes (const char *path)
 
void ReadRes (const char *path)
 
void Reverse (int *pGram)
 
void InitCount (const char *path, const char *path_init_res=NULL)
 
void UpdateCount (Array< int > &aCountBuf)
 
void CountAdd (Array< int > &aCountBuf, LHash< int, int > &hash, int key, int count)
 
void CountAdd (Array< int > &aCountBuf, Trie< int, int > &hash, int *pKey, int nLen, int count)
 
void CountAdd (VecShell< int > &aCountBuf, LHash< int, int > &hash, int key, int count)
 
void CountAdd (VecShell< int > &aCountBuf, Trie< int, int > &hash, int *pKey, int nLen, int count)
 
void CopyCountToThreads (Array< int > &aCountBuf)
 
void MoveWord (VecShell< int > vCountBuf, VecShell< int > vMap, int nWord, bool bOut=true)
 move word in/out of a class and update the counts More...
 
void ExchangeWord (VecShell< int > vCountBuf, VecShell< int > vMap, int nWord, int nToClass)
 exchange the nWord form m_aClass[nWord] to nToClass More...
 
void Cluster (int nMaxTime=-1)
 cluster More...
 
double LogLikelihood (VecShell< int > vCountBuf)
 claculate the Loglikelihood More...
 
void SimpleCluster ()
 使用出现频率进行简单的分类,不需要迭代 More...
 

Public Attributes

LHash< int, int > m_word_count
 N(w) index the word unigram count. More...
 
Trie< int, int > m_wgram_count
 N(w,v) word bigram count. More...
 
Trie< int, int > m_inv_wgram_count
 N(v,w) inverse word bigram count. More...
 
LHash< int, int > m_class
 index the class More...
 
Trie< int, int > m_class_gram
 (g(w), g(v)) the index of the class ngram More...
 
Trie< int, int > m_word_class_gram
 (w,g) the word-class ngram More...
 
Trie< int, int > m_class_word_gram
 (g,w) the class-word ngram More...
 
Mat< int > m_tCountBuf
 the count buffer for each threads More...
 
Mat< int > m_tMap
 map the word to correspond class at each thread More...
 
Array< int > m_mCountBuf
 the count buffer in main threads More...
 
Array< int > m_mMap
 the final g(w) More...
 
int m_nClassNum
 the maximum class number More...
 
int m_nVocabSize
 word number, i.e. the maximum word-id + 1 More...
 
int m_nSentNum
 total sentence number More...
 
int m_nUnigramNum
 
int m_nBigramNum
 
double m_dWordLogSum
 
String m_pathRes
 the result file, [ w g(w) ] More...
 

Detailed Description

Definition at line 111 of file wb-word-cluster.h.

Constructor & Destructor Documentation

§ WordCluster_t()

wb::WordCluster_t::WordCluster_t ( int  nClass,
char *  pathRes = NULL 
)
inline

Definition at line 147 of file wb-word-cluster.h.

§ ~WordCluster_t()

wb::WordCluster_t::~WordCluster_t ( void  )
inline

Definition at line 160 of file wb-word-cluster.h.

Member Function Documentation

§ Cluster()

void wb::WordCluster_t::Cluster ( int  nMaxTime = -1)

cluster

Definition at line 865 of file wb-word-cluster.cpp.

§ CopyCountToThreads()

void wb::WordCluster_t::CopyCountToThreads ( Array< int > &  aCountBuf)

Definition at line 763 of file wb-word-cluster.cpp.

§ CountAdd() [1/4]

void wb::WordCluster_t::CountAdd ( Array< int > &  aCountBuf,
LHash< int, int > &  hash,
int  key,
int  count 
)

Definition at line 725 of file wb-word-cluster.cpp.

§ CountAdd() [2/4]

void wb::WordCluster_t::CountAdd ( Array< int > &  aCountBuf,
Trie< int, int > &  hash,
int *  pKey,
int  nLen,
int  count 
)

Definition at line 735 of file wb-word-cluster.cpp.

§ CountAdd() [3/4]

void wb::WordCluster_t::CountAdd ( VecShell< int > &  aCountBuf,
LHash< int, int > &  hash,
int  key,
int  count 
)

Definition at line 745 of file wb-word-cluster.cpp.

§ CountAdd() [4/4]

void wb::WordCluster_t::CountAdd ( VecShell< int > &  aCountBuf,
Trie< int, int > &  hash,
int *  pKey,
int  nLen,
int  count 
)

Definition at line 754 of file wb-word-cluster.cpp.

§ ExchangeWord()

void wb::WordCluster_t::ExchangeWord ( VecShell< int >  vCountBuf,
VecShell< int >  vMap,
int  nWord,
int  nToClass 
)

exchange the nWord form m_aClass[nWord] to nToClass

Definition at line 856 of file wb-word-cluster.cpp.

§ InitCount()

void wb::WordCluster_t::InitCount ( const char *  path,
const char *  path_init_res = NULL 
)

Definition at line 562 of file wb-word-cluster.cpp.

§ LogLikelihood()

double wb::WordCluster_t::LogLikelihood ( VecShell< int >  vCountBuf)

claculate the Loglikelihood

Definition at line 981 of file wb-word-cluster.cpp.

§ MoveWord()

void wb::WordCluster_t::MoveWord ( VecShell< int >  vCountBuf,
VecShell< int >  vMap,
int  nWord,
bool  bOut = true 
)

move word in/out of a class and update the counts

class unigram

class bigram

< the inverse of class-word pairs

Definition at line 778 of file wb-word-cluster.cpp.

§ ReadRes()

void wb::WordCluster_t::ReadRes ( const char *  path)

Definition at line 551 of file wb-word-cluster.cpp.

§ Reverse()

void wb::WordCluster_t::Reverse ( int *  pGram)
inline

Definition at line 164 of file wb-word-cluster.h.

§ SimpleCluster()

void wb::WordCluster_t::SimpleCluster ( )

使用出现频率进行简单的分类,不需要迭代

< 对词频计算平方根

Definition at line 1024 of file wb-word-cluster.cpp.

§ UpdateCount()

void wb::WordCluster_t::UpdateCount ( Array< int > &  aCountBuf)

Definition at line 662 of file wb-word-cluster.cpp.

§ WriteRes()

void wb::WordCluster_t::WriteRes ( const char *  path)

Definition at line 541 of file wb-word-cluster.cpp.

Member Data Documentation

§ m_class

LHash<int, int> wb::WordCluster_t::m_class

index the class

Definition at line 118 of file wb-word-cluster.h.

§ m_class_gram

Trie<int, int> wb::WordCluster_t::m_class_gram

(g(w), g(v)) the index of the class ngram

Definition at line 119 of file wb-word-cluster.h.

§ m_class_word_gram

Trie<int, int> wb::WordCluster_t::m_class_word_gram

(g,w) the class-word ngram

Definition at line 121 of file wb-word-cluster.h.

§ m_dWordLogSum

double wb::WordCluster_t::m_dWordLogSum

Definition at line 136 of file wb-word-cluster.h.

§ m_inv_wgram_count

Trie<int, int> wb::WordCluster_t::m_inv_wgram_count

N(v,w) inverse word bigram count.

Definition at line 116 of file wb-word-cluster.h.

§ m_mCountBuf

Array<int> wb::WordCluster_t::m_mCountBuf

the count buffer in main threads

Definition at line 126 of file wb-word-cluster.h.

§ m_mMap

Array<int> wb::WordCluster_t::m_mMap

the final g(w)

Definition at line 127 of file wb-word-cluster.h.

§ m_nBigramNum

int wb::WordCluster_t::m_nBigramNum

Definition at line 135 of file wb-word-cluster.h.

§ m_nClassNum

int wb::WordCluster_t::m_nClassNum

the maximum class number

Definition at line 130 of file wb-word-cluster.h.

§ m_nSentNum

int wb::WordCluster_t::m_nSentNum

total sentence number

Definition at line 132 of file wb-word-cluster.h.

§ m_nUnigramNum

int wb::WordCluster_t::m_nUnigramNum

Definition at line 134 of file wb-word-cluster.h.

§ m_nVocabSize

int wb::WordCluster_t::m_nVocabSize

word number, i.e. the maximum word-id + 1

Definition at line 131 of file wb-word-cluster.h.

§ m_pathRes

String wb::WordCluster_t::m_pathRes

the result file, [ w g(w) ]

Definition at line 138 of file wb-word-cluster.h.

§ m_tCountBuf

Mat<int> wb::WordCluster_t::m_tCountBuf

the count buffer for each threads

Definition at line 123 of file wb-word-cluster.h.

§ m_tMap

Mat<int> wb::WordCluster_t::m_tMap

map the word to correspond class at each thread

Definition at line 124 of file wb-word-cluster.h.

§ m_wgram_count

Trie<int, int> wb::WordCluster_t::m_wgram_count

N(w,v) word bigram count.

Definition at line 115 of file wb-word-cluster.h.

§ m_word_class_gram

Trie<int, int> wb::WordCluster_t::m_word_class_gram

(w,g) the word-class ngram

Definition at line 120 of file wb-word-cluster.h.

§ m_word_count

LHash<int, int> wb::WordCluster_t::m_word_count

N(w) index the word unigram count.

Definition at line 114 of file wb-word-cluster.h.


The documentation for this class was generated from the following files: