TRF Language Model
|
#include <wb-word-cluster.h>
Public Member Functions | |
WordCluster_t (int nClass, char *pathRes=NULL) | |
~WordCluster_t (void) | |
void | WriteRes (const char *path) |
void | ReadRes (const char *path) |
void | Reverse (int *pGram) |
void | InitCount (const char *path, const char *path_init_res=NULL) |
void | UpdateCount (Array< int > &aCountBuf) |
void | CountAdd (Array< int > &aCountBuf, LHash< int, int > &hash, int key, int count) |
void | CountAdd (Array< int > &aCountBuf, Trie< int, int > &hash, int *pKey, int nLen, int count) |
void | CountAdd (VecShell< int > &aCountBuf, LHash< int, int > &hash, int key, int count) |
void | CountAdd (VecShell< int > &aCountBuf, Trie< int, int > &hash, int *pKey, int nLen, int count) |
void | CopyCountToThreads (Array< int > &aCountBuf) |
void | MoveWord (VecShell< int > vCountBuf, VecShell< int > vMap, int nWord, bool bOut=true) |
move word in/out of a class and update the counts More... | |
void | ExchangeWord (VecShell< int > vCountBuf, VecShell< int > vMap, int nWord, int nToClass) |
exchange the nWord form m_aClass[nWord] to nToClass More... | |
void | Cluster (int nMaxTime=-1) |
cluster More... | |
double | LogLikelihood (VecShell< int > vCountBuf) |
claculate the Loglikelihood More... | |
void | SimpleCluster () |
使用出现频率进行简单的分类,不需要迭代 More... | |
Public Attributes | |
LHash< int, int > | m_word_count |
N(w) index the word unigram count. More... | |
Trie< int, int > | m_wgram_count |
N(w,v) word bigram count. More... | |
Trie< int, int > | m_inv_wgram_count |
N(v,w) inverse word bigram count. More... | |
LHash< int, int > | m_class |
index the class More... | |
Trie< int, int > | m_class_gram |
(g(w), g(v)) the index of the class ngram More... | |
Trie< int, int > | m_word_class_gram |
(w,g) the word-class ngram More... | |
Trie< int, int > | m_class_word_gram |
(g,w) the class-word ngram More... | |
Mat< int > | m_tCountBuf |
the count buffer for each threads More... | |
Mat< int > | m_tMap |
map the word to correspond class at each thread More... | |
Array< int > | m_mCountBuf |
the count buffer in main threads More... | |
Array< int > | m_mMap |
the final g(w) More... | |
int | m_nClassNum |
the maximum class number More... | |
int | m_nVocabSize |
word number, i.e. the maximum word-id + 1 More... | |
int | m_nSentNum |
total sentence number More... | |
int | m_nUnigramNum |
int | m_nBigramNum |
double | m_dWordLogSum |
String | m_pathRes |
the result file, [ w g(w) ] More... | |
Definition at line 111 of file wb-word-cluster.h.
|
inline |
Definition at line 147 of file wb-word-cluster.h.
|
inline |
Definition at line 160 of file wb-word-cluster.h.
void wb::WordCluster_t::Cluster | ( | int | nMaxTime = -1 | ) |
cluster
Definition at line 865 of file wb-word-cluster.cpp.
void wb::WordCluster_t::CopyCountToThreads | ( | Array< int > & | aCountBuf | ) |
Definition at line 763 of file wb-word-cluster.cpp.
void wb::WordCluster_t::CountAdd | ( | Array< int > & | aCountBuf, |
LHash< int, int > & | hash, | ||
int | key, | ||
int | count | ||
) |
Definition at line 725 of file wb-word-cluster.cpp.
void wb::WordCluster_t::CountAdd | ( | Array< int > & | aCountBuf, |
Trie< int, int > & | hash, | ||
int * | pKey, | ||
int | nLen, | ||
int | count | ||
) |
Definition at line 735 of file wb-word-cluster.cpp.
void wb::WordCluster_t::CountAdd | ( | VecShell< int > & | aCountBuf, |
LHash< int, int > & | hash, | ||
int | key, | ||
int | count | ||
) |
Definition at line 745 of file wb-word-cluster.cpp.
void wb::WordCluster_t::CountAdd | ( | VecShell< int > & | aCountBuf, |
Trie< int, int > & | hash, | ||
int * | pKey, | ||
int | nLen, | ||
int | count | ||
) |
Definition at line 754 of file wb-word-cluster.cpp.
void wb::WordCluster_t::ExchangeWord | ( | VecShell< int > | vCountBuf, |
VecShell< int > | vMap, | ||
int | nWord, | ||
int | nToClass | ||
) |
exchange the nWord form m_aClass[nWord] to nToClass
Definition at line 856 of file wb-word-cluster.cpp.
void wb::WordCluster_t::InitCount | ( | const char * | path, |
const char * | path_init_res = NULL |
||
) |
Definition at line 562 of file wb-word-cluster.cpp.
double wb::WordCluster_t::LogLikelihood | ( | VecShell< int > | vCountBuf | ) |
claculate the Loglikelihood
Definition at line 981 of file wb-word-cluster.cpp.
void wb::WordCluster_t::MoveWord | ( | VecShell< int > | vCountBuf, |
VecShell< int > | vMap, | ||
int | nWord, | ||
bool | bOut = true |
||
) |
move word in/out of a class and update the counts
class unigram
class bigram
< the inverse of class-word pairs
Definition at line 778 of file wb-word-cluster.cpp.
void wb::WordCluster_t::ReadRes | ( | const char * | path | ) |
Definition at line 551 of file wb-word-cluster.cpp.
|
inline |
Definition at line 164 of file wb-word-cluster.h.
void wb::WordCluster_t::SimpleCluster | ( | ) |
void wb::WordCluster_t::UpdateCount | ( | Array< int > & | aCountBuf | ) |
Definition at line 662 of file wb-word-cluster.cpp.
void wb::WordCluster_t::WriteRes | ( | const char * | path | ) |
Definition at line 541 of file wb-word-cluster.cpp.
LHash<int, int> wb::WordCluster_t::m_class |
index the class
Definition at line 118 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster_t::m_class_gram |
(g(w), g(v)) the index of the class ngram
Definition at line 119 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster_t::m_class_word_gram |
(g,w) the class-word ngram
Definition at line 121 of file wb-word-cluster.h.
double wb::WordCluster_t::m_dWordLogSum |
Definition at line 136 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster_t::m_inv_wgram_count |
N(v,w) inverse word bigram count.
Definition at line 116 of file wb-word-cluster.h.
Array<int> wb::WordCluster_t::m_mCountBuf |
the count buffer in main threads
Definition at line 126 of file wb-word-cluster.h.
Array<int> wb::WordCluster_t::m_mMap |
the final g(w)
Definition at line 127 of file wb-word-cluster.h.
int wb::WordCluster_t::m_nBigramNum |
Definition at line 135 of file wb-word-cluster.h.
int wb::WordCluster_t::m_nClassNum |
the maximum class number
Definition at line 130 of file wb-word-cluster.h.
int wb::WordCluster_t::m_nSentNum |
total sentence number
Definition at line 132 of file wb-word-cluster.h.
int wb::WordCluster_t::m_nUnigramNum |
Definition at line 134 of file wb-word-cluster.h.
int wb::WordCluster_t::m_nVocabSize |
word number, i.e. the maximum word-id + 1
Definition at line 131 of file wb-word-cluster.h.
String wb::WordCluster_t::m_pathRes |
the result file, [ w g(w) ]
Definition at line 138 of file wb-word-cluster.h.
Mat<int> wb::WordCluster_t::m_tCountBuf |
the count buffer for each threads
Definition at line 123 of file wb-word-cluster.h.
Mat<int> wb::WordCluster_t::m_tMap |
map the word to correspond class at each thread
Definition at line 124 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster_t::m_wgram_count |
N(w,v) word bigram count.
Definition at line 115 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster_t::m_word_class_gram |
(w,g) the word-class ngram
Definition at line 120 of file wb-word-cluster.h.
LHash<int, int> wb::WordCluster_t::m_word_count |
N(w) index the word unigram count.
Definition at line 114 of file wb-word-cluster.h.