TRF Language Model
|
#include <wb-word-cluster.h>
Public Member Functions | |
WordCluster (int nClass) | |
~WordCluster (void) | |
void | Reverse (int *pGram) |
void | InitCount (const char *path, const char *pTagVocab=NULL) |
void | UpdataCount () |
void | CountAdd (LHash< int, int > &count, int nWord, int nAdd) |
void | CountAdd (Trie< int, int > &count, int *pWord, int nLen, int nAdd) |
void | CountAdd (int **pCount, int *pWord, int nLen, int nAdd) |
void | WriteCount (LHash< int, int > &count, File &file) |
void | WriteCount (Trie< int, int > &count, File &file, bool bReverse=false) |
void | WriteRes_WordClass (const char *path) |
void | WriteRes_ClassWord (const char *path) |
void | WriteRes_TagVocab (const char *path) |
void | Read_TagVocab (const char *path) |
double | LogLikelihood () |
void | MoveWord (int nWord, bool bOut=true) |
void | ExchangeWord (int nWord, int nToClass) |
exchange the nWord form m_aClass[nWord] to nToClass More... | |
void | Cluster (int nMaxTime=-1) |
void | SimpleCluster () |
使用出现频率进行简单的分类,不需要迭代 More... | |
Public Attributes | |
LHash< int, int > | m_wordCount |
N(w) More... | |
LHash< int, int > | m_classCount |
N(g) More... | |
Trie< int, int > | m_wordGramCount |
N(w,v) More... | |
Trie< int, int > | m_invWordGram |
储存每个w的前继,不计数,仅用于索引每个w的前继v More... | |
int ** | m_pClassGramCount |
N(g_w,g_v);. More... | |
Trie< int, int > | m_wordClassCount |
N(w,g), 储存时,w在前,g在后 More... | |
Trie< int, int > | m_classWordCount |
N(g,w), 储存时,w在前,g在后 More... | |
double | m_dWordLogSum |
记录sum{N(w)logN(w)} ,因为仅仅需要计算一次 More... | |
Array< int > | m_aClass |
记录每个词w所在的类g More... | |
int | m_nClassNum |
int | m_nVocabSize |
word-id的个数 More... | |
int | m_nSentNum |
文本中的词总数 More... | |
int | m_nUnigramNum |
int | m_nBigramNum |
char * | m_pathWordClass |
char * | m_pathClassWord |
char * | m_pathTagVocab |
Definition at line 32 of file wb-word-cluster.h.
|
inline |
Definition at line 60 of file wb-word-cluster.h.
|
inline |
Definition at line 67 of file wb-word-cluster.h.
void wb::WordCluster::Cluster | ( | int | nMaxTime = -1 | ) |
< 赋予最后一个类
Definition at line 410 of file wb-word-cluster.cpp.
|
inline |
Definition at line 74 of file wb-word-cluster.h.
|
inline |
Definition at line 80 of file wb-word-cluster.h.
|
inline |
Definition at line 86 of file wb-word-cluster.h.
void wb::WordCluster::ExchangeWord | ( | int | nWord, |
int | nToClass | ||
) |
exchange the nWord form m_aClass[nWord] to nToClass
Definition at line 398 of file wb-word-cluster.cpp.
void wb::WordCluster::InitCount | ( | const char * | path, |
const char * | pTagVocab = NULL |
||
) |
< 由于存在没有count的word,因此需要为没有cout的词分配一个class
Definition at line 6 of file wb-word-cluster.cpp.
double wb::WordCluster::LogLikelihood | ( | ) |
Definition at line 230 of file wb-word-cluster.cpp.
void wb::WordCluster::MoveWord | ( | int | nWord, |
bool | bOut = true |
||
) |
Definition at line 284 of file wb-word-cluster.cpp.
void wb::WordCluster::Read_TagVocab | ( | const char * | path | ) |
Definition at line 217 of file wb-word-cluster.cpp.
|
inline |
Definition at line 71 of file wb-word-cluster.h.
void wb::WordCluster::SimpleCluster | ( | ) |
void wb::WordCluster::UpdataCount | ( | ) |
Definition at line 88 of file wb-word-cluster.cpp.
Definition at line 149 of file wb-word-cluster.cpp.
Definition at line 158 of file wb-word-cluster.cpp.
void wb::WordCluster::WriteRes_ClassWord | ( | const char * | path | ) |
Definition at line 179 of file wb-word-cluster.cpp.
void wb::WordCluster::WriteRes_TagVocab | ( | const char * | path | ) |
Definition at line 210 of file wb-word-cluster.cpp.
void wb::WordCluster::WriteRes_WordClass | ( | const char * | path | ) |
Definition at line 172 of file wb-word-cluster.cpp.
Array<int> wb::WordCluster::m_aClass |
记录每个词w所在的类g
Definition at line 46 of file wb-word-cluster.h.
LHash<int, int> wb::WordCluster::m_classCount |
N(g)
Definition at line 36 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster::m_classWordCount |
N(g,w), 储存时,w在前,g在后
Definition at line 42 of file wb-word-cluster.h.
double wb::WordCluster::m_dWordLogSum |
记录sum{N(w)logN(w)} ,因为仅仅需要计算一次
Definition at line 44 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster::m_invWordGram |
储存每个w的前继,不计数,仅用于索引每个w的前继v
Definition at line 38 of file wb-word-cluster.h.
int wb::WordCluster::m_nBigramNum |
Definition at line 52 of file wb-word-cluster.h.
int wb::WordCluster::m_nClassNum |
Definition at line 47 of file wb-word-cluster.h.
int wb::WordCluster::m_nSentNum |
文本中的词总数
Definition at line 49 of file wb-word-cluster.h.
int wb::WordCluster::m_nUnigramNum |
Definition at line 51 of file wb-word-cluster.h.
int wb::WordCluster::m_nVocabSize |
word-id的个数
Definition at line 48 of file wb-word-cluster.h.
char* wb::WordCluster::m_pathClassWord |
Definition at line 55 of file wb-word-cluster.h.
char* wb::WordCluster::m_pathTagVocab |
Definition at line 56 of file wb-word-cluster.h.
char* wb::WordCluster::m_pathWordClass |
Definition at line 54 of file wb-word-cluster.h.
int** wb::WordCluster::m_pClassGramCount |
N(g_w,g_v);.
Definition at line 40 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster::m_wordClassCount |
N(w,g), 储存时,w在前,g在后
Definition at line 41 of file wb-word-cluster.h.
LHash<int, int> wb::WordCluster::m_wordCount |
N(w)
Definition at line 35 of file wb-word-cluster.h.
Trie<int, int> wb::WordCluster::m_wordGramCount |
N(w,v)
Definition at line 37 of file wb-word-cluster.h.