|
TRF Language Model
|
#include <wb-word-cluster.h>
Public Member Functions | |
| WordCluster (int nClass) | |
| ~WordCluster (void) | |
| void | Reverse (int *pGram) |
| void | InitCount (const char *path, const char *pTagVocab=NULL) |
| void | UpdataCount () |
| void | CountAdd (LHash< int, int > &count, int nWord, int nAdd) |
| void | CountAdd (Trie< int, int > &count, int *pWord, int nLen, int nAdd) |
| void | CountAdd (int **pCount, int *pWord, int nLen, int nAdd) |
| void | WriteCount (LHash< int, int > &count, File &file) |
| void | WriteCount (Trie< int, int > &count, File &file, bool bReverse=false) |
| void | WriteRes_WordClass (const char *path) |
| void | WriteRes_ClassWord (const char *path) |
| void | WriteRes_TagVocab (const char *path) |
| void | Read_TagVocab (const char *path) |
| double | LogLikelihood () |
| void | MoveWord (int nWord, bool bOut=true) |
| void | ExchangeWord (int nWord, int nToClass) |
| exchange the nWord form m_aClass[nWord] to nToClass More... | |
| void | Cluster (int nMaxTime=-1) |
| void | SimpleCluster () |
| 使用出现频率进行简单的分类,不需要迭代 More... | |
Public Attributes | |
| LHash< int, int > | m_wordCount |
| N(w) More... | |
| LHash< int, int > | m_classCount |
| N(g) More... | |
| Trie< int, int > | m_wordGramCount |
| N(w,v) More... | |
| Trie< int, int > | m_invWordGram |
| 储存每个w的前继,不计数,仅用于索引每个w的前继v More... | |
| int ** | m_pClassGramCount |
| N(g_w,g_v);. More... | |
| Trie< int, int > | m_wordClassCount |
| N(w,g), 储存时,w在前,g在后 More... | |
| Trie< int, int > | m_classWordCount |
| N(g,w), 储存时,w在前,g在后 More... | |
| double | m_dWordLogSum |
| 记录sum{N(w)logN(w)} ,因为仅仅需要计算一次 More... | |
| Array< int > | m_aClass |
| 记录每个词w所在的类g More... | |
| int | m_nClassNum |
| int | m_nVocabSize |
| word-id的个数 More... | |
| int | m_nSentNum |
| 文本中的词总数 More... | |
| int | m_nUnigramNum |
| int | m_nBigramNum |
| char * | m_pathWordClass |
| char * | m_pathClassWord |
| char * | m_pathTagVocab |
Definition at line 32 of file wb-word-cluster.h.
|
inline |
Definition at line 60 of file wb-word-cluster.h.
|
inline |
Definition at line 67 of file wb-word-cluster.h.
| void wb::WordCluster::Cluster | ( | int | nMaxTime = -1 | ) |
< 赋予最后一个类
Definition at line 410 of file wb-word-cluster.cpp.
|
inline |
Definition at line 74 of file wb-word-cluster.h.
|
inline |
Definition at line 80 of file wb-word-cluster.h.
|
inline |
Definition at line 86 of file wb-word-cluster.h.
| void wb::WordCluster::ExchangeWord | ( | int | nWord, |
| int | nToClass | ||
| ) |
exchange the nWord form m_aClass[nWord] to nToClass
Definition at line 398 of file wb-word-cluster.cpp.
| void wb::WordCluster::InitCount | ( | const char * | path, |
| const char * | pTagVocab = NULL |
||
| ) |
< 由于存在没有count的word,因此需要为没有cout的词分配一个class
Definition at line 6 of file wb-word-cluster.cpp.
| double wb::WordCluster::LogLikelihood | ( | ) |
Definition at line 230 of file wb-word-cluster.cpp.
| void wb::WordCluster::MoveWord | ( | int | nWord, |
| bool | bOut = true |
||
| ) |
Definition at line 284 of file wb-word-cluster.cpp.
| void wb::WordCluster::Read_TagVocab | ( | const char * | path | ) |
Definition at line 217 of file wb-word-cluster.cpp.
|
inline |
Definition at line 71 of file wb-word-cluster.h.
| void wb::WordCluster::SimpleCluster | ( | ) |
| void wb::WordCluster::UpdataCount | ( | ) |
Definition at line 88 of file wb-word-cluster.cpp.
Definition at line 149 of file wb-word-cluster.cpp.
Definition at line 158 of file wb-word-cluster.cpp.
| void wb::WordCluster::WriteRes_ClassWord | ( | const char * | path | ) |
Definition at line 179 of file wb-word-cluster.cpp.
| void wb::WordCluster::WriteRes_TagVocab | ( | const char * | path | ) |
Definition at line 210 of file wb-word-cluster.cpp.
| void wb::WordCluster::WriteRes_WordClass | ( | const char * | path | ) |
Definition at line 172 of file wb-word-cluster.cpp.
| Array<int> wb::WordCluster::m_aClass |
记录每个词w所在的类g
Definition at line 46 of file wb-word-cluster.h.
| LHash<int, int> wb::WordCluster::m_classCount |
N(g)
Definition at line 36 of file wb-word-cluster.h.
| Trie<int, int> wb::WordCluster::m_classWordCount |
N(g,w), 储存时,w在前,g在后
Definition at line 42 of file wb-word-cluster.h.
| double wb::WordCluster::m_dWordLogSum |
记录sum{N(w)logN(w)} ,因为仅仅需要计算一次
Definition at line 44 of file wb-word-cluster.h.
| Trie<int, int> wb::WordCluster::m_invWordGram |
储存每个w的前继,不计数,仅用于索引每个w的前继v
Definition at line 38 of file wb-word-cluster.h.
| int wb::WordCluster::m_nBigramNum |
Definition at line 52 of file wb-word-cluster.h.
| int wb::WordCluster::m_nClassNum |
Definition at line 47 of file wb-word-cluster.h.
| int wb::WordCluster::m_nSentNum |
文本中的词总数
Definition at line 49 of file wb-word-cluster.h.
| int wb::WordCluster::m_nUnigramNum |
Definition at line 51 of file wb-word-cluster.h.
| int wb::WordCluster::m_nVocabSize |
word-id的个数
Definition at line 48 of file wb-word-cluster.h.
| char* wb::WordCluster::m_pathClassWord |
Definition at line 55 of file wb-word-cluster.h.
| char* wb::WordCluster::m_pathTagVocab |
Definition at line 56 of file wb-word-cluster.h.
| char* wb::WordCluster::m_pathWordClass |
Definition at line 54 of file wb-word-cluster.h.
| int** wb::WordCluster::m_pClassGramCount |
N(g_w,g_v);.
Definition at line 40 of file wb-word-cluster.h.
| Trie<int, int> wb::WordCluster::m_wordClassCount |
N(w,g), 储存时,w在前,g在后
Definition at line 41 of file wb-word-cluster.h.
| LHash<int, int> wb::WordCluster::m_wordCount |
N(w)
Definition at line 35 of file wb-word-cluster.h.
| Trie<int, int> wb::WordCluster::m_wordGramCount |
N(w,v)
Definition at line 37 of file wb-word-cluster.h.