TRF Language Model
wb::WordCluster Class Reference

#include <wb-word-cluster.h>

Public Member Functions

 WordCluster (int nClass)
 
 ~WordCluster (void)
 
void Reverse (int *pGram)
 
void InitCount (const char *path, const char *pTagVocab=NULL)
 
void UpdataCount ()
 
void CountAdd (LHash< int, int > &count, int nWord, int nAdd)
 
void CountAdd (Trie< int, int > &count, int *pWord, int nLen, int nAdd)
 
void CountAdd (int **pCount, int *pWord, int nLen, int nAdd)
 
void WriteCount (LHash< int, int > &count, File &file)
 
void WriteCount (Trie< int, int > &count, File &file, bool bReverse=false)
 
void WriteRes_WordClass (const char *path)
 
void WriteRes_ClassWord (const char *path)
 
void WriteRes_TagVocab (const char *path)
 
void Read_TagVocab (const char *path)
 
double LogLikelihood ()
 
void MoveWord (int nWord, bool bOut=true)
 
void ExchangeWord (int nWord, int nToClass)
 exchange the nWord form m_aClass[nWord] to nToClass More...
 
void Cluster (int nMaxTime=-1)
 
void SimpleCluster ()
 使用出现频率进行简单的分类,不需要迭代 More...
 

Public Attributes

LHash< int, int > m_wordCount
 N(w) More...
 
LHash< int, int > m_classCount
 N(g) More...
 
Trie< int, int > m_wordGramCount
 N(w,v) More...
 
Trie< int, int > m_invWordGram
 储存每个w的前继,不计数,仅用于索引每个w的前继v More...
 
int ** m_pClassGramCount
 N(g_w,g_v);. More...
 
Trie< int, int > m_wordClassCount
 N(w,g), 储存时,w在前,g在后 More...
 
Trie< int, int > m_classWordCount
 N(g,w), 储存时,w在前,g在后 More...
 
double m_dWordLogSum
 记录sum{N(w)logN(w)} ,因为仅仅需要计算一次 More...
 
Array< int > m_aClass
 记录每个词w所在的类g More...
 
int m_nClassNum
 
int m_nVocabSize
 word-id的个数 More...
 
int m_nSentNum
 文本中的词总数 More...
 
int m_nUnigramNum
 
int m_nBigramNum
 
char * m_pathWordClass
 
char * m_pathClassWord
 
char * m_pathTagVocab
 

Detailed Description

Definition at line 32 of file wb-word-cluster.h.

Constructor & Destructor Documentation

§ WordCluster()

wb::WordCluster::WordCluster ( int  nClass)
inline

Definition at line 60 of file wb-word-cluster.h.

§ ~WordCluster()

wb::WordCluster::~WordCluster ( void  )
inline

Definition at line 67 of file wb-word-cluster.h.

Member Function Documentation

§ Cluster()

void wb::WordCluster::Cluster ( int  nMaxTime = -1)

< 赋予最后一个类

Definition at line 410 of file wb-word-cluster.cpp.

§ CountAdd() [1/3]

void wb::WordCluster::CountAdd ( LHash< int, int > &  count,
int  nWord,
int  nAdd 
)
inline

Definition at line 74 of file wb-word-cluster.h.

§ CountAdd() [2/3]

void wb::WordCluster::CountAdd ( Trie< int, int > &  count,
int *  pWord,
int  nLen,
int  nAdd 
)
inline

Definition at line 80 of file wb-word-cluster.h.

§ CountAdd() [3/3]

void wb::WordCluster::CountAdd ( int **  pCount,
int *  pWord,
int  nLen,
int  nAdd 
)
inline

Definition at line 86 of file wb-word-cluster.h.

§ ExchangeWord()

void wb::WordCluster::ExchangeWord ( int  nWord,
int  nToClass 
)

exchange the nWord form m_aClass[nWord] to nToClass

Definition at line 398 of file wb-word-cluster.cpp.

§ InitCount()

void wb::WordCluster::InitCount ( const char *  path,
const char *  pTagVocab = NULL 
)

< 由于存在没有count的word,因此需要为没有cout的词分配一个class

Definition at line 6 of file wb-word-cluster.cpp.

§ LogLikelihood()

double wb::WordCluster::LogLikelihood ( )

Definition at line 230 of file wb-word-cluster.cpp.

§ MoveWord()

void wb::WordCluster::MoveWord ( int  nWord,
bool  bOut = true 
)

Definition at line 284 of file wb-word-cluster.cpp.

§ Read_TagVocab()

void wb::WordCluster::Read_TagVocab ( const char *  path)

Definition at line 217 of file wb-word-cluster.cpp.

§ Reverse()

void wb::WordCluster::Reverse ( int *  pGram)
inline

Definition at line 71 of file wb-word-cluster.h.

§ SimpleCluster()

void wb::WordCluster::SimpleCluster ( )

使用出现频率进行简单的分类,不需要迭代

< 对词频计算平方根

Definition at line 505 of file wb-word-cluster.cpp.

§ UpdataCount()

void wb::WordCluster::UpdataCount ( )

Definition at line 88 of file wb-word-cluster.cpp.

§ WriteCount() [1/2]

void wb::WordCluster::WriteCount ( LHash< int, int > &  count,
File file 
)

Definition at line 149 of file wb-word-cluster.cpp.

§ WriteCount() [2/2]

void wb::WordCluster::WriteCount ( Trie< int, int > &  count,
File file,
bool  bReverse = false 
)

Definition at line 158 of file wb-word-cluster.cpp.

§ WriteRes_ClassWord()

void wb::WordCluster::WriteRes_ClassWord ( const char *  path)

Definition at line 179 of file wb-word-cluster.cpp.

§ WriteRes_TagVocab()

void wb::WordCluster::WriteRes_TagVocab ( const char *  path)

Definition at line 210 of file wb-word-cluster.cpp.

§ WriteRes_WordClass()

void wb::WordCluster::WriteRes_WordClass ( const char *  path)

Definition at line 172 of file wb-word-cluster.cpp.

Member Data Documentation

§ m_aClass

Array<int> wb::WordCluster::m_aClass

记录每个词w所在的类g

Definition at line 46 of file wb-word-cluster.h.

§ m_classCount

LHash<int, int> wb::WordCluster::m_classCount

N(g)

Definition at line 36 of file wb-word-cluster.h.

§ m_classWordCount

Trie<int, int> wb::WordCluster::m_classWordCount

N(g,w), 储存时,w在前,g在后

Definition at line 42 of file wb-word-cluster.h.

§ m_dWordLogSum

double wb::WordCluster::m_dWordLogSum

记录sum{N(w)logN(w)} ,因为仅仅需要计算一次

Definition at line 44 of file wb-word-cluster.h.

§ m_invWordGram

Trie<int, int> wb::WordCluster::m_invWordGram

储存每个w的前继,不计数,仅用于索引每个w的前继v

Definition at line 38 of file wb-word-cluster.h.

§ m_nBigramNum

int wb::WordCluster::m_nBigramNum

Definition at line 52 of file wb-word-cluster.h.

§ m_nClassNum

int wb::WordCluster::m_nClassNum

Definition at line 47 of file wb-word-cluster.h.

§ m_nSentNum

int wb::WordCluster::m_nSentNum

文本中的词总数

Definition at line 49 of file wb-word-cluster.h.

§ m_nUnigramNum

int wb::WordCluster::m_nUnigramNum

Definition at line 51 of file wb-word-cluster.h.

§ m_nVocabSize

int wb::WordCluster::m_nVocabSize

word-id的个数

Definition at line 48 of file wb-word-cluster.h.

§ m_pathClassWord

char* wb::WordCluster::m_pathClassWord

Definition at line 55 of file wb-word-cluster.h.

§ m_pathTagVocab

char* wb::WordCluster::m_pathTagVocab

Definition at line 56 of file wb-word-cluster.h.

§ m_pathWordClass

char* wb::WordCluster::m_pathWordClass

Definition at line 54 of file wb-word-cluster.h.

§ m_pClassGramCount

int** wb::WordCluster::m_pClassGramCount

N(g_w,g_v);.

Definition at line 40 of file wb-word-cluster.h.

§ m_wordClassCount

Trie<int, int> wb::WordCluster::m_wordClassCount

N(w,g), 储存时,w在前,g在后

Definition at line 41 of file wb-word-cluster.h.

§ m_wordCount

LHash<int, int> wb::WordCluster::m_wordCount

N(w)

Definition at line 35 of file wb-word-cluster.h.

§ m_wordGramCount

Trie<int, int> wb::WordCluster::m_wordGramCount

N(w,v)

Definition at line 37 of file wb-word-cluster.h.


The documentation for this class was generated from the following files: