27 static const char* Word_beg =
"<s>";
28 static const char* Word_end =
"</s>";
44 Vocab(
const char* pathVocab);
53 case VocabID_seqbeg:
return Word_beg;
break;
54 case VocabID_seqend:
return Word_end;
break;
63 if (wid >= m_aClass.
GetNum())
68 void GetClass(VocabID *pcid,
const VocabID *pwid,
int nlen);
77 if (cid == VocabID_none)
79 return m_aClass2Word[cid];
int GetClassNum()
get the total class number
Array< VocabID > m_aClass
store the classes of each word. Support soft and hard class
Array< String > m_aWords
the string of each vocabulary id
VocabID GetClass(VocabID wid)
get class
VocabID * GetClassMap()
get class map
T * GetBuffer(int i=0) const
get the buffer pointer
VocabID RandClass()
random a class
Array< int > * GetWord(VocabID cid)
get word belonging to a class
int GetNum() const
Get Array number.
int IterEnd() const
iter all the words, regardless the beg/end symbols
int GetSize()
get the vocab size, i.e. the word number
const char * GetWordStr(int id)
get word string
int IterBeg() const
iter all the words, regardless the beg/end symbols
bool IsLegalWord(VocabID id) const
Check if the VocabID is a legal word.
Array< Array< VocabID > * > m_aClass2Word
store the word belonging to each class.
Array< VocabID > m_aWordID
the word id. i.e 0,1,2,3,...