33 File file(pathVocab,
"rt");
41 char *p = strtok(pLine,
" \t\n");
47 if (strcmp(p, Word_beg) == 0) {
48 lout_error(
"[Vocab] the input vocab exists <s>! path=" << pathVocab);
50 else if (strcmp(p, Word_end) == 0) {
51 lout_error(
"[Vocab] the input vocab exists </s>! path=" << pathVocab);
55 pStr = strtok(NULL,
" \t\n");
56 if (
String(pStr, strlen(
"class=")) ==
"class=") {
61 pClass = strtok(NULL,
" \t\n");
66 lout_error(
"[Vocab] The id is not continuous (id=" <<
id <<
")(nNum=" << nNum <<
")!");
68 m_aWords[id] = (pStr) ? pStr :
"NAN";
74 pClass += strlen(
"class=");
78 nClassNum = max(nClassNum,
m_aClass[
id] + 1);
96 lout_error(
"[Vocab] class " << cid <<
" is empty!");
101 lout <<
"[Vocab] Read from " << pathVocab << endl;
102 lout <<
"[Vocab] Read " << nNum <<
" words" << endl;
120 for (
int i = 0; i < nlen; i++) {
#define SAFE_DELETE(p)
memory release
Array< VocabID > m_aClass
store the classes of each word. Support soft and hard class
Array< String > m_aWords
the string of each vocabulary id
VocabID GetClass(VocabID wid)
get class
void SetNum(int n)
Set Array number, to melloc enough memory.
virtual char * GetLine(bool bPrecent=false)
Read a line into the buffer.
void Clean()
Clean the array. Just set the top of array to -1 and donot release the memory.
int GetNum() const
Get Array number.
void Add(T t)
Add a value to the tail of array.
Log lout
the defination is in wb-log.cpp
void Fill(T m)
set all the values to m
int nLine
the number of reading from file
Array< Array< VocabID > * > m_aClass2Word
store the word belonging to each class.
Array< VocabID > m_aWordID
the word id. i.e 0,1,2,3,...