TRF Language Model
trf-feature.cpp
Go to the documentation of this file.
1 // You may obtain a copy of the License at
2 //
3 // http://www.apache.org/licenses/LICENSE-2.0
4 //
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 //
11 // Copyright 2014-2015 Tsinghua University
12 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
13 //
14 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
15 // license declaration. Different coding language may use different comment styles.
16 
17 
18 #include "trf-feature.h"
19 
20 namespace trf
21 {
22  void Seq::Set(Array<int> &aInt, Vocab *pv)
23  {
24  Set(aInt.GetBuffer(), aInt.GetNum(), pv);
25  }
26  void Seq::Set(int *pInt, int nLen, Vocab *pv)
27  {
28  Reset(nLen);
29  for (int i = 0; i < nLen; i++) {
30  x[word_layer][i] = pInt[i];
32  x[class_layer][i] = (pInt[i] < 0) ? pInt[i] : pv->GetClass(pInt[i]);
33  }
34  }
35  void Seq::SetClass(Vocab *pv)
36  {
37  pv->GetClass(x[class_layer].GetBuf(), x[word_layer].GetBuf(), nLen);
38  }
39  void Seq::Random(Vocab *pv)
40  {
41  /* randomly set c */
42  for (int i = 0; i < nLen; i++) {
43  x[class_layer][i] = pv->RandClass();
44  }
45 
46  /* randomly set a legal word*/
47  for (int i = 0; i < nLen; i++) {
48  Array<VocabID> *pXs = pv->GetWord(x[class_layer][i]);
49  x[word_layer][i] = pXs->Get(rand() % pXs->GetNum());
50  }
51  }
52  void Seq::Print()
53  {
54  for (int i = 0; i < x.GetRow(); i++) {
55  lout.output(x[i].GetBuf(), nLen);
56  lout << endl;
57  }
58  }
59  void Seq::Print(File &file)
60  {
61  for (int i = 0; i < x.GetRow(); i++) {
62  file.PrintArray("%d ", x[i].GetBuf(), nLen);
63  }
64  file.Print("\n");
65  }
66 
67  void FeatStyle::Set(const char *pstyle)
68  {
69  m_aFields.Clean();
70  m_nOrder = 0;
71 
72  Field fd;
73  int nLen = strlen(pstyle);
74  for (int i = 0; i < nLen;) {
75  fd.c = pstyle[i];
76  fd.i = (fd.c == '-') ? -1 : ((fd.c == 'w') ? word_layer : class_layer);
77  i += 2;
78  fd.n = atoi(pstyle + i);
79  while (pstyle[i] != ']' && i < nLen) {
80  i++;
81  }
82  i++;
83 
84  if (fd.n == 0) {
85  lout_error("[FeatStyle] Set: illegal style : " << pstyle);
86  }
87 
88  m_aFields.Add(fd);
89  m_nOrder += fd.n;
90  }
91  }
92  bool FeatStyle::GetKey(Seq &seq, int nPos, int nOrder, Array<int> &key)
93  {
94  key.Clean();
95  if (m_nOrder > nOrder) {
96  return false;
97  }
98 
99  int nCur = nPos;
100  for (int nfield = 0; nfield < m_aFields.GetNum(); nfield++) {
101  Field info = m_aFields[nfield];
102  for (int i = 0; i < info.n; i++) {
103  if (info.i >= 0) {
104  key.Add(seq.x[info.i][nCur]);
105  }
106  nCur++;
107  }
108  }
109  return true;
110  }
111 
112  FeatTable::FeatTable(const char *pstyle /* = "" */)
113  {
114  m_ptrie = NULL;
115  Reset(pstyle);
116 
117  m_aKey.SetNum(omp_get_max_threads());
118  for (int i = 0; i < m_aKey.GetNum(); i++) {
119  m_aKey[i] = new Array<int>(10);
120  }
121  }
123  {
124  SAFE_DELETE(m_ptrie);
125  SAFE_DEL_POINTER_ARRAY(m_aStyleInfo);
126  SAFE_DEL_POINTER_ARRAY(m_aKey);
127  }
128  void FeatTable::Reset(const char *pstyle)
129  {
130  m_nMinOrder = 0;
131  m_nMaxOrder = 0;
132  m_nNum = 0;
133  m_style = pstyle;
134  SAFE_DELETE(m_ptrie);
135  m_ptrie = new Trie<VocabID, int>;
136 
137  m_aStyleInfo.Clean();
138  // analyse the style
139  int nLen = strlen(pstyle);
140  const char *pColon = strchr(pstyle, ':');
141  if (!pColon) {
142  m_aStyleInfo.Add() = new FeatStyle(pstyle);
143  }
144  else {
145  int nBeg = pColon - pstyle;
146  while (pstyle[nBeg] != '[') {
147  nBeg--;
148  }
149  int nEnd = pColon - pstyle;
150  while (pstyle[nEnd] != ']') {
151  nEnd++;
152  }
153 
154  String sub(pstyle + nBeg, nEnd - nBeg + 1);
155  int iBeg = atoi(pstyle + nBeg + 1);
156  int iEnd = atoi(pColon + 1);
157  for (int i = iBeg; i <= iEnd; i++) {
158  String subnew;
159  subnew.Format("[%d]", i);
160  String s = String(pstyle).Replace(sub, subnew);
161  m_aStyleInfo.Add() = new FeatStyle(s);
162  }
163  }
164  // max order
165  m_nMinOrder = c_nMaxOrder;
166  m_nMaxOrder = 0;
167  for (int i = 0; i < m_aStyleInfo.GetNum(); i++) {
168  m_nMinOrder = min(m_nMinOrder, m_aStyleInfo[i]->m_nOrder);
169  m_nMaxOrder = max(m_nMaxOrder, m_aStyleInfo[i]->m_nOrder);
170  }
171  }
173  {
174  for (int pos = 0; pos < seq.GetLen(); pos++) {
175  Array<int> key;
176  for (int i = 0; i < m_aStyleInfo.GetNum(); i++) {
177  if ( false == m_aStyleInfo[i]->GetKey(seq, pos, seq.GetLen()-pos, key) )
178  continue;
179 
180  bool bFound;
181 
182  // no position constraint
183  int *pInt = m_ptrie->Insert(key.GetBuffer(), key.GetNum(), bFound);
184  if (!bFound) *pInt = 0;
185  *pInt += 1; // value is the count of each features
186 
187  if (m_aStyleInfo[i]->m_nOrder == m_nMaxOrder)
188  continue;
189 
190  // at the head position
191  if (pos == 0) {
192  int begid = VocabID_seqbeg;
193  Trie<VocabID, int> *sub = m_ptrie->InsertTrie(&begid, 1);
194  int *pInt = sub->Insert(key.GetBuffer(), key.GetNum(), bFound);
195  if (!bFound) *pInt = 0;
196  *pInt += 1;
197  }
198  // at the tail position
199  if (pos + m_aStyleInfo[i]->m_nOrder == seq.GetLen()) {
200  int endid = VocabID_seqend;
201  Trie<VocabID, int> *sub = m_ptrie->InsertTrie(key.GetBuffer(), key.GetNum());
202  int *pInt = sub->Insert(&endid, 1, bFound);
203  if (!bFound) *pInt = 0;
204  *pInt += 1;
205  }
206  }
207  }
208  }
210  {
211  // cutoff the feature first.
212  bool bNeedCutoff = false;
213  if (m_aCutoff.GetNum() > 0) {
214  for (int i = 0; i < m_aCutoff.GetNum(); i++) {
215  if (m_aCutoff[i] > 1) {
216  bNeedCutoff = true;
217  break;
218  }
219  }
220  }
221 
222  // cutoff the features
223  if (!bNeedCutoff)
224  return 0;
225 
226  Trie<VocabID, int> *pNewTrie = new Trie<VocabID, int>;
227 
228  // circle all the features
229  int nCutNum = 0;
230  for (int n = 1; n <= m_nMaxOrder + 1; n++) {
231  int nCut = m_aCutoff[min(n - 1, m_aCutoff.GetNum() - 1)];
232  /* as we add the <s> </s> into the trie, then the maximum order should be larger than m_nMaxOrder*/
233  VocabID ngram[c_nMaxOrder];
234  Trie<VocabID, int> *psub = NULL;
235  TrieIter2<VocabID, int> iter(m_ptrie, ngram, n);
236  while (psub = iter.Next()) {
237  int nCount = *psub->GetData();
238  if (nCount >= nCut) {
239  *pNewTrie->Insert(ngram, n) = nCount;
240  }
241  else {
242  nCutNum++;
243  }
244  }
245  }
246 
247  SAFE_DELETE(m_ptrie);
248  m_ptrie = pNewTrie;
249  return nCutNum;
250 
251  }
252  void FeatTable::IndexFeat(int begIdx)
253  {
254  // set the number(index) of the features
255  int nid = begIdx;
256  for (int n = 1; n <= m_nMaxOrder+1; n++) {
257  /* as we add the <s> </s> into the trie, then the maximum order should be larger than m_nMaxOrder*/
258  VocabID ngram[c_nMaxOrder];
259  Trie<VocabID, int> *psub = NULL;
260  TrieIter2<VocabID, int> iter(m_ptrie, ngram, n/*, LHash_IncSort*/);
261  while (psub = iter.Next()) {
262  *psub->GetData() = nid++;
263  }
264  }
265 
266  m_nNum = nid - begIdx;
267  }
268  void FeatTable::Find(Array<int> &afeat, Array<int> &key, bool bBeg, bool bEnd)
269  {
270  int *pValue = m_ptrie->Find(key.GetBuffer(), key.GetNum());
271  if (pValue) afeat.Add(*pValue);
272 
273  if (bBeg) { // at the begining
274  int begid = VocabID_seqbeg;
275  Trie<VocabID, int> *psub = m_ptrie->FindTrie(&begid, 1);
276  if (psub) {
277  pValue = psub->Find(key.GetBuffer(), key.GetNum());
278  if (pValue)
279  afeat.Add(*pValue);
280  }
281  }
282 
283  if (bEnd) { // at the end
284  Trie<VocabID, int> *psub = m_ptrie->FindTrie(key.GetBuffer(), key.GetNum());
285  int endid = VocabID_seqend;
286  if (psub) {
287  pValue = psub->Find(&endid, 1);
288  if (pValue)
289  afeat.Add(*pValue);
290  }
291  }
292  }
293  void FeatTable::Find(Array<int> &afeat, Seq &seq, int pos, int order)
294  {
295  Array<int> *pKey = m_aKey[omp_get_thread_num()];
296  for (int i = 0; i < m_aStyleInfo.GetNum(); i++) {
297  if (m_aStyleInfo[i]->m_nOrder == order) {
298 
299  m_aStyleInfo[i]->GetKey(seq, pos, order, *pKey);
300 
301  Find(afeat, *pKey, pos == 0, pos + order == seq.GetLen());
302  }
303  }
304  }
305 
306  void FeatTable::Find(Array<int> &afeat, Seq &seq)
307  {
308  Array<int> key;
309  for (int pos = 0; pos < seq.GetLen(); pos++)
310  {
311  for (int i = 0; i < m_aStyleInfo.GetNum(); i++)
312  {
313  if (false == m_aStyleInfo[i]->GetKey(seq, pos, seq.GetLen() - pos, key) )
314  continue;
315 
316  Find(afeat, key, pos == 0, pos + m_aStyleInfo[i]->m_nOrder == seq.GetLen());
317  }
318  }
319  }
320  void FeatTable::FindPosDep(Array<int> &afeat, Seq &seq, int pos)
321  {
322  int nLen = seq.GetLen();
323 
324  Array<int> *pkey = m_aKey[omp_get_thread_num()];
325  for (int i = 0; i < m_aStyleInfo.GetNum(); i++) {
326  int order = m_aStyleInfo[i]->m_nOrder;
327  for (int n = max(0, pos - order + 1); n <= min(nLen - order, pos); n++) {
328  lout_assert(true == m_aStyleInfo[i]->GetKey(seq, n, order, *pkey));
329  Find(afeat, *pkey, n == 0, n + order == nLen);
330  }
331  }
332  }
333  void FeatTable::ReadT(File &file, PValue *pValue /* = NULL */)
334  {
335  char *pLine = file.GetLine();
336  Reset(strtok(pLine, " \t\n"));
337  sscanf(strtok(NULL, " \t\n"), "order=[%d,%d]", &m_nMinOrder, &m_nMaxOrder);
338  sscanf(strtok(NULL, " \t\n"), "num=%d", &m_nNum);
339 
340  for (int i = 0; i < m_nNum; i++) {
341  pLine = file.GetLine();
342  strtok(pLine, " \t\n");
343 
344  int nid = atoi(strchr(strtok(NULL, " \t\n"), '=') + 1);
345  PValue v = atof(strchr(strtok(NULL, " \t\n"), '=') + 1);
346  char *p = strchr(strtok(NULL, ""), '=') + 1;
347 
348  Array<int> key;
349  p = strtok(p, " \t\n");
350  while (p) {
351  key.Add(atoi(p));
352  p = strtok(NULL, " \t\n");
353  }
354  *m_ptrie->Insert(key, key.GetNum()) = nid;
355  if (pValue) {
356  pValue[nid] = v;
357  }
358  }
359  }
360  void FeatTable::WriteT(File &file, PValue *pValue /* = NULL */)
361  {
362  file.Print("%s order=[%d,%d] num=%d\n", m_style.GetBuffer(), m_nMinOrder, m_nMaxOrder, m_nNum);
363 
364  int outnum = 0;
365  for (int n = 1; n <= m_nMaxOrder+1; n++) {
366  VocabID ngram[c_nMaxOrder];
367  Trie<VocabID, int> *psub = NULL;
368  TrieIter2<VocabID, int> iter(m_ptrie, ngram, n/*, LHash_IncSort*/);
369  while (psub = iter.Next()) {
370  int nid = *psub->GetData();
371 
372  file.Print("%s\t id=%d value=%f \t key=", m_style.GetBuffer(), nid, (pValue)? pValue[nid]:0);
373  for (int i = 0; i < n; i++) {
374  file.Print("%d ", ngram[i]);
375  }
376  file.Print("\n");
377 
378  outnum++;
379  }
380  }
381  lout_assert(outnum == m_nNum);
382  }
383 
384  void Feat::Reset(int nOrder, bool bClass)
385  {
387  SAFE_DEL_POINTER_ARRAY(m_aTable);
388  String str;
389  /* ngram */
390  m_aTable.Add() = new FeatTable(str.Format("w[1:%d]", nOrder));
391  if (bClass)
392  m_aTable.Add() = new FeatTable(str.Format("c[1:%d]", nOrder));
393  /* skip ngram */
394  for (int nSkipLen = 1; nSkipLen <= nOrder - 2; nSkipLen++) {
395  for (int nSkipBeg = 1; nSkipBeg <= nOrder - nSkipLen - 1; nSkipBeg++) {
396  m_aTable.Add() = new FeatTable(str.Format("w[%d]-[%d]w[%d]", nSkipBeg, nSkipLen, nOrder - nSkipLen - nSkipBeg));
397  if (bClass)
398  m_aTable.Add() = new FeatTable(str.Format("c[%d]-[%d]c[%d]", nSkipBeg, nSkipLen, nOrder - nSkipLen - nSkipBeg));
399  }
400  }
401 
402  //m_aTable.Add() = new FeatTable("w[1]-[1:2]w[1]");
403 
404  }
405  void Feat::Reset(const char *pfeatType)
406  {
407  lout << "[Feat] Reset: Load feat style form file = " << pfeatType << endl;
408 
409  SAFE_DEL_POINTER_ARRAY(m_aTable);
410  File file(pfeatType, "rt");
411  char *pLine;
412  while (pLine = file.GetLine()) {
413  // remove the comments
414  char *p = strstr(pLine, "//");
415  if (p) *p = '\0';
416 
417  p = strtok(pLine, " \t\n");
418  if (!p)
419  continue;
420  FeatTable *pFeatTable = new FeatTable(p);
421 
422  // cutoff setting
423  p = strtok(NULL, " \t\n");
424  if (p) {
425  pFeatTable->m_aCutoff.Clean();
426  while (*p != '\0') {
427  pFeatTable->m_aCutoff.Add(*p - '0');
428  p++;
429  }
430  }
431 
432  m_aTable.Add(pFeatTable);
433  }
434  }
436  {
437  int nMaxOrder = 0;
438  for (int i = 0; i < m_aTable.GetNum(); i++) {
439  nMaxOrder = max(nMaxOrder, m_aTable[i]->GetMaxOrder());
440  }
441  return nMaxOrder;
442  }
443  void Feat::Find(Array<int> &afeat, Seq &seq, int pos, int order)
444  {
445  for (int i = 0; i < m_aTable.GetNum(); i++) {
446  m_aTable[i]->Find(afeat, seq, pos, order);
447  }
448  }
449  void Feat::Find(Array<int> &afeat, Seq &seq)
450  {
451  for (int i = 0; i < m_aTable.GetNum(); i++) {
452  m_aTable[i]->Find(afeat, seq);
453  }
454  }
455  void Feat::FindClass(Array<int> &afeat, Seq &seq, int pos, int order)
456  {
457  for (int i = 0; i < m_aTable.GetNum(); i++) {
458  if (NULL == strchr(m_aTable[i]->GetStyle(), 'w')) {
459  /* no word, which means containing only class */
460  m_aTable[i]->Find(afeat, seq, pos, order);
461  }
462  }
463  }
464  void Feat::FindWord(Array<int> &afeat, Seq &seq, int pos, int order)
465  {
466  for (int i = 0; i < m_aTable.GetNum(); i++) {
467  if (strchr(m_aTable[i]->GetStyle(), 'w')) {
468  /* containing word */
469  m_aTable[i]->Find(afeat, seq, pos, order);
470  }
471  }
472  }
473  void Feat::FindPosDep(Array<int> &afeat, Seq &seq, int pos, int type /* = 0 */)
474  {
475  /*
476  type = 0: all features
477  type = 1: only class
478  type = 2: expect class
479  */
480  switch (type) {
481  case 0:
482  for (int i = 0; i < m_aTable.GetNum(); i++) {
483  m_aTable[i]->FindPosDep(afeat, seq, pos);
484  }
485  break;
486  case 1:
487  for (int i = 0; i < m_aTable.GetNum(); i++) {
488  if (NULL == strchr(m_aTable[i]->GetStyle(), 'w')) {
489  m_aTable[i]->FindPosDep(afeat, seq, pos);
490  }
491  }
492  break;
493  case 2:
494  for (int i = 0; i < m_aTable.GetNum(); i++) {
495  if (strchr(m_aTable[i]->GetStyle(), 'w')) {
496  m_aTable[i]->FindPosDep(afeat, seq, pos);
497  }
498  }
499  break;
500  default:
501  lout_error("[Feat] FindPosDep: unknown type = " << type);
502  }
503 
504  }
505  void Feat::LoadFeatFromCorpus(const char *path, Vocab *pv)
506  {
507  File file(path, "rt");
508 
509 
510  lout.Progress(file.fp, true, "[Feat] Load:");
511  char *pLine;
512  while (pLine = file.GetLine()) {
513  Array<VocabID> aInt;
514 
515  //aInt.Add(VocabID_seqbeg);
516  char *p = strtok(pLine, " \t\n");
517  while (p) {
518  aInt.Add(atoi(p));
519  p = strtok(NULL, " \t\n");
520  }
521  //aInt.Add(VocabID_seqend);
522 
523  Seq seq;
524  seq.Set(aInt, pv);
525 
526  for (int i = 0; i < m_aTable.GetNum(); i++) {
527  m_aTable[i]->LoadFeat(seq);
528  }
529  lout.Progress(file);
530  }
531 
532  // cutoff all features
533  lout << "[Feat] Feat cutoff..." << endl;
534  int nTotalCutNum = 0;
535  for (int i = 0; i < m_aTable.GetNum(); i++) {
536  nTotalCutNum += m_aTable[i]->CutoffFeat();
537  }
538  lout << "[Feat] Feat cutoff num = " << nTotalCutNum << endl;
539  // index all the features
540  lout << "[Feat] Feat index..." << endl;
541  m_nTotalNum = 0;
542  for (int i = 0; i < m_aTable.GetNum(); i++) {
543  m_aTable[i]->IndexFeat(m_nTotalNum);
544  m_nTotalNum += m_aTable[i]->GetNum();
545  }
546 
547  // output
548  lout << "[Feat] = {" << endl;
549  for (int i = 0; i < m_aTable.GetNum(); i++) {
550  lout << " " << m_aTable[i]->GetStyle() << ": " << m_aTable[i]->GetNum();
551  lout << " order=[" << m_aTable[i]->GetMinOrder() << "," << m_aTable[i]->GetMaxOrder() << "]";
552  lout << " cut=";
553  lout.output(m_aTable[i]->m_aCutoff.GetBuffer(), m_aTable[i]->m_aCutoff.GetNum(), "") << endl;
554  }
555  lout << " total = " << m_nTotalNum << "\n}" << endl;
556  }
557 
558  void Feat::WriteT(File &file, PValue *pValue /* = NULL */)
559  {
560  file.Print("feat-type=%d\n", m_aTable.GetNum());
561  file.Print("feat={ ");
562  for (int i = 0; i < m_aTable.GetNum(); i++) {
563  file.Print("%s ", m_aTable[i]->GetStyle());
564  }
565  file.Print(" }\n");
566 
567  for (int i = 0; i < m_aTable.GetNum(); i++) {
568  m_aTable[i]->WriteT(file, pValue);
569  }
570  }
571  void Feat::ReadT(File &file, PValue *pValue /* = NULL */)
572  {
573  SAFE_DEL_POINTER_ARRAY(m_aTable);
574 
575  int nNum;
576  fscanf(file, "feat-type=%d\n", &nNum);
577  file.GetLine();
578  m_aTable.SetNum(nNum);
579  for (int i = 0; i < m_aTable.GetNum(); i++) {
580  m_aTable[i] = new FeatTable();
581  m_aTable[i]->ReadT(file, pValue);
582  }
583 
584  // output
585  lout << "[Feat] = {" << endl;
586  for (int i = 0; i < m_aTable.GetNum(); i++) {
587  lout << " " << m_aTable[i]->GetStyle() << ": " << m_aTable[i]->GetNum();
588  lout << " order=[" << m_aTable[i]->GetMinOrder() << "," << m_aTable[i]->GetMaxOrder() << "]" << endl;
589  }
590  lout << " total = " << m_nTotalNum << "\n}" << endl;
591  }
592 }
char c
the charactor "w" or "c"
Definition: trf-feature.h:95
void Reset(const char *pstyle)
Reset.
_wb_TRIE * Next()
Get next trie.
Definition: wb-trie.h:268
void FindWord(Array< int > &afeat, Seq &seq, int pos, int order)
Find the ngram feature depending on word[pos].
#define SAFE_DELETE(p)
memory release
Definition: wb-vector.h:49
const char * Format(const char *p_pMessage,...)
format print to string
Definition: wb-string.cpp:69
a dynamic string class
Definition: wb-string.h:53
T & Get(int i)
get the value at position i
Definition: wb-vector.h:99
int GetMaxOrder()
Get maximum order.
void Clean()
Clean.
Definition: wb-trie.h:113
void SetClass(Vocab *pv)
set the class based the word sequence
Definition: trf-feature.cpp:35
void Random(Vocab *pv)
Random.
Definition: trf-feature.cpp:39
int VocabID
Definition: trf-vocab.h:23
#define lout_error(x)
Definition: wb-log.h:183
#define lout_assert(p)
Definition: wb-log.h:185
DataT * Find(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Find a value.
Definition: wb-trie.h:132
Analyse a determinate feat style (without ":")
Definition: trf-feature.h:91
_wb_TRIE * InsertTrie(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Insert a sub-trie.
Definition: wb-trie.h:169
short n
number, "w[2]" then n=2
Definition: trf-feature.h:97
Get all the values whose indexes are of a fixed length. The returned tries may not contain a legal va...
Definition: wb-trie.h:42
void LoadFeatFromCorpus(const char *path, Vocab *pv)
Load Features from corpus.
const int VocabID_seqbeg
Definition: trf-vocab.h:25
double PValue
Definition: trf-def.h:26
define the feature style. such as "w3"(word-3gram); "c2"(class-2gram);
Definition: trf-feature.h:124
VocabID GetClass(VocabID wid)
get class
Definition: trf-vocab.h:62
int nLen
Definition: trf-feature.h:45
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
Definition: wb-log.h:170
void ReadT(File &file, PValue *pValue=NULL)
Read form file.
void Reset(int p_len)
reset only change the len variable, does not change the buffer size.
Definition: trf-feature.h:51
void WriteT(File &file, PValue *pValue=NULL)
Write the features.
T * GetBuffer(int i=0) const
get the buffer pointer
Definition: wb-vector.h:97
FILE * fp
file pointer
Definition: wb-file.h:97
String Replace(const char *src, const char *rpl)
replace
Definition: wb-string.cpp:108
int GetLen() const
Definition: trf-feature.h:71
VocabID RandClass()
random a class
Definition: trf-vocab.h:70
virtual void Print(const char *p_pMessage,...)
print
Definition: wb-file.cpp:115
void Find(Array< int > &afeat, Array< int > &key, bool bBeg, bool bEnd)
Find the corresponding feature using a key. This will return the beg/end ngram.
Array< int > m_aCutoff
cutoff setting for different order
Definition: trf-feature.h:131
define a sequence including the word sequence and class sequence
Definition: trf-feature.h:41
DataT * Insert(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Insert a value.
Definition: wb-trie.h:142
void Print()
Definition: trf-feature.cpp:52
short i
if "w" then i=0, if "c" then i=1, used to index the value in Seq
Definition: trf-feature.h:96
_wb_TRIE * FindTrie(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Find a sub-trie.
Definition: wb-trie.h:148
void Insert(T t)
insert a value. Avoid repeating
Definition: wb-vector.h:298
const int VocabID_seqend
Definition: trf-vocab.h:26
void Set(const char *pstyle)
set and analyze the style
Definition: trf-feature.cpp:67
file class.
Definition: wb-file.h:94
void ReadT(File &file, PValue *pValue=NULL)
Read the features.
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
Definition: trf-feature.cpp:22
#define word_layer
Definition: trf-feature.h:31
Mat< VocabID > x
Definition: trf-feature.h:44
virtual char * GetLine(bool bPrecent=false)
Read a line into the buffer.
Definition: wb-file.cpp:47
int CutoffFeat()
cutoff the features
void Progress(long long n=-1, bool bInit=false, long long total=100, const char *head="")
progress bar
Definition: wb-log.cpp:146
void FindPosDep(Array< int > &afeat, Seq &seq, int pos, int type=0)
Find the class ngram depending on the nPos.
const int c_nMaxOrder
Definition: trf-feature.h:24
void WriteT(File &file, PValue *pValue=NULL)
Write to file.
void Clean()
Clean the array. Just set the top of array to -1 and donot release the memory.
Definition: wb-vector.h:258
void IndexFeat(int begIdx)
Set the number of each features. We should index the feature in defferent tables. ...
void Find(Array< int > &afeat, Seq &seq, int pos, int order)
Find the ngram feature with a fixed order.
Array< int > * GetWord(VocabID cid)
get word belonging to a class
Definition: trf-vocab.h:76
void Reset(int nOrder, bool bClass)
Reset, set the order. Node: the maximum order (including the skip) may be larger than nOrder...
int GetNum() const
Get Array number.
Definition: wb-vector.h:240
void Add(T t)
Add a value to the tail of array.
Definition: wb-vector.h:242
void FindClass(Array< int > &afeat, Seq &seq, int pos, int order)
Find the class ngram feature with a fixed order.
FeatTable(const char *pstyle="")
constructor
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22
DataT * GetData()
Get value.
Definition: wb-trie.h:126
void PrintArray(const char *pformat, TYPE *pbuf, int num)
print a array into file
Definition: wb-file.h:148
bool GetKey(Seq &seq, int nPos, int nOrder, Array< int > &key)
map a ngram to the index key, return if get a correct key
Definition: trf-feature.cpp:92
void FindPosDep(Array< int > &afeat, Seq &seq, int pos)
Find all the feature depending on position.
void LoadFeat(Seq &seq)
Extract a feature from a sequence.
int GetRow() const
Definition: wb-mat.h:128
Definition: trf-alg.cpp:20
#define SAFE_DEL_POINTER_ARRAY(a)
Definition: wb-vector.h:52
#define class_layer
Definition: trf-feature.h:32
~FeatTable()
destructor