TRF Language Model
wb-trie.h
Go to the documentation of this file.
1 // You may obtain a copy of the License at
2 //
3 // http://www.apache.org/licenses/LICENSE-2.0
4 //
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 //
11 // Copyright 2014-2015 Tsinghua University
12 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
13 //
14 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
15 // license declaration. Different coding language may use different comment styles.
16 
17 
18 
19 
28 #ifndef _WB_TRIE_H_
29 #define _WB_TRIE_H_
30 #include "wb-lhash.h"
31 
32 namespace wb
33 {
34 
35 #define _wb_TRIE wb::Trie<KeyT, DataT>
36 #define _wb_LHASH_FOR_TRIE wb::LHash<KeyT, wb::Trie<KeyT, DataT>*>
37 #define _wb_LHASHITER_FOR_TRIE wb::LHashIter<KeyT, wb::Trie<KeyT, DataT>*>
38 #define _wb_UNIT_FOR_TRIE wb::LHash::Unit<KeyT, wb::Trie<KeyT, DataT>*>
39 
40  template <class KeyT, class DataT> class Trie;
41  template <class KeyT, class DataT> class TrieIter;
42  template <class KeyT, class DataT> class TrieIter2;
43 
66  template <class KeyT, class DataT>
67  class Trie
68  {
69  friend class TrieIter<KeyT, DataT>;
70  friend class TrieIter2<KeyT, DataT>;
71  private:
72  DataT m_value;
73  _wb_LHASH_FOR_TRIE *m_phash;
74  public:
75  Trie() :m_phash(NULL){ Map_noKey(m_value); }
76  ~Trie() { Release(); }
78  void Release()
79  {
80  if (m_phash)
81  {
82  KeyT key;
83  _wb_TRIE **ppsub;
84  _wb_LHASHITER_FOR_TRIE iter(m_phash);
85  while (ppsub = iter.Next(key)) {
86  (*ppsub)->Release();
87  }
88  }
89 
90  Map_noKey(m_value);
91  if (m_phash) {
92  delete m_phash;
93  m_phash = NULL;
94  }
95  }
97  size_t TotalMemCost()
98  {
99  size_t nSize = sizeof(*this);
100  if (m_phash) {
101  KeyT key;
102  nSize += m_phash->TotalMemCost();
103  _wb_TRIE **ppsub;
104  _wb_LHASHITER_FOR_TRIE iter(m_phash);
105  while (ppsub = iter.Next(key)) {
106  nSize += (*ppsub)->TotalMemCost();
107  }
108  }
109  return nSize;
110  }
112  /* For Trie, clean is equal to release */
113  void Clean() { Release(); }
115  void Fill(DataT d)
116  {
117  KeyT key;
118  _wb_TRIE **ppsub;
119  _wb_LHASHITER_FOR_TRIE iter(m_phash);
120  while (ppsub = iter.Next(key)) {
121  (*ppsub)->Fill(d);
122  }
123  m_value = d;
124  }
126  DataT* GetData() { return &m_value; }
128  _wb_LHASH_FOR_TRIE *GetHash() const { return m_phash; }
130  bool IsDataLegal() { return !Map_noKeyP(m_value); }
132  DataT* Find(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
133  {
134  _wb_TRIE *psub = FindTrie(p_pIndex, nIndexLen, bFound);
135  if (psub && psub->IsDataLegal()) {
136  return psub->GetData();
137  }
138  bFound = false;
139  return NULL;
140  }
142  DataT* Insert(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
143  {
144  _wb_TRIE *psub = InsertTrie(p_pIndex, nIndexLen, bFound);
145  return psub->GetData();
146  }
148  _wb_TRIE *FindTrie(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
149  {
150  if (nIndexLen == 0) {
151  bFound = true;
152  return this;
153  }
154 
155  if (!m_phash) {
156  bFound = false;
157  return NULL;
158  }
159 
160  _wb_TRIE **ppsub = m_phash->Find(p_pIndex[0], bFound);
161  if (!bFound || !ppsub) {
162  bFound = false;
163  return NULL;
164  }
165 
166  return (*ppsub)->FindTrie(p_pIndex + 1, nIndexLen - 1, bFound);
167  }
169  _wb_TRIE *InsertTrie(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
170  {
171  if (nIndexLen == 0) {
172  return this;
173  }
174 
175  if (!m_phash) {
176  m_phash = new _wb_LHASH_FOR_TRIE;
177  }
178  _wb_TRIE **ppsub = m_phash->Insert(p_pIndex[0], bFound);
179  if (!bFound) {
180  *ppsub = new _wb_TRIE;
181  }
182  return (*ppsub)->InsertTrie(p_pIndex + 1, nIndexLen - 1, bFound);
183  }
185  DataT* Find(const KeyT *p_pIndex, int nIndexLen)
186  {
187  bool bFound;
188  return Find(p_pIndex, nIndexLen, bFound);
189  }
191  DataT* Insert(const KeyT *p_pIndex, int nIndexLen)
192  {
193  bool bFound;
194  return Insert(p_pIndex, nIndexLen, bFound);
195  }
197  _wb_TRIE *FindTrie(const KeyT *p_pIndex, int nIndexLen)
198  {
199  bool bFound;
200  return FindTrie(p_pIndex, nIndexLen, bFound);
201  }
203  _wb_TRIE *InsertTrie(const KeyT *p_pIndex, int nIndexLen)
204  {
205  bool bFound;
206  return InsertTrie(p_pIndex, nIndexLen, bFound);
207  }
208  };
209 
210 
214  template <class KeyT, class DataT>
215  class TrieIter
216  {
217  friend class TrieIter2<KeyT, DataT>;
218  public:
219  TrieIter(_wb_TRIE *ptrie, bool(*sort)(KeyT, KeyT) = 0)
220  : m_Iter(ptrie->m_phash, sort) {};
222  void Init() { m_Iter.Init(); };
224  /* the returned trie must contain a legal value*/
225  _wb_TRIE *Next(KeyT &key) {
226  _wb_TRIE *p;
227  while (p = Next_1(key)) {
228  if (p->IsDataLegal())
229  break;
230  }
231  return p;
232  }
233 
234  private:
236  /* the returned trie may not contain a legal value */
237  _wb_TRIE *Next_1(KeyT &key) {
238  _wb_TRIE **pp = m_Iter.Next(key);
239  return (pp) ? *pp : NULL;
240  };
241  private:
242  _wb_LHASHITER_FOR_TRIE m_Iter;
243  };
244 
245 
249  template <class KeyT, class DataT>
250  class TrieIter2
251  {
252  public:
253  TrieIter2(_wb_TRIE *ptrie, KeyT *pIndex, int level, bool(*sort)(KeyT, KeyT) = 0)
254  : m_ptrie(ptrie), m_pIndex(pIndex), m_nLevel(level),
255  m_iter(ptrie, sort), m_sort(sort){
256  m_pSubIter2 = NULL;
257  };
259  void Init()
260  {
261  m_iter.Init();
262  if (m_pSubIter2)
263  delete m_pSubIter2;
264  m_pSubIter2 = NULL;
265  }
267  /* The returned trie may not contain a legal value*/
269  {
270  if (m_nLevel == 0) {
271  return m_ptrie;
272  }
273  else if (m_nLevel == 1) {
274  return m_iter.Next(*m_pIndex);
275  }
276 
277  while (1)
278  {
279  if (m_pSubIter2 == NULL) {
280  _wb_TRIE *pSub = m_iter.Next_1(*m_pIndex);
281  if (pSub == NULL)
282  return NULL;
283 
284  m_pSubIter2 = new TrieIter2<KeyT, DataT>(pSub, m_pIndex + 1, m_nLevel - 1, m_sort);
285  }
286 
287  _wb_TRIE *pNext = m_pSubIter2->Next();
288  if (pNext == NULL) {
289  delete m_pSubIter2;
290  m_pSubIter2 = NULL;
291  }
292  else {
293  return pNext;
294  }
295  }
296 
297  }
298 
299  private:
300  TrieIter<KeyT, DataT> m_iter;
301  TrieIter2<KeyT, DataT> *m_pSubIter2;
302  _wb_TRIE *m_ptrie;
303  KeyT *m_pIndex;
304  int m_nLevel;
305  bool(*m_sort)(KeyT, KeyT);
306  };
309 }
310 
311 #endif
~Trie()
Definition: wb-trie.h:76
_wb_TRIE * Next()
Get next trie.
Definition: wb-trie.h:268
Trie()
Definition: wb-trie.h:75
void Release()
Release all the trie.
Definition: wb-trie.h:78
void Clean()
Clean.
Definition: wb-trie.h:113
bool IsDataLegal()
detect if current trie have legal value
Definition: wb-trie.h:130
_wb_LHASH_FOR_TRIE * GetHash() const
Get hash pointer.
Definition: wb-trie.h:128
DataT * Insert(const KeyT *p_pIndex, int nIndexLen)
Insert a value.
Definition: wb-trie.h:191
DataT * Find(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Find a value.
Definition: wb-trie.h:132
_wb_TRIE * InsertTrie(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Insert a sub-trie.
Definition: wb-trie.h:169
Get all the values whose indexes are of a fixed length. The returned tries may not contain a legal va...
Definition: wb-trie.h:42
void Map_noKey(KeyT *&key)
Definition: wb-lhash.h:81
TrieIter(_wb_TRIE *ptrie, bool(*sort)(KeyT, KeyT)=0)
Definition: wb-trie.h:219
_wb_TRIE * FindTrie(const KeyT *p_pIndex, int nIndexLen)
Find a sub-trie.
Definition: wb-trie.h:197
iter all the sub-tries
Definition: wb-trie.h:41
DataT * Insert(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Insert a value.
Definition: wb-trie.h:142
#define _wb_LHASH_FOR_TRIE
Definition: wb-trie.h:36
_wb_TRIE * FindTrie(const KeyT *p_pIndex, int nIndexLen, bool &bFound)
Find a sub-trie.
Definition: wb-trie.h:148
#define _wb_LHASHITER_FOR_TRIE
Definition: wb-trie.h:37
bool Map_noKeyP(KeyT *key)
Definition: wb-lhash.h:98
_wb_TRIE * Next(KeyT &key)
Get next sub-trie.
Definition: wb-trie.h:225
DataT * Find(const KeyT *p_pIndex, int nIndexLen)
Find a value.
Definition: wb-trie.h:185
_wb_TRIE * InsertTrie(const KeyT *p_pIndex, int nIndexLen)
Insert a sub-trie.
Definition: wb-trie.h:203
void Init()
Initialization.
Definition: wb-trie.h:222
#define _wb_TRIE
Definition: wb-trie.h:35
void Init()
Initialization.
Definition: wb-trie.h:259
DataT * GetData()
Get value.
Definition: wb-trie.h:126
size_t TotalMemCost()
Compute the total memory cost of the trie structure.
Definition: wb-trie.h:97
linear hash, using &#39;open address&#39; to handle collision
void Fill(DataT d)
set all the values to d
Definition: wb-trie.h:115
define all the code written by Bin Wang.
Definition: wb-file.cpp:21
trie structure
Definition: wb-trie.h:40
TrieIter2(_wb_TRIE *ptrie, KeyT *pIndex, int level, bool(*sort)(KeyT, KeyT)=0)
Definition: wb-trie.h:253