TRF Language Model
trf-def.cpp
Go to the documentation of this file.
1 // You may obtain a copy of the License at
2 //
3 // http://www.apache.org/licenses/LICENSE-2.0
4 //
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 //
11 // Copyright 2014-2015 Tsinghua University
12 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
13 //
14 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
15 // license declaration. Different coding language may use different comment styles.
16 
17 
18 #include "trf-def.h"
19 
20 namespace trf
21 {
22 
23  int omp_rand(int thread_num)
24  {
25 #ifdef __linux
26  static unsigned int s_states[128];
27  if (thread_num == -1) {
28 
29  return rand_r(&s_states[omp_get_thread_num()]);
30  }
31  else {
32  if (thread_num > 128) {
33  lout_error("[Rand] Can not support to many thread (Over 128)");
34  }
35  lout << "[Rand] Initial State" << endl;
36  srand(time(NULL));
37  for (int i = 0; i < thread_num; i++) {
38  s_states[i] = rand();
39  }
40  }
41 #else
42  if (thread_num != -1) {
43  srand(time(NULL));
44  }
45  return rand();
46 #endif
47  }
48 
49  /************************************************************************/
50  /* The definition of sampling function */
51  /************************************************************************/
52 
53  LogP LogLineNormalize(LogP* pdProbs, int nNum)
54  {
55  LogP dSum = LogP_zero;
56  for (int i = 0; i < nNum; i++)
57  dSum = Log_Sum(dSum, pdProbs[i]);
58  for (int i = 0; i < nNum; i++)
59  pdProbs[i] -= dSum;
60  return dSum;
61  }
62  int LogLineSampling(const LogP* pdProbs, int nNum)
63  {
64  double d = 1.0 * omp_rand() / RAND_MAX;
65  int sX = 0;
66  double dSum = 0;
67 
68  for (sX = 0; sX<nNum; sX++) {
69  dSum += LogP2Prob(pdProbs[sX]);
70 
71  if (fabs(dSum - 1) < 1e-5)
72  dSum = 1; //确保精度 if (dSum == 0) continue; //0概率 if (d <= dSum) break; } if (sX >= nNum) { // cout << "[LogLineSampling] " << sX << "\t" << dSum << endl; // lout_assert(sX<nNum); sX = nNum - 1; } return sX; } void LineNormalize(Prob* pdProbs, int nNum) { Prob dSum = 0; for (int i = 0; i<nNum; i++) dSum += pdProbs[i]; if (dSum > 0) { for (int i = 0; i<nNum; i++) pdProbs[i] /= dSum; } else { for (int i = 0; i<nNum; i++) pdProbs[i] = 1.0 / nNum; } } int LineSampling(const Prob* pdProbs, int nNum) { double d = 1.0 * omp_rand() / RAND_MAX; int sX = 0; double dSum = 0; for (sX = 0; sX<nNum; sX++) { dSum += pdProbs[sX]; if (fabs(dSum - 1) < 1e-5) dSum = 1; //确保精度 if (dSum == 0) continue; //0概率 if (d <= dSum) break; } if (sX >= nNum) { cout << "[LineSampling] " << sX << "\t" << dSum << endl; lout_assert(sX<nNum); } return sX; } bool Acceptable(Prob prob) { double d = 1.0 * omp_rand() / RAND_MAX; return d <= prob; } void RandomPos(int *a, int len, int n) { if (n> len) { lout_error("[RandomPos] n(" << n << ") > len(" << len << ") !!"); } for (int i = 0; i<n; i++) { int s = rand() % (len - i); //将a中第i+s个位置与第i位置交换 int temp = a[i + s]; a[i + s] = a[i]; a[i] = temp; } } double dRand(double dmin, double dmax) { double d = 1.0 * rand() / RAND_MAX; return d*(dmax - dmin) + dmin; } int omp_nrand(int nMin, int nMax) { return omp_rand() % (nMax - nMin) + nMin; } void EasySmooth(Prob *p, int num, Prob threshold /*= 1e-5*/) { for (int i = 0; i < num; i++) { p[i] = max(p[i], threshold); } LineNormalize(p, num); } double GetAISFactor(int t, int T) { t = t + 1; /* make sure that t is from 1 to T */ double delta = 4; double b_t = SigmFunc( delta * (2.0 * t / T - 1) ); double b_T = SigmFunc( delta * (2.0 * T / T - 1) ); double b_1 = SigmFunc( delta * (2.0 * 1 / T - 1) ); return (b_t - b_1) / (b_T - b_1); } double SigmFunc(double x) { return 1.0 / (1 + exp(-x)); } }
73  if (dSum == 0)
74  continue; //0概率 if (d <= dSum) break; } if (sX >= nNum) { // cout << "[LogLineSampling] " << sX << "\t" << dSum << endl; // lout_assert(sX<nNum); sX = nNum - 1; } return sX; } void LineNormalize(Prob* pdProbs, int nNum) { Prob dSum = 0; for (int i = 0; i<nNum; i++) dSum += pdProbs[i]; if (dSum > 0) { for (int i = 0; i<nNum; i++) pdProbs[i] /= dSum; } else { for (int i = 0; i<nNum; i++) pdProbs[i] = 1.0 / nNum; } } int LineSampling(const Prob* pdProbs, int nNum) { double d = 1.0 * omp_rand() / RAND_MAX; int sX = 0; double dSum = 0; for (sX = 0; sX<nNum; sX++) { dSum += pdProbs[sX]; if (fabs(dSum - 1) < 1e-5) dSum = 1; //确保精度 if (dSum == 0) continue; //0概率 if (d <= dSum) break; } if (sX >= nNum) { cout << "[LineSampling] " << sX << "\t" << dSum << endl; lout_assert(sX<nNum); } return sX; } bool Acceptable(Prob prob) { double d = 1.0 * omp_rand() / RAND_MAX; return d <= prob; } void RandomPos(int *a, int len, int n) { if (n> len) { lout_error("[RandomPos] n(" << n << ") > len(" << len << ") !!"); } for (int i = 0; i<n; i++) { int s = rand() % (len - i); //将a中第i+s个位置与第i位置交换 int temp = a[i + s]; a[i + s] = a[i]; a[i] = temp; } } double dRand(double dmin, double dmax) { double d = 1.0 * rand() / RAND_MAX; return d*(dmax - dmin) + dmin; } int omp_nrand(int nMin, int nMax) { return omp_rand() % (nMax - nMin) + nMin; } void EasySmooth(Prob *p, int num, Prob threshold /*= 1e-5*/) { for (int i = 0; i < num; i++) { p[i] = max(p[i], threshold); } LineNormalize(p, num); } double GetAISFactor(int t, int T) { t = t + 1; /* make sure that t is from 1 to T */ double delta = 4; double b_t = SigmFunc( delta * (2.0 * t / T - 1) ); double b_T = SigmFunc( delta * (2.0 * T / T - 1) ); double b_1 = SigmFunc( delta * (2.0 * 1 / T - 1) ); return (b_t - b_1) / (b_T - b_1); } double SigmFunc(double x) { return 1.0 / (1 + exp(-x)); } }
75 
76  if (d <= dSum)
77  break;
78  }
79  if (sX >= nNum) {
80 // cout << "[LogLineSampling] " << sX << "\t" << dSum << endl;
81 // lout_assert(sX<nNum);
82  sX = nNum - 1;
83  }
84 
85  return sX;
86  }
87  void LineNormalize(Prob* pdProbs, int nNum)
88  {
89  Prob dSum = 0;
90  for (int i = 0; i<nNum; i++)
91  dSum += pdProbs[i];
92 
93  if (dSum > 0) {
94  for (int i = 0; i<nNum; i++)
95  pdProbs[i] /= dSum;
96  }
97  else {
98  for (int i = 0; i<nNum; i++)
99  pdProbs[i] = 1.0 / nNum;
100  }
101 
102  }
103  int LineSampling(const Prob* pdProbs, int nNum)
104  {
105  double d = 1.0 * omp_rand() / RAND_MAX;
106  int sX = 0;
107  double dSum = 0;
108 
109  for (sX = 0; sX<nNum; sX++) {
110  dSum += pdProbs[sX];
111 
112  if (fabs(dSum - 1) < 1e-5)
113  dSum = 1; //确保精度 if (dSum == 0) continue; //0概率 if (d <= dSum) break; } if (sX >= nNum) { cout << "[LineSampling] " << sX << "\t" << dSum << endl; lout_assert(sX<nNum); } return sX; } bool Acceptable(Prob prob) { double d = 1.0 * omp_rand() / RAND_MAX; return d <= prob; } void RandomPos(int *a, int len, int n) { if (n> len) { lout_error("[RandomPos] n(" << n << ") > len(" << len << ") !!"); } for (int i = 0; i<n; i++) { int s = rand() % (len - i); //将a中第i+s个位置与第i位置交换 int temp = a[i + s]; a[i + s] = a[i]; a[i] = temp; } } double dRand(double dmin, double dmax) { double d = 1.0 * rand() / RAND_MAX; return d*(dmax - dmin) + dmin; } int omp_nrand(int nMin, int nMax) { return omp_rand() % (nMax - nMin) + nMin; } void EasySmooth(Prob *p, int num, Prob threshold /*= 1e-5*/) { for (int i = 0; i < num; i++) { p[i] = max(p[i], threshold); } LineNormalize(p, num); } double GetAISFactor(int t, int T) { t = t + 1; /* make sure that t is from 1 to T */ double delta = 4; double b_t = SigmFunc( delta * (2.0 * t / T - 1) ); double b_T = SigmFunc( delta * (2.0 * T / T - 1) ); double b_1 = SigmFunc( delta * (2.0 * 1 / T - 1) ); return (b_t - b_1) / (b_T - b_1); } double SigmFunc(double x) { return 1.0 / (1 + exp(-x)); } }
114  if (dSum == 0)
115  continue; //0概率 if (d <= dSum) break; } if (sX >= nNum) { cout << "[LineSampling] " << sX << "\t" << dSum << endl; lout_assert(sX<nNum); } return sX; } bool Acceptable(Prob prob) { double d = 1.0 * omp_rand() / RAND_MAX; return d <= prob; } void RandomPos(int *a, int len, int n) { if (n> len) { lout_error("[RandomPos] n(" << n << ") > len(" << len << ") !!"); } for (int i = 0; i<n; i++) { int s = rand() % (len - i); //将a中第i+s个位置与第i位置交换 int temp = a[i + s]; a[i + s] = a[i]; a[i] = temp; } } double dRand(double dmin, double dmax) { double d = 1.0 * rand() / RAND_MAX; return d*(dmax - dmin) + dmin; } int omp_nrand(int nMin, int nMax) { return omp_rand() % (nMax - nMin) + nMin; } void EasySmooth(Prob *p, int num, Prob threshold /*= 1e-5*/) { for (int i = 0; i < num; i++) { p[i] = max(p[i], threshold); } LineNormalize(p, num); } double GetAISFactor(int t, int T) { t = t + 1; /* make sure that t is from 1 to T */ double delta = 4; double b_t = SigmFunc( delta * (2.0 * t / T - 1) ); double b_T = SigmFunc( delta * (2.0 * T / T - 1) ); double b_1 = SigmFunc( delta * (2.0 * 1 / T - 1) ); return (b_t - b_1) / (b_T - b_1); } double SigmFunc(double x) { return 1.0 / (1 + exp(-x)); } }
116 
117  if (d <= dSum)
118  break;
119  }
120  if (sX >= nNum) {
121  cout << "[LineSampling] " << sX << "\t" << dSum << endl;
122  lout_assert(sX<nNum);
123  }
124 
125  return sX;
126  }
127  bool Acceptable(Prob prob)
128  {
129  double d = 1.0 * omp_rand() / RAND_MAX;
130  return d <= prob;
131  }
132  void RandomPos(int *a, int len, int n)
133  {
134  if (n> len) {
135  lout_error("[RandomPos] n(" << n << ") > len(" << len << ") !!");
136  }
137 
138  for (int i = 0; i<n; i++) {
139  int s = rand() % (len - i);
140  //将a中第i+s个位置与第i位置交换
141  int temp = a[i + s];
142  a[i + s] = a[i];
143  a[i] = temp;
144  }
145  }
146 
147  double dRand(double dmin, double dmax)
148  {
149  double d = 1.0 * rand() / RAND_MAX;
150  return d*(dmax - dmin) + dmin;
151  }
152  int omp_nrand(int nMin, int nMax)
153  {
154  return omp_rand() % (nMax - nMin) + nMin;
155  }
156 
157  void EasySmooth(Prob *p, int num, Prob threshold /*= 1e-5*/)
158  {
159  for (int i = 0; i < num; i++) {
160  p[i] = max(p[i], threshold);
161  }
162  LineNormalize(p, num);
163  }
164 
165  double GetAISFactor(int t, int T)
166  {
167  t = t + 1; /* make sure that t is from 1 to T */
168 
169  double delta = 4;
170  double b_t = SigmFunc( delta * (2.0 * t / T - 1) );
171  double b_T = SigmFunc( delta * (2.0 * T / T - 1) );
172  double b_1 = SigmFunc( delta * (2.0 * 1 / T - 1) );
173 
174  return (b_t - b_1) / (b_T - b_1);
175  }
176 
177  double SigmFunc(double x)
178  {
179  return 1.0 / (1 + exp(-x));
180  }
181 
182 }
const float LogP_zero
Definition: trf-def.h:30
double Prob
Definition: trf-def.h:28
int LogLineSampling(const LogP *pdProbs, int nNum)
Definition: trf-def.cpp:62
void RandomPos(int *a, int len, int n)
Definition: trf-def.cpp:132
double SigmFunc(double x)
calculate the sigmoid function f(x) = 1/(1+exp(-x))
Definition: trf-def.cpp:177
#define lout_error(x)
Definition: wb-log.h:183
#define lout_assert(p)
Definition: wb-log.h:185
void LineNormalize(Prob *pdProbs, int nNum)
Definition: trf-def.cpp:87
LogP Log_Sum(LogP x, LogP y)
Definition: trf-def.h:40
double LogP
Definition: trf-def.h:27
double dRand(double dmin, double dmax)
get a random float between dmin and dmax
Definition: trf-def.cpp:147
int LineSampling(const Prob *pdProbs, int nNum)
Definition: trf-def.cpp:103
bool Acceptable(Prob prob)
Definition: trf-def.cpp:127
int omp_rand(int thread_num)
Definition: trf-def.cpp:23
void EasySmooth(Prob *p, int num, Prob threshold)
smooth a distribution
Definition: trf-def.cpp:157
LogP LogLineNormalize(LogP *pdProbs, int nNum)
Definition: trf-def.cpp:53
double GetAISFactor(int t, int T)
Get the AIS intermediate factor beta_t.
Definition: trf-def.cpp:165
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22
Prob LogP2Prob(LogP x)
Definition: trf-def.h:33
int omp_nrand(int nMin, int nMax)
get a random integer int [nMin, nMax-1]
Definition: trf-def.cpp:152
Definition: trf-alg.cpp:20