TRF Language Model
wb-solve.cpp
Go to the documentation of this file.
1 // You may obtain a copy of the License at
2 //
3 // http://www.apache.org/licenses/LICENSE-2.0
4 //
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 //
11 // Copyright 2014-2015 Tsinghua University
12 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
13 //
14 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
15 // license declaration. Different coding language may use different comment styles.
16 
17 
18 #include "wb-solve.h"
19 
20 namespace wb
21 {
22  bool Solve::Run(const double *pInitParams /* = NULL */)
23  {
24  if (!m_pfunc) {
25  lout_Solve << "m_pFunc == NULL" << endl;
26  return false;
27  }
28 
29  Clock ck;
31  //lout_variable(m_pfunc->GetParamNum());
32  m_pdRoot = new double[m_pfunc->GetParamNum()];
33 
34  double *pdCurParams = new double[m_pfunc->m_nParamNum]; //当前参数x_k
35  double *pdCurGradient = new double[m_pfunc->m_nParamNum]; //当前的梯度 df_k
36  double dCurValue = 0; // 函数值 f_k
37  double *pdDir = new double[m_pfunc->m_nParamNum]; //方向,p_k
38  double nExValueNum; // 额外数据的大小
39  double dExValues[Func::cn_exvalue_max_num]; // 保存额外数据 double dStep = 0; ///< 保存迭代步长 double dGradNorm = 0; //梯度的模 m_dSpendMinute = 0; //初始化 for (int i = 0; i < m_pfunc->m_nParamNum; i++) { pdCurParams[i] = (pInitParams) ? pInitParams[i] : 1; } memset(pdDir, 0, sizeof(double)*m_pfunc->m_nParamNum); memset(pdCurGradient, 0, sizeof(double)*m_pfunc->m_nParamNum); IterInit(); ///init // iteration begin for (m_nIterNum = m_nIterMin; m_nIterNum <= m_nIterMax; m_nIterNum++) { lout_Solve << "======== iter:" << m_nIterNum << " ===(" << m_dSpendMinute << "m)=======" << endl; ck.Begin(); // set the parameter m_pfunc->SetParam(pdCurParams); // get the gradient m_pfunc->GetGradient(pdCurGradient); // get the function value dCurValue = m_pfunc->GetValue(); // get the ex-value nExValueNum = m_pfunc->GetExtraValues(m_nIterNum, dExValues); /* output the values */ { lout_Solve << "dir_k={ "; for (int i = 0; i < min(4, m_pfunc->m_nParamNum); i++) lout << pdDir[i] << " "; lout << "... }" << endl; lout_Solve << "x_k={ "; for (int i = 0; i < min(4, m_pfunc->m_nParamNum); i++) lout << pdCurParams[i] << " "; lout << "... }" << endl; lout_Solve << "g_k={ "; for (int i = 0; i < min(4, m_pfunc->m_nParamNum); i++) lout << pdCurGradient[i] << " "; lout << "... }" << endl; double dNorm = VecNorm(pdCurGradient, m_pfunc->m_nParamNum); lout_Solve << "a=" << dStep << " |g_k|=" << dNorm << " f_k=" << dCurValue << endl; lout_Solve << "ExValues={ "; for (int i = 0; i < nExValueNum; i++) lout << dExValues[i] << " "; lout << "}" << endl; // #ifdef _DEBUG // Pause(); // #endif } /* Stop Decision */ if (StopDecision(m_nIterNum, dCurValue, pdCurGradient)) { break; } // get the update direction ComputeDir(m_nIterNum, pdCurParams, pdCurGradient, pdDir); // Line search if (m_dGain > 0) dStep = m_dGain; else dStep = LineSearch(pdDir, dCurValue, pdCurParams, pdCurGradient); // Update parameters Update(pdCurParams, pdDir, dStep); // Add the spend times m_dSpendMinute += ck.ToSecond(ck.End()) / 60; } lout_Solve << "======== iter:" << m_nIterNum << " ===(" << m_dSpendMinute << "m)=======" << endl; lout_Solve << "Iter Finished!" << endl; // Save the result memcpy(m_pdRoot, pdCurParams, sizeof(m_pdRoot[0])*m_pfunc->m_nParamNum); SAFE_DELETE_ARRAY(pdDir); SAFE_DELETE_ARRAY(pdCurGradient); SAFE_DELETE_ARRAY(pdCurParams); return true; } void Solve::ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir) { /* gradient descent */ for (int i = 0; i < m_pfunc->m_nParamNum; i++) pdDir[i] = -pdGradient[i]; } double Solve::LineSearch(double *pdDir, double dValue, const double *pdCurParam, const double *pdGradient) { /* 需要额外的SetParam的代价 */ double *pdNextParam = new double[m_pfunc->m_nParamNum]; /// 算法参加Numerical Optimization,P57,介绍的interpolation算法 double a0 = 0, a1 = 0, a2 = 0; //步长 double phi0 = 0, phi1 = 0, phi2 = 0; //phi(ai) double c = 1e-4; double phi_t = VecProduct(pdGradient, pdDir, m_pfunc->m_nParamNum); // phi'(0) a2 = 1.0; //初始步长设为1 for (int k = 1; a2 > 0; k++) { // x = x + a * p for (int n = 0; n < m_pfunc->m_nParamNum; n++) pdNextParam[n] = pdCurParam[n] + a2 * pdDir[n]; m_pfunc->SetParam(pdNextParam); phi2 = m_pfunc->GetValue(); // phi(a2) if (phi2 <= dValue + c * a2 * phi_t) break; //保存前两次的结果 a0 = a1; a1 = a2; phi0 = phi1; phi1 = phi2; if (k == 1) { a2 = -phi_t*a1*a1 / 2 / (phi1 - dValue - phi_t * a1); } else { double v1 = phi1 - dValue - phi_t * a1; double v2 = phi0 - dValue - phi_t * a0; double a = a0*a0*v1 - a1*a1*v2; double b = -a0*a0*a0*v1 + a1*a1*a1*v2; double t = a0*a0*a1*a1*(a1 - a0); a /= t; b /= t; a2 = (-b + sqrt(b*b - 3 * a*phi_t)) / (3 * a); } //如果a2与a1太接近或差的太远,则取a2=a1/2 if (fabs(a2 - a1) < 1e-5 || a1 / a2 > 10) a2 = a1 / 2; //如果a2过小,则取一个不太小的值 if (a2 < 1e-10) { lout_warning("[Solve] LineSearch: a2 is too small < 1e-10, break") a2 = 1e-5; break; } } SAFE_DELETE_ARRAY(pdNextParam); return a2; } void Solve::Update(double *pdParam, const double *pdDir, double dStep) { for (int i = 0; i < m_pfunc->m_nParamNum; i++) { pdParam[i] += pdDir[i] * dStep; } } bool Solve::StopDecision(int k, double dValue, const double *pdGradient) { if (VecNorm(pdGradient, m_pfunc->m_nParamNum) < m_dStop) { return true; } if (k == m_nIterMax) { //防止因为迭代次数终止后,又进行一次额外的迭代 return true; } return false; } double Solve::VecProduct(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pdVec1[i] * pdVec2[i]; return d; } double Solve::VecNorm(const double *pdVec, int nSize) { return sqrt(VecProduct(pdVec, pdVec, nSize)); } double Solve::VecDist(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pow(pdVec1[i] - pdVec2[i], 2); return sqrt(d); } void LBFGS::IterInit() { SAFE_DELETE_ARRAY(m_pdPrevParam); SAFE_DELETE_ARRAY(m_pdPrevGradient); SAFE_DELETE_ARRAY(m_pdAlpha); m_pdPrevParam = new double[m_pfunc->GetParamNum()]; m_pdPrevGradient = new double[m_pfunc->GetParamNum()]; m_pdAlpha = new double[m_nLimitiedNum]; CirQueueBuf_Release(); CirQueueBuf_Init(); } void LBFGS::ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir) { if (k > 1) { // 保存用于LBFGS的vector CirQueueBuf_In(m_pd_s, m_pd_y); for (int n = 0; n < m_pfunc->m_nParamNum; n++) { m_pd_s[n] = pdParam[n] - m_pdPrevParam[n]; m_pd_y[n] = pdGradient[n] - m_pdPrevGradient[n]; } } /* 计算LBFGS direction */ double *pd_s = NULL; double *pd_y = NULL; int nVecLen = m_pfunc->m_nParamNum; lout_Solve << "LBFGS dir computer" << endl; int nBound = min(m_nLimitiedNum, k-1); //最多只计算前m个结果 //将梯度赋给q memcpy(pdDir, pdGradient, sizeof(pdDir[0])*nVecLen); //确保新来的向量不能为0向量 if (nBound >= 1) { CirQueueBuf_Prev(1, pd_s, pd_y); } //第一个循环 for (int i = 1; i <= nBound; i++) { CirQueueBuf_Prev(i, pd_s, pd_y); double dProd = VecProduct(pd_s, pd_y, nVecLen); m_pdAlpha[i - 1] = VecProduct(pd_s, pdDir, nVecLen) / dProd; for (int n = 0; n < nVecLen; n++) pdDir[n] -= m_pdAlpha[i - 1] * pd_y[n]; } //计算gamma,即初始的H^0 double dGamma = 1; if (k > 1) { CirQueueBuf_Prev(1, pd_s, pd_y); dGamma = VecProduct(pd_s, pd_y, nVecLen) / VecProduct(pd_y, pd_y, nVecLen); } // r = H^0 * q for (int n = 0; n < nVecLen; n++) pdDir[n] *= dGamma; //第二个循环 for (int i = nBound; i >= 1; i--) { CirQueueBuf_Prev(i, pd_s, pd_y); double dBeta = VecProduct(pd_y, pdDir, nVecLen) / VecProduct(pd_y, pd_s, nVecLen); for (int n = 0; n < nVecLen; n++) pdDir[n] += pd_s[n] * (m_pdAlpha[i - 1] - dBeta); } //方向需要取反 for (int n = 0; n < nVecLen; n++) pdDir[n] = -pdDir[n]; /* Save the previsou parameter and gradient */ memcpy(m_pdPrevParam, pdParam, sizeof(pdParam[0])*m_pfunc->m_nParamNum); memcpy(m_pdPrevGradient, pdGradient, sizeof(pdGradient[0])*m_pfunc->m_nParamNum); } void LBFGS::CirQueueBuf_Init() { m_pCirQueueBuf = new sy[m_nLimitiedNum]; for (int i = 0; i < m_nLimitiedNum; i++) { m_pCirQueueBuf[i].s = new double[m_pfunc->m_nParamNum]; m_pCirQueueBuf[i].y = new double[m_pfunc->m_nParamNum]; } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Release() { if (m_pCirQueueBuf) { for (int i = 0; i < m_nLimitiedNum; i++) { SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].s); SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].y); } SAFE_DELETE_ARRAY(m_pCirQueueBuf); } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Prev(int i, double *&pd_s, double *&pd_y) { i = (m_nLimitiedNum + m_nCirQueueBufTail - i) % m_nLimitiedNum; pd_s = m_pCirQueueBuf[i].s; pd_y = m_pCirQueueBuf[i].y; } void LBFGS::CirQueueBuf_In(double *&pd_s, double *&pd_y) { pd_s = m_pCirQueueBuf[m_nCirQueueBufTail].s; pd_y = m_pCirQueueBuf[m_nCirQueueBufTail].y; m_nCirQueueBufTail = (m_nCirQueueBufTail + 1) % m_nLimitiedNum; } }
40 
41  double dStep = 0;
42  double dGradNorm = 0; //梯度的模
43 
44  m_dSpendMinute = 0;
45 
46  //初始化
47  for (int i = 0; i < m_pfunc->m_nParamNum; i++) {
48  pdCurParams[i] = (pInitParams) ? pInitParams[i] : 1;
49  }
50  memset(pdDir, 0, sizeof(double)*m_pfunc->m_nParamNum);
51  memset(pdCurGradient, 0, sizeof(double)*m_pfunc->m_nParamNum);
52 
53  IterInit();
54 
55  // iteration begin
57  {
58  lout_Solve << "======== iter:" << m_nIterNum << " ===(" << m_dSpendMinute << "m)=======" << endl;
59  ck.Begin();
60 
61  // set the parameter
62  m_pfunc->SetParam(pdCurParams);
63 
64  // get the gradient
65  m_pfunc->GetGradient(pdCurGradient);
66  // get the function value
67  dCurValue = m_pfunc->GetValue();
68  // get the ex-value
69  nExValueNum = m_pfunc->GetExtraValues(m_nIterNum, dExValues);
70 
71  /* output the values */
72  {
73  lout_Solve << "dir_k={ ";
74  for (int i = 0; i < min(4, m_pfunc->m_nParamNum); i++)
75  lout << pdDir[i] << " ";
76  lout << "... }" << endl;
77 
78  lout_Solve << "x_k={ ";
79  for (int i = 0; i < min(4, m_pfunc->m_nParamNum); i++)
80  lout << pdCurParams[i] << " ";
81  lout << "... }" << endl;
82 
83  lout_Solve << "g_k={ ";
84  for (int i = 0; i < min(4, m_pfunc->m_nParamNum); i++)
85  lout << pdCurGradient[i] << " ";
86  lout << "... }" << endl;
87 
88  double dNorm = VecNorm(pdCurGradient, m_pfunc->m_nParamNum);
89  lout_Solve << "a=" << dStep << " |g_k|=" << dNorm << " f_k=" << dCurValue << endl;
90 
91  lout_Solve << "ExValues={ ";
92  for (int i = 0; i < nExValueNum; i++)
93  lout << dExValues[i] << " ";
94  lout << "}" << endl;
95 
96 // #ifdef _DEBUG
97 // Pause();
98 // #endif
99  }
100 
101  /* Stop Decision */
102  if (StopDecision(m_nIterNum, dCurValue, pdCurGradient)) {
103  break;
104  }
105 
106 
107  // get the update direction
108  ComputeDir(m_nIterNum, pdCurParams, pdCurGradient, pdDir);
109 
110  // Line search
111  if (m_dGain > 0)
112  dStep = m_dGain;
113  else
114  dStep = LineSearch(pdDir, dCurValue, pdCurParams, pdCurGradient);
115 
116  // Update parameters
117  Update(pdCurParams, pdDir, dStep);
118 
119  // Add the spend times
120  m_dSpendMinute += ck.ToSecond(ck.End()) / 60;
121  }
122 
123 
124  lout_Solve << "======== iter:" << m_nIterNum << " ===(" << m_dSpendMinute << "m)=======" << endl;
125  lout_Solve << "Iter Finished!" << endl;
126 
127  // Save the result
128  memcpy(m_pdRoot, pdCurParams, sizeof(m_pdRoot[0])*m_pfunc->m_nParamNum);
129 
130 
131  SAFE_DELETE_ARRAY(pdDir);
132  SAFE_DELETE_ARRAY(pdCurGradient);
133  SAFE_DELETE_ARRAY(pdCurParams);
134 
135  return true;
136  }
137  void Solve::ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir)
138  {
139  /* gradient descent */
140  for (int i = 0; i < m_pfunc->m_nParamNum; i++)
141  pdDir[i] = -pdGradient[i];
142  }
143  double Solve::LineSearch(double *pdDir, double dValue, const double *pdCurParam, const double *pdGradient)
144  {
145  /*
146  需要额外的SetParam的代价 */ double *pdNextParam = new double[m_pfunc->m_nParamNum]; /// 算法参加Numerical Optimization,P57,介绍的interpolation算法 double a0 = 0, a1 = 0, a2 = 0; //步长 double phi0 = 0, phi1 = 0, phi2 = 0; //phi(ai) double c = 1e-4; double phi_t = VecProduct(pdGradient, pdDir, m_pfunc->m_nParamNum); // phi'(0) a2 = 1.0; //初始步长设为1 for (int k = 1; a2 > 0; k++) { // x = x + a * p for (int n = 0; n < m_pfunc->m_nParamNum; n++) pdNextParam[n] = pdCurParam[n] + a2 * pdDir[n]; m_pfunc->SetParam(pdNextParam); phi2 = m_pfunc->GetValue(); // phi(a2) if (phi2 <= dValue + c * a2 * phi_t) break; //保存前两次的结果 a0 = a1; a1 = a2; phi0 = phi1; phi1 = phi2; if (k == 1) { a2 = -phi_t*a1*a1 / 2 / (phi1 - dValue - phi_t * a1); } else { double v1 = phi1 - dValue - phi_t * a1; double v2 = phi0 - dValue - phi_t * a0; double a = a0*a0*v1 - a1*a1*v2; double b = -a0*a0*a0*v1 + a1*a1*a1*v2; double t = a0*a0*a1*a1*(a1 - a0); a /= t; b /= t; a2 = (-b + sqrt(b*b - 3 * a*phi_t)) / (3 * a); } //如果a2与a1太接近或差的太远,则取a2=a1/2 if (fabs(a2 - a1) < 1e-5 || a1 / a2 > 10) a2 = a1 / 2; //如果a2过小,则取一个不太小的值 if (a2 < 1e-10) { lout_warning("[Solve] LineSearch: a2 is too small < 1e-10, break") a2 = 1e-5; break; } } SAFE_DELETE_ARRAY(pdNextParam); return a2; } void Solve::Update(double *pdParam, const double *pdDir, double dStep) { for (int i = 0; i < m_pfunc->m_nParamNum; i++) { pdParam[i] += pdDir[i] * dStep; } } bool Solve::StopDecision(int k, double dValue, const double *pdGradient) { if (VecNorm(pdGradient, m_pfunc->m_nParamNum) < m_dStop) { return true; } if (k == m_nIterMax) { //防止因为迭代次数终止后,又进行一次额外的迭代 return true; } return false; } double Solve::VecProduct(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pdVec1[i] * pdVec2[i]; return d; } double Solve::VecNorm(const double *pdVec, int nSize) { return sqrt(VecProduct(pdVec, pdVec, nSize)); } double Solve::VecDist(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pow(pdVec1[i] - pdVec2[i], 2); return sqrt(d); } void LBFGS::IterInit() { SAFE_DELETE_ARRAY(m_pdPrevParam); SAFE_DELETE_ARRAY(m_pdPrevGradient); SAFE_DELETE_ARRAY(m_pdAlpha); m_pdPrevParam = new double[m_pfunc->GetParamNum()]; m_pdPrevGradient = new double[m_pfunc->GetParamNum()]; m_pdAlpha = new double[m_nLimitiedNum]; CirQueueBuf_Release(); CirQueueBuf_Init(); } void LBFGS::ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir) { if (k > 1) { // 保存用于LBFGS的vector CirQueueBuf_In(m_pd_s, m_pd_y); for (int n = 0; n < m_pfunc->m_nParamNum; n++) { m_pd_s[n] = pdParam[n] - m_pdPrevParam[n]; m_pd_y[n] = pdGradient[n] - m_pdPrevGradient[n]; } } /* 计算LBFGS direction */ double *pd_s = NULL; double *pd_y = NULL; int nVecLen = m_pfunc->m_nParamNum; lout_Solve << "LBFGS dir computer" << endl; int nBound = min(m_nLimitiedNum, k-1); //最多只计算前m个结果 //将梯度赋给q memcpy(pdDir, pdGradient, sizeof(pdDir[0])*nVecLen); //确保新来的向量不能为0向量 if (nBound >= 1) { CirQueueBuf_Prev(1, pd_s, pd_y); } //第一个循环 for (int i = 1; i <= nBound; i++) { CirQueueBuf_Prev(i, pd_s, pd_y); double dProd = VecProduct(pd_s, pd_y, nVecLen); m_pdAlpha[i - 1] = VecProduct(pd_s, pdDir, nVecLen) / dProd; for (int n = 0; n < nVecLen; n++) pdDir[n] -= m_pdAlpha[i - 1] * pd_y[n]; } //计算gamma,即初始的H^0 double dGamma = 1; if (k > 1) { CirQueueBuf_Prev(1, pd_s, pd_y); dGamma = VecProduct(pd_s, pd_y, nVecLen) / VecProduct(pd_y, pd_y, nVecLen); } // r = H^0 * q for (int n = 0; n < nVecLen; n++) pdDir[n] *= dGamma; //第二个循环 for (int i = nBound; i >= 1; i--) { CirQueueBuf_Prev(i, pd_s, pd_y); double dBeta = VecProduct(pd_y, pdDir, nVecLen) / VecProduct(pd_y, pd_s, nVecLen); for (int n = 0; n < nVecLen; n++) pdDir[n] += pd_s[n] * (m_pdAlpha[i - 1] - dBeta); } //方向需要取反 for (int n = 0; n < nVecLen; n++) pdDir[n] = -pdDir[n]; /* Save the previsou parameter and gradient */ memcpy(m_pdPrevParam, pdParam, sizeof(pdParam[0])*m_pfunc->m_nParamNum); memcpy(m_pdPrevGradient, pdGradient, sizeof(pdGradient[0])*m_pfunc->m_nParamNum); } void LBFGS::CirQueueBuf_Init() { m_pCirQueueBuf = new sy[m_nLimitiedNum]; for (int i = 0; i < m_nLimitiedNum; i++) { m_pCirQueueBuf[i].s = new double[m_pfunc->m_nParamNum]; m_pCirQueueBuf[i].y = new double[m_pfunc->m_nParamNum]; } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Release() { if (m_pCirQueueBuf) { for (int i = 0; i < m_nLimitiedNum; i++) { SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].s); SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].y); } SAFE_DELETE_ARRAY(m_pCirQueueBuf); } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Prev(int i, double *&pd_s, double *&pd_y) { i = (m_nLimitiedNum + m_nCirQueueBufTail - i) % m_nLimitiedNum; pd_s = m_pCirQueueBuf[i].s; pd_y = m_pCirQueueBuf[i].y; } void LBFGS::CirQueueBuf_In(double *&pd_s, double *&pd_y) { pd_s = m_pCirQueueBuf[m_nCirQueueBufTail].s; pd_y = m_pCirQueueBuf[m_nCirQueueBufTail].y; m_nCirQueueBufTail = (m_nCirQueueBufTail + 1) % m_nLimitiedNum; } }
147  */
148  double *pdNextParam = new double[m_pfunc->m_nParamNum];
149 
151  double a0 = 0, a1 = 0, a2 = 0; //步长
152  double phi0 = 0, phi1 = 0, phi2 = 0; //phi(ai)
153  double c = 1e-4;
154  double phi_t = VecProduct(pdGradient, pdDir, m_pfunc->m_nParamNum); // phi'(0)
155 
156  a2 = 1.0; //初始步长设为1
157  for (int k = 1; a2 > 0; k++)
158  {
159  // x = x + a * p
160  for (int n = 0; n < m_pfunc->m_nParamNum; n++)
161  pdNextParam[n] = pdCurParam[n] + a2 * pdDir[n];
162 
163  m_pfunc->SetParam(pdNextParam);
164  phi2 = m_pfunc->GetValue(); // phi(a2)
165 
166  if (phi2 <= dValue + c * a2 * phi_t)
167  break;
168 
169  //保存前两次的结果 a0 = a1; a1 = a2; phi0 = phi1; phi1 = phi2; if (k == 1) { a2 = -phi_t*a1*a1 / 2 / (phi1 - dValue - phi_t * a1); } else { double v1 = phi1 - dValue - phi_t * a1; double v2 = phi0 - dValue - phi_t * a0; double a = a0*a0*v1 - a1*a1*v2; double b = -a0*a0*a0*v1 + a1*a1*a1*v2; double t = a0*a0*a1*a1*(a1 - a0); a /= t; b /= t; a2 = (-b + sqrt(b*b - 3 * a*phi_t)) / (3 * a); } //如果a2与a1太接近或差的太远,则取a2=a1/2 if (fabs(a2 - a1) < 1e-5 || a1 / a2 > 10) a2 = a1 / 2; //如果a2过小,则取一个不太小的值 if (a2 < 1e-10) { lout_warning("[Solve] LineSearch: a2 is too small < 1e-10, break") a2 = 1e-5; break; } } SAFE_DELETE_ARRAY(pdNextParam); return a2; } void Solve::Update(double *pdParam, const double *pdDir, double dStep) { for (int i = 0; i < m_pfunc->m_nParamNum; i++) { pdParam[i] += pdDir[i] * dStep; } } bool Solve::StopDecision(int k, double dValue, const double *pdGradient) { if (VecNorm(pdGradient, m_pfunc->m_nParamNum) < m_dStop) { return true; } if (k == m_nIterMax) { //防止因为迭代次数终止后,又进行一次额外的迭代 return true; } return false; } double Solve::VecProduct(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pdVec1[i] * pdVec2[i]; return d; } double Solve::VecNorm(const double *pdVec, int nSize) { return sqrt(VecProduct(pdVec, pdVec, nSize)); } double Solve::VecDist(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pow(pdVec1[i] - pdVec2[i], 2); return sqrt(d); } void LBFGS::IterInit() { SAFE_DELETE_ARRAY(m_pdPrevParam); SAFE_DELETE_ARRAY(m_pdPrevGradient); SAFE_DELETE_ARRAY(m_pdAlpha); m_pdPrevParam = new double[m_pfunc->GetParamNum()]; m_pdPrevGradient = new double[m_pfunc->GetParamNum()]; m_pdAlpha = new double[m_nLimitiedNum]; CirQueueBuf_Release(); CirQueueBuf_Init(); } void LBFGS::ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir) { if (k > 1) { // 保存用于LBFGS的vector CirQueueBuf_In(m_pd_s, m_pd_y); for (int n = 0; n < m_pfunc->m_nParamNum; n++) { m_pd_s[n] = pdParam[n] - m_pdPrevParam[n]; m_pd_y[n] = pdGradient[n] - m_pdPrevGradient[n]; } } /* 计算LBFGS direction */ double *pd_s = NULL; double *pd_y = NULL; int nVecLen = m_pfunc->m_nParamNum; lout_Solve << "LBFGS dir computer" << endl; int nBound = min(m_nLimitiedNum, k-1); //最多只计算前m个结果 //将梯度赋给q memcpy(pdDir, pdGradient, sizeof(pdDir[0])*nVecLen); //确保新来的向量不能为0向量 if (nBound >= 1) { CirQueueBuf_Prev(1, pd_s, pd_y); } //第一个循环 for (int i = 1; i <= nBound; i++) { CirQueueBuf_Prev(i, pd_s, pd_y); double dProd = VecProduct(pd_s, pd_y, nVecLen); m_pdAlpha[i - 1] = VecProduct(pd_s, pdDir, nVecLen) / dProd; for (int n = 0; n < nVecLen; n++) pdDir[n] -= m_pdAlpha[i - 1] * pd_y[n]; } //计算gamma,即初始的H^0 double dGamma = 1; if (k > 1) { CirQueueBuf_Prev(1, pd_s, pd_y); dGamma = VecProduct(pd_s, pd_y, nVecLen) / VecProduct(pd_y, pd_y, nVecLen); } // r = H^0 * q for (int n = 0; n < nVecLen; n++) pdDir[n] *= dGamma; //第二个循环 for (int i = nBound; i >= 1; i--) { CirQueueBuf_Prev(i, pd_s, pd_y); double dBeta = VecProduct(pd_y, pdDir, nVecLen) / VecProduct(pd_y, pd_s, nVecLen); for (int n = 0; n < nVecLen; n++) pdDir[n] += pd_s[n] * (m_pdAlpha[i - 1] - dBeta); } //方向需要取反 for (int n = 0; n < nVecLen; n++) pdDir[n] = -pdDir[n]; /* Save the previsou parameter and gradient */ memcpy(m_pdPrevParam, pdParam, sizeof(pdParam[0])*m_pfunc->m_nParamNum); memcpy(m_pdPrevGradient, pdGradient, sizeof(pdGradient[0])*m_pfunc->m_nParamNum); } void LBFGS::CirQueueBuf_Init() { m_pCirQueueBuf = new sy[m_nLimitiedNum]; for (int i = 0; i < m_nLimitiedNum; i++) { m_pCirQueueBuf[i].s = new double[m_pfunc->m_nParamNum]; m_pCirQueueBuf[i].y = new double[m_pfunc->m_nParamNum]; } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Release() { if (m_pCirQueueBuf) { for (int i = 0; i < m_nLimitiedNum; i++) { SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].s); SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].y); } SAFE_DELETE_ARRAY(m_pCirQueueBuf); } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Prev(int i, double *&pd_s, double *&pd_y) { i = (m_nLimitiedNum + m_nCirQueueBufTail - i) % m_nLimitiedNum; pd_s = m_pCirQueueBuf[i].s; pd_y = m_pCirQueueBuf[i].y; } void LBFGS::CirQueueBuf_In(double *&pd_s, double *&pd_y) { pd_s = m_pCirQueueBuf[m_nCirQueueBufTail].s; pd_y = m_pCirQueueBuf[m_nCirQueueBufTail].y; m_nCirQueueBufTail = (m_nCirQueueBufTail + 1) % m_nLimitiedNum; } }
170  a0 = a1;
171  a1 = a2;
172  phi0 = phi1;
173  phi1 = phi2;
174  if (k == 1) {
175  a2 = -phi_t*a1*a1 / 2 / (phi1 - dValue - phi_t * a1);
176  }
177  else {
178  double v1 = phi1 - dValue - phi_t * a1;
179  double v2 = phi0 - dValue - phi_t * a0;
180  double a = a0*a0*v1 - a1*a1*v2;
181  double b = -a0*a0*a0*v1 + a1*a1*a1*v2;
182  double t = a0*a0*a1*a1*(a1 - a0);
183  a /= t;
184  b /= t;
185  a2 = (-b + sqrt(b*b - 3 * a*phi_t)) / (3 * a);
186  }
187 
188  //如果a2与a1太接近或差的太远,则取a2=a1/2
189  if (fabs(a2 - a1) < 1e-5 ||
190  a1 / a2 > 10)
191  a2 = a1 / 2;
192  //如果a2过小,则取一个不太小的值
193  if (a2 < 1e-10) {
194  lout_warning("[Solve] LineSearch: a2 is too small < 1e-10, break")
195  a2 = 1e-5;
196  break;
197  }
198  }
199 
200  SAFE_DELETE_ARRAY(pdNextParam);
201 
202  return a2;
203  }
204  void Solve::Update(double *pdParam, const double *pdDir, double dStep)
205  {
206  for (int i = 0; i < m_pfunc->m_nParamNum; i++) {
207  pdParam[i] += pdDir[i] * dStep;
208  }
209  }
210  bool Solve::StopDecision(int k, double dValue, const double *pdGradient)
211  {
212  if (VecNorm(pdGradient, m_pfunc->m_nParamNum) < m_dStop) {
213  return true;
214  }
215  if (k == m_nIterMax) { //防止因为迭代次数终止后,又进行一次额外的迭代 return true; } return false; } double Solve::VecProduct(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pdVec1[i] * pdVec2[i]; return d; } double Solve::VecNorm(const double *pdVec, int nSize) { return sqrt(VecProduct(pdVec, pdVec, nSize)); } double Solve::VecDist(const double *pdVec1, const double *pdVec2, int nSize) { double d = 0; for (int i = 0; i < nSize; i++) d += pow(pdVec1[i] - pdVec2[i], 2); return sqrt(d); } void LBFGS::IterInit() { SAFE_DELETE_ARRAY(m_pdPrevParam); SAFE_DELETE_ARRAY(m_pdPrevGradient); SAFE_DELETE_ARRAY(m_pdAlpha); m_pdPrevParam = new double[m_pfunc->GetParamNum()]; m_pdPrevGradient = new double[m_pfunc->GetParamNum()]; m_pdAlpha = new double[m_nLimitiedNum]; CirQueueBuf_Release(); CirQueueBuf_Init(); } void LBFGS::ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir) { if (k > 1) { // 保存用于LBFGS的vector CirQueueBuf_In(m_pd_s, m_pd_y); for (int n = 0; n < m_pfunc->m_nParamNum; n++) { m_pd_s[n] = pdParam[n] - m_pdPrevParam[n]; m_pd_y[n] = pdGradient[n] - m_pdPrevGradient[n]; } } /* 计算LBFGS direction */ double *pd_s = NULL; double *pd_y = NULL; int nVecLen = m_pfunc->m_nParamNum; lout_Solve << "LBFGS dir computer" << endl; int nBound = min(m_nLimitiedNum, k-1); //最多只计算前m个结果 //将梯度赋给q memcpy(pdDir, pdGradient, sizeof(pdDir[0])*nVecLen); //确保新来的向量不能为0向量 if (nBound >= 1) { CirQueueBuf_Prev(1, pd_s, pd_y); } //第一个循环 for (int i = 1; i <= nBound; i++) { CirQueueBuf_Prev(i, pd_s, pd_y); double dProd = VecProduct(pd_s, pd_y, nVecLen); m_pdAlpha[i - 1] = VecProduct(pd_s, pdDir, nVecLen) / dProd; for (int n = 0; n < nVecLen; n++) pdDir[n] -= m_pdAlpha[i - 1] * pd_y[n]; } //计算gamma,即初始的H^0 double dGamma = 1; if (k > 1) { CirQueueBuf_Prev(1, pd_s, pd_y); dGamma = VecProduct(pd_s, pd_y, nVecLen) / VecProduct(pd_y, pd_y, nVecLen); } // r = H^0 * q for (int n = 0; n < nVecLen; n++) pdDir[n] *= dGamma; //第二个循环 for (int i = nBound; i >= 1; i--) { CirQueueBuf_Prev(i, pd_s, pd_y); double dBeta = VecProduct(pd_y, pdDir, nVecLen) / VecProduct(pd_y, pd_s, nVecLen); for (int n = 0; n < nVecLen; n++) pdDir[n] += pd_s[n] * (m_pdAlpha[i - 1] - dBeta); } //方向需要取反 for (int n = 0; n < nVecLen; n++) pdDir[n] = -pdDir[n]; /* Save the previsou parameter and gradient */ memcpy(m_pdPrevParam, pdParam, sizeof(pdParam[0])*m_pfunc->m_nParamNum); memcpy(m_pdPrevGradient, pdGradient, sizeof(pdGradient[0])*m_pfunc->m_nParamNum); } void LBFGS::CirQueueBuf_Init() { m_pCirQueueBuf = new sy[m_nLimitiedNum]; for (int i = 0; i < m_nLimitiedNum; i++) { m_pCirQueueBuf[i].s = new double[m_pfunc->m_nParamNum]; m_pCirQueueBuf[i].y = new double[m_pfunc->m_nParamNum]; } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Release() { if (m_pCirQueueBuf) { for (int i = 0; i < m_nLimitiedNum; i++) { SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].s); SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].y); } SAFE_DELETE_ARRAY(m_pCirQueueBuf); } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Prev(int i, double *&pd_s, double *&pd_y) { i = (m_nLimitiedNum + m_nCirQueueBufTail - i) % m_nLimitiedNum; pd_s = m_pCirQueueBuf[i].s; pd_y = m_pCirQueueBuf[i].y; } void LBFGS::CirQueueBuf_In(double *&pd_s, double *&pd_y) { pd_s = m_pCirQueueBuf[m_nCirQueueBufTail].s; pd_y = m_pCirQueueBuf[m_nCirQueueBufTail].y; m_nCirQueueBufTail = (m_nCirQueueBufTail + 1) % m_nLimitiedNum; } }
216  return true;
217  }
218  return false;
219  }
220  double Solve::VecProduct(const double *pdVec1, const double *pdVec2, int nSize)
221  {
222  double d = 0;
223  for (int i = 0; i < nSize; i++)
224  d += pdVec1[i] * pdVec2[i];
225  return d;
226  }
227  double Solve::VecNorm(const double *pdVec, int nSize)
228  {
229  return sqrt(VecProduct(pdVec, pdVec, nSize));
230  }
231  double Solve::VecDist(const double *pdVec1, const double *pdVec2, int nSize)
232  {
233  double d = 0;
234  for (int i = 0; i < nSize; i++)
235  d += pow(pdVec1[i] - pdVec2[i], 2);
236  return sqrt(d);
237  }
238 
239 
240 
242  {
243  SAFE_DELETE_ARRAY(m_pdPrevParam);
244  SAFE_DELETE_ARRAY(m_pdPrevGradient);
245  SAFE_DELETE_ARRAY(m_pdAlpha);
246 
247  m_pdPrevParam = new double[m_pfunc->GetParamNum()];
248  m_pdPrevGradient = new double[m_pfunc->GetParamNum()];
249  m_pdAlpha = new double[m_nLimitiedNum];
250 
251  CirQueueBuf_Release();
252  CirQueueBuf_Init();
253  }
254  void LBFGS::ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir)
255  {
256  if (k > 1) {
257  // 保存用于LBFGS的vector
258  CirQueueBuf_In(m_pd_s, m_pd_y);
259  for (int n = 0; n < m_pfunc->m_nParamNum; n++) {
260  m_pd_s[n] = pdParam[n] - m_pdPrevParam[n];
261  m_pd_y[n] = pdGradient[n] - m_pdPrevGradient[n];
262  }
263  }
264 
265  /*
266  计算LBFGS direction
267  */
268 
269  double *pd_s = NULL;
270  double *pd_y = NULL;
271  int nVecLen = m_pfunc->m_nParamNum;
272 
273  lout_Solve << "LBFGS dir computer" << endl;
274  int nBound = min(m_nLimitiedNum, k-1); //最多只计算前m个结果 //将梯度赋给q memcpy(pdDir, pdGradient, sizeof(pdDir[0])*nVecLen); //确保新来的向量不能为0向量 if (nBound >= 1) { CirQueueBuf_Prev(1, pd_s, pd_y); } //第一个循环 for (int i = 1; i <= nBound; i++) { CirQueueBuf_Prev(i, pd_s, pd_y); double dProd = VecProduct(pd_s, pd_y, nVecLen); m_pdAlpha[i - 1] = VecProduct(pd_s, pdDir, nVecLen) / dProd; for (int n = 0; n < nVecLen; n++) pdDir[n] -= m_pdAlpha[i - 1] * pd_y[n]; } //计算gamma,即初始的H^0 double dGamma = 1; if (k > 1) { CirQueueBuf_Prev(1, pd_s, pd_y); dGamma = VecProduct(pd_s, pd_y, nVecLen) / VecProduct(pd_y, pd_y, nVecLen); } // r = H^0 * q for (int n = 0; n < nVecLen; n++) pdDir[n] *= dGamma; //第二个循环 for (int i = nBound; i >= 1; i--) { CirQueueBuf_Prev(i, pd_s, pd_y); double dBeta = VecProduct(pd_y, pdDir, nVecLen) / VecProduct(pd_y, pd_s, nVecLen); for (int n = 0; n < nVecLen; n++) pdDir[n] += pd_s[n] * (m_pdAlpha[i - 1] - dBeta); } //方向需要取反 for (int n = 0; n < nVecLen; n++) pdDir[n] = -pdDir[n]; /* Save the previsou parameter and gradient */ memcpy(m_pdPrevParam, pdParam, sizeof(pdParam[0])*m_pfunc->m_nParamNum); memcpy(m_pdPrevGradient, pdGradient, sizeof(pdGradient[0])*m_pfunc->m_nParamNum); } void LBFGS::CirQueueBuf_Init() { m_pCirQueueBuf = new sy[m_nLimitiedNum]; for (int i = 0; i < m_nLimitiedNum; i++) { m_pCirQueueBuf[i].s = new double[m_pfunc->m_nParamNum]; m_pCirQueueBuf[i].y = new double[m_pfunc->m_nParamNum]; } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Release() { if (m_pCirQueueBuf) { for (int i = 0; i < m_nLimitiedNum; i++) { SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].s); SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].y); } SAFE_DELETE_ARRAY(m_pCirQueueBuf); } m_nCirQueueBufTail = 0; } void LBFGS::CirQueueBuf_Prev(int i, double *&pd_s, double *&pd_y) { i = (m_nLimitiedNum + m_nCirQueueBufTail - i) % m_nLimitiedNum; pd_s = m_pCirQueueBuf[i].s; pd_y = m_pCirQueueBuf[i].y; } void LBFGS::CirQueueBuf_In(double *&pd_s, double *&pd_y) { pd_s = m_pCirQueueBuf[m_nCirQueueBufTail].s; pd_y = m_pCirQueueBuf[m_nCirQueueBufTail].y; m_nCirQueueBufTail = (m_nCirQueueBufTail + 1) % m_nLimitiedNum; } }
275 
276  //将梯度赋给q
277  memcpy(pdDir, pdGradient, sizeof(pdDir[0])*nVecLen);
278 
279 
280  //确保新来的向量不能为0向量
281  if (nBound >= 1) {
282  CirQueueBuf_Prev(1, pd_s, pd_y);
283  }
284 
285 
286 
287  //第一个循环
288  for (int i = 1; i <= nBound; i++)
289  {
290  CirQueueBuf_Prev(i, pd_s, pd_y);
291 
292  double dProd = VecProduct(pd_s, pd_y, nVecLen);
293  m_pdAlpha[i - 1] = VecProduct(pd_s, pdDir, nVecLen) / dProd;
294  for (int n = 0; n < nVecLen; n++)
295  pdDir[n] -= m_pdAlpha[i - 1] * pd_y[n];
296  }
297 
298 
299  //计算gamma,即初始的H^0
300  double dGamma = 1;
301  if (k > 1) {
302  CirQueueBuf_Prev(1, pd_s, pd_y);
303  dGamma = VecProduct(pd_s, pd_y, nVecLen) / VecProduct(pd_y, pd_y, nVecLen);
304  }
305 
306  // r = H^0 * q
307  for (int n = 0; n < nVecLen; n++)
308  pdDir[n] *= dGamma;
309 
310  //第二个循环
311  for (int i = nBound; i >= 1; i--)
312  {
313  CirQueueBuf_Prev(i, pd_s, pd_y);
314  double dBeta = VecProduct(pd_y, pdDir, nVecLen) / VecProduct(pd_y, pd_s, nVecLen);
315  for (int n = 0; n < nVecLen; n++)
316  pdDir[n] += pd_s[n] * (m_pdAlpha[i - 1] - dBeta);
317  }
318 
319 
320 
321  //方向需要取反
322  for (int n = 0; n < nVecLen; n++)
323  pdDir[n] = -pdDir[n];
324 
325 
326 
327  /*
328  Save the previsou parameter and gradient
329  */
330  memcpy(m_pdPrevParam, pdParam, sizeof(pdParam[0])*m_pfunc->m_nParamNum);
331  memcpy(m_pdPrevGradient, pdGradient, sizeof(pdGradient[0])*m_pfunc->m_nParamNum);
332  }
333 
335  {
336  m_pCirQueueBuf = new sy[m_nLimitiedNum];
337  for (int i = 0; i < m_nLimitiedNum; i++) {
338  m_pCirQueueBuf[i].s = new double[m_pfunc->m_nParamNum];
339  m_pCirQueueBuf[i].y = new double[m_pfunc->m_nParamNum];
340  }
341  m_nCirQueueBufTail = 0;
342  }
344  {
345  if (m_pCirQueueBuf) {
346  for (int i = 0; i < m_nLimitiedNum; i++) {
347  SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].s);
348  SAFE_DELETE_ARRAY(m_pCirQueueBuf[i].y);
349  }
350  SAFE_DELETE_ARRAY(m_pCirQueueBuf);
351  }
352  m_nCirQueueBufTail = 0;
353  }
354  void LBFGS::CirQueueBuf_Prev(int i, double *&pd_s, double *&pd_y)
355  {
356  i = (m_nLimitiedNum + m_nCirQueueBufTail - i) % m_nLimitiedNum;
357  pd_s = m_pCirQueueBuf[i].s;
358  pd_y = m_pCirQueueBuf[i].y;
359  }
360  void LBFGS::CirQueueBuf_In(double *&pd_s, double *&pd_y)
361  {
362  pd_s = m_pCirQueueBuf[m_nCirQueueBufTail].s;
363  pd_y = m_pCirQueueBuf[m_nCirQueueBufTail].y;
364  m_nCirQueueBufTail = (m_nCirQueueBufTail + 1) % m_nLimitiedNum;
365  }
366 }
int m_nIterMin
minium iteration number
Definition: wb-solve.h:87
virtual void ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir)
Calculate the update direction p_k.
Definition: wb-solve.cpp:137
virtual void SetParam(double *pdParams)=0
set the parameter.
static double VecDist(const double *pdVec1, const double *pdVec2, int nSize)
calculate the distance of two vectors
Definition: wb-solve.cpp:231
virtual void IterInit()
initial the iteration, for derivation.
Definition: wb-solve.h:114
clock - used to record the time
Definition: wb-win.h:95
virtual void GetGradient(double *pdGradient)=0
calculate the gradient g(x)
clock_t Begin()
begin to record
Definition: wb-win.cpp:138
static double ToSecond(clock_t t)
transform the clock_t to second
Definition: wb-win.h:115
int m_nIterNum
current iteration number, iter form m_nIterMin to m_nIterMax
Definition: wb-solve.h:86
virtual double GetValue()=0
calculate the function value f(x)
virtual void IterInit()
iter init
Definition: wb-solve.cpp:241
static double VecProduct(const double *pdVec1, const double *pdVec2, int nSize)
calculate the dot of two vectors
Definition: wb-solve.cpp:220
#define SAFE_DELETE_ARRAY(p)
Definition: wb-vector.h:50
virtual double LineSearch(double *pdDir, double dValue, const double *pdParam, const double *pdGradient)
linear search.
Definition: wb-solve.cpp:143
int m_nIterMax
maximum iteration number
Definition: wb-solve.h:88
void CirQueueBuf_In(double *&pd_s, double *&pd_y)
in queue. return the pointer.
Definition: wb-solve.cpp:360
int GetParamNum() const
get the paremeter number
Definition: wb-solve.h:52
#define lout_warning(x)
Definition: wb-log.h:184
static double VecNorm(const double *pdVec, int nSize)
calculate the norm of a vector
Definition: wb-solve.cpp:227
virtual void ComputeDir(int k, const double *pdParam, const double *pdGradient, double *pdDir)
Calculate the update direction p_k, referring to "Numerical Optimization"锟斤拷P178锟斤拷Algorithm 7...
Definition: wb-solve.cpp:254
double * m_pdRoot
save the root of the function
Definition: wb-solve.h:84
virtual bool StopDecision(int k, double dValue, const double *pdGradient)
Stop decision.
Definition: wb-solve.cpp:210
double m_dGain
itera step. ==0 means using the line search .
Definition: wb-solve.h:93
Func * m_pfunc
pointer to the function
Definition: wb-solve.h:82
void CirQueueBuf_Release()
release the circular queue
Definition: wb-solve.cpp:343
double m_dStop
stop threshold
Definition: wb-solve.h:92
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22
define the framework of iterative algorithms, such as gradient descent or LBFGS.
static const int cn_exvalue_max_num
Definition: wb-solve.h:59
virtual int GetExtraValues(int k, double *pdValues)
calculate extra values which will be print at each iteration
Definition: wb-solve.h:66
int m_nParamNum
the parameter number
Definition: wb-solve.h:45
clock_t End()
record end and return the time
Definition: wb-win.cpp:143
void CirQueueBuf_Init()
Init the circular queue.
Definition: wb-solve.cpp:334
virtual bool Run(const double *pInitParams=NULL)
Run iteration. input the init-parameters.
Definition: wb-solve.cpp:22
#define lout_Solve
Definition: wb-solve.h:69
virtual void Update(double *pdParam, const double *pdDir, double dStep)
Update.
Definition: wb-solve.cpp:204
double m_dSpendMinute
record the iteration spend time锟斤拷minute锟斤拷
Definition: wb-solve.h:90
define all the code written by Bin Wang.
Definition: wb-file.cpp:21
void CirQueueBuf_Prev(int i, double *&pd_s, double *&pd_y)
find the previoud ith datas, i<=m_nLimitiedNum
Definition: wb-solve.cpp:354