software/TRF-html/trf-sa-train_8cpp_source.html

 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Copyright 2014-2015 Tsinghua University
 // Author: wb.th08@gmail.com (Bin Wang), ozj@tsinghua.edu.cn (Zhijian Ou)
 //
 // All h, cpp, cc, and script files (e.g. bat, sh, pl, py) should include the above
 // license declaration. Different coding language may use different comment styles.


 #include "trf-sa-train.h"
 #include "wb-log.h"

 namespace trf
 {
     ThreadData::~ThreadData()
     {
         for (int i = 0; i < aSeqs.GetNum(); i++) {
             SAFE_DELETE(aSeqs[i]);
         }
     }
     void ThreadData::Create(int maxlen, Model *pModel)
     {
         aSeqs.SetNum(maxlen + 1);
         aSeqs.Fill(NULL);
         for (int i = 1; i < aSeqs.GetNum(); i++) {
             aSeqs[i] = new Seq(i);
             aSeqs[i]->Random(pModel->m_pVocab);
         }
     }

     void SAfunc::Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid /* = NULL */, CorpusBase *pTest /* = NULL */, int nMinibatch /* = 100 */)
     {
         MLfunc::Reset(pModel, pTrain, pValid, pTest);
         GetEmpVar(pTrain, m_vEmpiricalVar);

         m_nMiniBatchSample = nMinibatch;

         /*
         sampling pi
         */
         lout << "Smoothing the pi" << endl;
         double dMax = 0;
         int iMax = 0;
         for (int i = 1; i < m_trainPi.GetSize(); i++) {
             if (m_trainPi[i] > dMax) {
                 dMax = m_trainPi[i];
                 iMax = i;
             }
         }
         m_samplePi.Copy(m_trainPi);
         for (int i = 1; i < iMax; i++) {
             m_samplePi[i] = dMax;
         }
         for (int i = 1; i < m_samplePi.GetSize(); i++) {
             m_samplePi[i] = max((double)m_samplePi[i], 1e-5);
         }
         LineNormalize(m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1);

         lout << "sample-pi = [ "; lout.output(m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1); lout << "]" << endl;
         m_pModel->SetPi(m_samplePi.GetBuf());

         /* save the sample count */
         m_vAllSampleLenCount.Reset(m_pModel->GetMaxLen() + 1);
         m_vCurSampleLenCount.Reset(m_pModel->GetMaxLen() + 1);
         m_vAllSampleLenCount.Fill(0);
         m_nTotalSample = 0;

         /* for SA estimateio. there are two set of paremeters
             i.e. feature weight \lambda and normalization constants \zeta
         */
         m_nParamNum = m_pModel->GetParamNum() + m_pModel->GetMaxLen() + 1;

         m_nCDSampleTimes = 1;
         m_nSASampleTimes = 1;

     }
     void SAfunc::PrintInfo()
     {
         lout << "[SAfunc] *** Info: *** " << endl;
         lout << "  "; lout_variable(m_nMiniBatchSample);
         lout << "  "; lout_variable(m_var_gap);
         lout << "  "; lout_variable(m_fRegL2);
         lout << "[SAfunc] *** [End] ***" << endl;
     }
     void SAfunc::RandSeq(Seq &seq, int nLen /* = -1 */)
     {
         if (nLen == -1) {
             nLen = rand() % m_pModel->GetMaxLen() + 1;
         }

         seq.Reset(nLen);
         seq.Random(m_pModel->m_pVocab);
     }
     void SAfunc::SetParam(double *pdParams)
     {
         if (pdParams == NULL)
             return;

         m_value.Reset(m_pModel->GetParamNum());
         for (int i = 0; i < m_value.GetSize(); i++)
             m_value[i] = (PValue)pdParams[i];
         m_pModel->SetParam(m_value.GetBuf());
         m_pModel->ExactNormalize(1); // only calculate Z_1

         /* set zeta */
         m_pModel->SetZeta(pdParams + m_pModel->GetParamNum());

         if (m_fparm.Good()) {
             m_fparm.PrintArray("%f ", pdParams, m_nParamNum);
         }
     }
     void SAfunc::GetParam(double *pdParams)
     {
         if (pdParams == NULL)
             return;

         /* get lambda */
         m_value.Reset(m_pModel->GetParamNum());
         m_pModel->GetParam(m_value.GetBuf());
         for (int i = 0; i < m_value.GetSize(); i++)
             pdParams[i] = m_value[i];
         /* get zeta */
         pdParams += m_pModel->GetParamNum();
         for (int i = 0; i <= m_pModel->GetMaxLen(); i++) {
             pdParams[i] = m_pModel->m_zeta[i];
         }
     }

     int qsort_compare_double(const void * a, const void * b)
     {
         if (*(double*)a < *(double*)b) return -1;
         if (*(double*)a == *(double*)b) return 0;
         if (*(double*)a > *(double*)b) return 1;
     }

     void SAfunc::GetEmpVar(CorpusBase *pCorpus, Vec<double> &vVar)
     {
         int nThread = omp_get_max_threads();

         // the true length distribution
         Prob *pi = m_trainPi.GetBuf();

         vVar.Fill(0);
         Array<VocabID> aSeq;
         Vec<double> vExpf2(m_pModel->GetParamNum());
         Vec<double> vExp_l(m_pModel->GetParamNum());

         Mat<double> matExpf2(nThread, vExpf2.GetSize());
         Mat<double> matExp_l(nThread, vExp_l.GetSize());

         vExpf2.Fill(0);
         vExp_l.Fill(0);
         matExpf2.Fill(0);
         matExp_l.Fill(0);

         lout.Progress(0, true, pCorpus->GetNum() - 1, "[SAfunc] E[f^2]:");
 #pragma omp parallel for firstprivate(aSeq)
         for (int l = 0; l < pCorpus->GetNum(); l++) {

             double *pExpf2 = matExpf2[omp_get_thread_num()].GetBuf();

             pCorpus->GetSeq(l, aSeq);
             Seq seq;
             seq.Set(aSeq, m_pModel->m_pVocab);

             int nLen = min(m_pModel->GetMaxLen(), seq.GetLen());

             LHash<int, int> aFeatNum;
             bool bFound;
             Array<int> afeat;
             m_pModel->m_pFeat->Find(afeat, seq);
             for (int i = 0; i < afeat.GetNum(); i++) {
                 int *p = aFeatNum.Insert(afeat[i], bFound);
                 if (!bFound) *p = 0;
                 (*p) += 1;
             }
             LHashIter<int, int> iter(&aFeatNum);
             int *pCount;
             int nFeat;
             while (pCount = iter.Next(nFeat)) {
                 pExpf2[nFeat] += pow((double)(*pCount), 2);
             }
 #pragma omp critical
             {
                 lout.Progress();
             }
         }

         vExpf2.Fill(0);
         for (int t = 0; t < nThread; t++) {
             vExpf2 += matExpf2[t];
         }
         vExpf2 /= pCorpus->GetNum();


         //lout_variable(aExpFeatSqu[38272]);

         lout.Progress(0, true, m_pModel->GetMaxLen(), "[SAfunc] E_l[f]:");
         for (int nLen = 1; nLen <= m_pModel->GetMaxLen(); nLen++)
         {
             matExp_l.Fill(0);

             Array<int> aSeqId;
             for (int i = 0; i < pCorpus->GetNum(); i++) {
                 pCorpus->GetSeq(i, aSeq);
                 int nSeqLen = aSeq.GetNum();
                 if (nLen == m_pModel->GetMaxLen()) {
                     if (nSeqLen < nLen)
                         continue;
                 }
                 else {
                     if (nSeqLen != nLen)
                         continue;
                 }
                 aSeqId.Add(i);
             }

 #pragma omp parallel for firstprivate(aSeq)
             for (int k = 0; k < aSeqId.GetNum(); k++)
             {
                 pCorpus->GetSeq(aSeqId[k], aSeq);

                 Seq seq;
                 seq.Set(aSeq, m_pModel->m_pVocab);
                 m_pModel->FeatCount(seq, matExp_l[omp_get_thread_num()].GetBuf());
             }

             if (aSeqId.GetNum() > 0) {
                 vExp_l.Fill(0);
                 for (int t = 0; t < nThread; t++) {
                     vExp_l += matExp_l[t];
                 }
                 vExp_l /= aSeqId.GetNum();
             }
             else {
                 vExp_l.Fill(0);
             }


             for (int i = 0; i < m_pModel->GetParamNum(); i++)
                 vExpf2[i] -= pi[nLen] * pow(vExp_l[i], 2);

             lout.Progress(nLen);
         }

         int nZero = 0;
         int nDownGap = 0;
         double dMinVarOverZero = 100;
         for (int i = 0; i < m_nParamNum; i++) {
             if (vExpf2[i] == 0)
                 nZero++;
             else
                 dMinVarOverZero = min(vExpf2[i], dMinVarOverZero);

             if (vExpf2[i] < m_var_gap) {
                 nDownGap++;
                 vExpf2[i] = m_var_gap;
             }

         }
         if (nZero > 0) {
             lout_warning("[EmpiricalVar] Exist zero expectation  (zero-num=" << nZero << ")");
         }
         lout << "[EmpiricalVar] the number of ( var < gap=" << m_var_gap << " ) is " << nDownGap << endl;
         lout << "[EmpiricalVar] min variance value (over 0) is " << dMinVarOverZero << endl;


         vVar.Copy(vExpf2);

         // Write
         if (m_fmean.Good()) {
             lout << "Write Empirical Mean ..." << endl;
             Vec<PValue> aLogExp(m_vEmpiricalExp.GetSize());
             for (int i = 0; i < aLogExp.GetSize(); i++) aLogExp[i] = log(m_vEmpiricalExp[i]);
             m_pModel->m_pFeat->WriteT(m_fmean, aLogExp.GetBuf());
 //          m_fmean.PrintArray("%f\n", m_vEmpiricalExp.GetBuf(), m_vEmpiricalExp.GetSize());
         }
         if (m_fvar.Good()) {
             lout << "Write Empirical Var ..." << endl;
             Vec<PValue> aLogVar(vVar.GetSize());
             for (int i = 0; i < vVar.GetSize(); i++) aLogVar[i] = log(vVar[i]);
             m_pModel->m_pFeat->WriteT(m_fvar, aLogVar.GetBuf());
             //m_fvar.PrintArray("%f\n", vVar.GetBuf(), vVar.GetSize());
         }
     }

     void SAfunc::GetSampleExp(VecShell<double> &vExp, VecShell<double> &vExp2, VecShell<double> &vLen)
     {
         int nThread = omp_get_max_threads();
         m_matSampleExp.Reset(nThread, m_pModel->GetParamNum());
         m_matSampleExp2.Reset(nThread, m_pModel->GetParamNum());
         m_matSampleLen.Reset(nThread, m_pModel->GetMaxLen() + 1);
 //      Vec<int> vNum(nThread); // record the sample number of each thread

         m_matSampleExp.Fill(0);
         m_matSampleLen.Fill(0);
 //      vNum.Fill(0);


         // init the sequence
         if (m_threadSeq.GetNum() != nThread) {
             for (int i = 0; i < nThread; i++) {
                 m_threadSeq[i] = new Seq;
                 RandSeq(*m_threadSeq[i]);
             }
         }

         /* sampling */
         //lout.Progress(0, true, m_nMiniBatchSample-1, "[SA] sample:");
 #pragma omp parallel for
         for (int sample = 0; sample < m_nMiniBatchSample; sample++)
         {
             int tid = omp_get_thread_num();
             Vec<double> aCurCount(m_pModel->GetParamNum());
             m_pModel->Sample(*m_threadSeq[tid]);

             int nLen = min(m_pModel->GetMaxLen(), m_threadSeq[tid]->GetLen());

             m_pModel->FeatCount(*m_threadSeq[tid], aCurCount.GetBuf(), m_trainPi[nLen] / m_pModel->m_pi[nLen]);
             //m_pModel->FeatCount(*m_threadSeq[tid], m_matSampleExp[tid].GetBuf(), m_trainPi[nLen] / m_pModel->m_pi[nLen]);
             for (int i = 0; i < aCurCount.GetSize(); i++) {
                 m_matSampleExp[tid][i] += aCurCount[i];
                 m_matSampleExp2[tid][i] += pow(aCurCount[i], 2);
             }
             m_matSampleLen[tid][nLen]++;

 #pragma omp critical
             {
                 if (m_fsamp.Good()) {
                     m_threadSeq[tid]->Print(m_fsamp);
                 }
                 //lout.Progress();
             }

         }
         lout << " len-jump acc-rate=";
         lout_variable_rate(m_pModel->m_nLenJumpAccTimes, m_pModel->m_nLenJumpTotalTime);
         m_pModel->m_nLenJumpAccTimes = 0;
         m_pModel->m_nLenJumpTotalTime = 0;
         lout << " class-propose acc-rate=";
         lout_variable_rate(m_pModel->m_nSampleHAccTimes, m_pModel->m_nSampleHTotalTimes);
         m_pModel->m_nSampleHAccTimes = 0;
         m_pModel->m_nSampleHTotalTimes = 0;
         lout << endl;


         // summarization
         vExp.Fill(0);
         vExp2.Fill(0);
         vLen.Fill(0);
         for (int t = 0; t < nThread; t++) {
             vExp += m_matSampleExp[t];
             vExp2 += m_matSampleExp2[t];
             vLen += m_matSampleLen[t];
         }
         m_vAllSampleLenCount += vLen;
         m_vCurSampleLenCount.Copy(vLen);
         m_nTotalSample += m_nMiniBatchSample;

         vExp /= m_nMiniBatchSample;
         vExp2 /= m_nMiniBatchSample;
         vLen /= m_nMiniBatchSample;
     }

     void SAfunc::IterEnd(double *pFinalParams)
     {
         SetParam(pFinalParams);
         // set the pi as the len-prob in training set.
         m_pModel->SetPi(m_trainPi.GetBuf());
     }
     void SAfunc::WriteModel(int nEpoch)
     {
         String strTempModel;
         String strName = String(m_pathOutputModel).FileName();
 #ifdef __linux
         strTempModel.Format("%s.n%d.model", strName.GetBuffer(), nEpoch);
 #else
         strTempModel.Format("%s.n%d.model", strName.GetBuffer(), nEpoch);
 #endif
         // set the pi as the pi of training set
         m_pModel->SetPi(m_trainPi.GetBuf());
         m_pModel->WriteT(strTempModel);
         m_pModel->SetPi(m_samplePi.GetBuf());
     }
     void SAfunc::GetGradient(double *pdGradient)
     {
         int nWeightNum = m_pModel->GetParamNum();
         m_vSampleExp.Reset(nWeightNum);
         m_vSampleExp2.Reset(nWeightNum);
         m_vSampleLen.Reset(m_pModel->GetMaxLen() + 1);


 //      /* get theoretical expectation */
         GetSampleExp(m_vSampleExp, m_vSampleExp2, m_vSampleLen);


 #if defined(_Adam)
         for (int i = 0; i < nWeightNum; i++) {
             pdGradient[i] = m_vEmpiricalExp[i] - m_vSampleExp[i]
                 - m_fRegL2 * m_pModel->m_value[i];// the L2 regularization
         }

 #elif defined(_Hession)
         for (int i = 0; i < nWeightNum; i++) {
             pdGradient[i] = m_vEmpiricalExp[i] - m_vSampleExp[i]
                 - m_fRegL2 * m_pModel->m_value[i];// the L2 regularization
         }
 #else
         /* Calculate the gradient */
         for (int i = 0; i < nWeightNum; i++) {
             pdGradient[i] = (
                 m_vEmpiricalExp[i] - m_vSampleExp[i]
                 - m_fRegL2 * m_pModel->m_value[i] // the L2 regularization
                 ) / ( m_vEmpiricalVar[i] + m_fRegL2 ) ; // rescaled by variance
         }
 #endif


         /*
             Zeta update
         */
         for (int l = 0; l <= m_pModel->GetMaxLen(); l++) {
             if (m_pModel->m_pi[l] > 0) {
                 pdGradient[nWeightNum + l] =  m_vSampleLen[l] / m_pModel->m_pi[l];
             }
             else {
                 pdGradient[nWeightNum + l] = 0;
             }
         }


         if (m_fgrad.Good()) {
             m_fgrad.PrintArray("%f ", pdGradient, m_nParamNum);
             m_fgrad.Print("\n");
         }
         if (m_fexp.Good()) {
             m_fexp.PrintArray("%f ", m_vSampleExp.GetBuf(), m_vSampleExp.GetSize());
             m_fexp.Print("\n");
         }


     }
     int SAfunc::GetExtraValues(int t, double *pdValues)
     {
         int nValue = 0;

         // set the training pi
         m_pModel->SetPi(m_trainPi.GetBuf());

         Vec<Prob> samsZeta(m_pModel->m_zeta.GetSize());
         Vec<Prob> trueZeta(m_pModel->m_zeta.GetSize());
         //Vec<double> trueLogZ(m_pModel->m_logz.GetSize());
         samsZeta.Fill(0);
         trueZeta.Fill(0);
         samsZeta = m_pModel->m_zeta;

         /* calcualte the p(v) */
         Vec<double> vLL;
         if (m_pCorpusTrain) {
             pdValues[nValue++] = -GetLL(m_pCorpusTrain, -1, &vLL);
             if (m_ftrainLL.Good()) {
                 m_ftrainLL.Reopen("wt");
                 m_ftrainLL.PrintArray("%f\n", vLL.GetBuf(), vLL.GetSize());
             }
         }
         if (m_pCorpusValid) {
             pdValues[nValue++] = -GetLL(m_pCorpusValid, -1, &vLL);
             if (m_fvallidLL.Good()){
                 m_fvallidLL.Reopen("wt");
                 m_fvallidLL.PrintArray("%f\n", vLL.GetBuf(), vLL.GetSize());
             }
         }
         if (m_pCorpusTest) {
             pdValues[nValue++] = -GetLL(m_pCorpusTest, -1, &vLL);
             if (m_ftestLL.Good()){
                 m_ftestLL.Reopen("wt");
                 m_ftestLL.PrintArray("%f\n", vLL.GetBuf(), vLL.GetSize());
             }
         }

         /* true Z_L to get the LL */
         if (m_pModel->m_pVocab->GetSize() < 100 && m_pModel->GetMaxOrder() < 4) {

             m_pModel->ExactNormalize(); // normalization
             trueZeta.Copy(m_pModel->m_zeta);
             if (m_pCorpusTrain) pdValues[nValue++] = -GetLL(m_pCorpusTrain);
             if (m_pCorpusValid) pdValues[nValue++] = -GetLL(m_pCorpusValid);
             if (m_pCorpusTest) pdValues[nValue++] = -GetLL(m_pCorpusTest);

             m_pModel->SetZeta(samsZeta.GetBuf());
         }


         /* output debug */
         if (!m_fdbg.Good()) {
             m_fdbg.Open("SAfunc.dbg", "wt");
         }
         m_vAllSampleLenCount *= 1.0 / m_nTotalSample;
         m_vCurSampleLenCount *= 1.0 / m_nMiniBatchSample;
         m_fdbg.PrintArray("%f ", m_vCurSampleLenCount.GetBuf() + 1, m_vCurSampleLenCount.GetSize() - 1);
         m_fdbg.PrintArray("%f ", m_vAllSampleLenCount.GetBuf() + 1, m_vAllSampleLenCount.GetSize() - 1);
         m_fdbg.PrintArray("%f ", m_samplePi.GetBuf() + 1, m_samplePi.GetSize() - 1);
         m_fdbg.PrintArray("%f ", trueZeta.GetBuf() + 1, trueZeta.GetSize() - 1);
         m_fdbg.PrintArray("%f ", samsZeta.GetBuf() + 1, samsZeta.GetSize() - 1);
         m_fdbg.Print("\n");
         m_vAllSampleLenCount *= m_nTotalSample;
         m_vCurSampleLenCount *= m_nMiniBatchSample;

         m_pModel->SetPi(m_samplePi.GetBuf());

         return nValue;
     }

     void LearningRate::Reset(const char *pstr, int p_t0)
     {
         sscanf(pstr, "%lf,%lf", &tc, &beta);
         t0 = p_t0;
         //lout << "[Learning Rate] tc=" << tc << " beta=" << beta << " t0=" << t0 << endl;
     }
     double LearningRate::Get(int t)
     {
         double gamma;
         if (t <= t0) {
             gamma = 1.0 / (tc + pow(t, beta));
         }
         else {
             gamma = 1.0 / (tc + pow(t0, beta) + t - t0);
         }
         return gamma;
     }


     bool SAtrain::Run(const double *pInitParams /* = NULL */)
     {
         if (!m_pfunc) {
             lout_Solve << "m_pFunc == NULL" << endl;
             return false;
         }
         Clock ck;
         m_dSpendMinute = 0;

         SAfunc *pSA = (SAfunc*)m_pfunc;
 //      int nIterPerEpoch = pSA->m_pCorpusTrain->GetNum() / pSA->m_nMiniBatchSample + 1;
 //      lout_variable(nIterPerEpoch);


         double *pdCurParams = new double[m_pfunc->GetParamNum()];
         double *pdCurGradient = new double[m_pfunc->GetParamNum()];
         double *pdCurDir = new double[m_pfunc->GetParamNum()]; // current update direction
         double dCurValue = 0;
         double dExValues[Func::cn_exvalue_max_num];
         double nExValueNum;

         // if average
         bool bAvg = (m_nAvgBeg > 0);
         double *pdAvgParams = NULL;
         if (bAvg) {
             pdAvgParams = new double[m_pfunc->GetParamNum()];
         }


         for (int i = 0; i < m_pfunc->GetParamNum(); i++) {
             pdCurParams[i] = (pInitParams) ? pInitParams[i] : 1;
         }
         memset(pdCurGradient, 0, sizeof(double)*m_pfunc->GetParamNum());
         memset(pdCurDir, 0, sizeof(double)*m_pfunc->GetParamNum());

         IterInit();
         m_pfunc->SetParam(pdCurParams);
         //pSA->WriteModel(0);

         // iteration begin
         lout_Solve << "************* Training Begin *****************" << endl;
         lout_Solve << "print-per-iter=" << m_nPrintPerIter << endl;
         lout.bOutputCmd() = false;
         ck.Begin();
         for (m_nIterNum = m_nIterMin; m_nIterNum <= m_nIterMax; m_nIterNum++)
         {
             // epoch number
             m_fEpochNun = 1.0 * m_nIterNum * pSA->m_nMiniBatchSample / pSA->m_pCorpusTrain->GetNum();

             // set the parameter
             m_pfunc->SetParam(pdCurParams);
             // get the gradient
             m_pfunc->GetGradient(pdCurGradient);
             // get the function value
             dCurValue = m_pfunc->GetValue();
             // get the averaged parameters
             if (bAvg) {
                 if (m_nIterNum <= m_nAvgBeg) {
                     memcpy(pdAvgParams, pdCurParams, sizeof(pdCurParams[0])*m_pfunc->GetParamNum());
                 }
                 else {
                     for (int i = 0; i < m_pfunc->GetParamNum(); i++) {
                         pdAvgParams[i] += (pdCurParams[i] - pdAvgParams[i]) / (m_nIterNum - m_nAvgBeg);
                     }
                 }
             }

             // print
             if (m_nIterNum % m_nPrintPerIter == 0 || m_nIterNum == m_nIterMax)
             {
                 m_dSpendMinute = ck.ToSecond(ck.Get()) / 60;
                 bool bPrintCmd;

                 bPrintCmd = lout.bOutputCmd();
                 lout.bOutputCmd() = true;
                 lout_Solve << "t=" << m_nIterNum;
                 cout<<setprecision(4)<<setiosflags(ios::fixed);
                 lout << " epoch=" << m_fEpochNun;
                 cout<<setprecision(2)<<setiosflags(ios::fixed);
                 lout << " time="  << m_dSpendMinute << "m";
                 lout << (bAvg ? " [Avg]" : " ");
                 lout.bOutputCmd() = bPrintCmd;


                 // get the ex-value
                 if (bAvg) pSA->SetParam(pdAvgParams);
                 // This function will use AIS to normaliza the model
                 nExValueNum = pSA->GetExtraValues(m_nIterNum, dExValues);

                 bPrintCmd = lout.bOutputCmd();
                 lout.bOutputCmd() = true;
                 lout<< "ExValues={ ";
                 cout<< setprecision(2) << setiosflags(ios::fixed);
                 for (int i = 0; i < nExValueNum; i++)
                     lout << dExValues[i] << " ";
                 lout << "}" << endl;

                 // write model
                 if (m_aWriteAtIter.Find(m_nIterNum) != -1)
                     pSA->WriteModel(m_nIterNum);

                 lout.bOutputCmd() = bPrintCmd;

                 if (bAvg) pSA->SetParam(pdCurParams);
             }
             //lout.Progress(m_nIterNum % m_nPrintPerIter, true, m_nPrintPerIter - 1, "Train:");


             /* Stop Decision */
             if (StopDecision(m_nIterNum, dCurValue, pdCurGradient)) {
                 break;
             }


             // update the learning rate gamma
             UpdateGamma(m_nIterNum);

             // update the direction
             UpdateDir(pdCurDir, pdCurGradient, pdCurParams);

             // Update parameters
             Update(pdCurParams, pdCurDir, 0);
         }

         lout_Solve << "************* Training End *****************" << endl;
         lout_Solve << "iter=" << m_nIterNum << " time=" << m_dSpendMinute << "m" << endl;
         lout_Solve << "********************************************" << endl;

         // do something at the end of the iteration
         if (bAvg) pSA->IterEnd(pdAvgParams);
         else pSA->IterEnd(pdCurParams);

         SAFE_DELETE_ARRAY(pdCurGradient);
         SAFE_DELETE_ARRAY(pdCurDir);
         SAFE_DELETE_ARRAY(pdCurParams);
         SAFE_DELETE_ARRAY(pdAvgParams);
         return true;
     }

     void SAtrain::UpdateGamma(int nIterNum)
     {
         m_gamma_lambda = m_gain_lambda.Get(nIterNum);
         m_gamma_zeta = m_gain_zeta.Get(nIterNum);

         lout_Solve << "g_lambda=" << m_gamma_lambda
             << " g_zeta=" << m_gamma_zeta
             << endl;
     }
     void SAtrain::UpdateDir(double *pDir, double *pGradient, const double *pdParam)
     {
         /* using the momentum */
         // pdDir is actually the gradient

         SAfunc* pSA = (SAfunc*)m_pfunc;
         int nWeightNum = pSA->GetFeatNum();
         int nZetaNum = pSA->GetZetaNum();

         lout_assert(nWeightNum + nZetaNum == m_pfunc->GetParamNum());


 #if defined(_Adam)
         for (int i = 0; i < nWeightNum; i++) {
             double g = pGradient[i];
             adam_m[i] = adam_beta1 * adam_m[i] + (1 - adam_beta1) * g;
             adam_v[i] = adam_beta2 * adam_v[i] + (1 - adam_beta2)* g*g;
             double m_hat = adam_m[i] / (1 - pow(adam_beta1, m_nIterNum));
             double v_hat = adam_v[i] / (1 - pow(adam_beta2, m_nIterNum));
             pDir[i] = adam_alpha * m_hat / (sqrt(v_hat) + adam_sigma);
         }
 #elif defined(_Hession)
         for (int i = 0; i < nWeightNum; i++) {
             double h = pSA->m_vSampleExp2[i] - pow(pSA->m_vSampleExp[i],2) + pSA->m_fRegL2;
             m_avgHes[i] += m_gamma_lambda * (h - m_avgHes[i]);
             pDir[i] = m_gamma_lambda * pGradient[i] / max(1e-4, m_avgHes[i]);
         }
 #else
         // update lambda
         for (int i = 0; i < nWeightNum; i++) {
             //m_avgGrad[i] = 0.9*m_avgGrad[i] + 0.1*pGradient[i];
             //pDir[i] = m_gamma_lambda * m_avgGrad[i];
             pDir[i] = m_gamma_lambda * pGradient[i];
         }

         if (m_dir_gap > 0) {
             int n_dgap_cutnum = 0;
             for (int i = 0; i < nWeightNum; i++) {
                 if (pDir[i] > m_dir_gap) {
                     pDir[i] = m_dir_gap;
                     n_dgap_cutnum++;
                 }
                 else if (pDir[i] < -m_dir_gap) {
                     pDir[i] = -m_dir_gap;
                     n_dgap_cutnum++;
                 }
             }
             lout_variable_precent(n_dgap_cutnum, nWeightNum);
         }
 #endif


         // update zeta
         for (int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
             // limit the update of zeta.
             pDir[i] = min( m_gamma_zeta, 1.0*pSA->m_pModel->GetMaxLen()*pSA->m_pModel->m_pi[i-nWeightNum] ) * pGradient[i];
         }

     }
     void SAtrain::Update(double *pdParam, const double *pdDir, double dStep)
     {
         // pdDir is actually the gradient

         SAfunc* pSA = (SAfunc*)m_pfunc;
         int nWeightNum = pSA->GetFeatNum();
         int nZetaNum = pSA->GetZetaNum();

 //      lout_assert(nWeightNum == nNgramFeatNum + nVHsize + nCHsize + nHHsize);

         // update lambda
         if (m_bUpdate_lambda) {
             for (int i = 0; i < nWeightNum; i++) {
                 pdParam[i] += pdDir[i];
             }
         }


         // update zeta
         if (m_bUpdate_zeta) {
             for (int i = nWeightNum; i < nWeightNum + nZetaNum; i++) {
                 pdParam[i] += pdDir[i];
             }
             double zeta1 = pdParam[nWeightNum + 1];
             for (int i = nWeightNum + 1; i < nWeightNum + nZetaNum; i++) {
                 pdParam[i] -= zeta1; // minus the zeta[1];
             }
         }


     }

 #define GAIN_INFO(g) lout<<"  "#g"\ttc="<<g.tc<<" beta="<<g.beta<<" t0="<<g.t0<<endl;
     void SAtrain::PrintInfo()
     {
         lout << "[SATrain] *** Info: ***" << endl;
         GAIN_INFO(m_gain_lambda);
         GAIN_INFO(m_gain_zeta);
         lout << "  " << "m_dir_gap=" << m_dir_gap << endl;
         lout << "[SATrain] *** [End] ***" << endl;
     }
 }
wb::Mat< double >

trf::Model::m_pi
Vec< Prob > m_pi
the prior length distribution
Definition: trf-model.h:58

trf::SAfunc::m_vSampleExp2
Vec< double > m_vSampleExp2
the sample expectation^2
Definition: trf-sa-train.h:81

trf::SAfunc::GetParam
void GetParam(double *pdParams)
get the parameters
Definition: trf-sa-train.cpp:120

trf::Prob
double Prob
Definition: trf-def.h:28

SAFE_DELETE
#define SAFE_DELETE(p)
memory release
Definition: wb-vector.h:49

wb::LHashIter::Next
DataT * Next(KeyT &key)
get next value
Definition: wb-lhash.h:576

wb::String::Format
const char * Format(const char *p_pMessage,...)
format print to string
Definition: wb-string.cpp:69

wb::String
a dynamic string class
Definition: wb-string.h:53

trf::SAtrain::UpdateDir
void UpdateDir(double *pDir, double *pGradient, const double *pParam)
compute the update direction
Definition: trf-sa-train.cpp:701

trf::SAtrain::Update
virtual void Update(double *pdParam, const double *pdDir, double dStep)
Update the parameters.
Definition: trf-sa-train.cpp:761

trf::SAfunc::GetFeatNum
int GetFeatNum() const
get the ngram feature number
Definition: trf-sa-train.h:143

wb::Log::bOutputCmd
bool & bOutputCmd()
if output to the cmd window
Definition: wb-log.cpp:96

wb::Clock::Get
clock_t Get()
get the time, but don&#39;t stop recording
Definition: wb-win.cpp:151

trf::Seq::Random
void Random(Vocab *pv)
Random.
Definition: trf-feature.cpp:39

m_nPrintPerIter
pFunc m_nPrintPerIter
Definition: main-sa-train.cpp:163

lout_variable_precent
#define lout_variable_precent(x, y)
Definition: wb-log.h:180

wb::VecShell< double >

trf::SAfunc::m_fRegL2
double m_fRegL2
l2 regularization
Definition: trf-sa-train.h:97

lout_assert
#define lout_assert(p)
Definition: wb-log.h:185

wb::MatShell::Fill
void Fill(T v)
Definition: wb-mat.h:397

trf::SAfunc::IterEnd
void IterEnd(double *pFinalParams)
do something at the end of the SA iteration
Definition: trf-sa-train.cpp:380

trf::SAfunc::SetParam
virtual void SetParam(double *pdParams)
set the parameter.
Definition: trf-sa-train.cpp:102

trf::LineNormalize
void LineNormalize(Prob *pdProbs, int nNum)
Definition: trf-def.cpp:87

trf::SAtrain::PrintInfo
void PrintInfo()
Print Information.
Definition: trf-sa-train.cpp:795

trf::SAfunc::GetExtraValues
virtual int GetExtraValues(int t, double *pdValues)
calculate extra values which will be print at each iteration
Definition: trf-sa-train.cpp:460

wb::Clock
clock - used to record the time
Definition: wb-win.h:95

m_nIterMax
pFunc m_nIterMax
Definition: main-sa-train.cpp:165

trf::PValue
double PValue
Definition: trf-def.h:26

trf-sa-train.h

GAIN_INFO
#define GAIN_INFO(g)
Definition: trf-sa-train.cpp:794

wb::Log::output
Log & output(T *pArray, int n, const char *pgap=" ")
output an array
Definition: wb-log.h:170

trf::Seq::Reset
void Reset(int p_len)
reset only change the len variable, does not change the buffer size.
Definition: trf-feature.h:51

trf::SAfunc
Definition: trf-sa-train.h:66

wb::Clock::Begin
clock_t Begin()
begin to record
Definition: wb-win.cpp:138

trf::Seq::GetLen
int GetLen() const
Definition: trf-feature.h:71

trf::LearningRate::Reset
void Reset(const char *pstr, int p_t0)
Definition: trf-sa-train.cpp:531

lout_variable
#define lout_variable(x)
Definition: wb-log.h:179

trf::SAfunc::RandSeq
void RandSeq(Seq &seq, int nLen=-1)
get a random sequence
Definition: trf-sa-train.cpp:93

trf::ThreadData::Create
void Create(int maxlen, Model *pModel)
Definition: trf-sa-train.cpp:29

trf::SAfunc::GetZetaNum
int GetZetaNum() const
get the zeta parameter number
Definition: trf-sa-train.h:145

trf::MLfunc::m_pCorpusTrain
CorpusBase * m_pCorpusTrain
training corpus
Definition: trf-ml-train.h:36

m_fRegL2
pFunc m_fRegL2
Definition: main-sa-train.cpp:159

trf::SAtrain::UpdateGamma
void UpdateGamma(int nIterNum)
Update the learning rate.
Definition: trf-sa-train.cpp:692

wb::Clock::ToSecond
static double ToSecond(clock_t t)
transform the clock_t to second
Definition: wb-win.h:115

trf::Seq
define a sequence including the word sequence and class sequence
Definition: trf-feature.h:41

trf::CorpusBase::GetSeq
virtual bool GetSeq(int nLine, Array< VocabID > &aSeq)=0
get the sequence in nLine

wb-log.h
a definition of a class Log, which can output to the cmd window and the log file simultaneously.  In wb-log.cpp, there are a Log variable "lout", which can be directly used just like "cout". For example:

trf::SAfunc::GetEmpVar
void GetEmpVar(CorpusBase *pCorpus, Vec< double > &vVar)
calculate the empirical expectation
Definition: trf-sa-train.cpp:144

wb::Vec< double >

trf::MLfunc::Reset
void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL)
Definition: trf-ml-train.cpp:29

wb::LHashIter
the iter of LHash
Definition: wb-lhash.h:42

trf::Model
TRF model.
Definition: trf-model.h:51

wb::VecShell::Fill
void Fill(T v)
Definition: wb-mat.h:279

wb::Array::Find
int Find(T t)
Find a value and return the position.
Definition: wb-vector.h:248

wb::VecShell::GetSize
int GetSize() const
Definition: wb-mat.h:69

trf::SAfunc::m_nMiniBatchSample
int m_nMiniBatchSample
mini-batch for samples
Definition: trf-sa-train.h:70

SAFE_DELETE_ARRAY
#define SAFE_DELETE_ARRAY(p)
Definition: wb-vector.h:50

trf::Seq::Set
void Set(Array< int > &aInt, Vocab *pv)
transform the word sequence (form file) to Seq
Definition: trf-feature.cpp:22

wb::VecShell::GetBuf
T * GetBuf() const
Definition: wb-mat.h:68

trf::CorpusBase
Definition: trf-corpus.h:28

wb::Log::Progress
void Progress(long long n=-1, bool bInit=false, long long total=100, const char *head="")
progress bar
Definition: wb-log.cpp:146

m_nAvgBeg
pFunc m_nAvgBeg
Definition: main-sa-train.cpp:158

aSeq
Array< VocabID > aSeq
Definition: main-TRF.cpp:153

trf::Model::m_pVocab
Vocab * m_pVocab
Definition: trf-model.h:62

trf::qsort_compare_double
int qsort_compare_double(const void *a, const void *b)
Definition: trf-sa-train.cpp:137

trf::SAfunc::PrintInfo
void PrintInfo()
print information
Definition: trf-sa-train.cpp:85

trf::LearningRate::Get
double Get(int t)
input the iteration number, get the learning rate
Definition: trf-sa-train.cpp:537

lout_warning
#define lout_warning(x)
Definition: wb-log.h:184

trf::RandSeq
Definition: trf-corpus.h:105

trf::ThreadData::~ThreadData
~ThreadData()
Definition: trf-sa-train.cpp:23

lout_variable_rate
#define lout_variable_rate(x, y)
Definition: wb-log.h:181

wb::Array::GetNum
int GetNum() const
Get Array number.
Definition: wb-vector.h:240

wb::Array::Add
void Add(T t)
Add a value to the tail of array.
Definition: wb-vector.h:242

trf::ThreadData::aSeqs
Array< Seq * > aSeqs
Definition: trf-sa-train.h:55

trf::SAfunc::GetSampleExp
virtual void GetSampleExp(VecShell< double > &vExp, VecShell< double > &vExp2, VecShell< double > &vLen)
calcualte the expectation of SA samples
Definition: trf-sa-train.cpp:301

wb::lout
Log lout
the defination is in wb-log.cpp
Definition: wb-log.cpp:22

wb::String::GetBuffer
char * GetBuffer() const
get buffer
Definition: wb-string.h:74

trf::SAfunc::GetGradient
virtual void GetGradient(double *pdGradient)
calculate the gradient g(x)
Definition: trf-sa-train.cpp:400

trf::SAfunc::Reset
virtual void Reset(Model *pModel, CorpusBase *pTrain, CorpusBase *pValid=NULL, CorpusBase *pTest=NULL, int nMinibatch=100)
reset
Definition: trf-sa-train.cpp:39

trf::SAfunc::WriteModel
void WriteModel(int nEpoch)
Write Model.
Definition: trf-sa-train.cpp:386

wb::String::FileName
String FileName()
if the string is a path, this function return the file name.
Definition: wb-string.cpp:162

lout_Solve
#define lout_Solve
Definition: wb-solve.h:69

trf::SAfunc::m_vSampleExp
Vec< double > m_vSampleExp
the sample expectation
Definition: trf-sa-train.h:80

trf
Definition: trf-alg.cpp:20

trf::MLfunc::m_pModel
Model * m_pModel
HRF model.
Definition: trf-ml-train.h:33

wb::Vec::Copy
void Copy(VecShell< T > v)
Definition: wb-mat.h:386

trf::Model::GetMaxLen
int GetMaxLen() const
Get max-len.
Definition: trf-model.h:100

wb::LHash< int, int >

wb::Array< VocabID >

trf::CorpusBase::GetNum
virtual int GetNum() const
get the seq number
Definition: trf-corpus.h:47

trf::SAtrain::Run
virtual bool Run(const double *pInitParams=NULL)
Run iteration. input the init-parameters.
Definition: trf-sa-train.cpp:550