RDKit
Open-source cheminformatics and machine learning.
BFGSOpt.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <math.h>
11 #include <RDGeneral/Invariant.h>
13 #include <cstring>
14 #include <vector>
15 #include <algorithm>
16 
17 namespace BFGSOpt {
18 const double FUNCTOL =
19  1e-4; //!< Default tolerance for function convergence in the minimizer
20 const double MOVETOL =
21  1e-7; //!< Default tolerance for x changes in the minimizer
22 const int MAXITS = 200; //!< Default maximum number of iterations
23 const double EPS = 3e-8; //!< Default gradient tolerance in the minimizer
24 const double TOLX =
25  4. * EPS; //!< Default direction vector tolerance in the minimizer
26 const double MAXSTEP = 100.0; //!< Default maximim step size in the minimizer
27 
28 //! Do a Quasi-Newton minimization along a line.
29 /*!
30  See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
31 
32  \param dim the dimensionality of the space.
33  \param oldPt the current position, as an array.
34  \param oldVal the current function value.
35  \param grad the value of the function gradient at oldPt
36  \param dir the minimization direction
37  \param newPt used to return the final position
38  \param newVal used to return the final function value
39  \param func the function to minimize
40  \param maxStep the maximum allowable step size
41  \param resCode used to return the results of the search.
42 
43  Possible values for resCode are on return are:
44  - 0: success
45  - 1: the stepsize got too small. This probably indicates success.
46  - -1: the direction is bad (orthogonal to the gradient)
47 */
48 template <typename EnergyFunctor>
49 void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad,
50  double *dir, double *newPt, double &newVal,
51  EnergyFunctor func, double maxStep, int &resCode) {
52  PRECONDITION(oldPt, "bad input array");
53  PRECONDITION(grad, "bad input array");
54  PRECONDITION(dir, "bad input array");
55  PRECONDITION(newPt, "bad input array");
56 
57  const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
58  double sum = 0.0, slope = 0.0, test = 0.0, lambda = 0.0;
59  double lambda2 = 0.0, lambdaMin = 0.0, tmpLambda = 0.0, val2 = 0.0;
60 
61  resCode = -1;
62 
63  // get the length of the direction vector:
64  sum = 0.0;
65  for (unsigned int i = 0; i < dim; i++) sum += dir[i] * dir[i];
66  sum = sqrt(sum);
67 
68  // rescale if we're trying to move too far:
69  if (sum > maxStep) {
70  for (unsigned int i = 0; i < dim; i++) dir[i] *= maxStep / sum;
71  }
72 
73  // make sure our direction has at least some component along
74  // -grad
75  slope = 0.0;
76  for (unsigned int i = 0; i < dim; i++) {
77  slope += dir[i] * grad[i];
78  }
79  if (slope >= 0.0) {
80  return;
81  }
82 
83  test = 0.0;
84  for (unsigned int i = 0; i < dim; i++) {
85  double temp = fabs(dir[i]) / std::max(fabs(oldPt[i]), 1.0);
86  if (temp > test) test = temp;
87  }
88 
89  lambdaMin = MOVETOL / test;
90  lambda = 1.0;
91  unsigned int it = 0;
92  while (it < MAX_ITER_LINEAR_SEARCH) {
93  // std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
94  if (lambda < lambdaMin) {
95  // the position change is too small
96  resCode = 1;
97  break;
98  }
99  for (unsigned int i = 0; i < dim; i++) {
100  newPt[i] = oldPt[i] + lambda * dir[i];
101  }
102  newVal = func(newPt);
103 
104  if (newVal - oldVal <= FUNCTOL * lambda * slope) {
105  // we're converged on the function:
106  resCode = 0;
107  return;
108  }
109  // if we made it this far, we need to backtrack:
110  if (it == 0) {
111  // it's the first step:
112  tmpLambda = -slope / (2.0 * (newVal - oldVal - slope));
113  } else {
114  double rhs1 = newVal - oldVal - lambda * slope;
115  double rhs2 = val2 - oldVal - lambda2 * slope;
116  double a = (rhs1 / (lambda * lambda) - rhs2 / (lambda2 * lambda2)) /
117  (lambda - lambda2);
118  double b = (-lambda2 * rhs1 / (lambda * lambda) +
119  lambda * rhs2 / (lambda2 * lambda2)) /
120  (lambda - lambda2);
121  if (a == 0.0) {
122  tmpLambda = -slope / (2.0 * b);
123  } else {
124  double disc = b * b - 3 * a * slope;
125  if (disc < 0.0) {
126  tmpLambda = 0.5 * lambda;
127  } else if (b <= 0.0) {
128  tmpLambda = (-b + sqrt(disc)) / (3.0 * a);
129  } else {
130  tmpLambda = -slope / (b + sqrt(disc));
131  }
132  }
133  if (tmpLambda > 0.5 * lambda) {
134  tmpLambda = 0.5 * lambda;
135  }
136  }
137  lambda2 = lambda;
138  val2 = newVal;
139  lambda = std::max(tmpLambda, 0.1 * lambda);
140  ++it;
141  }
142  // nothing was done
143  // std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
144  for (unsigned int i = 0; i < dim; i++) {
145  newPt[i] = oldPt[i];
146  }
147 }
148 
149 #define CLEANUP() \
150  { \
151  delete[] grad; \
152  delete[] dGrad; \
153  delete[] hessDGrad; \
154  delete[] newPos; \
155  delete[] xi; \
156  delete[] invHessian; \
157  }
158 //! Do a BFGS minimization of a function.
159 /*!
160  See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
161 
162  \param dim the dimensionality of the space.
163  \param pos the starting position, as an array.
164  \param gradTol tolerance for gradient convergence
165  \param numIters used to return the number of iterations required
166  \param funcVal used to return the final function value
167  \param func the function to minimize
168  \param gradFunc calculates the gradient of func
169  \param funcTol tolerance for changes in the function value for convergence.
170  \param maxIts maximum number of iterations allowed
171  \param snapshotFreq a snapshot of the minimization trajectory
172  will be stored after as many steps as indicated
173  through this parameter; defaults to 0 (no
174  snapshots stored)
175  \param snapshotVect pointer to a std::vector<Snapshot> object that will receive
176  the coordinates and energies every snapshotFreq steps;
177  defaults to NULL (no snapshots stored)
178 
179  \return a flag indicating success (or type of failure). Possible values are:
180  - 0: success
181  - 1: too many iterations were required
182 */
183 template <typename EnergyFunctor, typename GradientFunctor>
184 int minimize(unsigned int dim, double *pos, double gradTol,
185  unsigned int &numIters, double &funcVal, EnergyFunctor func,
186  GradientFunctor gradFunc, unsigned int snapshotFreq,
187  RDKit::SnapshotVect *snapshotVect, double funcTol = TOLX,
188  unsigned int maxIts = MAXITS) {
189  RDUNUSED_PARAM(funcTol);
190  PRECONDITION(pos, "bad input array");
191  PRECONDITION(gradTol > 0, "bad tolerance");
192 
193  double sum, maxStep, fp;
194 
195  double *grad, *dGrad, *hessDGrad;
196  double *newPos, *xi;
197  double *invHessian;
198 
199  grad = new double[dim];
200  dGrad = new double[dim];
201  hessDGrad = new double[dim];
202  newPos = new double[dim];
203  xi = new double[dim];
204  invHessian = new double[dim * dim];
205  snapshotFreq = std::min(snapshotFreq, maxIts);
206 
207  // evaluate the function and gradient in our current position:
208  fp = func(pos);
209  gradFunc(pos, grad);
210 
211  sum = 0.0;
212  memset(invHessian, 0, dim * dim * sizeof(double));
213  for (unsigned int i = 0; i < dim; i++) {
214  unsigned int itab = i * dim;
215  // initialize the inverse hessian to be identity:
216  invHessian[itab + i] = 1.0;
217  // the first line dir is -grad:
218  xi[i] = -grad[i];
219  sum += pos[i] * pos[i];
220  }
221  // pick a max step size:
222  maxStep = MAXSTEP * std::max(sqrt(sum), static_cast<double>(dim));
223 
224  for (unsigned int iter = 1; iter <= maxIts; iter++) {
225  numIters = iter;
226  int status;
227 
228  // do the line search:
229  linearSearch(dim, pos, fp, grad, xi, newPos, funcVal, func, maxStep,
230  status);
231  CHECK_INVARIANT(status >= 0, "bad direction in linearSearch");
232 
233  // save the function value for the next search:
234  fp = funcVal;
235 
236  // set the direction of this line and save the gradient:
237  double test = 0.0;
238  for (unsigned int i = 0; i < dim; i++) {
239  xi[i] = newPos[i] - pos[i];
240  pos[i] = newPos[i];
241  double temp = fabs(xi[i]) / std::max(fabs(pos[i]), 1.0);
242  if (temp > test) test = temp;
243  dGrad[i] = grad[i];
244  }
245  // std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<"
246  // "<<TOLX<<std::endl;
247  if (test < TOLX) {
248  if (snapshotVect && snapshotFreq) {
249  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
250  snapshotVect->push_back(s);
251  newPos = NULL;
252  }
253  CLEANUP();
254  return 0;
255  }
256 
257  // update the gradient:
258  double gradScale = gradFunc(pos, grad);
259 
260  // is the gradient converged?
261  test = 0.0;
262  double term = std::max(funcVal * gradScale, 1.0);
263  for (unsigned int i = 0; i < dim; i++) {
264  double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
265  test = std::max(test, temp);
266  dGrad[i] = grad[i] - dGrad[i];
267  }
268  test /= term;
269  // std::cerr<<" "<<gradScale<<" "<<test<<"
270  // "<<gradTol<<std::endl;
271  if (test < gradTol) {
272  if (snapshotVect && snapshotFreq) {
273  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
274  snapshotVect->push_back(s);
275  newPos = NULL;
276  }
277  CLEANUP();
278  return 0;
279  }
280 
281  // for(unsigned int i=0;i<dim;i++){
282  // figure out how much the gradient changed:
283  //}
284 
285  // compute hessian*dGrad:
286  double fac = 0, fae = 0, sumDGrad = 0, sumXi = 0;
287  for (unsigned int i = 0; i < dim; i++) {
288 #if 0
289  unsigned int itab = i * dim;
290  hessDGrad[i] = 0.0;
291  for (unsigned int j = 0; j < dim; j++) {
292  hessDGrad[i] += invHessian[itab + j] * dGrad[j];
293  }
294 
295 #else
296  double *ivh = &(invHessian[i * dim]);
297  double &hdgradi = hessDGrad[i];
298  double *dgj = dGrad;
299  hdgradi = 0.0;
300  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++dgj) {
301  hdgradi += *ivh * *dgj;
302  }
303 #endif
304  fac += dGrad[i] * xi[i];
305  fae += dGrad[i] * hessDGrad[i];
306  sumDGrad += dGrad[i] * dGrad[i];
307  sumXi += xi[i] * xi[i];
308  }
309  if (fac > sqrt(EPS * sumDGrad * sumXi)) {
310  fac = 1.0 / fac;
311  double fad = 1.0 / fae;
312  for (unsigned int i = 0; i < dim; i++) {
313  dGrad[i] = fac * xi[i] - fad * hessDGrad[i];
314  }
315 
316  for (unsigned int i = 0; i < dim; i++) {
317  unsigned int itab = i * dim;
318 #if 0
319  for (unsigned int j = i; j < dim; j++) {
320  invHessian[itab + j] += fac * xi[i] * xi[j] -
321  fad * hessDGrad[i] * hessDGrad[j] +
322  fae * dGrad[i] * dGrad[j];
323  invHessian[j * dim + i] = invHessian[itab + j];
324  }
325 #else
326  double pxi = fac * xi[i], hdgi = fad * hessDGrad[i],
327  dgi = fae * dGrad[i];
328  double *pxj = &(xi[i]), *hdgj = &(hessDGrad[i]), *dgj = &(dGrad[i]);
329  for (unsigned int j = i; j < dim; ++j, ++pxj, ++hdgj, ++dgj) {
330  invHessian[itab + j] += pxi * *pxj - hdgi * *hdgj + dgi * *dgj;
331  invHessian[j * dim + i] = invHessian[itab + j];
332  }
333 #endif
334  }
335  }
336  // generate the next direction to move:
337  for (unsigned int i = 0; i < dim; i++) {
338  unsigned int itab = i * dim;
339  xi[i] = 0.0;
340 #if 0
341  for (unsigned int j = 0; j < dim; j++) {
342  xi[i] -= invHessian[itab + j] * grad[j];
343  }
344 #else
345  double &pxi = xi[i], *ivh = &(invHessian[itab]), *gj = grad;
346  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++gj) {
347  pxi -= *ivh * *gj;
348  }
349 #endif
350  }
351  if (snapshotVect && snapshotFreq && !(iter % snapshotFreq)) {
352  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
353  snapshotVect->push_back(s);
354  newPos = new double[dim];
355  }
356  }
357  CLEANUP();
358  return 1;
359 }
360 
361 //! Do a BFGS minimization of a function.
362 /*!
363  \param dim the dimensionality of the space.
364  \param pos the starting position, as an array.
365  \param gradTol tolerance for gradient convergence
366  \param numIters used to return the number of iterations required
367  \param funcVal used to return the final function value
368  \param func the function to minimize
369  \param gradFunc calculates the gradient of func
370  \param funcTol tolerance for changes in the function value for convergence.
371  \param maxIts maximum number of iterations allowed
372 
373  \return a flag indicating success (or type of failure). Possible values are:
374  - 0: success
375  - 1: too many iterations were required
376 */
377 template <typename EnergyFunctor, typename GradientFunctor>
378 int minimize(unsigned int dim, double *pos, double gradTol,
379  unsigned int &numIters, double &funcVal, EnergyFunctor func,
380  GradientFunctor gradFunc, double funcTol = TOLX,
381  unsigned int maxIts = MAXITS) {
382  return minimize(dim, pos, gradTol, numIters, funcVal, func,
383  gradFunc, 0, NULL, funcTol, maxIts);
384 }
385 
386 }
const double MAXSTEP
Default maximim step size in the minimizer.
Definition: BFGSOpt.h:26
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:99
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition: BFGSOpt.h:20
#define CLEANUP()
Definition: BFGSOpt.h:149
const double lambda
scaling factor for rBO correction
Definition: UFF/Params.h:87
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition: BFGSOpt.h:18
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition: BFGSOpt.h:49
std::vector< Snapshot > SnapshotVect
Definition: Snapshot.h:18
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:194
const double TOLX
Default direction vector tolerance in the minimizer.
Definition: BFGSOpt.h:24
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, unsigned int snapshotFreq, RDKit::SnapshotVect *snapshotVect, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition: BFGSOpt.h:184
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
const double EPS
Default gradient tolerance in the minimizer.
Definition: BFGSOpt.h:23
const int MAXITS
Default maximum number of iterations.
Definition: BFGSOpt.h:22