MUSE Pipeline Reference Manual  0.18.5
muse_optimize.c
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set sw=2 sts=2 et cin: */
3 /*
4  *
5  * This file is part of the MUSE Instrument Pipeline
6  * Copyright (C) 2005-2011 European Southern Observatory
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  */
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 
26 #include <stdlib.h>
27 #include <math.h>
28 #include <float.h>
29 #include <string.h>
30 #ifndef _OPENMP
31 #define omp_get_max_threads() 1
32 #else
33 #include <omp.h>
34 #endif
35 
36 /*----------------------------------------------------------------------------*
37  The following lines come from lmmin.h.
38  *----------------------------------------------------------------------------*/
39 /* User-supplied subroutines. */
40 
41 /* Compact high-level interface. */
42 
43 /* Collection of control parameters. */
44 typedef struct {
45  double ftol; /* relative error desired in the sum of squares. */
46  double xtol; /* relative error between last two approximations. */
47  double gtol; /* orthogonality desired between fvec and its derivs. */
48  double epsilon; /* step used to calculate the jacobian. */
49  double stepbound; /* initial bound to steps in the outer loop. */
50  double fnorm; /* norm of the residue vector fvec. */
51  int maxcall; /* maximum number of iterations. */
52  int nfev; /* actual number of iterations. */
53  int info; /* status of minimization. */
54 } lm_control_type;
55 
56 /* Initialize control parameters with default values. */
57 static void lm_initialize_control(lm_control_type * control);
58 
59 /* Refined calculation of Eucledian norm, typically used in printout routine. */
60 static double lm_enorm(int, double *);
61 
62 /* The actual minimization. */
63 static void lm_minimize(int m_dat, int n_par, double *par,
64  void (*evaluate) (double *par, int m_dat, double *fvec,
65  void *data, int *info),
66  void (*printout) (const char *func, int n_par,
67  double *par, int m_dat,
68  double *fvec, void *data, int iflag,
69  int iter, int nfev),
70  void *data, lm_control_type * control);
71 
72 
73 /* Legacy low-level interface. */
74 
75 /* Alternative to lm_minimize, allowing full control, and read-out
76  of auxiliary arrays. For usage, see implementation of lm_minimize. */
77 static void lm_lmdif(int m, int n, double *x, double *fvec, double ftol,
78  double xtol, double gtol, int maxfev, double epsfcn,
79  double *diag, int mode, double factor, int *info,
80  int *nfev, double *fjac, int *ipvt, double *qtf,
81  double *wa1, double *wa2, double *wa3, double *wa4,
82  void (*evaluate) (double *par, int m_dat, double *fvec,
83  void *data, int *info),
84  void (*printout) (const char *func, int n_par,
85  double *par, int m_dat,
86  double *fvec, void *data, int iflag,
87  int iter, int nfev),
88  void *data);
89 
90 /* static const char *lm_infmsg[]; */
91 /* static const char *lm_shortmsg[]; */
92 /*---------------------------------------------------------------------------*
93  End of lmmin.h.
94  *---------------------------------------------------------------------------*/
95 
96 /*---------------------------------------------------------------------------*
97  Definition of high level CPL interface.
98  *---------------------------------------------------------------------------*/
99 
100 #include "muse_optimize.h"
101 #include "muse_utils.h"
107 typedef struct {
109  int npar;
110  void *data;
111  cpl_error_code retval;
112 } muse_cpl_optimize_eval_struct;
113 
120 static void
121 lm_evaluate_cpl (double *par, int m_dat, double *fvec, void *data, int *info) {
122  muse_cpl_optimize_eval_struct *s = data;
123 
124  cpl_array *cpl_par = cpl_array_wrap_double(par, s->npar);
125  cpl_array *cpl_fvec = cpl_array_wrap_double(fvec, m_dat);
126 
127  s->retval = (*(s->func))(s->data, cpl_par, cpl_fvec);
128 
129  cpl_array_unwrap(cpl_par);
130  cpl_array_unwrap(cpl_fvec);
131 
132  if (s->retval != CPL_ERROR_NONE) {
133  *info = -1;
134  }
135 }
136 
137 static const cpl_error_code error_code_tbl[] = { // see also lm_infmsg
138  CPL_ERROR_ILLEGAL_INPUT,
139  CPL_ERROR_NONE,
140  CPL_ERROR_NONE,
141  CPL_ERROR_NONE,
142  CPL_ERROR_SINGULAR_MATRIX,
143  CPL_ERROR_CONTINUE,
144  CPL_ERROR_CONTINUE,
145  CPL_ERROR_CONTINUE,
146  CPL_ERROR_CONTINUE,
147  CPL_ERROR_ILLEGAL_OUTPUT,
148  CPL_ERROR_NONE,
149 };
150 
151 static const char *lm_infmsg[] = { /* from lmmin.c */
152  "fatal coding error (improper input parameters)",
153  "success (the relative error in the sum of squares is at most tol)",
154  "success (the relative error between x and the solution is at most tol)",
155  "success (both errors are at most tol)",
156  "trapped by degeneracy (fvec is orthogonal to the columns of the jacobian)"
157  "timeout (number of calls to fcn has reached maxcall*(n+1))",
158  "failure (ftol<tol: cannot reduce sum of squares any further)",
159  "failure (xtol<tol: cannot improve approximate solution any further)",
160  "failure (gtol<tol: cannot improve approximate solution any further)",
161  "exception (not enough memory)",
162  "exception (break requested within function evaluation)"
163 };
164 
177 static void
178 muse_cpl_optimize_print(const char *func, int n_par, double *par,
179  int m_dat, double *fvec,
180  void *data, int iflag, int iter, int nfev) {
181  if (iflag == 2) {
182  cpl_msg_debug(func, "trying step in gradient direction");
183  } else if (iflag == 1) {
184  cpl_msg_debug(func, "determining gradient (iteration %d)", iter);
185  } else if (iflag == 0) {
186  cpl_msg_debug(func, "starting minimization");
187  } else if (iflag == -1) {
188  cpl_msg_debug(func, "terminated after %d evaluations", nfev);
189  }
190 
191  char *parstring = cpl_calloc(n_par * 15 + 30, sizeof(char));
192  snprintf(parstring, 5, "par:");
193  int i;
194  for (i = 0; i < n_par; ++i) {
195  snprintf(parstring + strlen(parstring), 15, " %7.3g", par[i]);
196  }
197  snprintf(parstring + strlen(parstring), 25, " => norm: %7g",
198  lm_enorm(m_dat, fvec));
199  cpl_msg_debug(func, "%s", parstring);
200  cpl_free(parstring);
201 
202 #if 0 /* still not adopted */
203  double f, y, t;
204  muse_cpl_optimize_eval_struct *mydata
205  = (muse_cpl_optimize_eval_struct *) data;
206 
207  if (iflag == -1) {
208  cpl_msg_debug(func, " fitting data as follows:", mydata);
209  for (i = 0; i < m_dat; ++i) {
210  t = (mydata->tvec)[i];
211  y = (mydata->yvec)[i];
212  f = mydata->f(t, par);
213  cpl_msg_debug(func, " t[%2d]=%12g y=%12g fit=%12g residue=%12g",
214  i, t, y, f, y - f);
215  }
216  }
217 #else /* just to silence "unused parameter" warning: */
218  UNUSED_ARGUMENT(data);
219 #endif
220 }
221 
251 cpl_error_code
252 muse_cpl_optimize_lvmq(void *aData, cpl_array *aPar, int aSize,
253  muse_cpl_evaluate_func *aFunction,
255 
256  cpl_ensure_code(aPar != NULL, CPL_ERROR_NULL_INPUT);
257  int npars = cpl_array_get_size(aPar);
258  cpl_ensure_code(npars > 0, CPL_ERROR_ILLEGAL_INPUT);
259  cpl_ensure_code(aSize > 0, CPL_ERROR_ILLEGAL_INPUT);
260  cpl_ensure_code(aFunction != NULL, CPL_ERROR_NULL_INPUT);
261 
262  muse_cpl_optimize_eval_struct s = {
263  aFunction,
264  npars,
265  aData,
266  CPL_ERROR_NONE
267  };
268 
269  void (*debug_func) (const char *, int, double *, int, double *, void *,
270  int, int, int) = NULL;
271  lm_control_type control;
272  lm_initialize_control(&control);
273  if (aCtrl != NULL) {
274  if (aCtrl->maxcall > 0) {
275  control.maxcall = aCtrl->maxcall;
276  }
277  if (aCtrl->ftol > 0) {
278  control.ftol = aCtrl->ftol;
279  }
280  if (aCtrl->xtol > 0) {
281  control.xtol = aCtrl->xtol;
282  }
283  if (aCtrl->gtol > 0) {
284  control.gtol = aCtrl->gtol;
285  }
286  if (aCtrl->debug == CPL_TRUE) {
287  debug_func = muse_cpl_optimize_print;
288  }
289  }
290 
291  double timeinit = cpl_test_get_walltime();
292  double cpuinit = cpl_test_get_cputime();
293  lm_minimize(aSize,
294  cpl_array_get_size(aPar),
295  cpl_array_get_data_double(aPar),
296  lm_evaluate_cpl,
297  debug_func,
298  &s, &control);
299 
300  double timefini = cpl_test_get_walltime();
301  double cpufini = cpl_test_get_cputime();
302 
303  cpl_msg_debug(__func__, "Minimizing finished after %i steps: %s",
304  control.nfev,
305  lm_infmsg[control.info]);
306 
307  cpl_msg_debug(__func__, "processing time %.3fs (%.3fs CPU, %d CPUs)",
308  timefini - timeinit, cpufini - cpuinit, omp_get_max_threads());
309 
310 
311  cpl_error_code res = error_code_tbl[control.info];
312 
313  cpl_ensure_code(res == CPL_ERROR_NONE, res);
314 
315  return s.retval;
316 }
317 
320 /*----------------------------------------------------------------------------*
321  This comes from lmmin.c.
322  *----------------------------------------------------------------------------*/
323 /*
324  * Project: LevenbergMarquardtLeastSquaresFitting
325  *
326  * File: lmmin.c
327  *
328  * Contents: Levenberg-Marquardt core implementation,
329  * and simplified user interface.
330  *
331  * Authors: Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
332  * (lmdif and other routines from the public-domain library
333  * netlib::minpack, Argonne National Laboratories, March 1980);
334  * Steve Moshier (initial C translation);
335  * Joachim Wuttke (conversion into C++ compatible ANSI style,
336  * corrections, comments, wrappers, hosting).
337  *
338  * Homepage: www.messen-und-deuten.de/lmfit
339  *
340  * Licence: Public domain.
341  *
342  * Make: For instance: gcc -c lmmin.c; ar rc liblmmin.a lmmin.o
343  */
344 
345 /* *********************** high-level interface **************************** */
346 
347 /* machine-dependent constants from float.h */
348 #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
349 #define LM_DWARF DBL_MIN /* smallest nonzero number */
350 #if 0
351 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
352 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
353 #else
354 #define LM_SQRT_DWARF 1.e-150
355 #define LM_SQRT_GIANT 1.e150
356 #endif
357 #define LM_USERTOL 30*LM_MACHEP /* users are recommened to require this */
358 
359 /* If the above values do not work, the following seem good for an x86:
360  LM_MACHEP .555e-16
361  LM_DWARF 9.9e-324
362  LM_SQRT_DWARF 1.e-160
363  LM_SQRT_GIANT 1.e150
364  LM_USER_TOL 1.e-14
365  The following values should work on any machine:
366  LM_MACHEP 1.2e-16
367  LM_DWARF 1.0e-38
368  LM_SQRT_DWARF 3.834e-20
369  LM_SQRT_GIANT 1.304e19
370  LM_USER_TOL 1.e-14
371 */
372 
373 
374 static void lm_initialize_control( lm_control_type * control ) {
375  control->maxcall = 100;
376  control->epsilon = LM_USERTOL;
377  control->stepbound = 100.;
378  control->ftol = LM_USERTOL;
379  control->xtol = LM_USERTOL;
380  control->gtol = LM_USERTOL;
381 }
382 
383 static void lm_minimize( int m_dat, int n_par, double *par,
384  void (*evaluate) (double *par, int m_dat, double *fvec,
385  void *data, int *info),
386  void (*printout) (const char *func,
387  int n_par, double *par, int m_dat,
388  double *fvec, void *data, int iflag,
389  int iter, int nfev),
390  void *data, lm_control_type * control ) {
391 
392 /* allocate work space. */
393 
394  double *fvec, *diag, *fjac, *qtf, *wa1, *wa2, *wa3, *wa4;
395  int *ipvt;
396 
397  int n = n_par;
398  int m = m_dat;
399 
400  if ( (fvec = (double *) cpl_malloc(m * sizeof(double))) == NULL ||
401  (diag = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
402  (qtf = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
403  (fjac = (double *) cpl_malloc(n*m*sizeof(double))) == NULL ||
404  (wa1 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
405  (wa2 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
406  (wa3 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
407  (wa4 = (double *) cpl_malloc(m * sizeof(double))) == NULL ||
408  (ipvt = (int *) cpl_malloc(n * sizeof(int) )) == NULL ) {
409  control->info = 9;
410  return;
411  }
412 
413 /* perform fit. */
414 
415  control->info = 0;
416  control->nfev = 0;
417 
418  /* this goes through the modified legacy interface: */
419  lm_lmdif( m, n, par, fvec, control->ftol, control->xtol, control->gtol,
420  control->maxcall * (n + 1), control->epsilon, diag, 1,
421  control->stepbound, &(control->info),
422  &(control->nfev), fjac, ipvt, qtf, wa1, wa2, wa3, wa4,
423  evaluate, printout, data );
424 
425  if ( printout )
426  (*printout) (__func__, n, par, m, fvec, data, -1, 0, control->nfev);
427  control->fnorm = lm_enorm(m, fvec);
428  if ( control->info < 0 )
429  control->info = 10;
430 
431 /* clean up. */
432 
433  cpl_free(fvec);
434  cpl_free(diag);
435  cpl_free(qtf);
436  cpl_free(fjac);
437  cpl_free(wa1);
438  cpl_free(wa2);
439  cpl_free(wa3);
440  cpl_free(wa4);
441  cpl_free(ipvt);
442 } /* lm_minimize. */
443 
444 
445 /* the following messages are indexed by the variable info. */
446 
447 #if 0 /* moved definition of lm_infmsg to the beginning of the file */
448 static const char *lm_infmsg[] = {
449  "fatal coding error (improper input parameters)",
450  "success (the relative error in the sum of squares is at most tol)",
451  "success (the relative error between x and the solution is at most tol)",
452  "success (both errors are at most tol)",
453  "trapped by degeneracy (fvec is orthogonal to the columns of the jacobian)"
454  "timeout (number of calls to fcn has reached maxcall*(n+1))",
455  "failure (ftol<tol: cannot reduce sum of squares any further)",
456  "failure (xtol<tol: cannot improve approximate solution any further)",
457  "failure (gtol<tol: cannot improve approximate solution any further)",
458  "exception (not enough memory)",
459  "exception (break requested within function evaluation)"
460 };
461 
462 static const char *lm_shortmsg[] = {
463  "invalid input",
464  "success (f)",
465  "success (p)",
466  "success (f,p)",
467  "degenerate",
468  "call limit",
469  "failed (f)",
470  "failed (p)",
471  "failed (o)",
472  "no memory",
473  "user break"
474 };
475 #endif
476 
477 
478 /* ************************** implementation ******************************* */
479 
480 #define BUG 0
481 #if BUG
482 #include <stdio.h>
483 #endif
484 
485 static void lm_qrfac(int m, int n, double *a, int pivot, int *ipvt,
486  double *rdiag, double *acnorm, double *wa);
487 static void lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
488  double *qtb, double *x, double *sdiag, double *wa);
489 static void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
490  double *qtb, double delta, double *par, double *x,
491  double *sdiag, double *wa1, double *wa2);
492 
493 #define MIN(a,b) (((a)<=(b)) ? (a) : (b))
494 #define MAX(a,b) (((a)>=(b)) ? (a) : (b))
495 #define SQR(x) (x)*(x)
496 
497 
498 /* the low-level legacy interface for full control. */
499 
674 static void lm_lmdif(int m, int n, double *x, double *fvec, double ftol,
675  double xtol, double gtol, int maxfev, double epsfcn,
676  double *diag, int mode, double factor, int *info,
677  int *nfev, double *fjac, int *ipvt, double *qtf,
678  double *wa1, double *wa2, double *wa3, double *wa4,
679  void (*evaluate) (double *par, int m_dat, double *fvec,
680  void *data, int *info),
681  void (*printout) (const char *func, int n_par, double *par, int m_dat,
682  double *fvec, void *data, int iflag,
683  int iter, int nfev),
684  void *data)
685 {
686  int iter, j;
687  double actred, delta, dirder, eps, fnorm, fnorm1, gnorm, par, pnorm,
688  prered, ratio, sum, temp, temp1, temp2, temp3, xnorm;
689  static double p1 = 0.1;
690  static double p0001 = 1.0e-4;
691 
692  *nfev = 0; /* function evaluation counter */
693  iter = 1; /* outer loop counter */
694  par = 0; /* levenberg-marquardt parameter */
695  delta = 0; /* to prevent a warning (initialization within if-clause) */
696  xnorm = 0; /* ditto */
697  temp = MAX(epsfcn, LM_MACHEP);
698  eps = sqrt(temp); /* for calculating the Jacobian by forward differences */
699 
700 /* lmdif: check input parameters for errors. */
701 
702  if ((n <= 0) || (m < n) || (ftol < 0.)
703  || (xtol < 0.) || (gtol < 0.) || (maxfev <= 0) || (factor <= 0.)) {
704  *info = 0; // invalid parameter
705  return;
706  }
707  if (mode == 2) { /* scaling by diag[] */
708  for (j = 0; j < n; j++) { /* check for nonpositive elements */
709  if (diag[j] <= 0.0) {
710  *info = 0; // invalid parameter
711  return;
712  }
713  }
714  }
715 #if BUG
716  printf("lmdif\n");
717 #endif
718 
719 /* lmdif: evaluate function at starting point and calculate norm. */
720 
721  *info = 0;
722  (*evaluate) (x, m, fvec, data, info); ++(*nfev);
723  if( printout )
724  (*printout) (__func__, n, x, m, fvec, data, 0, 0, *nfev);
725  if (*info < 0)
726  return;
727  fnorm = lm_enorm(m, fvec);
728 
729 /* lmdif: the outer loop. */
730 
731  do {
732 #if BUG
733  cpl_msg_debug(__func__, "iter=%d nfev=%d fnorm=%g\n",
734  iter, *nfev, fnorm);
735 #endif
736 
737 /* outer: calculate the jacobian matrix. */
738  *info = 0;
739 #if 1
740  #pragma omp parallel for default(none) /* as req. by Ralf */ \
741  shared(n, info, m, x, eps, evaluate, data, printout, nfev, iter, \
742  fjac, fvec)
743 #endif
744  for (j = 0; j < n; j++) {
745  if (*info < 0)
746  continue;
747  double *wa5 = (double *) cpl_malloc(m * sizeof(double));
748  double *xd = (double *) cpl_malloc(n * sizeof(double));
749  memcpy(xd, x, n * sizeof(double));
750  double step = eps * fabs(xd[j]);
751  if (step == 0.)
752  step = eps;
753  xd[j] += step;
754  int infod = 0;
755  (*evaluate) (xd, m, wa5, data, &infod);
756  if( printout )
757  (*printout) (__func__, n, xd, m, wa5, data, 1, iter, ++(*nfev));
758  if (infod < 0) {/* user requested break */
759  *info = infod;
760  cpl_free(wa5);
761  cpl_free(xd);
762  continue;
763  }
764  int i;
765  double diff = xd[j] - x[j];
766  double *fjacm = fjac + j * m;
767  for (i = 0; i < m; i++) { /* changed in 2.3, Mark Bydder */
768  fjacm[i] = (wa5[i] - fvec[i]) / diff;
769  }
770  cpl_free(wa5);
771  cpl_free(xd);
772  }
773  if (*info < 0)
774  return; /* user requested break */
775 
776 #if BUG>1
777  /* DEBUG: print the entire matrix */
778  {
779  int i;
780  for (i = 0; i < m; i++) {
781  for (j = 0; j < n; j++)
782  printf("%.5e ", fjac[j * m + i]);
783  printf("\n");
784  }
785  }
786 #endif
787 
788 /* outer: compute the qr factorization of the jacobian. */
789 
790  lm_qrfac(m, n, fjac, 1, ipvt, wa1, wa2, wa3);
791 
792  if (iter == 1) { /* first iteration */
793  if (mode != 2) {
794  /* diag := norms of the columns of the initial jacobian */
795  for (j = 0; j < n; j++) {
796  diag[j] = wa2[j];
797  if (wa2[j] == 0.)
798  diag[j] = 1.;
799  }
800  }
801  /* use diag to scale x, then calculate the norm */
802  for (j = 0; j < n; j++)
803  wa3[j] = diag[j] * x[j];
804  xnorm = lm_enorm(n, wa3);
805  /* initialize the step bound delta. */
806  delta = factor * xnorm;
807  if (delta == 0.)
808  delta = factor;
809  }
810 
811 /* outer: form (q transpose)*fvec and store first n components in qtf. */
812 
813  int i;
814  for (i = 0; i < m; i++)
815  wa4[i] = fvec[i];
816 
817  for (j = 0; j < n; j++) {
818  temp3 = fjac[j * m + j];
819  if (temp3 != 0.) {
820  sum = 0;
821  for (i = j; i < m; i++)
822  sum += fjac[j * m + i] * wa4[i];
823  temp = -sum / temp3;
824  for (i = j; i < m; i++)
825  wa4[i] += fjac[j * m + i] * temp;
826  }
827  fjac[j * m + j] = wa1[j];
828  qtf[j] = wa4[j];
829  }
830 
831 /* outer: compute norm of scaled gradient and test for convergence. */
832 
833  gnorm = 0;
834  if (fnorm != 0) {
835  for (j = 0; j < n; j++) {
836  if (wa2[ipvt[j]] == 0)
837  continue;
838 
839  sum = 0.;
840  for (i = 0; i <= j; i++)
841  sum += fjac[j * m + i] * qtf[i] / fnorm;
842  gnorm = MAX(gnorm, fabs(sum / wa2[ipvt[j]]));
843  }
844  }
845 
846  if (gnorm <= gtol) {
847  *info = 4;
848  return;
849  }
850 
851 /* outer: rescale if necessary. */
852 
853  if (mode != 2) {
854  for (j = 0; j < n; j++)
855  diag[j] = MAX(diag[j], wa2[j]);
856  }
857 
858 /* the inner loop. */
859  do {
860 #if BUG
861  printf("lmdif/ inner loop iter=%d nfev=%d\n", iter, *nfev);
862 #endif
863 
864 /* inner: determine the levenberg-marquardt parameter. */
865 
866  lm_lmpar(n, fjac, m, ipvt, diag, qtf, delta, &par,
867  wa1, wa2, wa3, wa4);
868 
869 /* inner: store the direction p and x + p; calculate the norm of p. */
870 
871  for (j = 0; j < n; j++) {
872  wa1[j] = -wa1[j];
873  wa2[j] = x[j] + wa1[j];
874  wa3[j] = diag[j] * wa1[j];
875  }
876  pnorm = lm_enorm(n, wa3);
877 
878 /* inner: on the first iteration, adjust the initial step bound. */
879 
880  if (*nfev <= 1 + n)
881  delta = MIN(delta, pnorm);
882 
883  /* evaluate the function at x + p and calculate its norm. */
884 
885  *info = 0;
886  (*evaluate) (wa2, m, wa4, data, info); ++(*nfev);
887  if( printout )
888  (*printout) (__func__, n, x, m, wa4, data, 2, iter, *nfev);
889  if (*info < 0)
890  return; /* user requested break. */
891 
892  fnorm1 = lm_enorm(m, wa4);
893 #if BUG
894  printf("lmdif/ pnorm %.10e fnorm1 %.10e fnorm %.10e"
895  " delta=%.10e par=%.10e\n",
896  pnorm, fnorm1, fnorm, delta, par);
897 #endif
898 
899 /* inner: compute the scaled actual reduction. */
900 
901  if (p1 * fnorm1 < fnorm)
902  actred = 1 - SQR(fnorm1 / fnorm);
903  else
904  actred = -1;
905 
906 /* inner: compute the scaled predicted reduction and
907  the scaled directional derivative. */
908 
909  for (j = 0; j < n; j++) {
910  wa3[j] = 0;
911  for (i = 0; i <= j; i++)
912  wa3[i] += fjac[j * m + i] * wa1[ipvt[j]];
913  }
914  temp1 = lm_enorm(n, wa3) / fnorm;
915  temp2 = sqrt(par) * pnorm / fnorm;
916  prered = SQR(temp1) + 2 * SQR(temp2);
917  dirder = -(SQR(temp1) + SQR(temp2));
918 
919 /* inner: compute the ratio of the actual to the predicted reduction. */
920 
921  ratio = prered != 0 ? actred / prered : 0;
922 #if BUG
923  printf("lmdif/ actred=%.10e prered=%.10e ratio=%.10e"
924  " sq(1)=%.10e sq(2)=%.10e dd=%.10e\n",
925  actred, prered, prered != 0 ? ratio : 0.,
926  SQR(temp1), SQR(temp2), dirder);
927 #endif
928 
929 /* inner: update the step bound. */
930 
931  if (ratio <= 0.25) {
932  if (actred >= 0.)
933  temp = 0.5;
934  else
935  temp = 0.5 * dirder / (dirder + 0.55 * actred);
936  if (p1 * fnorm1 >= fnorm || temp < p1)
937  temp = p1;
938  delta = temp * MIN(delta, pnorm / p1);
939  par /= temp;
940  } else if (par == 0. || ratio >= 0.75) {
941  delta = pnorm / 0.5;
942  par *= 0.5;
943  }
944 
945 /* inner: test for successful iteration. */
946 
947  if (ratio >= p0001) {
948  /* yes, success: update x, fvec, and their norms. */
949  for (j = 0; j < n; j++) {
950  x[j] = wa2[j];
951  wa2[j] = diag[j] * x[j];
952  }
953  for (i = 0; i < m; i++)
954  fvec[i] = wa4[i];
955  xnorm = lm_enorm(n, wa2);
956  fnorm = fnorm1;
957  iter++;
958  }
959 #if BUG
960  else {
961  printf("ATTN: iteration considered unsuccessful\n");
962  }
963 #endif
964 
965 /* inner: tests for convergence ( otherwise *info = 1, 2, or 3 ). */
966 
967  *info = 0; /* do not terminate (unless overwritten by nonzero) */
968  if (fabs(actred) <= ftol && prered <= ftol && 0.5 * ratio <= 1)
969  *info = 1;
970  if (delta <= xtol * xnorm)
971  *info += 2;
972  if (*info != 0)
973  return;
974 
975 /* inner: tests for termination and stringent tolerances. */
976 
977  if (*nfev >= maxfev)
978  *info = 5;
979  if (fabs(actred) <= LM_MACHEP &&
980  prered <= LM_MACHEP && 0.5 * ratio <= 1)
981  *info = 6;
982  if (delta <= LM_MACHEP * xnorm)
983  *info = 7;
984  if (gnorm <= LM_MACHEP)
985  *info = 8;
986  if (*info != 0)
987  return;
988 
989 /* inner: end of the loop. repeat if iteration unsuccessful. */
990 
991  } while (ratio < p0001);
992 
993 /* outer: end of the loop. */
994 
995  } while (1);
996 
997 } /* lm_lmdif. */
998 
1069 static void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
1070  double *qtb, double delta, double *par, double *x,
1071  double *sdiag, double *wa1, double *wa2)
1072 {
1073  int i, iter, j, nsing;
1074  double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
1075  double sum, temp;
1076  static double p1 = 0.1;
1077 
1078 #if BUG
1079  printf("lmpar\n");
1080 #endif
1081 
1082 /* lmpar: compute and store in x the gauss-newton direction. if the
1083  jacobian is rank-deficient, obtain a least squares solution. */
1084 
1085  nsing = n;
1086  for (j = 0; j < n; j++) {
1087  wa1[j] = qtb[j];
1088  if (r[j * ldr + j] == 0 && nsing == n)
1089  nsing = j;
1090  if (nsing < n)
1091  wa1[j] = 0;
1092  }
1093 #if BUG
1094  printf("nsing %d ", nsing);
1095 #endif
1096  for (j = nsing - 1; j >= 0; j--) {
1097  wa1[j] = wa1[j] / r[j + ldr * j];
1098  temp = wa1[j];
1099  for (i = 0; i < j; i++)
1100  wa1[i] -= r[j * ldr + i] * temp;
1101  }
1102 
1103  for (j = 0; j < n; j++)
1104  x[ipvt[j]] = wa1[j];
1105 
1106 /* lmpar: initialize the iteration counter, evaluate the function at the
1107  origin, and test for acceptance of the gauss-newton direction. */
1108 
1109  iter = 0;
1110  for (j = 0; j < n; j++)
1111  wa2[j] = diag[j] * x[j];
1112  dxnorm = lm_enorm(n, wa2);
1113  fp = dxnorm - delta;
1114  if (fp <= p1 * delta) {
1115 #if BUG
1116  printf("lmpar/ terminate (fp<p1*delta)\n");
1117 #endif
1118  *par = 0;
1119  return;
1120  }
1121 
1122 /* lmpar: if the jacobian is not rank deficient, the newton
1123  step provides a lower bound, parl, for the 0. of
1124  the function. otherwise set this bound to 0.. */
1125 
1126  parl = 0;
1127  if (nsing >= n) {
1128  for (j = 0; j < n; j++)
1129  wa1[j] = diag[ipvt[j]] * wa2[ipvt[j]] / dxnorm;
1130 
1131  for (j = 0; j < n; j++) {
1132  sum = 0.;
1133  for (i = 0; i < j; i++)
1134  sum += r[j * ldr + i] * wa1[i];
1135  wa1[j] = (wa1[j] - sum) / r[j + ldr * j];
1136  }
1137  temp = lm_enorm(n, wa1);
1138  parl = fp / delta / temp / temp;
1139  }
1140 
1141 /* lmpar: calculate an upper bound, paru, for the 0. of the function. */
1142 
1143  for (j = 0; j < n; j++) {
1144  sum = 0;
1145  for (i = 0; i <= j; i++)
1146  sum += r[j * ldr + i] * qtb[i];
1147  wa1[j] = sum / diag[ipvt[j]];
1148  }
1149  gnorm = lm_enorm(n, wa1);
1150  paru = gnorm / delta;
1151  if (paru == 0.)
1152  paru = LM_DWARF / MIN(delta, p1);
1153 
1154 /* lmpar: if the input par lies outside of the interval (parl,paru),
1155  set par to the closer endpoint. */
1156 
1157  *par = MAX(*par, parl);
1158  *par = MIN(*par, paru);
1159  if (*par == 0.)
1160  *par = gnorm / dxnorm;
1161 #if BUG
1162  printf("lmpar/ parl %.4e par %.4e paru %.4e\n", parl, *par, paru);
1163 #endif
1164 
1165 /* lmpar: iterate. */
1166 
1167  for (;; iter++) {
1168 
1169  /* evaluate the function at the current value of par. */
1170 
1171  if (*par == 0.)
1172  *par = MAX(LM_DWARF, 0.001 * paru);
1173  temp = sqrt(*par);
1174  for (j = 0; j < n; j++)
1175  wa1[j] = temp * diag[j];
1176  lm_qrsolv(n, r, ldr, ipvt, wa1, qtb, x, sdiag, wa2);
1177  for (j = 0; j < n; j++)
1178  wa2[j] = diag[j] * x[j];
1179  dxnorm = lm_enorm(n, wa2);
1180  fp_old = fp;
1181  fp = dxnorm - delta;
1182 
1183  /* if the function is small enough, accept the current value
1184  of par. Also test for the exceptional cases where parl
1185  is zero or the number of iterations has reached 10. */
1186 
1187  if (fabs(fp) <= p1 * delta
1188  || (parl == 0. && fp <= fp_old && fp_old < 0.)
1189  || iter == 10)
1190  break; /* the only exit from the iteration. */
1191 
1192  /* compute the Newton correction. */
1193 
1194  for (j = 0; j < n; j++)
1195  wa1[j] = diag[ipvt[j]] * wa2[ipvt[j]] / dxnorm;
1196 
1197  for (j = 0; j < n; j++) {
1198  wa1[j] = wa1[j] / sdiag[j];
1199  for (i = j + 1; i < n; i++)
1200  wa1[i] -= r[j * ldr + i] * wa1[j];
1201  }
1202  temp = lm_enorm(n, wa1);
1203  parc = fp / delta / temp / temp;
1204 
1205  /* depending on the sign of the function, update parl or paru. */
1206 
1207  if (fp > 0)
1208  parl = MAX(parl, *par);
1209  else if (fp < 0)
1210  paru = MIN(paru, *par);
1211  /* the case fp==0 is precluded by the break condition */
1212 
1213  /* compute an improved estimate for par. */
1214 
1215  *par = MAX(parl, *par + parc);
1216 
1217  }
1218 
1219 } /* lm_lmpar. */
1220 
1271 static void lm_qrfac(int m, int n, double *a, int pivot, int *ipvt,
1272  double *rdiag, double *acnorm, double *wa)
1273 {
1274  int i, j, k, minmn;
1275  double ajnorm;
1276  static double p05 = 0.05;
1277 
1278 /* qrfac: compute initial column norms and initialize several arrays. */
1279 
1280  for (j = 0; j < n; j++) {
1281  acnorm[j] = lm_enorm(m, &a[j * m]);
1282  rdiag[j] = acnorm[j];
1283  wa[j] = rdiag[j];
1284  if (pivot)
1285  ipvt[j] = j;
1286  }
1287 #if BUG
1288  printf("qrfac\n");
1289 #endif
1290 
1291 /* qrfac: reduce a to r with householder transformations. */
1292 
1293  minmn = MIN(m, n);
1294  for (j = 0; j < minmn; j++) {
1295  double *am = a + j *m;
1296  if (pivot) {
1297 
1298  /* bring the column of largest norm into the pivot position. */
1299 
1300  int kmax = j;
1301  for (k = j + 1; k < n; k++)
1302  if (rdiag[k] > rdiag[kmax])
1303  kmax = k;
1304  if (kmax != j) {
1305  double *akmaxm = a + kmax * m;
1306  for (i = 0; i < m; i++) {
1307  double temp = am[i];
1308  am[i] = akmaxm[i];
1309  akmaxm[i] = temp;
1310  }
1311  rdiag[kmax] = rdiag[j];
1312  wa[kmax] = wa[j];
1313  int ktmp = ipvt[j];
1314  ipvt[j] = ipvt[kmax];
1315  ipvt[kmax] = ktmp;
1316  }
1317  }
1318  /* compute the Householder transformation to reduce the
1319  j-th column of a to a multiple of the j-th unit vector. */
1320 
1321  ajnorm = lm_enorm(m - j, &am[j]);
1322  if (ajnorm == 0.) {
1323  rdiag[j] = 0;
1324  continue;
1325  }
1326 
1327  if (am[j] < 0.)
1328  ajnorm = -ajnorm;
1329  for (i = j; i < m; i++)
1330  am[i] /= ajnorm;
1331  am[j] += 1;
1332 
1333  /* apply the transformation to the remaining columns
1334  and update the norms. */
1335 
1336  for (k = j + 1; k < n; k++) {
1337  double sum = 0;
1338  double *akm = a + k * m;
1339 
1340  for (i = j; i < m; i++)
1341  sum += am[i] * akm[i];
1342 
1343  double temp = sum / am[j];
1344 
1345  for (i = j; i < m; i++)
1346  akm[i] -= temp * am[i];
1347 
1348  if (pivot && rdiag[k] != 0.) {
1349  temp = akm[j] / rdiag[k];
1350  temp = MAX(0., 1 - temp * temp);
1351  rdiag[k] *= sqrt(temp);
1352  temp = rdiag[k] / wa[k];
1353  if (p05 * SQR(temp) <= LM_MACHEP) {
1354  rdiag[k] = lm_enorm(m - j - 1, &akm[j + 1]);
1355  wa[k] = rdiag[k];
1356  }
1357  }
1358  }
1359 
1360  rdiag[j] = -ajnorm;
1361  }
1362 }
1363 
1425 static void
1426 lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
1427  double *qtb, double *x, double *sdiag, double *wa)
1428 {
1429  int i, kk, j, k, nsing;
1430  double qtbpj, sum, temp;
1431  double _sin, _cos, _tan, _cot; /* local variables, not functions */
1432 
1433 /* qrsolv: copy r and (q transpose)*b to preserve input and initialize s.
1434  in particular, save the diagonal elements of r in x. */
1435 
1436  for (j = 0; j < n; j++) {
1437  for (i = j; i < n; i++)
1438  r[j * ldr + i] = r[i * ldr + j];
1439  x[j] = r[j * ldr + j];
1440  wa[j] = qtb[j];
1441  }
1442 #if BUG
1443  printf("qrsolv\n");
1444 #endif
1445 
1446 /* qrsolv: eliminate the diagonal matrix d using a givens rotation. */
1447 
1448  for (j = 0; j < n; j++) {
1449 
1450 /* qrsolv: prepare the row of d to be eliminated, locating the
1451  diagonal element using p from the qr factorization. */
1452 
1453  if (diag[ipvt[j]] == 0.)
1454  goto L90;
1455  for (k = j; k < n; k++)
1456  sdiag[k] = 0.;
1457  sdiag[j] = diag[ipvt[j]];
1458 
1459 /* qrsolv: the transformations to eliminate the row of d modify only
1460  a single element of (q transpose)*b beyond the first n, which is
1461  initially 0.. */
1462 
1463  qtbpj = 0.;
1464  for (k = j; k < n; k++) {
1465 
1466  /* determine a givens rotation which eliminates the
1467  appropriate element in the current row of d. */
1468 
1469  if (sdiag[k] == 0.)
1470  continue;
1471  kk = k + ldr * k;
1472  if (fabs(r[kk]) < fabs(sdiag[k])) {
1473  _cot = r[kk] / sdiag[k];
1474  _sin = 1 / sqrt(1 + SQR(_cot));
1475  _cos = _sin * _cot;
1476  } else {
1477  _tan = sdiag[k] / r[kk];
1478  _cos = 1 / sqrt(1 + SQR(_tan));
1479  _sin = _cos * _tan;
1480  }
1481 
1482  /* compute the modified diagonal element of r and
1483  the modified element of ((q transpose)*b,0). */
1484 
1485  r[kk] = _cos * r[kk] + _sin * sdiag[k];
1486  temp = _cos * wa[k] + _sin * qtbpj;
1487  qtbpj = -_sin * wa[k] + _cos * qtbpj;
1488  wa[k] = temp;
1489 
1490  /* accumulate the tranformation in the row of s. */
1491 
1492  for (i = k + 1; i < n; i++) {
1493  temp = _cos * r[k * ldr + i] + _sin * sdiag[i];
1494  sdiag[i] = -_sin * r[k * ldr + i] + _cos * sdiag[i];
1495  r[k * ldr + i] = temp;
1496  }
1497  }
1498 
1499  L90:
1500  /* store the diagonal element of s and restore
1501  the corresponding diagonal element of r. */
1502 
1503  sdiag[j] = r[j * ldr + j];
1504  r[j * ldr + j] = x[j];
1505  }
1506 
1507 /* qrsolv: solve the triangular system for z. if the system is
1508  singular, then obtain a least squares solution. */
1509 
1510  nsing = n;
1511  for (j = 0; j < n; j++) {
1512  if (sdiag[j] == 0. && nsing == n)
1513  nsing = j;
1514  if (nsing < n)
1515  wa[j] = 0;
1516  }
1517 
1518  for (j = nsing - 1; j >= 0; j--) {
1519  sum = 0;
1520  for (i = j + 1; i < nsing; i++)
1521  sum += r[j * ldr + i] * wa[i];
1522  wa[j] = (wa[j] - sum) / sdiag[j];
1523  }
1524 
1525 /* qrsolv: permute the components of z back to components of x. */
1526 
1527  for (j = 0; j < n; j++)
1528  x[ipvt[j]] = wa[j];
1529 
1530 } /* lm_qrsolv. */
1531 
1548 static double
1549 lm_enorm(int n, double *x) {
1550  int i;
1551  double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
1552 
1553  s1 = 0;
1554  s2 = 0;
1555  s3 = 0;
1556  x1max = 0;
1557  x3max = 0;
1558  agiant = LM_SQRT_GIANT / ((double) n);
1559 
1560  /* sum squares. */
1561  for (i = 0; i < n; i++) {
1562  xabs = fabs(x[i]);
1563  if (xabs > LM_SQRT_DWARF && xabs < agiant) {
1564  /* sum for intermediate components. */
1565  s2 += xabs * xabs;
1566  continue;
1567  }
1568 
1569  if (xabs > LM_SQRT_DWARF) {
1570  /* sum for large components. */
1571  if (xabs > x1max) {
1572  temp = x1max / xabs;
1573  s1 = 1 + s1 * SQR(temp);
1574  x1max = xabs;
1575  } else {
1576  temp = xabs / x1max;
1577  s1 += SQR(temp);
1578  }
1579  continue;
1580  }
1581  /* sum for small components. */
1582  if (xabs > x3max) {
1583  temp = x3max / xabs;
1584  s3 = 1 + s3 * SQR(temp);
1585  x3max = xabs;
1586  } else {
1587  if (xabs != 0.) {
1588  temp = xabs / x3max;
1589  s3 += SQR(temp);
1590  }
1591  }
1592  }
1593 
1594  /* calculation of norm. */
1595 
1596  if (s1 != 0)
1597  return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1598  if (s2 != 0) {
1599  if (s2 >= x3max)
1600  return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1601  else
1602  return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1603  }
1604 
1605  return x3max * sqrt(s3);
1606 
1607 } /* lm_enorm. */
1608 
cpl_error_code muse_cpl_optimize_lvmq(void *aData, cpl_array *aPar, int aSize, muse_cpl_evaluate_func *aFunction, muse_cpl_optimize_control_t *aCtrl)
Minimize a function with the Levenberg-Marquardt algorithm.
int debug
Flag to switch on debugging messages. Default value: CPL_FALSE.
Definition: muse_optimize.h:71
int maxcall
Maximum number of iterations. Default value (when set to -1): 100.
Definition: muse_optimize.h:66
double xtol
Relative error between last two approximations. Default value (when set to -1): 30 * DBL_EPSILON...
Definition: muse_optimize.h:51
double ftol
Relative error desired in the sum of squares. Default value (when set to -1): 30 * DBL_EPSILON...
Definition: muse_optimize.h:42
double gtol
Orthogonality desired between fvec and its derivs. Default value (when set to -1): 30 * DBL_EPSILON...
Definition: muse_optimize.h:61
Optimization control parameters.
Definition: muse_optimize.h:33
cpl_error_code( muse_cpl_evaluate_func)(void *aData, cpl_array *aPar, cpl_array *aRetval)
User provided function to evaluate in muse_cpl_optimize_lvmq().
Definition: muse_optimize.h:89
static void muse_cpl_optimize_print(const char *func, int n_par, double *par, int m_dat, double *fvec, void *data, int iflag, int iter, int nfev)
Default printout method adopted to CPL.