00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef HAVE_CONFIG_H
00022 #include <config.h>
00023 #endif
00024
00025 #include <stdlib.h>
00026 #include <math.h>
00027 #include <float.h>
00028 #include <string.h>
00029 #ifndef _OPENMP
00030 #define omp_get_max_threads() 1
00031 #else
00032 #include <omp.h>
00033 #endif
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 typedef struct {
00044 double ftol;
00045 double xtol;
00046 double gtol;
00047 double epsilon;
00048 double stepbound;
00049 double fnorm;
00050 int maxcall;
00051 int nfev;
00052 int info;
00053 } lm_control_type;
00054
00055
00056 static void lm_initialize_control(lm_control_type * control);
00057
00058
00059 static double lm_enorm(int, double *);
00060
00061
00062 static void lm_minimize(int m_dat, int n_par, double *par,
00063 void (*evaluate) (double *par, int m_dat, double *fvec,
00064 void *data, int *info),
00065 void (*printout) (const char *func, int n_par,
00066 double *par, int m_dat,
00067 double *fvec, void *data, int iflag,
00068 int iter, int nfev),
00069 void *data, lm_control_type * control);
00070
00071
00072
00073
00074
00075
00076 static void lm_lmdif(int m, int n, double *x, double *fvec, double ftol,
00077 double xtol, double gtol, int maxfev, double epsfcn,
00078 double *diag, int mode, double factor, int *info,
00079 int *nfev, double *fjac, int *ipvt, double *qtf,
00080 double *wa1, double *wa2, double *wa3, double *wa4,
00081 void (*evaluate) (double *par, int m_dat, double *fvec,
00082 void *data, int *info),
00083 void (*printout) (const char *func, int n_par,
00084 double *par, int m_dat,
00085 double *fvec, void *data, int iflag,
00086 int iter, int nfev),
00087 void *data);
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099 #include "muse_optimize.h"
00100 #include "muse_utils.h"
00106 typedef struct {
00107 muse_cpl_evaluate_func *func;
00108 int npar;
00109 void *data;
00110 cpl_error_code retval;
00111 } muse_cpl_optimize_eval_struct;
00112
00119 static void
00120 lm_evaluate_cpl (double *par, int m_dat, double *fvec, void *data, int *info) {
00121 muse_cpl_optimize_eval_struct *s = data;
00122
00123 cpl_array *cpl_par = cpl_array_wrap_double(par, s->npar);
00124 cpl_array *cpl_fvec = cpl_array_wrap_double(fvec, m_dat);
00125
00126 s->retval = (*(s->func))(s->data, cpl_par, cpl_fvec);
00127
00128 cpl_array_unwrap(cpl_par);
00129 cpl_array_unwrap(cpl_fvec);
00130
00131 if (s->retval != CPL_ERROR_NONE) {
00132 *info = -1;
00133 }
00134 }
00135
00136 static const cpl_error_code error_code_tbl[] = {
00137 CPL_ERROR_ILLEGAL_INPUT,
00138 CPL_ERROR_NONE,
00139 CPL_ERROR_NONE,
00140 CPL_ERROR_NONE,
00141 CPL_ERROR_SINGULAR_MATRIX,
00142 CPL_ERROR_CONTINUE,
00143 CPL_ERROR_CONTINUE,
00144 CPL_ERROR_CONTINUE,
00145 CPL_ERROR_CONTINUE,
00146 CPL_ERROR_ILLEGAL_OUTPUT,
00147 CPL_ERROR_NONE,
00148 };
00149
00150 static const char *lm_infmsg[] = {
00151 "fatal coding error (improper input parameters)",
00152 "success (the relative error in the sum of squares is at most tol)",
00153 "success (the relative error between x and the solution is at most tol)",
00154 "success (both errors are at most tol)",
00155 "trapped by degeneracy (fvec is orthogonal to the columns of the jacobian)"
00156 "timeout (number of calls to fcn has reached maxcall*(n+1))",
00157 "failure (ftol<tol: cannot reduce sum of squares any further)",
00158 "failure (xtol<tol: cannot improve approximate solution any further)",
00159 "failure (gtol<tol: cannot improve approximate solution any further)",
00160 "exception (not enough memory)",
00161 "exception (break requested within function evaluation)"
00162 };
00163
00176 static void
00177 muse_cpl_optimize_print(const char *func, int n_par, double *par,
00178 int m_dat, double *fvec,
00179 void *data, int iflag, int iter, int nfev) {
00180 if (iflag == 2) {
00181 cpl_msg_debug(func, "trying step in gradient direction");
00182 } else if (iflag == 1) {
00183 cpl_msg_debug(func, "determining gradient (iteration %d)", iter);
00184 } else if (iflag == 0) {
00185 cpl_msg_debug(func, "starting minimization");
00186 } else if (iflag == -1) {
00187 cpl_msg_debug(func, "terminated after %d evaluations", nfev);
00188 }
00189
00190 char *parstring = cpl_calloc(n_par * 15 + 30, sizeof(char));
00191 snprintf(parstring, 5, "par:");
00192 int i;
00193 for (i = 0; i < n_par; ++i) {
00194 snprintf(parstring + strlen(parstring), 15, " %7.3g", par[i]);
00195 }
00196 snprintf(parstring + strlen(parstring), 25, " => norm: %7g",
00197 lm_enorm(m_dat, fvec));
00198 cpl_msg_debug(func, "%s", parstring);
00199 cpl_free(parstring);
00200
00201 #if 0
00202 double f, y, t;
00203 muse_cpl_optimize_eval_struct *mydata
00204 = (muse_cpl_optimize_eval_struct *) data;
00205
00206 if (iflag == -1) {
00207 cpl_msg_debug(func, " fitting data as follows:", mydata);
00208 for (i = 0; i < m_dat; ++i) {
00209 t = (mydata->tvec)[i];
00210 y = (mydata->yvec)[i];
00211 f = mydata->f(t, par);
00212 cpl_msg_debug(func, " t[%2d]=%12g y=%12g fit=%12g residue=%12g",
00213 i, t, y, f, y - f);
00214 }
00215 }
00216 #else
00217 UNUSED_ARGUMENT(data);
00218 #endif
00219 }
00220
00250 cpl_error_code
00251 muse_cpl_optimize_lvmq(void *aData, cpl_array *aPar, int aSize,
00252 muse_cpl_evaluate_func *aFunction,
00253 muse_cpl_optimize_control_t *aCtrl) {
00254
00255 cpl_ensure_code(aPar != NULL, CPL_ERROR_NULL_INPUT);
00256 int npars = cpl_array_get_size(aPar);
00257 cpl_ensure_code(npars > 0, CPL_ERROR_ILLEGAL_INPUT);
00258 cpl_ensure_code(aSize > 0, CPL_ERROR_ILLEGAL_INPUT);
00259 cpl_ensure_code(aFunction != NULL, CPL_ERROR_NULL_INPUT);
00260
00261 muse_cpl_optimize_eval_struct s = {
00262 aFunction,
00263 npars,
00264 aData,
00265 CPL_ERROR_NONE
00266 };
00267
00268 void (*debug_func) (const char *, int, double *, int, double *, void *,
00269 int, int, int) = NULL;
00270 lm_control_type control;
00271 lm_initialize_control(&control);
00272 if (aCtrl != NULL) {
00273 if (aCtrl->maxcall > 0) {
00274 control.maxcall = aCtrl->maxcall;
00275 }
00276 if (aCtrl->ftol > 0) {
00277 control.ftol = aCtrl->ftol;
00278 }
00279 if (aCtrl->xtol > 0) {
00280 control.xtol = aCtrl->xtol;
00281 }
00282 if (aCtrl->gtol > 0) {
00283 control.gtol = aCtrl->gtol;
00284 }
00285 if (aCtrl->debug == CPL_TRUE) {
00286 debug_func = muse_cpl_optimize_print;
00287 }
00288 }
00289
00290 double timeinit = cpl_test_get_walltime();
00291 double cpuinit = cpl_test_get_cputime();
00292 lm_minimize(aSize,
00293 cpl_array_get_size(aPar),
00294 cpl_array_get_data_double(aPar),
00295 lm_evaluate_cpl,
00296 debug_func,
00297 &s, &control);
00298
00299 double timefini = cpl_test_get_walltime();
00300 double cpufini = cpl_test_get_cputime();
00301
00302 cpl_msg_debug(__func__, "Minimizing finished after %i steps: %s",
00303 control.nfev / ((int)cpl_array_get_size(aPar)+1),
00304 lm_infmsg[control.info]);
00305
00306 cpl_msg_debug(__func__, "processing time %.3fs (%.3fs CPU, %d CPUs)",
00307 timefini - timeinit, cpufini - cpuinit, omp_get_max_threads());
00308
00309
00310 cpl_error_code res = error_code_tbl[control.info];
00311
00312 cpl_ensure_code(res == CPL_ERROR_NONE, res);
00313
00314 return s.retval;
00315 }
00316
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347 #define LM_MACHEP DBL_EPSILON
00348 #define LM_DWARF DBL_MIN
00349 #if 0
00350 #define LM_SQRT_DWARF sqrt(DBL_MIN)
00351 #define LM_SQRT_GIANT sqrt(DBL_MAX)
00352 #else
00353 #define LM_SQRT_DWARF 1.e-150
00354 #define LM_SQRT_GIANT 1.e150
00355 #endif
00356 #define LM_USERTOL 30*LM_MACHEP
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373 static void lm_initialize_control( lm_control_type * control ) {
00374 control->maxcall = 100;
00375 control->epsilon = LM_USERTOL;
00376 control->stepbound = 100.;
00377 control->ftol = LM_USERTOL;
00378 control->xtol = LM_USERTOL;
00379 control->gtol = LM_USERTOL;
00380 }
00381
00382 static void lm_minimize( int m_dat, int n_par, double *par,
00383 void (*evaluate) (double *par, int m_dat, double *fvec,
00384 void *data, int *info),
00385 void (*printout) (const char *func,
00386 int n_par, double *par, int m_dat,
00387 double *fvec, void *data, int iflag,
00388 int iter, int nfev),
00389 void *data, lm_control_type * control ) {
00390
00391
00392
00393 double *fvec, *diag, *fjac, *qtf, *wa1, *wa2, *wa3, *wa4;
00394 int *ipvt;
00395
00396 int n = n_par;
00397 int m = m_dat;
00398
00399 if ( (fvec = (double *) cpl_malloc(m * sizeof(double))) == NULL ||
00400 (diag = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
00401 (qtf = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
00402 (fjac = (double *) cpl_malloc(n*m*sizeof(double))) == NULL ||
00403 (wa1 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
00404 (wa2 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
00405 (wa3 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
00406 (wa4 = (double *) cpl_malloc(m * sizeof(double))) == NULL ||
00407 (ipvt = (int *) cpl_malloc(n * sizeof(int) )) == NULL ) {
00408 control->info = 9;
00409 return;
00410 }
00411
00412
00413
00414 control->info = 0;
00415 control->nfev = 0;
00416
00417
00418 lm_lmdif( m, n, par, fvec, control->ftol, control->xtol, control->gtol,
00419 control->maxcall * (n + 1), control->epsilon, diag, 1,
00420 control->stepbound, &(control->info),
00421 &(control->nfev), fjac, ipvt, qtf, wa1, wa2, wa3, wa4,
00422 evaluate, printout, data );
00423
00424 if ( printout )
00425 (*printout) (__func__, n, par, m, fvec, data, -1, 0, control->nfev);
00426 control->fnorm = lm_enorm(m, fvec);
00427 if ( control->info < 0 )
00428 control->info = 10;
00429
00430
00431
00432 cpl_free(fvec);
00433 cpl_free(diag);
00434 cpl_free(qtf);
00435 cpl_free(fjac);
00436 cpl_free(wa1);
00437 cpl_free(wa2);
00438 cpl_free(wa3);
00439 cpl_free(wa4);
00440 cpl_free(ipvt);
00441 }
00442
00443
00444
00445
00446 #if 0
00447 static const char *lm_infmsg[] = {
00448 "fatal coding error (improper input parameters)",
00449 "success (the relative error in the sum of squares is at most tol)",
00450 "success (the relative error between x and the solution is at most tol)",
00451 "success (both errors are at most tol)",
00452 "trapped by degeneracy (fvec is orthogonal to the columns of the jacobian)"
00453 "timeout (number of calls to fcn has reached maxcall*(n+1))",
00454 "failure (ftol<tol: cannot reduce sum of squares any further)",
00455 "failure (xtol<tol: cannot improve approximate solution any further)",
00456 "failure (gtol<tol: cannot improve approximate solution any further)",
00457 "exception (not enough memory)",
00458 "exception (break requested within function evaluation)"
00459 };
00460
00461 static const char *lm_shortmsg[] = {
00462 "invalid input",
00463 "success (f)",
00464 "success (p)",
00465 "success (f,p)",
00466 "degenerate",
00467 "call limit",
00468 "failed (f)",
00469 "failed (p)",
00470 "failed (o)",
00471 "no memory",
00472 "user break"
00473 };
00474 #endif
00475
00476
00477
00478
00479 #define BUG 0
00480 #if BUG
00481 #include <stdio.h>
00482 #endif
00483
00484 static void lm_qrfac(int m, int n, double *a, int pivot, int *ipvt,
00485 double *rdiag, double *acnorm, double *wa);
00486 static void lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
00487 double *qtb, double *x, double *sdiag, double *wa);
00488 static void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
00489 double *qtb, double delta, double *par, double *x,
00490 double *sdiag, double *wa1, double *wa2);
00491
00492 #define MIN(a,b) (((a)<=(b)) ? (a) : (b))
00493 #define MAX(a,b) (((a)>=(b)) ? (a) : (b))
00494 #define SQR(x) (x)*(x)
00495
00496
00497
00498
00673 static void lm_lmdif(int m, int n, double *x, double *fvec, double ftol,
00674 double xtol, double gtol, int maxfev, double epsfcn,
00675 double *diag, int mode, double factor, int *info,
00676 int *nfev, double *fjac, int *ipvt, double *qtf,
00677 double *wa1, double *wa2, double *wa3, double *wa4,
00678 void (*evaluate) (double *par, int m_dat, double *fvec,
00679 void *data, int *info),
00680 void (*printout) (const char *func, int n_par, double *par, int m_dat,
00681 double *fvec, void *data, int iflag,
00682 int iter, int nfev),
00683 void *data)
00684 {
00685 int iter, j;
00686 double actred, delta, dirder, eps, fnorm, fnorm1, gnorm, par, pnorm,
00687 prered, ratio, sum, temp, temp1, temp2, temp3, xnorm;
00688 static double p1 = 0.1;
00689 static double p0001 = 1.0e-4;
00690
00691 *nfev = 0;
00692 iter = 1;
00693 par = 0;
00694 delta = 0;
00695 xnorm = 0;
00696 temp = MAX(epsfcn, LM_MACHEP);
00697 eps = sqrt(temp);
00698
00699
00700
00701 if ((n <= 0) || (m < n) || (ftol < 0.)
00702 || (xtol < 0.) || (gtol < 0.) || (maxfev <= 0) || (factor <= 0.)) {
00703 *info = 0;
00704 return;
00705 }
00706 if (mode == 2) {
00707 for (j = 0; j < n; j++) {
00708 if (diag[j] <= 0.0) {
00709 *info = 0;
00710 return;
00711 }
00712 }
00713 }
00714 #if BUG
00715 printf("lmdif\n");
00716 #endif
00717
00718
00719
00720 *info = 0;
00721 (*evaluate) (x, m, fvec, data, info); ++(*nfev);
00722 if( printout )
00723 (*printout) (__func__, n, x, m, fvec, data, 0, 0, *nfev);
00724 if (*info < 0)
00725 return;
00726 fnorm = lm_enorm(m, fvec);
00727
00728
00729
00730 do {
00731 #if BUG
00732 cpl_msg_debug(__func__, "iter=%d nfev=%d fnorm=%g\n",
00733 iter, *nfev, fnorm);
00734 #endif
00735
00736
00737 *info = 0;
00738 #if 1
00739 #pragma omp parallel for default(none) \
00740 shared(n, info, m, x, eps, evaluate, data, printout, nfev, iter, \
00741 fjac, fvec)
00742 #endif
00743 for (j = 0; j < n; j++) {
00744 if (*info < 0)
00745 continue;
00746 double *wa5 = (double *) cpl_malloc(m * sizeof(double));
00747 double *xd = (double *) cpl_malloc(n * sizeof(double));
00748 memcpy(xd, x, n * sizeof(double));
00749 double step = eps * fabs(xd[j]);
00750 if (step == 0.)
00751 step = eps;
00752 xd[j] += step;
00753 int infod = 0;
00754 (*evaluate) (xd, m, wa5, data, &infod); ++(*nfev);
00755 if( printout )
00756 (*printout) (__func__, n, xd, m, wa5, data, 1, iter, *nfev);
00757 if (infod < 0) {
00758 *info = infod;
00759 cpl_free(wa5);
00760 cpl_free(xd);
00761 continue;
00762 }
00763 int i;
00764 double diff = xd[j] - x[j];
00765 double *fjacm = fjac + j * m;
00766 for (i = 0; i < m; i++) {
00767 fjacm[i] = (wa5[i] - fvec[i]) / diff;
00768 }
00769 cpl_free(wa5);
00770 cpl_free(xd);
00771 }
00772 if (*info < 0)
00773 return;
00774
00775 #if BUG>1
00776
00777 {
00778 int i;
00779 for (i = 0; i < m; i++) {
00780 for (j = 0; j < n; j++)
00781 printf("%.5e ", fjac[j * m + i]);
00782 printf("\n");
00783 }
00784 }
00785 #endif
00786
00787
00788
00789 lm_qrfac(m, n, fjac, 1, ipvt, wa1, wa2, wa3);
00790
00791 if (iter == 1) {
00792 if (mode != 2) {
00793
00794 for (j = 0; j < n; j++) {
00795 diag[j] = wa2[j];
00796 if (wa2[j] == 0.)
00797 diag[j] = 1.;
00798 }
00799 }
00800
00801 for (j = 0; j < n; j++)
00802 wa3[j] = diag[j] * x[j];
00803 xnorm = lm_enorm(n, wa3);
00804
00805 delta = factor * xnorm;
00806 if (delta == 0.)
00807 delta = factor;
00808 }
00809
00810
00811
00812 int i;
00813 for (i = 0; i < m; i++)
00814 wa4[i] = fvec[i];
00815
00816 for (j = 0; j < n; j++) {
00817 temp3 = fjac[j * m + j];
00818 if (temp3 != 0.) {
00819 sum = 0;
00820 for (i = j; i < m; i++)
00821 sum += fjac[j * m + i] * wa4[i];
00822 temp = -sum / temp3;
00823 for (i = j; i < m; i++)
00824 wa4[i] += fjac[j * m + i] * temp;
00825 }
00826 fjac[j * m + j] = wa1[j];
00827 qtf[j] = wa4[j];
00828 }
00829
00830
00831
00832 gnorm = 0;
00833 if (fnorm != 0) {
00834 for (j = 0; j < n; j++) {
00835 if (wa2[ipvt[j]] == 0)
00836 continue;
00837
00838 sum = 0.;
00839 for (i = 0; i <= j; i++)
00840 sum += fjac[j * m + i] * qtf[i] / fnorm;
00841 gnorm = MAX(gnorm, fabs(sum / wa2[ipvt[j]]));
00842 }
00843 }
00844
00845 if (gnorm <= gtol) {
00846 *info = 4;
00847 return;
00848 }
00849
00850
00851
00852 if (mode != 2) {
00853 for (j = 0; j < n; j++)
00854 diag[j] = MAX(diag[j], wa2[j]);
00855 }
00856
00857
00858 do {
00859 #if BUG
00860 printf("lmdif/ inner loop iter=%d nfev=%d\n", iter, *nfev);
00861 #endif
00862
00863
00864
00865 lm_lmpar(n, fjac, m, ipvt, diag, qtf, delta, &par,
00866 wa1, wa2, wa3, wa4);
00867
00868
00869
00870 for (j = 0; j < n; j++) {
00871 wa1[j] = -wa1[j];
00872 wa2[j] = x[j] + wa1[j];
00873 wa3[j] = diag[j] * wa1[j];
00874 }
00875 pnorm = lm_enorm(n, wa3);
00876
00877
00878
00879 if (*nfev <= 1 + n)
00880 delta = MIN(delta, pnorm);
00881
00882
00883
00884 *info = 0;
00885 (*evaluate) (wa2, m, wa4, data, info); ++(*nfev);
00886 if( printout )
00887 (*printout) (__func__, n, x, m, wa4, data, 2, iter, *nfev);
00888 if (*info < 0)
00889 return;
00890
00891 fnorm1 = lm_enorm(m, wa4);
00892 #if BUG
00893 printf("lmdif/ pnorm %.10e fnorm1 %.10e fnorm %.10e"
00894 " delta=%.10e par=%.10e\n",
00895 pnorm, fnorm1, fnorm, delta, par);
00896 #endif
00897
00898
00899
00900 if (p1 * fnorm1 < fnorm)
00901 actred = 1 - SQR(fnorm1 / fnorm);
00902 else
00903 actred = -1;
00904
00905
00906
00907
00908 for (j = 0; j < n; j++) {
00909 wa3[j] = 0;
00910 for (i = 0; i <= j; i++)
00911 wa3[i] += fjac[j * m + i] * wa1[ipvt[j]];
00912 }
00913 temp1 = lm_enorm(n, wa3) / fnorm;
00914 temp2 = sqrt(par) * pnorm / fnorm;
00915 prered = SQR(temp1) + 2 * SQR(temp2);
00916 dirder = -(SQR(temp1) + SQR(temp2));
00917
00918
00919
00920 ratio = prered != 0 ? actred / prered : 0;
00921 #if BUG
00922 printf("lmdif/ actred=%.10e prered=%.10e ratio=%.10e"
00923 " sq(1)=%.10e sq(2)=%.10e dd=%.10e\n",
00924 actred, prered, prered != 0 ? ratio : 0.,
00925 SQR(temp1), SQR(temp2), dirder);
00926 #endif
00927
00928
00929
00930 if (ratio <= 0.25) {
00931 if (actred >= 0.)
00932 temp = 0.5;
00933 else
00934 temp = 0.5 * dirder / (dirder + 0.55 * actred);
00935 if (p1 * fnorm1 >= fnorm || temp < p1)
00936 temp = p1;
00937 delta = temp * MIN(delta, pnorm / p1);
00938 par /= temp;
00939 } else if (par == 0. || ratio >= 0.75) {
00940 delta = pnorm / 0.5;
00941 par *= 0.5;
00942 }
00943
00944
00945
00946 if (ratio >= p0001) {
00947
00948 for (j = 0; j < n; j++) {
00949 x[j] = wa2[j];
00950 wa2[j] = diag[j] * x[j];
00951 }
00952 for (i = 0; i < m; i++)
00953 fvec[i] = wa4[i];
00954 xnorm = lm_enorm(n, wa2);
00955 fnorm = fnorm1;
00956 iter++;
00957 }
00958 #if BUG
00959 else {
00960 printf("ATTN: iteration considered unsuccessful\n");
00961 }
00962 #endif
00963
00964
00965
00966 *info = 0;
00967 if (fabs(actred) <= ftol && prered <= ftol && 0.5 * ratio <= 1)
00968 *info = 1;
00969 if (delta <= xtol * xnorm)
00970 *info += 2;
00971 if (*info != 0)
00972 return;
00973
00974
00975
00976 if (*nfev >= maxfev)
00977 *info = 5;
00978 if (fabs(actred) <= LM_MACHEP &&
00979 prered <= LM_MACHEP && 0.5 * ratio <= 1)
00980 *info = 6;
00981 if (delta <= LM_MACHEP * xnorm)
00982 *info = 7;
00983 if (gnorm <= LM_MACHEP)
00984 *info = 8;
00985 if (*info != 0)
00986 return;
00987
00988
00989
00990 } while (ratio < p0001);
00991
00992
00993
00994 } while (1);
00995
00996 }
00997
01068 static void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
01069 double *qtb, double delta, double *par, double *x,
01070 double *sdiag, double *wa1, double *wa2)
01071 {
01072 int i, iter, j, nsing;
01073 double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
01074 double sum, temp;
01075 static double p1 = 0.1;
01076
01077 #if BUG
01078 printf("lmpar\n");
01079 #endif
01080
01081
01082
01083
01084 nsing = n;
01085 for (j = 0; j < n; j++) {
01086 wa1[j] = qtb[j];
01087 if (r[j * ldr + j] == 0 && nsing == n)
01088 nsing = j;
01089 if (nsing < n)
01090 wa1[j] = 0;
01091 }
01092 #if BUG
01093 printf("nsing %d ", nsing);
01094 #endif
01095 for (j = nsing - 1; j >= 0; j--) {
01096 wa1[j] = wa1[j] / r[j + ldr * j];
01097 temp = wa1[j];
01098 for (i = 0; i < j; i++)
01099 wa1[i] -= r[j * ldr + i] * temp;
01100 }
01101
01102 for (j = 0; j < n; j++)
01103 x[ipvt[j]] = wa1[j];
01104
01105
01106
01107
01108 iter = 0;
01109 for (j = 0; j < n; j++)
01110 wa2[j] = diag[j] * x[j];
01111 dxnorm = lm_enorm(n, wa2);
01112 fp = dxnorm - delta;
01113 if (fp <= p1 * delta) {
01114 #if BUG
01115 printf("lmpar/ terminate (fp<p1*delta)\n");
01116 #endif
01117 *par = 0;
01118 return;
01119 }
01120
01121
01122
01123
01124
01125 parl = 0;
01126 if (nsing >= n) {
01127 for (j = 0; j < n; j++)
01128 wa1[j] = diag[ipvt[j]] * wa2[ipvt[j]] / dxnorm;
01129
01130 for (j = 0; j < n; j++) {
01131 sum = 0.;
01132 for (i = 0; i < j; i++)
01133 sum += r[j * ldr + i] * wa1[i];
01134 wa1[j] = (wa1[j] - sum) / r[j + ldr * j];
01135 }
01136 temp = lm_enorm(n, wa1);
01137 parl = fp / delta / temp / temp;
01138 }
01139
01140
01141
01142 for (j = 0; j < n; j++) {
01143 sum = 0;
01144 for (i = 0; i <= j; i++)
01145 sum += r[j * ldr + i] * qtb[i];
01146 wa1[j] = sum / diag[ipvt[j]];
01147 }
01148 gnorm = lm_enorm(n, wa1);
01149 paru = gnorm / delta;
01150 if (paru == 0.)
01151 paru = LM_DWARF / MIN(delta, p1);
01152
01153
01154
01155
01156 *par = MAX(*par, parl);
01157 *par = MIN(*par, paru);
01158 if (*par == 0.)
01159 *par = gnorm / dxnorm;
01160 #if BUG
01161 printf("lmpar/ parl %.4e par %.4e paru %.4e\n", parl, *par, paru);
01162 #endif
01163
01164
01165
01166 for (;; iter++) {
01167
01168
01169
01170 if (*par == 0.)
01171 *par = MAX(LM_DWARF, 0.001 * paru);
01172 temp = sqrt(*par);
01173 for (j = 0; j < n; j++)
01174 wa1[j] = temp * diag[j];
01175 lm_qrsolv(n, r, ldr, ipvt, wa1, qtb, x, sdiag, wa2);
01176 for (j = 0; j < n; j++)
01177 wa2[j] = diag[j] * x[j];
01178 dxnorm = lm_enorm(n, wa2);
01179 fp_old = fp;
01180 fp = dxnorm - delta;
01181
01182
01183
01184
01185
01186 if (fabs(fp) <= p1 * delta
01187 || (parl == 0. && fp <= fp_old && fp_old < 0.)
01188 || iter == 10)
01189 break;
01190
01191
01192
01193 for (j = 0; j < n; j++)
01194 wa1[j] = diag[ipvt[j]] * wa2[ipvt[j]] / dxnorm;
01195
01196 for (j = 0; j < n; j++) {
01197 wa1[j] = wa1[j] / sdiag[j];
01198 for (i = j + 1; i < n; i++)
01199 wa1[i] -= r[j * ldr + i] * wa1[j];
01200 }
01201 temp = lm_enorm(n, wa1);
01202 parc = fp / delta / temp / temp;
01203
01204
01205
01206 if (fp > 0)
01207 parl = MAX(parl, *par);
01208 else if (fp < 0)
01209 paru = MIN(paru, *par);
01210
01211
01212
01213
01214 *par = MAX(parl, *par + parc);
01215
01216 }
01217
01218 }
01219
01270 static void lm_qrfac(int m, int n, double *a, int pivot, int *ipvt,
01271 double *rdiag, double *acnorm, double *wa)
01272 {
01273 int i, j, k, minmn;
01274 double ajnorm;
01275 static double p05 = 0.05;
01276
01277
01278
01279 for (j = 0; j < n; j++) {
01280 acnorm[j] = lm_enorm(m, &a[j * m]);
01281 rdiag[j] = acnorm[j];
01282 wa[j] = rdiag[j];
01283 if (pivot)
01284 ipvt[j] = j;
01285 }
01286 #if BUG
01287 printf("qrfac\n");
01288 #endif
01289
01290
01291
01292 minmn = MIN(m, n);
01293 for (j = 0; j < minmn; j++) {
01294 double *am = a + j *m;
01295 if (pivot) {
01296
01297
01298
01299 int kmax = j;
01300 for (k = j + 1; k < n; k++)
01301 if (rdiag[k] > rdiag[kmax])
01302 kmax = k;
01303 if (kmax != j) {
01304 double *akmaxm = a + kmax * m;
01305 for (i = 0; i < m; i++) {
01306 double temp = am[i];
01307 am[i] = akmaxm[i];
01308 akmaxm[i] = temp;
01309 }
01310 rdiag[kmax] = rdiag[j];
01311 wa[kmax] = wa[j];
01312 int ktmp = ipvt[j];
01313 ipvt[j] = ipvt[kmax];
01314 ipvt[kmax] = ktmp;
01315 }
01316 }
01317
01318
01319
01320 ajnorm = lm_enorm(m - j, &am[j]);
01321 if (ajnorm == 0.) {
01322 rdiag[j] = 0;
01323 continue;
01324 }
01325
01326 if (am[j] < 0.)
01327 ajnorm = -ajnorm;
01328 for (i = j; i < m; i++)
01329 am[i] /= ajnorm;
01330 am[j] += 1;
01331
01332
01333
01334
01335 for (k = j + 1; k < n; k++) {
01336 double sum = 0;
01337 double *akm = a + k * m;
01338
01339 for (i = j; i < m; i++)
01340 sum += am[i] * akm[i];
01341
01342 double temp = sum / am[j];
01343
01344 for (i = j; i < m; i++)
01345 akm[i] -= temp * am[i];
01346
01347 if (pivot && rdiag[k] != 0.) {
01348 temp = akm[j] / rdiag[k];
01349 temp = MAX(0., 1 - temp * temp);
01350 rdiag[k] *= sqrt(temp);
01351 temp = rdiag[k] / wa[k];
01352 if (p05 * SQR(temp) <= LM_MACHEP) {
01353 rdiag[k] = lm_enorm(m - j - 1, &akm[j + 1]);
01354 wa[k] = rdiag[k];
01355 }
01356 }
01357 }
01358
01359 rdiag[j] = -ajnorm;
01360 }
01361 }
01362
01424 static void
01425 lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
01426 double *qtb, double *x, double *sdiag, double *wa)
01427 {
01428 int i, kk, j, k, nsing;
01429 double qtbpj, sum, temp;
01430 double _sin, _cos, _tan, _cot;
01431
01432
01433
01434
01435 for (j = 0; j < n; j++) {
01436 for (i = j; i < n; i++)
01437 r[j * ldr + i] = r[i * ldr + j];
01438 x[j] = r[j * ldr + j];
01439 wa[j] = qtb[j];
01440 }
01441 #if BUG
01442 printf("qrsolv\n");
01443 #endif
01444
01445
01446
01447 for (j = 0; j < n; j++) {
01448
01449
01450
01451
01452 if (diag[ipvt[j]] == 0.)
01453 goto L90;
01454 for (k = j; k < n; k++)
01455 sdiag[k] = 0.;
01456 sdiag[j] = diag[ipvt[j]];
01457
01458
01459
01460
01461
01462 qtbpj = 0.;
01463 for (k = j; k < n; k++) {
01464
01465
01466
01467
01468 if (sdiag[k] == 0.)
01469 continue;
01470 kk = k + ldr * k;
01471 if (fabs(r[kk]) < fabs(sdiag[k])) {
01472 _cot = r[kk] / sdiag[k];
01473 _sin = 1 / sqrt(1 + SQR(_cot));
01474 _cos = _sin * _cot;
01475 } else {
01476 _tan = sdiag[k] / r[kk];
01477 _cos = 1 / sqrt(1 + SQR(_tan));
01478 _sin = _cos * _tan;
01479 }
01480
01481
01482
01483
01484 r[kk] = _cos * r[kk] + _sin * sdiag[k];
01485 temp = _cos * wa[k] + _sin * qtbpj;
01486 qtbpj = -_sin * wa[k] + _cos * qtbpj;
01487 wa[k] = temp;
01488
01489
01490
01491 for (i = k + 1; i < n; i++) {
01492 temp = _cos * r[k * ldr + i] + _sin * sdiag[i];
01493 sdiag[i] = -_sin * r[k * ldr + i] + _cos * sdiag[i];
01494 r[k * ldr + i] = temp;
01495 }
01496 }
01497
01498 L90:
01499
01500
01501
01502 sdiag[j] = r[j * ldr + j];
01503 r[j * ldr + j] = x[j];
01504 }
01505
01506
01507
01508
01509 nsing = n;
01510 for (j = 0; j < n; j++) {
01511 if (sdiag[j] == 0. && nsing == n)
01512 nsing = j;
01513 if (nsing < n)
01514 wa[j] = 0;
01515 }
01516
01517 for (j = nsing - 1; j >= 0; j--) {
01518 sum = 0;
01519 for (i = j + 1; i < nsing; i++)
01520 sum += r[j * ldr + i] * wa[i];
01521 wa[j] = (wa[j] - sum) / sdiag[j];
01522 }
01523
01524
01525
01526 for (j = 0; j < n; j++)
01527 x[ipvt[j]] = wa[j];
01528
01529 }
01530
01547 static double
01548 lm_enorm(int n, double *x) {
01549 int i;
01550 double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
01551
01552 s1 = 0;
01553 s2 = 0;
01554 s3 = 0;
01555 x1max = 0;
01556 x3max = 0;
01557 agiant = LM_SQRT_GIANT / ((double) n);
01558
01559
01560 for (i = 0; i < n; i++) {
01561 xabs = fabs(x[i]);
01562 if (xabs > LM_SQRT_DWARF && xabs < agiant) {
01563
01564 s2 += xabs * xabs;
01565 continue;
01566 }
01567
01568 if (xabs > LM_SQRT_DWARF) {
01569
01570 if (xabs > x1max) {
01571 temp = x1max / xabs;
01572 s1 = 1 + s1 * SQR(temp);
01573 x1max = xabs;
01574 } else {
01575 temp = xabs / x1max;
01576 s1 += SQR(temp);
01577 }
01578 continue;
01579 }
01580
01581 if (xabs > x3max) {
01582 temp = x3max / xabs;
01583 s3 = 1 + s3 * SQR(temp);
01584 x3max = xabs;
01585 } else {
01586 if (xabs != 0.) {
01587 temp = xabs / x3max;
01588 s3 += SQR(temp);
01589 }
01590 }
01591 }
01592
01593
01594
01595 if (s1 != 0)
01596 return x1max * sqrt(s1 + (s2 / x1max) / x1max);
01597 if (s2 != 0) {
01598 if (s2 >= x3max)
01599 return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
01600 else
01601 return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
01602 }
01603
01604 return x3max * sqrt(s3);
01605
01606 }
01607