GRAVI Pipeline Reference Manual 1.9.0
Loading...
Searching...
No Matches
gravi_pca.c
Go to the documentation of this file.
1/*
2 * This file is part of the GRAVI Pipeline
3 * Copyright (C) 2002,2003 European Southern Observatory
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
30#include <gsl/gsl_linalg.h>
31#include <gsl/gsl_matrix.h>
32#include <gsl/gsl_bspline.h>
33#include <gsl/gsl_sort_vector.h>
34#include <gsl/gsl_multifit.h>
35#include <gsl/gsl_multimin.h>
36#include <gsl/gsl_statistics.h>
37
38#include "gravi_pca.h"
39#include "gravi_utils.h"
40
41/*
42 * this function copied from GSL 2.7
43 * if pipeline dependency is updated, it can be removed from here
44 */
45static double _gsl_vector_sum(const gsl_vector *a)
46{
47 const size_t N = a->size;
48 const size_t stride = a->stride;
49 double sum = 0.0;
50 size_t i;
51
52 for (i = 0; i < N; i++)
53 sum += a->data[i * stride];
54 return sum;
55}
56
64{
65 int n_obs, n_wave; // matrix dimensions
66 int n_valid; // number of nonzero singular values = min(n_obs, n_wave)
67 gsl_matrix *data; // original data matrix
68 gsl_matrix_int *mask; // mask for invalid data values
69 gsl_vector *mean; // columnwise mean of original data matrix
70
71 gsl_matrix *Vt; // PCA components
72 gsl_vector *S; // singular values
73 gsl_vector *signs; // sign convention for PCA components, size (n_comp,)
74
75 int n_comp; // number of components fits evaluated for
76 gsl_matrix *data_fit; // matrix of fit to data, size (n_obs, n_wave)
77};
78
87{
88 int n_comp; // number of components fits evaluated for
89 int n_wave; // length of wavelength axis
90 gsl_matrix *median_components; // median-average of components from all results, size (n_comp, n_wave)
91 gsl_matrix *component_fits; // matrix of fits to each PCA component, size (n_comp, n_wave)
92};
93
100 if (!self)
101 return;
102
103 FREE(gsl_matrix_free, self->data);
104 FREE(gsl_matrix_int_free, self->mask);
105 FREE(gsl_vector_free, self->mean);
106 FREE(gsl_matrix_free, self->Vt);
107 FREE(gsl_vector_free, self->S);
108 FREE(gsl_vector_free, self->signs);
109 FREE(gsl_matrix_free, self->data_fit);
110 cpl_free(self);
111}
112
119 if (!self)
120 return;
121
122 FREE(gsl_matrix_free, self->median_components);
123 FREE(gsl_matrix_free, self->component_fits);
124 cpl_free(self);
125}
126
138static int svd_wrapper(gsl_matrix **A, gsl_matrix **V, gsl_vector **S)
139{
140 int m = (*A)->size1, n = (*A)->size2, ret;
141 gsl_vector *w;
142
143 if (m >= n) {
144 *V = gsl_matrix_alloc(n, n);
145 *S = gsl_vector_alloc(n);
146 w = gsl_vector_alloc(n);
147 ret = gsl_linalg_SV_decomp(*A /* becomes U */, *V, *S, w);
148 } else {
149 /* Transpose A */
150 gsl_matrix *At = gsl_matrix_alloc(n, m);
151 gsl_matrix_transpose_memcpy(At, *A);
152
153 *V = gsl_matrix_alloc(m, m);
154 *S = gsl_vector_alloc(m);
155 w = gsl_vector_alloc(m);
156 ret = gsl_linalg_SV_decomp(At /* becomes U */, *V, *S, w);
157
158 /* Rearrange the result matrices: V becomes At, At becomes V */
159 gsl_matrix_free(*A);
160 *A = *V; *V = At;
161 }
162
163 gsl_vector_free(w);
164 return ret;
165}
166
175static void svd_flip(gsl_matrix *U, gsl_matrix *V)
176{
177 /* sign of max value in each column of U */
178 gsl_vector *signs = gsl_vector_alloc(U->size2);
179
180 for (size_t j = 0; j < U->size2; j++) {
181 gsl_vector_const_view Ucol = gsl_matrix_const_column(U, j);
182 double max_abs_val = -INFINITY;
183 size_t max_abs_ind = -1;
184 for(size_t j = 0; j < U->size2; j++) {
185 double abs_val = fabs(gsl_vector_get(&Ucol.vector, j));
186 if (abs_val > max_abs_val) max_abs_ind = j;
187 }
188
189 double max_val = gsl_vector_get(&Ucol.vector, max_abs_ind);
190 gsl_vector_set(signs, j, max_val > 0 ? 1 : -1);
191 }
192
193 /* adjust U */
194 for (size_t i = 0; i < U->size1; i++) {
195 gsl_vector_view Urow = gsl_matrix_row(U, i);
196 gsl_vector_mul(&Urow.vector, signs);
197 }
198
199 /* adjust V */
200 for (size_t i = 0; i < V->size1; i++) {
201 gsl_vector_view Vrow = gsl_matrix_row(V, i);
202 gsl_vector_mul(&Vrow.vector, signs);
203 }
204
205 gsl_vector_free(signs);
206}
207
219gravi_pca_result *gravi_pca_create_result(const cpl_matrix *data, const cpl_matrix *mask)
220{
221 cpl_ensure(data != NULL, CPL_ERROR_NULL_INPUT, NULL);
222
223 cpl_size n_obs = cpl_matrix_get_nrow(data);
224 cpl_size n_wave = cpl_matrix_get_ncol(data);
225 cpl_size nvalid = CPL_MIN(n_obs, n_wave);
226
227 /* Copy from cpl_matrix to gsl_matrix */
228 gsl_matrix_const_view data_gsl_view = gsl_matrix_const_view_array(
229 cpl_matrix_get_data_const(data), n_obs, n_wave);
230
231 gsl_matrix_int *use_mask = gsl_matrix_int_alloc(n_obs, n_wave);
232 if (mask) {
233 for(cpl_size i = 0; i < n_obs; i++) {
234 for (cpl_size j = 0; j < n_wave; j++)
235 gsl_matrix_int_set(use_mask, i, j,
236 cpl_matrix_get(mask, i, j) > 0 ? 0 : 1);
237 }
238 } else {
239 gsl_matrix_int_set_all(use_mask, 1);
240 }
241
242 /* Calculate mean over columns */
243 gsl_vector* column_mean = gsl_vector_alloc(n_wave);
244 for(int j = 0; j < n_wave; j++) {
245 gsl_vector_const_view col = gsl_matrix_const_column(&data_gsl_view.matrix, j);
246 double mu = (1.0 / n_obs) * _gsl_vector_sum(&col.vector);
247 gsl_vector_set(column_mean, j, mu);
248 }
249
250 /* Centre data by subtracting mean */
251 gsl_matrix* mean_subtracted_data = gsl_matrix_alloc(n_obs, n_wave);
252 gsl_matrix_memcpy(mean_subtracted_data, &data_gsl_view.matrix);
253 for(cpl_size i = 0; i < n_obs; i++) {
254 gsl_vector_view mean_subtracted_row_view = gsl_matrix_row(mean_subtracted_data, i);
255 gsl_vector_sub(&mean_subtracted_row_view.vector, column_mean);
256 }
257
258 gravi_pca_result *result = cpl_malloc(sizeof(gravi_pca_result));
259 result->n_obs = n_obs;
260 result->n_wave = n_wave;
261 result->n_valid = nvalid;
262 result->data = mean_subtracted_data;
263 result->mask = use_mask;
264 result->mean = column_mean;
265
266 /* Initialise PCA data to known values */
267 result->S = NULL;
268 result->Vt = NULL;
269
270 /* Initialise fitting data to known values */
271 result->n_comp = -1;
272 result->signs = NULL;
273 result->data_fit = NULL;
274
275 return result;
276}
277
284{
285 cpl_ensure_code(self, CPL_ERROR_NULL_INPUT);
286
287 unsigned int n_obs = self->n_obs;
288 unsigned int n_wave = self->n_wave;
289 unsigned int n_valid = self->n_valid;
290 int svd_ret;
291
292 /* Perform SVD */
293 gsl_vector *S;
294 gsl_matrix *V;
295 gsl_matrix *U = gsl_matrix_alloc(n_obs, n_wave);
296 gsl_matrix_memcpy(U, self->data);
297 svd_ret = svd_wrapper(&U, &V, &S);
298
299 if (svd_ret)
300 return CPL_ERROR_ILLEGAL_INPUT;
301
302 svd_flip(U, V);
303 gsl_matrix_free(U);
304
305 /* First nvalid singular values */
306 self->S = S;
307
308 /* explained variance of each component */
309 // const int max_components = 10;
310 // gsl_vector *Ssum = gsl_vector_alloc(max_components);
311 // gsl_vector_set(Ssum, 0, gsl_vector_get(S, 0));
312 // for (int i = 1; i < max_components; i++) {
313 // double accum = gsl_vector_get(Ssum, i - 1) + gsl_vector_get(S, i);
314 // gsl_vector_set(Ssum, i, accum);
315 // }
316 // gsl_vector_scale(Ssum, 1.0 / gsl_vector_get(Ssum, max_components - 1));
317 // printf("Explained Variance:\n");
318 // gsl_vector_fprintf(stdout, Ssum, "%f");
319 // gsl_vector_free(Ssum);
320
321 /* First nvalid rows of Vt */
322 self->Vt = gsl_matrix_alloc(n_valid, n_wave);
323 gsl_matrix_transpose_memcpy(self->Vt, V);
324 gsl_matrix_free(V);
325
326 return CPL_ERROR_NONE;
327}
328
337static double gravi_pca_get_component_sign(const gsl_vector *visphi, const gsl_vector *wave)
338{
339 /* wavelength ranges where components should be positive (2 ranges, upper/lower limits) */
340 const double ranges[2][2] = {{2.02, 2.04}, {2.34, 2.36}};
341 int range_indices[2][2];
342
343 /* Find start and end indices in the wavelength array for each range value */
344 int nwave = wave->size, irange = 0;
345 for (int w = 0; w < nwave; w++) {
346 if (gsl_vector_get(wave, w) > ranges[irange / 2][irange % 2]) {
347 range_indices[irange / 2][irange % 2] = w;
348 ++irange;
349 }
350 if (irange == 4)
351 break;
352 }
353
354 int range_length[2] = {
355 range_indices[0][1] - range_indices[0][0],
356 range_indices[1][1] - range_indices[1][0]
357 };
358 int total_length = range_length[0] + range_length[1];
359
360 /* Extract visphi segments for each range */
361 int offset = 0;
362 gsl_vector *visphi_range = gsl_vector_alloc(total_length);
363 for (int r = 0; r < 2; r++) {
364 gsl_vector_const_view range = gsl_vector_const_subvector(
365 visphi, range_indices[r][0], range_length[r]);
366 gsl_vector_view range_dest = gsl_vector_subvector(
367 visphi_range, offset, range_length[r]);
368 gsl_vector_memcpy(&range_dest.vector, &range.vector);
369 offset += range_length[r];
370 }
371
372 /* Check sign */
373 // double result = gsl_stats_median(visphi_range->data, 1, visphi_range->size) < 0 ? -1.0 : 1.0;
374 gsl_sort_vector(visphi_range);
375 double result = gsl_stats_median_from_sorted_data(
376 visphi_range->data, 1, visphi_range->size
377 ) < 0 ? -1.0 : 1.0;
378
379 gsl_vector_free(visphi_range);
380 return result;
381}
382
393cpl_error_code gravi_pca_set_component_signs(gravi_pca_result *self, const cpl_array *wave, const int ncomp)
394{
395 cpl_ensure_code(wave, CPL_ERROR_NULL_INPUT);
396 cpl_ensure_code(ncomp > 0 && ncomp <= self->n_valid, CPL_ERROR_ILLEGAL_INPUT);
397
398 const int n_wave = self->n_wave;
399 gsl_vector_const_view wave_vec = gsl_vector_const_view_array(
400 cpl_array_get_data_double_const(wave), n_wave);
401 gsl_vector *signs = gsl_vector_alloc(ncomp);
402
403 for (int c = 0; c < ncomp; c++) {
404 gsl_vector_const_view components = gsl_matrix_const_row(self->Vt, c);
405 double sign = gravi_pca_get_component_sign(&components.vector, &wave_vec.vector);
406 gsl_vector_set(signs, c, sign);
407 }
408
409 self->signs = signs;
410 return CPL_ERROR_NONE;
411}
412
423gravi_pca_model *gravi_pca_create_model(const gravi_pca_result **results, int nres, int ncomp)
424{
425 cpl_ensure(results, CPL_ERROR_NULL_INPUT, NULL);
426 cpl_ensure(ncomp > 0 && ncomp <= results[0]->n_valid, CPL_ERROR_ILLEGAL_INPUT, NULL);
427 for (int r = 0; r < nres; r++)
428 cpl_ensure(results[r]->signs, CPL_ERROR_ILLEGAL_INPUT, NULL);
429
430 const int n_wave = results[0]->n_wave;
431
432 gsl_matrix *all_comp_vals = gsl_matrix_alloc(nres, n_wave);
433 gsl_matrix *median_comp_vals = gsl_matrix_alloc(ncomp, n_wave);
434 gsl_matrix *flip_vals = gsl_matrix_alloc(nres, n_wave);
435 gsl_vector *flip_vec = gsl_vector_alloc(n_wave);
436
437 for (int c = 0; c < ncomp; c++) {
438 /* build temp matrix with all values for c'th component */
439 for (int r = 0; r < nres; r++) {
440 gsl_vector_const_view rth_components = gsl_matrix_const_row(results[r]->Vt, c);
441 gsl_matrix_set_row(all_comp_vals, r, &rth_components.vector);
442
443 /* determine flip vals */
444 double sign = gsl_vector_get(results[r]->signs, c);
445 gsl_vector_set_all(flip_vec, sign);
446 gsl_matrix_set_row(flip_vals, r, flip_vec);
447 }
448
449 /* multiply by flip values */
450 gsl_matrix_mul_elements(all_comp_vals, flip_vals);
451
452 /* take median average down columns */
453 for (int j = 0; j < n_wave; j++) {
454 gsl_vector_view column_comp_vals = gsl_matrix_column(all_comp_vals, j);
455 gsl_sort_vector(&column_comp_vals.vector);
456 double median_val = gsl_stats_median_from_sorted_data(
457 column_comp_vals.vector.data, n_wave, nres
458 );
459 gsl_matrix_set(median_comp_vals, c, j, median_val);
460 }
461 }
462 gsl_matrix_free(all_comp_vals);
463 gsl_matrix_free(flip_vals);
464 gsl_vector_free(flip_vec);
465
466 gravi_pca_model *model = cpl_malloc(sizeof(gravi_pca_model));
467 model->n_wave = n_wave;
468 model->n_comp = ncomp;
469 model->median_components = median_comp_vals;
470 model->component_fits = NULL;
471 return model;
472}
473
483gravi_pca_model *gravi_pca_load_model(const cpl_matrix *components)
484{
485 int n_comp = cpl_matrix_get_nrow(components);
486 int n_wave = cpl_matrix_get_ncol(components);
487
488 /* Copy from cpl_matrix to gsl_matrix */
489 gsl_matrix_const_view comps_gsl_view = gsl_matrix_const_view_array(
490 cpl_matrix_get_data_const(components), n_comp, n_wave);
491 gsl_matrix *my_comps = gsl_matrix_alloc(n_comp, n_wave);
492 gsl_matrix_memcpy(my_comps, &comps_gsl_view.matrix);
493
494 gravi_pca_model *result = cpl_malloc(sizeof(gravi_pca_model));
495 result->n_comp = n_comp;
496 result->n_wave = n_wave;
497 result->median_components = NULL;
498 result->component_fits = my_comps;
499
500 return result;
501}
502
511cpl_error_code gravi_pca_fit_components_bspline(gravi_pca_model *self, const cpl_array *wave, int degree, int ncomp)
512{
513 cpl_ensure_code(self, CPL_ERROR_NULL_INPUT);
514 cpl_ensure_code(self->median_components, CPL_ERROR_ILLEGAL_INPUT);
515
516 cpl_ensure_code(wave, CPL_ERROR_NULL_INPUT);
517 cpl_ensure_code(degree > 0, CPL_ERROR_ILLEGAL_INPUT);
518 cpl_ensure_code(ncomp > 0 && ncomp <= self->n_comp, CPL_ERROR_ILLEGAL_INPUT);
519
520 const int n_coeffs = degree;
521 const int k = 4; // k=4 => cubic spline
522 const int nbreak = n_coeffs - k + 2;
523 const int n_wave = self->n_wave;
524
525 gsl_bspline_workspace *bw = gsl_bspline_alloc(k, nbreak);
526 const gsl_multifit_robust_type *T = gsl_multifit_robust_default;
527 gsl_multifit_robust_workspace *mw = gsl_multifit_robust_alloc(T, n_wave, n_coeffs);
528 gsl_multifit_robust_maxiter(1000, mw);
529
530 gsl_vector *B = gsl_vector_alloc(n_coeffs);
531 gsl_vector *coeffs = gsl_vector_alloc(n_coeffs);
532 gsl_matrix *cov = gsl_matrix_alloc(n_coeffs, n_coeffs);
533 double wvi, Bj, fit_val, fit_err;
534
535 gsl_bspline_knots_uniform(cpl_array_get(wave, 0, NULL), cpl_array_get(wave, n_wave-1, NULL), bw);
536
537 gsl_matrix *result = gsl_matrix_alloc(ncomp, n_wave);
538
539 /* Loop over components */
540 for (int c = 0; c < ncomp; c++) {
541 gsl_vector_const_view cv = gsl_matrix_const_row(self->median_components, c);
542
543 /* construct fit matrix X */
544 gsl_matrix *X = gsl_matrix_alloc(n_wave, n_coeffs);
545 for (int i = 0; i < n_wave; i++) {
546 wvi = cpl_array_get(wave, i, NULL);
547 gsl_bspline_eval(wvi, B, bw);
548
549 for (int j = 0; j < n_coeffs; j++) {
550 Bj = gsl_vector_get(B, j);
551 gsl_matrix_set(X, i, j, Bj);
552 }
553 }
554
555 /* do fit */
556 gsl_multifit_robust(X, &cv.vector, coeffs, cov, mw);
557
558 /* store fit results */
559 for (int j = 0; j < n_wave; j++) {
560 wvi = cpl_array_get(wave, j, NULL);
561 gsl_bspline_eval(wvi, B, bw);
562 gsl_multifit_robust_est(B, coeffs, cov, &fit_val, &fit_err);
563 gsl_matrix_set(result, c, j, fit_val);
564 }
565 gsl_matrix_free(X);
566 }
567
568 self->n_comp = ncomp;
569 self->component_fits = result;
570
571 gsl_vector_free(B);
572 gsl_vector_free(coeffs);
573 gsl_matrix_free(cov);
574 gsl_bspline_free(bw);
575 gsl_multifit_robust_free(mw);
576 return CPL_ERROR_NONE;
577}
578
587cpl_error_code gravi_pca_fit_components_polynomial(gravi_pca_model *self, const cpl_array *wave, int degree, int ncomp)
588{
589 cpl_ensure_code(self, CPL_ERROR_NULL_INPUT);
590 cpl_ensure_code(self->median_components, CPL_ERROR_ILLEGAL_INPUT);
591
592 cpl_ensure_code(wave, CPL_ERROR_NULL_INPUT);
593 cpl_ensure_code(degree > 0, CPL_ERROR_ILLEGAL_INPUT);
594 cpl_ensure_code(ncomp > 0 && ncomp <= self->n_comp, CPL_ERROR_ILLEGAL_INPUT);
595
596 const int n_wave = self->n_wave;
597 const int n_coeffs = degree;
598
599 const gsl_multifit_robust_type *T = gsl_multifit_robust_default;
600 gsl_multifit_robust_workspace *mw = gsl_multifit_robust_alloc(T, n_wave, n_coeffs);
601 gsl_multifit_robust_maxiter(1000, mw);
602
603 gsl_vector *coeffs = gsl_vector_alloc(n_coeffs);
604 gsl_matrix *cov = gsl_matrix_alloc(n_coeffs, n_coeffs);
605 double wvi, Xij, fit_val, fit_err;
606
607 gsl_matrix *result = gsl_matrix_alloc(ncomp, n_wave);
608
609 /* Loop over components */
610 for (int c = 0; c < ncomp; c++) {
611 gsl_vector_const_view cv = gsl_matrix_const_row(self->median_components, c);
612
613 /* construct fit matrix X */
614 gsl_matrix *X = gsl_matrix_alloc(n_wave, n_coeffs);
615 for (int i = 0; i < n_wave; i++) {
616 wvi = cpl_array_get(wave, i, NULL);
617
618 for (int j = 0; j < n_coeffs; j++) {
619 Xij = pow(wvi, j);
620 gsl_matrix_set(X, i, j, Xij);
621 }
622 }
623
624 /* do fit */
625 gsl_multifit_robust(X, &cv.vector, coeffs, cov, mw);
626
627 /* store fit results */
628 for (int j = 0; j < n_wave; j++) {
629 gsl_vector_const_view Xi = gsl_matrix_const_row(X, j);
630 gsl_multifit_robust_est(&Xi.vector, coeffs, cov, &fit_val, &fit_err);
631 gsl_matrix_set(result, c, j, fit_val);
632 }
633 gsl_matrix_free(X);
634 }
635
636 self->n_comp = ncomp;
637 self->component_fits = result;
638
639 gsl_vector_free(coeffs);
640 gsl_matrix_free(cov);
641 gsl_multifit_robust_free(mw);
642 return CPL_ERROR_NONE;
643}
644
652cpl_error_code gravi_pca_refine_mean(gravi_pca_result *self, const cpl_matrix *residual)
653{
654 cpl_ensure_code(self, CPL_ERROR_NULL_INPUT);
655 cpl_ensure_code(residual, CPL_ERROR_NULL_INPUT);
656
657 int n_obs = self->n_obs;
658 int n_wave = self->n_wave;
659
660 /* Copy from cpl_matrix to gsl_matrix */
661 gsl_matrix_const_view resid_gsl_view = gsl_matrix_const_view_array(
662 cpl_matrix_get_data_const(residual), n_obs, n_wave);
663
664 /* Calculate new mean over columns */
665 gsl_vector* refined_mean = gsl_vector_alloc(n_wave);
666 for(int j = 0; j < n_wave; j++) {
667 gsl_vector_const_view col = gsl_matrix_const_column(&resid_gsl_view.matrix, j);
668 double mu = (1.0 / n_obs) * _gsl_vector_sum(&col.vector);
669 gsl_vector_set(refined_mean, j, mu);
670 }
671
672 /* Re-centre the data using the new mean */
673 for (int i = 0; i < n_obs; i++) {
674 gsl_vector_view row = gsl_matrix_row(self->data, i);
675 gsl_vector_add(&row.vector, self->mean);
676 gsl_vector_sub(&row.vector, refined_mean);
677 }
678
679 /* Store the new mean */
680 gsl_vector_swap(self->mean, refined_mean);
681 gsl_vector_free(refined_mean);
682
683 return CPL_ERROR_NONE;
684}
685
694cpl_vector *gravi_pca_get_component_median(const gravi_pca_model *self, int component)
695{
696 cpl_ensure(self, CPL_ERROR_NULL_INPUT, NULL);
697 cpl_ensure(self->median_components, CPL_ERROR_ILLEGAL_INPUT, NULL);
698 cpl_ensure(component > 0 && component <= self->n_comp, CPL_ERROR_ILLEGAL_INPUT, NULL);
699
700 cpl_vector *result = cpl_vector_new(self->n_wave);
701 gsl_vector_const_view row = gsl_matrix_const_row(self->median_components, component-1);
702 for(int i = 0; i < self->n_wave; i++)
703 cpl_vector_set(result, i, gsl_vector_get(&row.vector, i));
704 return result;
705}
706
716cpl_vector *gravi_pca_get_component_fit(const gravi_pca_model *self, int component)
717{
718 cpl_ensure(self, CPL_ERROR_NULL_INPUT, NULL);
719 cpl_ensure(self->component_fits, CPL_ERROR_ILLEGAL_INPUT, NULL);
720 cpl_ensure(component > 0 && component <= self->n_comp, CPL_ERROR_ILLEGAL_INPUT, NULL);
721
722 cpl_vector *result = cpl_vector_new(self->n_wave);
723 gsl_vector_const_view row = gsl_matrix_const_row(self->component_fits, component-1);
724 for(int i = 0; i < self->n_wave; i++)
725 cpl_vector_set(result, i, gsl_vector_get(&row.vector, i));
726 return result;
727}
728
733 int n_data; // number of data values
734 int n_components; // number of PCA components in fit
735 const gsl_matrix *components; // component fit values
736 gsl_vector_const_view data; // data vector
737 gsl_vector_int_const_view mask; // mask vector
739
745static double gravi_pca_model_chi2(const gsl_vector *x, void *params)
746{
747 gravi_pca_model_params *p = params;
748 gsl_vector *model = gsl_vector_calloc(p->n_data);
749 gsl_vector *scratch = gsl_vector_alloc(p->n_data);
750 gsl_vector *chi2 = gsl_vector_alloc(p->n_data);
751
752 /* model = C1 * c1vals + C2 * c2vals + ... */
753 for (int i = 0; i < p->n_components; i++) {
754 gsl_vector_const_view cv = gsl_matrix_const_row(p->components, i);
755 gsl_vector_memcpy(scratch, &cv.vector);
756 gsl_vector_scale(scratch, gsl_vector_get(x, i));
757 gsl_vector_add(model, scratch);
758 }
759
760 /* form chi2 */
761 gsl_vector_memcpy(chi2, &(p->data.vector));
762 gsl_vector_sub(chi2, model);
763 gsl_vector_mul(chi2, chi2);
764
765 int n_valid = 0;
766 double chi2_sum = 0.0;
767 for (int i = 0; i < p->n_data; i++) {
768 if(gsl_vector_int_get(&(p->mask.vector), i) > 0) {
769 chi2_sum += gsl_vector_get(chi2, i);
770 n_valid++;
771 }
772 }
773
774 /* Check for no valid data */
775 if (n_valid <= p->n_components)
776 return -1;
777
778 chi2_sum /= (n_valid - p->n_components);
779
780 gsl_vector_free(chi2);
781 gsl_vector_free(scratch);
782 gsl_vector_free(model);
783 return chi2_sum;
784}
785
792cpl_error_code gravi_pca_fit_model(gravi_pca_result *self, const gravi_pca_model *model, cpl_boolean fit_mean_subtracted, cpl_boolean verbose)
793{
794 cpl_ensure_code(self, CPL_ERROR_NULL_INPUT);
795 if (fit_mean_subtracted)
796 cpl_ensure_code(self->mean, CPL_ERROR_ILLEGAL_INPUT);
797
798 cpl_ensure_code(model, CPL_ERROR_NULL_INPUT);
799 cpl_ensure_code(model->component_fits, CPL_ERROR_ILLEGAL_INPUT);
800
801 const int MAX_ITERATIONS = 1000;
802 const int n_obs = self->n_obs;
803 const int n_wave = model->n_wave;
804 const int n_comp = model->n_comp;
805
806 const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex2rand;
807 gsl_multimin_fminimizer *mini = gsl_multimin_fminimizer_alloc(T, n_comp);
808 int status;
809
810 gsl_vector *coeffs = gsl_vector_alloc(n_comp);
811 gsl_vector *step_size = gsl_vector_alloc(n_comp);
812
813 /* Add mean back on to mean-subtracted data */
814 gsl_matrix *data = gsl_matrix_calloc(n_obs, n_wave);
815 if (!fit_mean_subtracted) {
816 for (int i = 0; i < n_obs; i++)
817 gsl_matrix_set_row(data, i, self->mean);
818 }
819 gsl_matrix_add(data, self->data);
820
821 self->data_fit = gsl_matrix_calloc(n_obs, n_wave);
822 self->n_comp = n_comp;
823
824 /* Loop over rows in the original data */
825 for (int i = 0; i < n_obs; i++) {
826 gsl_vector_set_all(coeffs, 1.0);
827 gsl_vector_set_all(step_size, 0.1);
828
829 /* Prepare container of function parameters */
830 gravi_pca_model_params params = {
831 .n_data = n_wave, .n_components = n_comp,
832 .components = model->component_fits,
833 .data = gsl_matrix_const_row(data, i),
834 .mask = gsl_matrix_int_const_row(self->mask, i)
835 };
836
837 /* Prepare minimization function object */
838 gsl_multimin_function func = {
840 .n = n_comp,
841 .params = &params
842 };
843
844 gsl_multimin_fminimizer_set(mini, &func, coeffs, step_size);
845
846 int iterations = 0;
847 double size;
848 if (verbose)
849 printf("iterations c_0 c_1 size chi2\n");
850 do {
851 iterations++;
852 status = gsl_multimin_fminimizer_iterate(mini);
853
854 /* Error taking optimisation step */
855 if (status)
856 break;
857
858 /* All data was invalid */
859 if (mini->fval < 0) {
860 cpl_error_set_message(cpl_func, CPL_ERROR_ILLEGAL_INPUT, "No valid visphi data for fitting\n");
861 status = GSL_EINVAL;
862 break;
863 }
864
865 size = gsl_multimin_fminimizer_size(mini);
866 status = gsl_multimin_test_size(size, 1e-3);
867
868 if (verbose)
869 printf("%d %f %f %f %f\n", iterations, mini->x->data[0], mini->x->data[1], size, mini->fval);
870 } while (status == GSL_CONTINUE && iterations < MAX_ITERATIONS);
871
872 /* Store best-fit model */
873 if (status == GSL_CONTINUE) {
874 cpl_msg_warning(cpl_func, "model-fitting did not converge");
875 } else if (status != GSL_SUCCESS) {
876 cpl_msg_error(cpl_func, "model-fitting failed with error code %d (%s)", status, gsl_strerror(status));
877 break;
878 }
879 gsl_vector_view fit_vals = gsl_matrix_row(self->data_fit, i);
880 for (int c = 0; c < n_comp; c++) {
881 gsl_vector *term = gsl_vector_alloc_row_from_matrix(model->component_fits, c);
882 gsl_vector_scale(term, gsl_vector_get(mini->x, c));
883 gsl_vector_add(&fit_vals.vector, term);
884 gsl_vector_free(term);
885 }
886
887 if (fit_mean_subtracted) {
888 gsl_vector_add(&fit_vals.vector, self->mean);
889 }
890 }
891
892 gsl_matrix_free(data);
893 gsl_vector_free(coeffs);
894 gsl_vector_free(step_size);
895 gsl_multimin_fminimizer_free(mini);
896 return status ? CPL_ERROR_ILLEGAL_OUTPUT : CPL_ERROR_NONE;
897}
898
907cpl_vector *gravi_pca_get_component(const gravi_pca_result *self, int component)
908{
909 cpl_ensure(self, CPL_ERROR_NULL_INPUT, NULL);
910 cpl_ensure(component >= 0 && component <= self->n_valid, CPL_ERROR_ILLEGAL_INPUT, NULL);
911
912 cpl_vector *result = cpl_vector_new(self->n_wave);
913 if (component == 0) {
914 /* Return the mean */
915 for(int i = 0; i < self->n_wave; i++)
916 cpl_vector_set(result, i, gsl_vector_get(self->mean, i));
917 } else {
918 /* Return the n'th component */
919 gsl_vector_const_view row = gsl_matrix_const_row(self->Vt, component-1);
920 for(int i = 0; i < self->n_wave; i++) {
921 cpl_vector_set(result, i, gsl_vector_get(&row.vector, i));
922 }
923 cpl_vector_multiply_scalar(result, gsl_vector_get(self->signs, component-1));
924 }
925 return result;
926}
927
936{
937 cpl_ensure(self, CPL_ERROR_NULL_INPUT, NULL);
938 cpl_ensure(self->data_fit, CPL_ERROR_NULL_INPUT, NULL);
939
940 cpl_matrix *result = cpl_matrix_new(self->n_obs, self->n_wave);
941 for(int i = 0; i < self->n_obs; i++) {
942 for(int j = 0; j < self->n_wave; j++) {
943 cpl_matrix_set(result, i, j, gsl_matrix_get(self->data_fit, i, j));
944 }
945 }
946 return result;
947}
948
957{
958 cpl_ensure(self, CPL_ERROR_NULL_INPUT, NULL);
959 cpl_ensure(self->data, CPL_ERROR_NULL_INPUT, NULL);
960 cpl_ensure(self->mean, CPL_ERROR_NULL_INPUT, NULL);
961 cpl_ensure(self->data_fit, CPL_ERROR_NULL_INPUT, NULL);
962
963 /* Add mean back onto centred data, then subtract model */
964 gsl_matrix *resid = gsl_matrix_alloc(self->n_obs, self->n_wave);
965 gsl_matrix_memcpy(resid, self->data);
966
967 for(int i = 0; i < self->n_obs; i++) {
968 gsl_vector_view row_view = gsl_matrix_row(resid, i);
969 gsl_vector_add(&row_view.vector, self->mean);
970 }
971
972 gsl_matrix_sub(resid, self->data_fit);
973
974 cpl_matrix *result = cpl_matrix_new(self->n_obs, self->n_wave);
975 for(int i = 0; i < self->n_obs; i++) {
976 for(int j = 0; j < self->n_wave; j++) {
977 cpl_matrix_set(result, i, j, gsl_matrix_get(resid, i, j));
978 }
979 }
980 return result;
981}
cpl_error_code gravi_pca_fit_components_polynomial(gravi_pca_model *self, const cpl_array *wave, int degree, int ncomp)
Fit polynomial model to each of a set of median-averaged PCA components.
Definition: gravi_pca.c:587
cpl_error_code gravi_pca_refine_mean(gravi_pca_result *self, const cpl_matrix *residual)
Override the mean component calculated from the PCA decomposition. In conjunction with the fit_mean_s...
Definition: gravi_pca.c:652
cpl_matrix * gravi_pca_get_data_residual(const gravi_pca_result *self)
Get residual (data - model).
Definition: gravi_pca.c:956
static int svd_wrapper(gsl_matrix **A, gsl_matrix **V, gsl_vector **S)
Handle computing SVD for MxN matrix, M<N.
Definition: gravi_pca.c:138
cpl_error_code gravi_pca_set_component_signs(gravi_pca_result *self, const cpl_array *wave, const int ncomp)
Impose sign convention on PCA components.
Definition: gravi_pca.c:393
gravi_pca_model * gravi_pca_create_model(const gravi_pca_result **results, int nres, int ncomp)
Compute median values of PCA components over a set of decomposition results.
Definition: gravi_pca.c:423
static double gravi_pca_model_chi2(const gsl_vector *x, void *params)
Evaluate chi^2 statistic for fit.
Definition: gravi_pca.c:745
void gravi_pca_result_delete(gravi_pca_result *self)
Deallocate a gravi_pca_result object.
Definition: gravi_pca.c:99
gravi_pca_result * gravi_pca_create_result(const cpl_matrix *data, const cpl_matrix *mask)
Construct a new gravi_pca_result object from a matrix of visphi data.
Definition: gravi_pca.c:219
cpl_vector * gravi_pca_get_component(const gravi_pca_result *self, int component)
Get components from PCA decomposition.
Definition: gravi_pca.c:907
cpl_matrix * gravi_pca_get_data_fit(const gravi_pca_result *self)
Get noise-free model.
Definition: gravi_pca.c:935
cpl_vector * gravi_pca_get_component_median(const gravi_pca_model *self, int component)
Get median-averaged component from a set of PCA decompositions.
Definition: gravi_pca.c:694
struct _gravi_pca_model_params_ gravi_pca_model_params
Type to hold parameters for fitting PCA components to data.
cpl_error_code gravi_pca_decomp_matrix_svd(gravi_pca_result *self)
Perform PCA decomposition by calculating singular value decomposition.
Definition: gravi_pca.c:283
static double _gsl_vector_sum(const gsl_vector *a)
Definition: gravi_pca.c:45
void gravi_pca_model_delete(gravi_pca_model *self)
Deallocate a gravi_pca_model object.
Definition: gravi_pca.c:118
cpl_error_code gravi_pca_fit_components_bspline(gravi_pca_model *self, const cpl_array *wave, int degree, int ncomp)
Fit B-spline model to each of a set of median-averaged PCA components.
Definition: gravi_pca.c:511
static double gravi_pca_get_component_sign(const gsl_vector *visphi, const gsl_vector *wave)
Calculate sign convention on visphi components.
Definition: gravi_pca.c:337
cpl_vector * gravi_pca_get_component_fit(const gravi_pca_model *self, int component)
Get fit to components from PCA decomposition.
Definition: gravi_pca.c:716
static void svd_flip(gsl_matrix *U, gsl_matrix *V)
Adjust U and V so that loadings for most significant components are positive.
Definition: gravi_pca.c:175
gravi_pca_model * gravi_pca_load_model(const cpl_matrix *components)
Create PCA model from existing set of components.
Definition: gravi_pca.c:483
cpl_error_code gravi_pca_fit_model(gravi_pca_result *self, const gravi_pca_model *model, cpl_boolean fit_mean_subtracted, cpl_boolean verbose)
Fit model formed from linear combination of PCA components to data.
Definition: gravi_pca.c:792
#define FREE(function, variable)
Definition: gravi_utils.h:69
Type to hold average (median) components obtained from a set of PCA decompositions and/or best-fit mo...
Definition: gravi_pca.c:87
gsl_matrix * median_components
Definition: gravi_pca.c:90
gsl_matrix * component_fits
Definition: gravi_pca.c:91
Type to hold parameters for fitting PCA components to data.
Definition: gravi_pca.c:732
gsl_vector_const_view data
Definition: gravi_pca.c:736
gsl_vector_int_const_view mask
Definition: gravi_pca.c:737
const gsl_matrix * components
Definition: gravi_pca.c:735
Type to hold results of a PCA decomposition.
Definition: gravi_pca.c:64
gsl_matrix * data_fit
Definition: gravi_pca.c:76
gsl_matrix_int * mask
Definition: gravi_pca.c:68
gsl_matrix * Vt
Definition: gravi_pca.c:71
gsl_vector * mean
Definition: gravi_pca.c:69
gsl_matrix * data
Definition: gravi_pca.c:67
gsl_vector * signs
Definition: gravi_pca.c:73
gsl_vector * S
Definition: gravi_pca.c:72