/* $Id: cpl_ppm.c,v 1.3 2007/07/16 09:19:52 cizzo Exp $ * * This file is part of the ESO Common Pipeline Library * Copyright (C) 2002-2007 European Southern Observatory * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * $Author: cizzo $ * $Date: 2007/07/16 09:19:52 $ * $Revision: 1.3 $ * $Name: $ */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include /** * @defgroup cpl_ppm Point pattern patching module * * @par Synopsis: * @code * #include "cpl_ppm.h" * @endcode */ /**@{*/ /** * @brief * Match 1-D patterns. * * @param peaks List of observed positions (e.g., of emission peaks) * @param lines List of positions in searched pattern (e.g., wavelengths) * @param min_disp Min expected scale (e.g., spectral dispersion in A/pixel) * @param max_disp Max expected scale (e.g., spectral dispersion in A/pixel) * @param tolerance Tolerance for interval ratio comparison * @param seq_peaks Returned: index of identified peaks in input @em peaks * @param seq_lines Returned: index of identified lines in input @em lines * * @return List of all matching points positions * * This function attempts to find the reference pattern @em lines in a * list of observed positions @em peaks. In the following documentation * a terminology drawn from the context of arc lamp spectra calibration * is used for simplicity: the reference pattern is then a list of * wavelengths corresponding to a set of reference arc lamp emission * lines - the so-called line catalog; while the observed positions are * the positions (in pixel) on the CCD, measured along the dispersion * direction, of any significant peak of the signal. To identify the * observed peaks means to associate them with the right reference * wavelength. This is attempted here with a point-pattern matching * technique, where the pattern is contained in the vector @em lines, * and is searched in the vector @em peak. * * In order to work, this method just requires a rough expectation * value of the spectral dispersion (in Angstrom/pixel), and a line * catalog. The line catalog @em lines should just include lines that * are expected somewhere in the CCD exposure of the calibration lamp * (note, however, that a catalog including extra lines at its blue * and/or red ends is still allowed). * * Typically, the arc lamp lines candidates @em peak will include * light contaminations, hot pixels, and other unwanted signal, * but only in extreme cases this prevents the pattern-recognition * algorithm from identifying all the spectral lines. The pattern * is detected even in the case @em peak contained more arc lamp * lines than actually listed in the input line catalog. * * This method is based on the assumption that the relation between * wavelengths and CCD positions is with good approximation @em locally * linear (this is always true, for any modern spectrograph). * * The ratio between consecutive intervals pairs in wavelength and in * pixel is invariant to linear transformations, and therefore this * quantity can be used in the recognition of local portions of the * searched pattern. All the examined sub-patterns will overlap, leading * to the final identification of the whole pattern, notwithstanding the * overall non-linearity of the relation between pixels and wavelengths. * * Ambiguous cases, caused by exceptional regularities in the pattern, * or by a number of undetected (but expected) peaks that disrupt the * pattern on the data, are recovered by linear interpolation and * extrapolation of the safely identified peaks. * * More details about the applied algorithm can be found in the comments * to the function code. * * The @em seq_peaks and @em seq_lines are array reporting the positions * of matching peaks and wavelengths in the input @em peaks and @em lines * vectors. This functionality is not yet supported: this arguments * should always be set to NULL or a CPL_ERROR_UNSUPPORTED_MODE would * be set. */ cpl_bivector *cpl_ppm_match_positions(cpl_vector *peaks, cpl_vector *lines, double min_disp, double max_disp, double tolerance, cpl_array **seq_peaks, cpl_array **seq_lines) { int i, j, k, l; int nlint, npint; int minpos; float min; double lratio, pratio; double lo_start, lo_end, hi_start, hi_end, denom; double disp, variation, prev_variation; int max, maxpos, minl, mink; int ambiguous; int npeaks_lo, npeaks_hi; int *peak_lo; int *peak_hi; int **ident; int *nident; int *lident; double *peak; double *line; int npeaks, nlines; double *xpos; double *lambda; int *ilambda; double *tmp_xpos; double *tmp_lambda; int *tmp_ilambda; int *flag; int n = 0; int nn; int nseq = 0; int gap; int *seq_length; int found; if (seq_lines || seq_peaks) { cpl_error_set(cpl_func, CPL_ERROR_UNSUPPORTED_MODE); return NULL; } peak = cpl_vector_get_data(peaks); npeaks = cpl_vector_get_size(peaks); line = cpl_vector_get_data(lines); nlines = cpl_vector_get_size(lines); if (npeaks < 4) return NULL; peak_lo = cpl_malloc(npeaks * sizeof(int)); peak_hi = cpl_malloc(npeaks * sizeof(int)); nident = cpl_calloc(npeaks, sizeof(int)); lident = cpl_calloc(nlines, sizeof(int)); xpos = cpl_calloc(npeaks, sizeof(double)); lambda = cpl_calloc(npeaks, sizeof(double)); ilambda = cpl_calloc(npeaks, sizeof(int)); tmp_xpos = cpl_calloc(npeaks, sizeof(double)); tmp_lambda = cpl_calloc(npeaks, sizeof(double)); tmp_ilambda = cpl_calloc(npeaks, sizeof(int)); flag = cpl_calloc(npeaks, sizeof(int)); seq_length = cpl_calloc(npeaks, sizeof(int)); ident = cpl_malloc(npeaks * sizeof(int *)); for (i = 0; i < npeaks; i++) ident[i] = cpl_malloc(3 * npeaks * sizeof(int)); /* * This is just the number of intervals - one less than the number * of points (catalog wavelengths, or detected peaks). */ nlint = nlines - 1; npint = npeaks - 1; /* * Here the big loops on catalog lines begins. */ for (i = 1; i < nlint; i++) { /* * For each catalog wavelength I take the previous and the next one, * and compute the ratio of the corresponding wavelength intervals. * This ratio will be compared to all the ratios obtained doing the * same with all the detected peaks positions. */ lratio = (line[i+1] - line[i]) / (line[i] - line[i-1]); /* * Here the loop on detected peaks positions begins. */ for (j = 1; j < npint; j++) { /* * Not all peaks are used for computing ratios: just the ones * that are compatible with the expected spectral dispersion * are taken into consideration. Therefore, I define the pixel * intervals before and after any peak that are compatible with * the specified dispersion interval, and select just the peaks * within such intervals. If either of the two intervals doesn't * contain any peak, then I skip the current peak and continue * with the next. */ lo_start = peak[j] - (line[i] - line[i-1]) / min_disp; lo_end = peak[j] - (line[i] - line[i-1]) / max_disp; hi_start = peak[j] + (line[i+1] - line[i]) / max_disp; hi_end = peak[j] + (line[i+1] - line[i]) / min_disp; for (npeaks_lo = 0, k = 0; k < npeaks; k++) { if (peak[k] > lo_end) break; if (peak[k] > lo_start) { peak_lo[npeaks_lo] = k; ++npeaks_lo; } } if (npeaks_lo == 0) continue; for (npeaks_hi = 0, k = 0; k < npeaks; k++) { if (peak[k] > hi_end) break; if (peak[k] > hi_start) { peak_hi[npeaks_hi] = k; ++npeaks_hi; } } if (npeaks_hi == 0) continue; /* * Now I have all peaks that may help for a local identification. * peak_lo[k] is the sequence number of the k-th peak of the lower * interval; peak_hi[l] is the sequence number of the l-th peak of * the higher interval. j is, of course, the sequence number of the * current peak (second big loop). */ prev_variation = 1000.0; minl = mink = 0; for (k = 0; k < npeaks_lo; k++) { denom = peak[j] - peak[peak_lo[k]]; for (l = 0; l < npeaks_hi; l++) { /* * For any pair of peaks - one from the lower and the other * from the higher interval - I compute the same ratio that * was computed with the current line catalog wavelength. */ pratio = (peak[peak_hi[l]] - peak[j]) / denom; /* * If the two ratios are compatible within the specified * tolerance, we have a preliminary identification. This * will be marked in the matrix ident[][], where the first * index corresponds to a peak sequence number, and the second * index is the counter of the identifications made during * this whole process. The array of counters is nident[]. * If more than one interval pair fulfills the specified * tolerance, the closest to the expected ratio is selected. */ variation = fabs(lratio-pratio) / pratio; if (variation < tolerance) { if (variation < prev_variation) { prev_variation = variation; minl = l; mink = k; } } } } if (prev_variation < tolerance) { ident[j][nident[j]] = i; ident[peak_hi[minl]][nident[peak_hi[minl]]] = i + 1; ident[peak_lo[mink]][nident[peak_lo[mink]]] = i - 1; ++nident[j]; ++nident[peak_hi[minl]]; ++nident[peak_lo[mink]]; } } /* End loop on positions */ } /* End loop on lines */ /* * At this point I have filled the ident matrix with all my preliminary * identifications. Ambiguous identifications must be eliminated. */ for (i = 0; i < npeaks; i++) { /* * I don't take into consideration peaks that were never identified. * They are likely contaminations, or emission lines that were not * listed in the input wavelength catalog. */ if (nident[i] > 1) { /* * Initialise the histogram of wavelengths assigned to the i-th peak. */ for (j = 0; j < nlines; j++) lident[j] = 0; /* * Count how many times each catalog wavelength was assigned * to the i-th peak. */ for (j = 0; j < nident[i]; j++) ++lident[ident[i][j]]; /* * What wavelength was most frequently assigned to the i-th peak? */ max = 0; maxpos = 0; for (j = 0; j < nlines; j++) { if (max < lident[j]) { max = lident[j]; maxpos = j; } } /* * Were there other wavelengths assigned with the same frequency? * This would be the case of an ambiguous identification. It is * safer to reject this peak... */ ambiguous = 0; for (k = maxpos + 1; k < nlines; k++) { if (lident[k] == max) { ambiguous = 1; break; } } if (ambiguous) continue; /* * Otherwise, I assign to the i-th peak the wavelength that was * most often assigned to it. */ tmp_xpos[n] = peak[i]; tmp_lambda[n] = line[maxpos]; tmp_ilambda[n] = maxpos; ++n; } } /* * Check on identified peaks. Contaminations from other spectra might * be present and should be excluded: this type of contamination * consists of peaks that have been _correctly_ identified! The non- * spectral type of light contamination should have been almost all * removed already in the previous steps, but it may still be present. * Here, the self-consistent sequences of identified peaks are * separated one from the other. At the moment, just the longest of * such sequences is selected (in other words, spectral multiplexing * is ignored). */ if (n > 1) { nn = 0; /* Number of peaks in the list of sequences */ nseq = 0; /* Current sequence */ for (k = 0; k < n; k++) { if (flag[k] == 0) { /* Was peak k already assigned to a sequence? */ flag[k] = 1; xpos[nn] = tmp_xpos[k]; /* Begin the nseq-th sequence */ lambda[nn] = tmp_lambda[k]; ilambda[nn] = tmp_ilambda[k]; ++seq_length[nseq]; ++nn; /* * Now look for all the following peaks that are compatible * with the expected spectral dispersion, and add them in * sequence to xpos. Note that missing peaks are not a problem... */ i = k; while (i < n - 1) { found = 0; for (j = i + 1; j < n; j++) { if (flag[j] == 0) { disp = (tmp_lambda[j] - tmp_lambda[i]) / (tmp_xpos[j] - tmp_xpos[i]); if (disp >= min_disp && disp <= max_disp) { flag[j] = 1; xpos[nn] = tmp_xpos[j]; lambda[nn] = tmp_lambda[j]; ilambda[nn] = tmp_ilambda[j]; ++seq_length[nseq]; ++nn; i = j; found = 1; break; } } } if (!found) break; } /* * Current sequence is completed: begin new sequence on the * excluded peaks, starting the loop on peaks again. */ ++nseq; k = 0; } } /* * Find the longest sequence of self-consistent peaks. */ max = 0; maxpos = 0; for (i = 0; i < nseq; i++) { if (seq_length[i] > max) { max = seq_length[i]; maxpos = i; } } /* * Find where this sequence starts in the whole peak position * storage. */ nn = 0; for (i = 0; i < maxpos; i++) nn += seq_length[i]; /* * Move the longest sequence at the beginning of the returned lists */ n = max; for (i = 0; i < n; i++, nn++) { xpos[i] = xpos[nn]; lambda[i] = lambda[nn]; ilambda[i] = ilambda[nn]; } /* * Are some wavelengths missing? Recover them. */ for (i = 1; i < n; i++) { gap = ilambda[i] - ilambda[i-1]; for (j = 1; j < gap; j++) { if (j == 1) { /* * Determine the local dispersion from the current pair of peaks */ disp = (lambda[i] - lambda[i-1]) / (xpos[i] - xpos[i-1]); } /* * With this, find the expected position of the missing * peak by linear interpolation. */ hi_start = xpos[i-1] + (line[ilambda[i-1] + j] - lambda[i-1]) / disp; /* * Is there a peak at that position? Here a peak from the * original list is searched, that is closer than 2 pixels * to the expected position. If it is found, insert it at * the current position on the list of identified peaks, * and leave immediately the loop (taking the new position * for the following linear interpolation, in case more * than one peak is missing in the current interval). * If it is not found, stay in the loop, looking for * the following missing peaks in this interval. */ found = 0; for (k = 0; k < npeaks; k++) { if (fabs(peak[k] - hi_start) < 2) { for (l = n; l > i; l--) { xpos[l] = xpos[l-1]; lambda[l] = lambda[l-1]; ilambda[l] = ilambda[l-1]; } xpos[i] = peak[k]; lambda[i] = line[ilambda[i-1] + j]; ilambda[i] = ilambda[i-1] + j; ++n; found = 1; break; } } if (found) break; } } /* * Try to extrapolate forward */ found = 1; while (ilambda[n-1] < nlines - 1 && found) { /* * Determine the local dispersion from the last pair of * identified peaks */ if (n > 1) disp = (lambda[n-1] - lambda[n-2]) / (xpos[n-1] - xpos[n-2]); else disp = 0.0; if (disp > max_disp || disp < min_disp) break; /* * With this, find the expected position of the missing * peak by linear interpolation. */ hi_start = xpos[n-1] + (line[ilambda[n-1] + 1] - lambda[n-1]) / disp; /* * Is there a peak at that position? Here a peak from the * original list is searched, that is closer than 6 pixels * to the expected position. If it is found, insert it at * the end of the list of identified peaks. If it is not * found, leave the loop. */ found = 0; min = fabs(peak[0] - hi_start); minpos = 0; for (k = 1; k < npeaks; k++) { if (min > fabs(peak[k] - hi_start)) { min = fabs(peak[k] - hi_start); minpos = k; } } if (min < 6 && fabs(peak[minpos] - xpos[n-1]) > 1.0) { xpos[n] = peak[minpos]; lambda[n] = line[ilambda[n-1] + 1]; ilambda[n] = ilambda[n-1] + 1; ++n; found = 1; } } /* * Try to extrapolate backward */ found = 1; while (ilambda[0] > 0 && found) { /* * Determine the local dispersion from the first pair of * identified peaks */ disp = (lambda[1] - lambda[0]) / (xpos[1] - xpos[0]); if (disp > max_disp || disp < min_disp) break; /* * With this, find the expected position of the missing * peak by linear interpolation. */ hi_start = xpos[0] - (lambda[0] - line[ilambda[0] - 1]) / disp; /* * Is there a peak at that position? Here a peak from the * original list is searched, that is closer than 6 pixels * to the expected position. If it is found, insert it at * the beginning of the list of identified peaks. If it is not * found, leave the loop. */ found = 0; min = fabs(peak[0] - hi_start); minpos = 0; for (k = 1; k < npeaks; k++) { if (min > fabs(peak[k] - hi_start)) { min = fabs(peak[k] - hi_start); minpos = k; } } if (min < 6 && fabs(peak[minpos] - xpos[0]) > 1.0) { for (j = n; j > 0; j--) { xpos[j] = xpos[j-1]; lambda[j] = lambda[j-1]; ilambda[j] = ilambda[j-1]; } xpos[0] = peak[minpos]; lambda[0] = line[ilambda[0] - 1]; ilambda[0] = ilambda[0] - 1; ++n; found = 1; } } } /* * At this point all peaks are processed. Free memory, and return * the result. */ /************************************************+ for (i = 0; i < npeaks; i++) { printf("Peak %d:\n ", i); for (j = 0; j < nident[i]; j++) printf("%.2f, ", line[ident[i][j]]); printf("\n"); } printf("\n"); for (i = 0; i < n; i++) printf("%.2f, %.2f\n", xpos[i], lambda[i]); +************************************************/ for (i = 0; i < npeaks; i++) cpl_free(ident[i]); cpl_free(ident); cpl_free(nident); cpl_free(lident); cpl_free(ilambda); cpl_free(tmp_xpos); cpl_free(tmp_lambda); cpl_free(tmp_ilambda); cpl_free(peak_lo); cpl_free(flag); cpl_free(seq_length); cpl_free(peak_hi); if (n == 0) { cpl_free(xpos); cpl_free(lambda); return NULL; } return cpl_bivector_wrap_vectors(cpl_vector_wrap(n, xpos), cpl_vector_wrap(n, lambda)); } /**@}*/