/* $Id: cpl_ppm.c,v 1.3 2007/07/16 09:19:52 cizzo Exp $
 *
 * This file is part of the ESO Common Pipeline Library
 * Copyright (C) 2002-2007 European Southern Observatory
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 */

/*
 * $Author: cizzo $
 * $Date: 2007/07/16 09:19:52 $
 * $Revision: 1.3 $
 * $Name:  $
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
#include <time.h>

#include <cpl_memory.h>
#include <cpl_ppm.h>


/**
 * @defgroup cpl_ppm Point pattern patching module
 *
 * @par Synopsis:
 * @code
 *   #include "cpl_ppm.h"
 * @endcode
 */

/**@{*/

/**
 * @brief
 *   Match 1-D patterns.
 *   
 * @param peaks     List of observed positions (e.g., of emission peaks)
 * @param lines     List of positions in searched pattern (e.g., wavelengths)
 * @param min_disp  Min expected scale (e.g., spectral dispersion in A/pixel)
 * @param max_disp  Max expected scale (e.g., spectral dispersion in A/pixel)
 * @param tolerance Tolerance for interval ratio comparison
 * @param seq_peaks Returned: index of identified peaks in input @em peaks
 * @param seq_lines Returned: index of identified lines in input @em lines
 *  
 * @return List of all matching points positions
 * 
 * This function attempts to find the reference pattern @em lines in a 
 * list of observed positions @em peaks. In the following documentation 
 * a terminology drawn from the context of arc lamp spectra calibration
 * is used for simplicity: the reference pattern is then a list of
 * wavelengths corresponding to a set of reference arc lamp emission
 * lines - the so-called line catalog; while the observed positions are 
 * the positions (in pixel) on the CCD, measured along the dispersion 
 * direction, of any significant peak of the signal. To identify the
 * observed peaks means to associate them with the right reference
 * wavelength. This is attempted here with a point-pattern matching 
 * technique, where the pattern is contained in the vector @em lines, 
 * and is searched in the vector @em peak. 
 *
 * In order to work, this method just requires a rough expectation
 * value of the spectral dispersion (in Angstrom/pixel), and a line
 * catalog. The line catalog @em lines should just include lines that 
 * are expected somewhere in the CCD exposure of the calibration lamp
 * (note, however, that a catalog including extra lines at its blue 
 * and/or red ends is still allowed).
 * 
 * Typically, the arc lamp lines candidates @em peak will include 
 * light contaminations, hot pixels, and other unwanted signal, 
 * but only in extreme cases this prevents the pattern-recognition 
 * algorithm from identifying all the spectral lines. The pattern 
 * is detected even in the case @em peak contained more arc lamp 
 * lines than actually listed in the input line catalog.
 * 
 * This method is based on the assumption that the relation between
 * wavelengths and CCD positions is with good approximation @em locally
 * linear (this is always true, for any modern spectrograph).
 * 
 * The ratio between consecutive intervals pairs in wavelength and in 
 * pixel is invariant to linear transformations, and therefore this 
 * quantity can be used in the recognition of local portions of the 
 * searched pattern. All the examined sub-patterns will overlap, leading 
 * to the final identification of the whole pattern, notwithstanding the 
 * overall non-linearity of the relation between pixels and wavelengths.
 *
 * Ambiguous cases, caused by exceptional regularities in the pattern,
 * or by a number of undetected (but expected) peaks that disrupt the
 * pattern on the data, are recovered by linear interpolation and 
 * extrapolation of the safely identified peaks. 
 *
 * More details about the applied algorithm can be found in the comments
 * to the function code.
 *
 * The @em seq_peaks and @em seq_lines are array reporting the positions
 * of matching peaks and wavelengths in the input @em peaks and @em lines
 * vectors. This functionality is not yet supported: this arguments
 * should always be set to NULL or a CPL_ERROR_UNSUPPORTED_MODE would
 * be set.
 */

cpl_bivector *cpl_ppm_match_positions(cpl_vector *peaks, cpl_vector *lines,
                                      double min_disp, double max_disp,
                                      double tolerance, cpl_array **seq_peaks, 
                                      cpl_array **seq_lines)
{

  int      i, j, k, l;
  int      nlint, npint;
  int      minpos;
  float    min;
  double   lratio, pratio;
  double   lo_start, lo_end, hi_start, hi_end, denom;
  double   disp, variation, prev_variation;
  int      max, maxpos, minl, mink;
  int      ambiguous;
  int      npeaks_lo, npeaks_hi;
  int     *peak_lo;
  int     *peak_hi;
  int    **ident;
  int     *nident;
  int     *lident;

  double  *peak;
  double  *line;
  int      npeaks, nlines;

  double  *xpos;
  double  *lambda;
  int     *ilambda;
  double  *tmp_xpos;
  double  *tmp_lambda;
  int     *tmp_ilambda;
  int     *flag;
  int      n = 0;
  int      nn;
  int      nseq = 0;
  int      gap;
  int     *seq_length;
  int      found;


  if (seq_lines || seq_peaks) {
      cpl_error_set(cpl_func, CPL_ERROR_UNSUPPORTED_MODE);
      return NULL;
  }

  peak        = cpl_vector_get_data(peaks);
  npeaks      = cpl_vector_get_size(peaks);
  line        = cpl_vector_get_data(lines);
  nlines      = cpl_vector_get_size(lines);

  if (npeaks < 4)
      return NULL;

  peak_lo     = cpl_malloc(npeaks * sizeof(int));
  peak_hi     = cpl_malloc(npeaks * sizeof(int));
  nident      = cpl_calloc(npeaks, sizeof(int));
  lident      = cpl_calloc(nlines, sizeof(int));
  xpos        = cpl_calloc(npeaks, sizeof(double));
  lambda      = cpl_calloc(npeaks, sizeof(double));
  ilambda     = cpl_calloc(npeaks, sizeof(int));
  tmp_xpos    = cpl_calloc(npeaks, sizeof(double));
  tmp_lambda  = cpl_calloc(npeaks, sizeof(double));
  tmp_ilambda = cpl_calloc(npeaks, sizeof(int));
  flag        = cpl_calloc(npeaks, sizeof(int));
  seq_length  = cpl_calloc(npeaks, sizeof(int));
  ident       = cpl_malloc(npeaks * sizeof(int *));
  for (i = 0; i < npeaks; i++)
    ident[i]  = cpl_malloc(3 * npeaks * sizeof(int));

  /*
   * This is just the number of intervals - one less than the number
   * of points (catalog wavelengths, or detected peaks).
   */

  nlint = nlines - 1;
  npint = npeaks - 1;


  /*
   * Here the big loops on catalog lines begins.
   */

  for (i = 1; i < nlint; i++) {


    /*
     * For each catalog wavelength I take the previous and the next one, 
     * and compute the ratio of the corresponding wavelength intervals.
     * This ratio will be compared to all the ratios obtained doing the
     * same with all the detected peaks positions.
     */

    lratio = (line[i+1] - line[i]) / (line[i] - line[i-1]);


    /*
     * Here the loop on detected peaks positions begins.
     */

    for (j = 1; j < npint; j++) {

      /*
       * Not all peaks are used for computing ratios: just the ones
       * that are compatible with the expected spectral dispersion
       * are taken into consideration. Therefore, I define the pixel
       * intervals before and after any peak that are compatible with
       * the specified dispersion interval, and select just the peaks
       * within such intervals. If either of the two intervals doesn't
       * contain any peak, then I skip the current peak and continue
       * with the next.
       */

      lo_start = peak[j] - (line[i] - line[i-1]) / min_disp;
      lo_end   = peak[j] - (line[i] - line[i-1]) / max_disp;
      hi_start = peak[j] + (line[i+1] - line[i]) / max_disp;
      hi_end   = peak[j] + (line[i+1] - line[i]) / min_disp;

      for (npeaks_lo = 0, k = 0; k < npeaks; k++) {
        if (peak[k] > lo_end)
          break;
        if (peak[k] > lo_start) {
          peak_lo[npeaks_lo] = k;
          ++npeaks_lo;
        }
      }

      if (npeaks_lo == 0)
        continue;

      for (npeaks_hi = 0, k = 0; k < npeaks; k++) {
        if (peak[k] > hi_end)
          break;
        if (peak[k] > hi_start) {
          peak_hi[npeaks_hi] = k;
          ++npeaks_hi;
        }
      }

      if (npeaks_hi == 0)
        continue;


      /*
       * Now I have all peaks that may help for a local identification.
       * peak_lo[k] is the sequence number of the k-th peak of the lower
       * interval; peak_hi[l] is the sequence number of the l-th peak of
       * the higher interval. j is, of course, the sequence number of the
       * current peak (second big loop).
       */

      prev_variation = 1000.0;
      minl = mink = 0;

      for (k = 0; k < npeaks_lo; k++) {
        denom = peak[j] - peak[peak_lo[k]];
        for (l = 0; l < npeaks_hi; l++) {

          /*
           * For any pair of peaks - one from the lower and the other
           * from the higher interval - I compute the same ratio that
           * was computed with the current line catalog wavelength.
           */

          pratio = (peak[peak_hi[l]] - peak[j]) / denom;

          /*
           * If the two ratios are compatible within the specified
           * tolerance, we have a preliminary identification. This
           * will be marked in the matrix ident[][], where the first
           * index corresponds to a peak sequence number, and the second
           * index is the counter of the identifications made during
           * this whole process. The array of counters is nident[].
           * If more than one interval pair fulfills the specified
           * tolerance, the closest to the expected ratio is selected.
           */

          variation = fabs(lratio-pratio) / pratio;

          if (variation < tolerance) {
            if (variation < prev_variation) {
              prev_variation = variation;
              minl = l;
              mink = k;
            }
          }
        }
      }
      if (prev_variation < tolerance) {
        ident[j][nident[j]]                         = i;
        ident[peak_hi[minl]][nident[peak_hi[minl]]] = i + 1;
        ident[peak_lo[mink]][nident[peak_lo[mink]]] = i - 1;
        ++nident[j];
        ++nident[peak_hi[minl]];
        ++nident[peak_lo[mink]];
      }
    }   /* End loop on positions */
  }    /* End loop on lines     */


  /*
   * At this point I have filled the ident matrix with all my preliminary
   * identifications. Ambiguous identifications must be eliminated.
   */


  for (i = 0; i < npeaks; i++) {


    /*
     * I don't take into consideration peaks that were never identified.
     * They are likely contaminations, or emission lines that were not
     * listed in the input wavelength catalog.
     */

    if (nident[i] > 1) {


      /*
       * Initialise the histogram of wavelengths assigned to the i-th peak.
       */

      for (j = 0; j < nlines; j++)
        lident[j] = 0;


      /*
       * Count how many times each catalog wavelength was assigned
       * to the i-th peak.
       */

      for (j = 0; j < nident[i]; j++)
        ++lident[ident[i][j]];


      /*
       * What wavelength was most frequently assigned to the i-th peak?
       */

      max = 0;
      maxpos = 0;
      for (j = 0; j < nlines; j++) {
        if (max < lident[j]) {
          max = lident[j];
          maxpos = j;
        }
      }


      /*
       * Were there other wavelengths assigned with the same frequency?
       * This would be the case of an ambiguous identification. It is
       * safer to reject this peak...
       */

      ambiguous = 0;

      for (k = maxpos + 1; k < nlines; k++) {
        if (lident[k] == max) {
          ambiguous = 1;
          break;
        }
      }

      if (ambiguous)
        continue;


      /*
       * Otherwise, I assign to the i-th peak the wavelength that was
       * most often assigned to it.
       */

      tmp_xpos[n]   = peak[i];
      tmp_lambda[n] = line[maxpos];
      tmp_ilambda[n] = maxpos;

      ++n;

    }

  }


  /*
   * Check on identified peaks. Contaminations from other spectra might 
   * be present and should be excluded: this type of contamination 
   * consists of peaks that have been _correctly_ identified! The non-
   * spectral type of light contamination should have been almost all 
   * removed already in the previous steps, but it may still be present.
   * Here, the self-consistent sequences of identified peaks are
   * separated one from the other. At the moment, just the longest of
   * such sequences is selected (in other words, spectral multiplexing
   * is ignored).
   */

  if (n > 1) {
    nn = 0;                  /* Number of peaks in the list of sequences */
    nseq = 0;                /* Current sequence */
    for (k = 0; k < n; k++) {
      if (flag[k] == 0) {    /* Was peak k already assigned to a sequence? */
        flag[k] = 1;
        xpos[nn] = tmp_xpos[k];       /* Begin the nseq-th sequence */
        lambda[nn] = tmp_lambda[k];
        ilambda[nn] = tmp_ilambda[k];
        ++seq_length[nseq];
        ++nn;

        /*
         * Now look for all the following peaks that are compatible
         * with the expected spectral dispersion, and add them in 
         * sequence to xpos. Note that missing peaks are not a problem...
         */
         
        i = k;
        while (i < n - 1) {
          found = 0;
          for (j = i + 1; j < n; j++) {
            if (flag[j] == 0) {
              disp = (tmp_lambda[j] - tmp_lambda[i])
                   / (tmp_xpos[j] - tmp_xpos[i]);
              if (disp >= min_disp && disp <= max_disp) {
                flag[j] = 1;
                xpos[nn] = tmp_xpos[j];
                lambda[nn] = tmp_lambda[j];
                ilambda[nn] = tmp_ilambda[j];
                ++seq_length[nseq];
                ++nn;
                i = j;
                found = 1;
                break;
              }
            }
          }
          if (!found)
            break;
        }

        /*
         * Current sequence is completed: begin new sequence on the
         * excluded peaks, starting the loop on peaks again.
         */

        ++nseq;
        k = 0;
      }
    }


    /*
     * Find the longest sequence of self-consistent peaks.
     */

    max = 0;
    maxpos = 0;
    for (i = 0; i < nseq; i++) {
      if (seq_length[i] > max) {
        max = seq_length[i];
        maxpos = i;
      }
    }

    /*
     * Find where this sequence starts in the whole peak position
     * storage.
     */

    nn = 0;
    for (i = 0; i < maxpos; i++)
      nn += seq_length[i];

    /*
     * Move the longest sequence at the beginning of the returned lists
     */

    n = max;
    for (i = 0; i < n; i++, nn++) {
      xpos[i] = xpos[nn];
      lambda[i] = lambda[nn];
      ilambda[i] = ilambda[nn];
    }


    /*
     * Are some wavelengths missing? Recover them.
     */

    for (i = 1; i < n; i++) {
      gap = ilambda[i] - ilambda[i-1];
      for (j = 1; j < gap; j++) {

        if (j == 1) {

          /*
           * Determine the local dispersion from the current pair of peaks
           */
  
          disp = (lambda[i] - lambda[i-1]) / (xpos[i] - xpos[i-1]);
        }

        /*
         * With this, find the expected position of the missing
         * peak by linear interpolation.
         */

        hi_start = xpos[i-1] + (line[ilambda[i-1] + j] - lambda[i-1]) / disp;

        /*
         * Is there a peak at that position? Here a peak from the
         * original list is searched, that is closer than 2 pixels
         * to the expected position. If it is found, insert it at
         * the current position on the list of identified peaks,
         * and leave immediately the loop (taking the new position
         * for the following linear interpolation, in case more
         * than one peak is missing in the current interval).
         * If it is not found, stay in the loop, looking for 
         * the following missing peaks in this interval.
         */

        found = 0;
        for (k = 0; k < npeaks; k++) {
          if (fabs(peak[k] - hi_start) < 2) {
            for (l = n; l > i; l--) {
              xpos[l] = xpos[l-1];
              lambda[l] = lambda[l-1];
              ilambda[l] = ilambda[l-1];
            }
            xpos[i] = peak[k];
            lambda[i] = line[ilambda[i-1] + j];
            ilambda[i] = ilambda[i-1] + j;
            ++n;
            found = 1;
            break;
          }
        }
        if (found)
          break;
      }
    }


    /*
     * Try to extrapolate forward
     */

    found = 1;
    while (ilambda[n-1] < nlines - 1 && found) {

      /*
       * Determine the local dispersion from the last pair of 
       * identified peaks
       */

      if (n > 1)
          disp = (lambda[n-1] - lambda[n-2]) / (xpos[n-1] - xpos[n-2]);
      else
          disp = 0.0;

      if (disp > max_disp || disp < min_disp)
        break;


      /*
       * With this, find the expected position of the missing
       * peak by linear interpolation.
       */

      hi_start = xpos[n-1] + (line[ilambda[n-1] + 1] - lambda[n-1]) / disp;

      /*
       * Is there a peak at that position? Here a peak from the
       * original list is searched, that is closer than 6 pixels
       * to the expected position. If it is found, insert it at
       * the end of the list of identified peaks. If it is not
       * found, leave the loop.
       */

      found = 0;
      min = fabs(peak[0] - hi_start);
      minpos = 0;
      for (k = 1; k < npeaks; k++) {
        if (min > fabs(peak[k] - hi_start)) {
            min = fabs(peak[k] - hi_start);
            minpos = k;
        }
      }
      if (min < 6 && fabs(peak[minpos] - xpos[n-1]) > 1.0) {
        xpos[n] = peak[minpos];
        lambda[n] = line[ilambda[n-1] + 1];
        ilambda[n] = ilambda[n-1] + 1;
        ++n;
        found = 1;
      }
    }


    /*
     * Try to extrapolate backward
     */

    found = 1;
    while (ilambda[0] > 0 && found) {

      /*
       * Determine the local dispersion from the first pair of
       * identified peaks
       */

      disp = (lambda[1] - lambda[0]) / (xpos[1] - xpos[0]);

      if (disp > max_disp || disp < min_disp)
        break;


      /*
       * With this, find the expected position of the missing
       * peak by linear interpolation.
       */

      hi_start = xpos[0] - (lambda[0] - line[ilambda[0] - 1]) / disp;


      /*
       * Is there a peak at that position? Here a peak from the
       * original list is searched, that is closer than 6 pixels
       * to the expected position. If it is found, insert it at
       * the beginning of the list of identified peaks. If it is not
       * found, leave the loop.
       */

      found = 0;
      min = fabs(peak[0] - hi_start);
      minpos = 0;
      for (k = 1; k < npeaks; k++) {
        if (min > fabs(peak[k] - hi_start)) {
            min = fabs(peak[k] - hi_start);
            minpos = k;
        }
      }
      if (min < 6 && fabs(peak[minpos] - xpos[0]) > 1.0) {
        for (j = n; j > 0; j--) {
          xpos[j] = xpos[j-1];
          lambda[j] = lambda[j-1];
          ilambda[j] = ilambda[j-1];
        }
        xpos[0] = peak[minpos];
        lambda[0] = line[ilambda[0] - 1];
        ilambda[0] = ilambda[0] - 1;
        ++n;
        found = 1;
      }
    }
  }


  /*
   * At this point all peaks are processed. Free memory, and return
   * the result.
   */

/************************************************+
  for (i = 0; i < npeaks; i++) {
    printf("Peak %d:\n   ", i);
    for (j = 0; j < nident[i]; j++)
      printf("%.2f, ", line[ident[i][j]]);
    printf("\n");
  }

  printf("\n");

  for (i = 0; i < n; i++)
    printf("%.2f, %.2f\n", xpos[i], lambda[i]);
+************************************************/
  for (i = 0; i < npeaks; i++)
    cpl_free(ident[i]);
  cpl_free(ident);
  cpl_free(nident);
  cpl_free(lident);
  cpl_free(ilambda);
  cpl_free(tmp_xpos);
  cpl_free(tmp_lambda);
  cpl_free(tmp_ilambda);
  cpl_free(peak_lo);
  cpl_free(flag);
  cpl_free(seq_length);
  cpl_free(peak_hi);

  if (n == 0) {
    cpl_free(xpos);
    cpl_free(lambda);
    return NULL;
  }

  return cpl_bivector_wrap_vectors(cpl_vector_wrap(n, xpos), 
                                   cpl_vector_wrap(n, lambda));
}

/**@}*/