; $Id: sign_test.pro,v 1.5 2001/01/15 22:28:22 scottm Exp $
;
;  Copyright (c) 1991-2001, Research Systems Inc.  All rights
;  reserved. Unauthorized reproduction prohibited.


function gaussint1,x
; gaussinit1 returns the probabilty of obtaining x or something
; more extreme.

  if x le 0 then return,gaussint(x)          $
 else return,1 - gaussint(X)

end 


pro sign_test,Data, Diff,Prob,Names=Names,List_Name=Ln,   $
              Missing=M,NoPrint=NP
;+ 
; NAME:
;	SIGN_TEST
;
; PURPOSE:
;	To test the null hypothesis that two populations have the same 
;	distribution -i.e. F(x) = G(x) against the alternative that their 
;	distributions differ in location- i.e F(x) = G(x+a).Sign_test 
;	pairwise tests the populations in Data.
;  
; CATEGORY:
;	Statistics.
;
; CALLING SEQUENCE:
;	SIGN_TEST, Data, [Diff,Prob,Names=Names]
;
; INPUTS:
;	Data:	Two-dimensional array. Data(i,j) = the jth observation from
;		the ith population.
;
; KEYWORDS:  
;	NAMES:	Vector of user supplied names for the populations to be 
;		used in the output.
;
;    LIST_NAME:	Name of output file. Default is to the screen.
;
;      MISSING:	Value used as a place holder for missing data.  Pairwise 
;		handling of missing data.
;           
;      NOPRINT:	Flag, if set, to suppress output to the screen.
;
; OUTPUT:
;	Table written to the screen showing for each pair of populations 
;	the number of positive differences in observations.  Also, table of 
; 	probabilties for each population pair giving the two-tailed 
;	significance of the results in the first table.
;
;OPTIONAL OUTPUT PARAMETERS: 
;	Diff:	Two-dimensional array of positive differences.
;		Diff(i,j) = number of observations in population
;		i greater than the corresponding observation in population j.
;
;	Prob:	Two-dimensional array. Prob(i,j) = probability of 
;		Diff(i,j) or something more extreme.
;                          
;RESTRICTIONS:
;      All populations have the same sample size.
;
;COMMON BLOCKS: 
;     None.
;
;PROCEDURE:
;	For each pair of populations, the diffence between corresponding
;	observations is computed and a count is made of the positive and
;	negative differences.  The probability of the count is computed
;	under the assumption that the distributions are the same - i.e.
;	the probability of a negative difference = the probability of a 
;	positive difference = .5.  For sample size > 25, the binomial 
;	distribution is approximated with a normal distribution for computing
;	Prob.
;-


On_Error,2
SD= size(Data)

if( N_Elements( Ln) NE 0) THEN openw,unit,/get,Ln else unit=-1

if ( SD(0) NE 2) THEN BEGIN
   printf,unit, 'sign_test- Data array has wrong dimension'
   goto, DONE
ENDIF


C=SD(1)
R= SD(2)


Diff = Fltarr(C,C) 
Prob = Diff +1.0

 for i = 0l,C-2 DO  BEGIN

   D1 = Replicate(1.0,C-i-1) # Data(i,*)  ;compute differences
   Temp = Data(i+1:*,*) - D1

   if (N_Elements(M) NE 0) THEN BEGIN   ; Handle missing data
     here = where( Data(i+1:*,*) EQ M, count) 
   if count  NE 0 THEN Temp(here)=0
   here = where(D1 EQ M,count)
   if count NE 0 THEN  Temp( here) = 0
   ENDIF

   here = where (Temp NE 0,count)
   if ( count NE 0) THEN BEGIN    
     Temp1 = Temp
     Temp1 ( here) = 1   ; If diff =0 then discard observation
     PopSize = Temp1 # Replicate(1,R)   
                  ; compute number of observation per column


     here = where(Temp LE 0, count)     ;count positives
     if (count ne 0) THEN  $
        Temp(here) = 0                 
     here = where(Temp NE 0,count)
     if ( count NE 0) THEN $
     Temp(here) = 1
     PosNo = Temp # Replicate(1,R)     


     Diff(i+1:*,i) = PosNo


     for j =long(i+1),C-1 DO BEGIN
         k=j-i-1
        if Popsize(k) eq 0 THEN BEGIN
           printf,unit,"sign_test- Data are all the same or missing"
           printf,unit,"           for columns ",i, " and ",j
           Diff(i,j) = -1 & Diff(j,i) = -1
           Prob(i,j) = -1 & Prob(j,i) =-1
        ENDIF ELSE $
        if PopSize(k) GT 25 THEN      $       
                          ;approximate binomial with normal
           Prob(j,i) =2*      $
                 Gaussint1((2*Diff(j,i)-       $
                    PopSize(k))/sqrt(PopSize(k)))  $

         else if Diff(j,i) GT PopSize(k)/2 THEN   $          
                 prob(j,i) =      $
                    2*binomial(Diff(j,i),PopSize(k) ,.5)     $
         else if Diff(j,i) eq PopSize(k)/2 THEN       $ 
             prob(j,i) = 1           $
             else  prob(j,i) =        $
                   2*(1- binomial(Diff(j,i)+1,PopSize(k),.5))
   
      ENDFOR


     Diff(i,i+1:*) = PopSize - Diff(i+1:*,i)
     Prob(i,i+1:*) = Prob(i+1:*,i)


 ENDIF ELSE BEGIN
        printf,unit,'sign_test- all data are missing for column ',i
        printf,unit,'           or data the same in all columns after column',i
        printf,unit," "
        Diff(i+1:*,i) = -1 & Diff(i,i+1:*) = -1
        Prob(i,i+1:*) = -1 & Prob(i+1:*,i) = -1
        ENDELSE
ENDFOR

 SN =Size(Names)
 if (SN(1) EQ 0) THEN BEGIN
     I = INDGEN(C)
     Names=['pop'+StrTrim(I,2)]  
 ENDIF ELSE          		$
   if ( SN(1) LT C) THEN BEGIN
     I = Indgen(C)
     printf,unit,'sign_test- missing names'
     Names=[Names, 'pop'+StrTrim(I(SN(1):C-1),2)]
  ENDIF


if( Not keyword_set(NP)) THEN BEGIN

  printf,unit, " Table of Count Differences"
  printf,unit, " "
  printf,unit, format ='(8X,16(A8,2x))', Names

  for i= 0,C-1 do                       $
     printf,unit, format='(A8,16(I8,2X))',Names(i),Diff(*,i)
 printf,unit, " "
 printf,unit, "Table of Probabilities:"
 printf,unit," "
 printf,unit, format ='(8X,16(A10,2x))', Names

 for i= 0,C-1 do                      $
     printf,unit, format='(A8,16(G10.5,2X))',Names(i),  $
                Prob(*,i)
 ENDIF


DONE:
   if ( unit NE -1) THEN Free_Lun,unit
   RETURN
   END