C @(#)clst.for 17.1.1.1 (ES0-DMD) 01/25/02 17:16:37 C=========================================================================== C Copyright (C) 1995 European Southern Observatory (ESO) C C This program is free software; you can redistribute it and/or C modify it under the terms of the GNU General Public License as C published by the Free Software Foundation; either version 2 of C the License, or (at your option) any later version. C C This program is distributed in the hope that it will be useful, C but WITHOUT ANY WARRANTY; without even the implied warranty of C MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the C GNU General Public License for more details. C C You should have received a copy of the GNU General Public C License along with this program; if not, write to the Free C Software Foundation, Inc., 675 Massachusetss Ave, Cambridge, C MA 02139, USA. C C Corresponding concerning ESO-MIDAS should be addressed as follows: C Internet e-mail: midas@eso.org C Postal address: European Southern Observatory C Data Management Division C Karl-Schwarzschild-Strasse 2 C D 85748 Garching bei Muenchen C GERMANY C=========================================================================== C C C++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ C C.IDENTIFICATION: C CLST VERSION 1 85.01.21 C F. MURTAGH ST-ECF, MUNICH C C.KEYWORDS: C APPLICATIONS, STATISTICS, CLUSTERING. C C.PURPOSE: C HIERARCHICAL CLUSTERING USING (USER-SPECIFIED) ALTERNATIVE C CRITERIA. C N * M ARRAY PASSED TO PROGRAM IN "DATA", C SEQUENCE OF N-1 AGGLOMERATIONS OF ROW POINTS RETURNED IN C VECTORS "IA" AND "IB", WITH CRITERION VALUES IN "CRIT". C C.ALGORITHM: C LANCE-WILLIAMS DISSIMILARITY RECURRENCE FORMULA USED; C SEVEN CLUSTER UPDATE CRITERIA ARE AVAILABLE; C THE DISSIMILARITY MATRIX IS SYMMETRIC AT ALL TIMES, AND C THE UPPER DIAGONAL HALF MATRIX ONLY IS STORED. C C.INPUT/OUTPUT: C PARAMETERS. C N, M, DATA : DIMENSIONS OF, AND ARRAY, OF DATA; C DISS, LEN : DISSIMILARITIES IN LOWER HALF DIAG. STORAGE, C LEN = N.N-1 / 2; C IOPT : CLUSTERING CRITERION TO BE USED; C IA, IB, CRIT: HISTORY OF AGGLOMERATIONS (1ST N-1 VALS. USED); C MEMBR : CARDINALITIES OF CLUSTERS; C NN, DISNN : NEAREST NEIGHBOURS AND ASSOC. DISSIMILARITIES, C USED TO SPEED UP SEARCH OF "DISS"; C FLAG : INDICATOR OF AGGLOMERABLE OBJECTS/CLUSTERS. C *** NOTE *** IJKLMN VARIABLE TYPE CONVENTION FOLLOWED. C C--------------------------------------------------------------- C SUBROUTINE CLST(N,M,LEN,IOPT,DATA,IA,IB,CRIT,MEMBR,NN,DISNN, X FLAG,DISS) REAL DATA(N,M),MEMBR(N),DISS(LEN),X INTEGER IA(N),IB(N) REAL CRIT(N),DISNN(N),DMIN,XX INTEGER NN(N) INTEGER N,M,LEN,IOPT,I,J,K,I2,J2,JJ INTEGER IND,IOFFSET,NCL,JM,IM,IND1,IND2,IND3 LOGICAL FLAG(N) REAL INF DATA INF/1.E+20/ C C C... INITIALIZATIONS: DO I=1,N MEMBR(I)=1. FLAG(I)=.TRUE. ENDDO NCL=N C C C... CONSTRUCT DISSIMILARITY MATRIX: DO I=1,N-1 DO J=I+1,N IND=IOFFSET(N,I,J) DISS(IND)=0. DO K=1,M DISS(IND)=DISS(IND)+(DATA(I,K)-DATA(J,K))**2 ENDDO IF (IOPT.EQ.1) DISS(IND)=DISS(IND)/2. C (ABOVE IS DONE FOR THE CASE OF THE MIN. VAR. METHOD, C WHERE MERGING CRITERIA ARE DEFINED IN TERMS OF VARIANCES C RATHER THAN DISTANCES.) ENDDO ENDDO C C C... CARRY OUT AN AGGLOMERATION - FIRST CREATE LIST OF NN'S: DO I=1,N-1 DMIN=INF DO J=I+1,N IND=IOFFSET(N,I,J) IF (DISS(IND).GE.DMIN) GOTO 500 DMIN=DISS(IND) JM=J 500 CONTINUE ENDDO NN(I)=JM DISNN(I)=DMIN ENDDO C 400 CONTINUE C... NEXT, DET. LEAST DISS. FROM LIST OF NN'S: DMIN=INF DO I=1,N-1 IF (.NOT.FLAG(I)) GOTO 600 IF (DISNN(I).GE.DMIN) GOTO 600 DMIN=DISNN(I) IM=I JM=NN(I) 600 CONTINUE ENDDO NCL=NCL-1 C C C... THIS ALLOWS AN AGGLOMERATION TO BE CARRIED OUT: I2=MIN0(IM,JM) J2=MAX0(IM,JM) IA(N-NCL)=I2 IB(N-NCL)=J2 CRIT(N-NCL)=DMIN C C C... UPDATE DISS.'S FROM NEW CLUSTER: FLAG(J2)=.FALSE. DMIN=INF DO K=1,N IF (.NOT.FLAG(K)) GOTO 800 IF (K.EQ.I2) GOTO 800 X=MEMBR(I2)+MEMBR(J2)+MEMBR(K) IF (I2.LT.K) THEN IND1=IOFFSET(N,I2,K) ELSE IND1=IOFFSET(N,K,I2) ENDIF IF (J2.LT.K) THEN IND2=IOFFSET(N,J2,K) ELSE IND2=IOFFSET(N,K,J2) ENDIF IND3=IOFFSET(N,I2,J2) XX=DISS(IND3) C C... WARD'S MINIMUM VARIANCE METHOD - IOPT=1. IF (IOPT.EQ.1) THEN DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+ X (MEMBR(J2)+MEMBR(K))*DISS(IND2)- X MEMBR(K)*XX DISS(IND1)=DISS(IND1)/X ENDIF C C... SINGLE LINK METHOD - IOPT=2. IF (IOPT.EQ.2) THEN DISS(IND1)=MIN(DISS(IND1),DISS(IND2)) ENDIF C C... COMPLETE LINK METHOD - IOPT=3. IF (IOPT.EQ.3) THEN DISS(IND1)=MAX(DISS(IND1),DISS(IND2)) ENDIF C C... AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4. IF (IOPT.EQ.4) THEN DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2))/ X (MEMBR(I2)+MEMBR(J2)) ENDIF C C... MCQUITTY'S METHOD - IOPT=5. IF (IOPT.EQ.5) THEN DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2) ENDIF C C... MEDIAN (GOWER'S) METHOD - IOPT=6. IF (IOPT.EQ.6) THEN DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)-0.25*XX ENDIF C C... CENTROID METHOD - IOPT=7. IF (IOPT.EQ.7) THEN DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)- X MEMBR(I2)*MEMBR(J2)*XX/(MEMBR(I2)+MEMBR(J2)))/ X (MEMBR(I2)+MEMBR(J2)) ENDIF C IF (I2.GT.K) GOTO 800 IF (DISS(IND1).GE.DMIN) GOTO 800 DMIN=DISS(IND1) JJ=K 800 CONTINUE ENDDO MEMBR(I2)=MEMBR(I2)+MEMBR(J2) DISNN(I2)=DMIN NN(I2)=JJ C C C... UPDATE LIST OF NN'S INSOFAR AS THIS IS REQUIRED: DO I=1,N-1 IF (.NOT.FLAG(I)) GOTO 900 IF (NN(I).EQ.I2) GOTO 850 IF (NN(I).EQ.J2) GOTO 850 GOTO 900 850 CONTINUE C (REDETERMINE NN OF I:) DMIN=INF DO J=I+1,N IND=IOFFSET(N,I,J) IF (.NOT.FLAG(J)) GOTO 870 IF (I.EQ.J) GOTO 870 IF (DISS(IND).GE.DMIN) GOTO 870 DMIN=DISS(IND) JJ=J 870 CONTINUE ENDDO NN(I)=JJ DISNN(I)=DMIN 900 CONTINUE ENDDO C C C... REPEAT PREV. STEPS UNTIL N-1 AGGLOMERATIONS EFFECTED: IF (NCL.GT.1) GOTO 400 C C C RETURN END C C C INTEGER FUNCTION IOFFSET(N,I,J) C... MAP ROW (I) AND COL. (J) OF UPPER HALF DIAGONAL SYMMETRIC C MATRIX ONTO VECTOR. INTEGER I,J,N IOFFSET=J+(I-1)*N-(I*(I+1))/2 RETURN END