﻿* Encoding: windows-1252.

* This macro works only with SPSS version 22 or higher.

* Assigns the name 'stimuli' to the active dataset 

DATASET NAME stimuli.

OMS
/SELECT ALL
/DESTINATION VIEWER = NO.

* Creates a new dataset called 'valid_clusters' that will contain the outcome of the genClust function

DATA LIST /numClusters 1 validClusters 3 stimuliPerCondition 5 sumOfDistances 7.
BEGIN DATA
END DATA.
DATASET NAME valid_clusters.

DEFINE genClust (maxClusters=!TOKENS(1) 
 /condVar=!TOKENS(1)
 /firstCond=!TOKENS(1)
 /lastCond=!TOKENS(1)
 /variablesToMatch=!CMDEND)

* runs from 2 to !maxClusters cluster analysis

!DO !numClusters=2 !TO !maxClusters

DATASET ACTIVATE stimuli.

* runs the cluster analysis with !numClusters selected clusters and saves the cluster membership and distance to the cluster's center of each stimulus

QUICK CLUSTER !variablesToMatch
  /MISSING=LISTWISE
  /CRITERIA=CLUSTER(!numClusters) MXITER(50) CONVERGE(0)
  /METHOD=KMEANS(NOUPDATE)
  /SAVE CLUSTER DISTANCE

* creates a new variable called 'distance' containing the squared sum from each stimulus to its cluster's center

COMPUTE distance=QCL_2 * QCL_2.
EXECUTE.

* creates a temporary file containing the number of valid stimuli for each stimulus condition in each cluster

DATASET DECLARE temp1.
AGGREGATE
  /OUTFILE='temp1'
  /BREAK=QCL_1 
!DO !numCond=!firstCond !TO !lastCond
  !LET !condName = !CONCAT(!condVar,!numCond)
  /!condName= CIN(!condVar !numCond !numCond) 
!DOEND
  /distance=SUM(distance).

DELETE VARIABLES QCL_1 QCL_2 distance.

!LET !lastCondName = !CONCAT(!condVar,!lastCond)
!LET !firstCondName = !CONCAT(!condVar,!firstCond)

DATASET ACTIVATE temp1.
IF (MIN(!firstCondName to !lastCondName)>=1) valid=MIN(!firstCondName to !lastCondName).
EXECUTE.

SORT CASES BY valid (D).

* creates a second temporary file containing some calculations from the first temporary dataset

DATASET DECLARE temp2.
AGGREGATE
  /OUTFILE='temp2'
  /numClusters=N(QCL_1)
  /validClusters=N(valid) 
  /stimuliPerCondition=SUM(valid)
  /sumOfDistances=SUM(distance).

* aggregates the data of the secondt temporary file (number of clusters, number of valid clusters, valid stimuli per condition and sum of distances of the stimuli) to the valid_clusters' dataset

DATASET ACTIVATE valid_clusters.
ADD FILES /FILE=*
  /FILE='temp2'.
EXECUTE.

!DOEND

* closes the temporary files

DATASET CLOSE temp1.
DATASET CLOSE temp2.

!ENDDEFINE.

* maxClusters: k-max value (number of stimuli of the condition with the fewest stimuli) 
* condVar: name of the conditions' variable
* firstCond: number of the first condition
* lastCond: number of the last condition
* variablesToMatch: name of the variables to be matched

genClust maxClusters=60 condVar=COND firstCond=1 lastCond =3 variablesToMatch = ZLNG ZSYL ZN ZNHF ZBFQ ZIMA.

OMSEND.

* Activates the dataset containing the oucome of the genClust function 

DATASET ACTIVATE valid_clusters.

* Creates a graph with the valid stimuli per condition for each cluster analysis

GRAPH
 /SCATTERPLOT(BIVAR)=numClusters WITH stimuliPerCondition
 /MISSING=LISTWISE.

* Creates a graph with the sum of distances for each cluster analysis

GRAPH
 /SCATTERPLOT(BIVAR)=numClusters WITH sumOfDistances
 /MISSING=LISTWISE.


