/* file: mammalsteeth.sas Example of cluster analysis taken from Example 4 of the SAS documentation to PROC CLUSTER */ options nocenter nodate pageno=1 linesize=132; title h = 1 j = l 'File: cluster.mammalsteeth.sas'; title2 h = 1 j = l 'Cluster Analysis of Mammals'' teeth data'; data teeth; input mammal $ 1-16 @21 (v1-v8) (1.); label v1='Top incisors' v2='Bottom incisors' v3='Top canines' v4='Bottom canines' v5='Top premolars' v6='Bottom premolars' v7='Top molars' v8='Bottom molars'; cards; BROWN BAT 23113333 MOLE 32103333 SILVER HAIR BAT 23112333 PIGMY BAT 23112233 HOUSE BAT 23111233 RED BAT 13112233 PIKA 21002233 RABBIT 21003233 BEAVER 11002133 GROUNDHOG 11002133 GRAY SQUIRREL 11001133 HOUSE MOUSE 11000033 PORCUPINE 11001133 WOLF 33114423 BEAR 33114423 RACCOON 33114432 MARTEN 33114412 WEASEL 33113312 WOLVERINE 33114412 BADGER 33113312 RIVER OTTER 33114312 SEA OTTER 32113312 JAGUAR 33113211 COUGAR 33113211 FUR SEAL 32114411 SEA LION 32114411 GREY SEAL 32113322 ELEPHANT SEAL 21114411 REINDEER 04103333 ELK 04103333 DEER 04003333 MOOSE 04003333 ; /* principal components analysis of teeth here we score the principal components and output then to data set teeth2 */ proc princomp data=teeth out=teeth2; var v1-v8; run; /* average linkage cluster analysis a dendrogram (tree diagram) is also output */ proc cluster data=teeth2 method=average outtree=ttree ccc pseudo rsquare; var v1-v8; id mammal; run; /* --- PROC TREE prints the tree diagram here we also output a data set, called ttree2 that contains four clusters --- */ proc tree data=ttree out=ttree2 nclusters=4; id mammal; run; /* --- the next set of statements sort the data sets by variable mammal and then merge the tree data set (with the cluster scores) with the teeth data set (with the prinicipal components) --- */ proc sort data=teeth2; by mammal; run; proc sort data=ttree2; by mammal; run; data teeth3; merge teeth2 ttree2; by mammal; run; /* --- stuff for plotting --- */ symbol1 c=black f=, v='1'; symbol2 c=black f=, v='2'; symbol3 c=black f=, v='3'; symbol4 c=black f=, v='4'; proc gplot; plot prin2*prin1=cluster; run; proc sort; by cluster; run; proc print; by cluster; var mammal prin1 prin2; run;