/****************************************************************/ /* S A S S A M P L E L I B R A R Y */ /* */ /* NAME: CLUSTEX4 */ /* TITLE: DOCUMENTATION EXAMPLE 4 FROM PROC CLUSTER */ /* PRODUCT: STAT */ /* SYSTEM: ALL */ /* KEYS: CLUSTER */ /* PROCS: CLUSTER TREE STANDARD */ /* DATA: */ /* */ /* SUPPORT: WSS UPDATE: */ /* REF: */ /* MISC: */ /* */ /****************************************************************/ data teeth; title 'HIERARCHICAL CLUSTER ANALYSIS OF MAMMALS'' TEETH DATA'; title2 'Evaluating the Effects of Ties'; input mammal $ 1-16 @21 (v1-v8) (1.); label v1='Top incisors' v2='Bottom incisors' v3='Top canines' v4='Bottom canines' v5='Top premolars' v6='Bottom premolars' v7='Top molars' v8='Bottom molars'; cards; BROWN BAT 23113333 MOLE 32103333 SILVER HAIR BAT 23112333 PIGMY BAT 23112233 HOUSE BAT 23111233 RED BAT 13112233 PIKA 21002233 RABBIT 21003233 BEAVER 11002133 GROUNDHOG 11002133 GRAY SQUIRREL 11001133 HOUSE MOUSE 11000033 PORCUPINE 11001133 WOLF 33114423 BEAR 33114423 RACCOON 33114432 MARTEN 33114412 WEASEL 33113312 WOLVERINE 33114412 BADGER 33113312 RIVER OTTER 33114312 SEA OTTER 32113312 JAGUAR 33113211 COUGAR 33113211 FUR SEAL 32114411 SEA LION 32114411 GREY SEAL 32113322 ELEPHANT SEAL 21114411 REINDEER 04103333 ELK 04103333 DEER 04003333 MOOSE 04003333 ; proc cluster data=teeth method=average nonorm outtree=_null_; var v1-v8; id mammal; title3 'Raw Data'; run; proc cluster data=teeth std method=average nonorm outtree=_null_; var v1-v8; id mammal; title3 'Standardized Data'; run; options mprint; *---------------------------------------------------------------------+ | | | the macro CLUSPERM randomly permutes observations and does a | | cluster analysis for each permutation. the arguments are as | | follows: | | | | data data set name | | var list of variables to cluster | | id id variable for proc cluster | | method clustering method (and possibly other options) | | nperm number of random permutations | | | +---------------------------------------------------------------------; %macro CLUSPERM(data,var,id,method,nperm); *------create temporary data set with random numbers------; data _temp_; set &data; array _random_ _ran_1-_ran_&nperm; do over _random_; _random_=ranuni(835297461); end; run; *------permute and cluster the data------; %do n=1 %to &nperm; proc sort data=_temp_(keep=_ran_&n &var &id) out=_perm_; by _ran_&n; proc cluster method=&method noprint outtree=_tree_&n; var &var; id &id; ; /* extra semicolon needed for macro bug */ run; %end; %mend; *---------------------------------------------------------------------+ | | | the macro PLOTPERM plots various cluster statistics against the | | number of clusters for each permutation. the arguments are as | | follows: | | | | stats names of variables from tree data set | | nclus maximum number of clusters to be plotted | | nperm number of random permutations | | | +---------------------------------------------------------------------; %macro PLOTPERM(stat,nclus,nperm); *------concatenate tree data sets for 20 or fewer clusters------; data _plot_; set %do n=1 %to &nperm; _tree_&n(in=_in_&n) %end; ; if _ncl_ <= &nclus; %do n=1 %to &nperm; if _in_&n then _perm_=&n; ; %end; label _perm_='permutation number'; keep _ncl_ &stat _perm_; run; *------plot the requested statistics by number of clusters------; proc plot; plot (&stat)*_ncl_=_perm_; run; %mend; *---------------------------------------------------------------------+ | | | the macro TREEPERM generates cluster-membership variables for a | | specified number of clusters for each permutation. proc print lists | | the objects in each cluster-combination and proc tabulate gives | | the frequencies and means. the arguments are as follows: | | | | var list of variables to cluster (no "-" or ":" allowed) | | id id variable for proc cluster | | meanfmt format for printing means in proc tabulate | | nclus number of clusters desired | | nperm number of random permutations | | | +---------------------------------------------------------------------; %macro TREEPERM(var,id,meanfmt,nclus,nperm); *------create data sets giving cluster membership------; %do n=1 %to &nperm; proc tree data=_tree_&n noprint n=&nclus out=_out_&n(drop=clusname rename=(cluster=_clus_&n)); copy &var; id &id; proc sort; by &id &var; run; %end; *------merge the cluster variables------; data _merge_; merge %do n=1 %to &nperm; _out_&n %end; ; by &id &var; length all_clus $ %eval(3 * &nperm); %do n=1 %to &nperm; substr( all_clus, %eval(1+(&n-1)*3), 3) = put( _clus_&n, 3.); %end; run; *------print and tabulate cluster combinations------; proc sort; by _clus_:; proc print; var &var; id &id; by all_clus notsorted; proc tabulate order=data formchar=' '; class all_clus; var &var; table all_clus, n='FREQ'*f=5. mean*f=&meanfmt*(&var) / rts=%eval(&nperm*3+1); run; %mend; %let vlist=v1 v2 v3 v4 v5 v6 v7 v8; TITLE3 'raw data'; *------cluster raw data with average linkage------; %CLUSPERM( teeth, &vlist, mammal, average, 10); *------plot statistics for the last 20 levels------; %PLOTPERM( _psf_ _pst2_ _ccc_, 20, 10); *------analyze the 4-cluster level------; %TREEPERM( &vlist, mammal, 9.1, 4, 10); title3 'Standardized Data'; *------cluster standardized data with average linkage------; %CLUSPERM( teeth, &vlist, mammal, average std, 10); *------plot statistics for the last 20 levels------; %PLOTPERM( _psf_ _pst2_ _ccc_, 20, 10); *------analyze the 4-cluster level------; %TREEPERM( &vlist, mammal, 9.1, 4, 10);