/* --------------------------------------------------- File: anova.demonstration Purpose: Demonstrate how ANOVA and dummy coding of groups gives the same result. Also demonstrates the logic of ANOVA --------------------------------------------------- */ * - fabricated data for this example. four groups and a single dependent variable; DATA rawscore; INPUT i group y; * - this code creates dummy codes; ARRAY dgr [4] dgroup1 dgroup2 dgroup3 dgroup4; * - first initialize all values to 0; DO j=1 to 4; dgr[j]=0; END; * - set the value of the dummy variable for the group to 1; dgr[group]=1; DROP j; DATALINES; 1 1 34 2 1 62 3 1 52 4 1 56 5 1 34 6 1 58 7 1 50 8 1 56 1 2 44 2 2 44 3 2 48 4 2 61 5 2 53 6 2 39 7 2 52 8 2 57 1 3 53 2 3 62 3 3 43 4 3 33 5 3 38 6 3 23 7 3 62 8 3 49 1 4 47 2 4 49 3 4 54 4 4 43 5 4 29 6 4 48 7 4 49 8 4 56 ; run; TITLE Demonstration of the logic of ANOVA; proc print; var i group dgroup1 dgroup2 dgroup3 dgroup4 y; run; TITLE2 Means and within-group variances; proc means noprint; by group; var y; output out=stats1 mean=mean_y var=var_y; run; proc print; var group mean_y var_y; run; TITLE2 Overall mean (mu_ha, variance of the means (s2_xbar); TITLE3 and the average of the group variances (s2_error); proc means noprint data=stats1; var mean_y var_y; output out=stats2 mean = mu_hat s2_error var = s2_xbar stupid; run; proc print; var mu_hat s2_xbar s2_error; run; TITLE2 An "ANOVA" table constructed from the variances; data stats3; set stats2; source = 'Model'; s2_hat = 8*s2_xbar; f = s2_hat / s2_error; pvalue = 1 - probf(f, 3, 28); output; source = 'Error'; s2_hat = s2_error; f = .; pvalue = .; output; run; proc print; var source s2_hat f pvalue; run; TITLE2 Results from PROC GLM; TITLE3 Compare the ANOVA table to the one on the previous page; proc glm data=rawscore; class group; model y = group / solution; run; TITLE2 Results from PROC REG; TITLE3 Compare the ANOVA table to the two above; proc reg data=rawscore; model y = dgroup1 dgroup2 dgroup3; run; quit;