/* ------------------------------------------------------------------ FILE: multreg5.sas DATA: Sister Sally of the Salubrious Sabines' sixth grade ratings on who is most likely to become an axe murderer. PURPOSE: Illustrates the matrix algebra behind the normal equations for the GLM. ------------------------------------------------------------------- */ options nocenter nodate pageno=1; title Sister Sals Sixth Grade; data sal; length name $12; input name $ Pd age rating; cards; Abercrombe 51 12 2 Binghamton 60 18 -2 Chatsworth 49 10 0 Dillsbarff 58 23 3 Evansbottom 53 14 1 ; /* perform the regression and output the data set with the predicted Pd scores and the residuals */ proc reg; model pd = age rating; output out=sal predicted=yhat residual=u; run; proc print; /* Now do the same using PROC IML (Interactive Matrix Language) */ proc iml; /* create the data matrices */ y = {51, 60, 49, 58, 53}; /* column vector for y */ x = {1 12 2, /* Design matrix or X */ 1 18 -2, 1 10 0, 1 23 3, 1 14 1}; /* calculate the coefficients */ xtx = t(x) * x; xty = t(x) * y; xtxinv = inv(xtx); b = xtxinv * xty; /* derive the predicted values and the residuals */ yhat = x * b; u = y - yhat; /* illustrate how the X transpose * residuals equals 0 */ xtu = t(x) * u; /* print the results */ print b yhat u xtu; quit;