/* -------------------------------------------------- Example of a maximum likelihood analysis testing for Hardy-Weinberg equilibrium for genotypes of three flowers See handout (class notes) on maximum likelihood for background this program is on ~carey/p7291dir/maxlik.3x2.sas ---------------------------------------------------- */ title Example of Maximum Likelihood Hypothesis Testing; title2 Genotypic frequencies for flowers ; data flowers; input geno1 $ geno2 $ geno3 $ n1 n2 n3; cards; AA Aa aa 427 332 235 ; data general; set flowers; /* --- arrays --- */ array geno geno1-geno3; array n n1-n3; array prob prob1-prob3; ntot=n1+n2+n3; array obs obs1-obs3; do over obs; obs=n/ntot; end; /* --- Pearson chi square --- */ p = (n1 + n2/2)/ntot; q=1-p; prob1=p*p; prob2=2*p*q; prob3=q*q; pearchi=0; do over prob; con = ((obs - prob)**2)/prob; pearchi = pearchi + con; end; pearchi = ntot*pearchi; pearprob = 1 - probchi(pearchi,1); /* * ---------------------------------------------- * MAXIMUM LIKELIHOOD: two parameter model; * ---------------------------------------------- */ file print; put '-------------------' / ' General Model'/ '-------------------'/; link init; loop2: do p = plow to phigh by delta; do x = xlow to xhigh by delta; link getlogl; end; end; link deltait; if delta > .0001 then goto loop2; link printit; lg=maxlogl; /* * ---------------------------------------------- * MAXIMUM LIKELIHOOD: One parameter model; * ---------------------------------------------- */ put ///'-------------------' / ' Constrained Model'/ '-------------------'/; link init; loop1: do p = plow to phigh by delta; x=p; link getlogl; end; link deltait; if delta > .0001 then goto loop1; link printit; lc=maxlogl; lrchi = 2*(lg - lc); lrprob = 1 - probchi(lrchi,1); put / 'Likelihood ratio chi square' (lrchi) (7.3) +3 'Probability' (lrprob) (7.4); put ' Pearson chi square' (pearchi) (7.3) +3 'Probability' (pearprob) (7.4); /* stop the program */ stop; * initialization section; init: maxlogl=-10**10; plow=.01; phigh=.99; xlow=.01; xhigh=.99; delta=.01; return; /* * --- calculate the log likelihood; */ getlogl: prob1=p*x; prob2=2*p*(1-x); prob3=(1-p) - p*(1-x); logl=0; do over geno; if prob le 0 or prob ge 1 then logl=-10**10; else logl=logl + n * log(prob); end; if logl > maxlogl then do; pmax=p; xmax=x; maxlogl=logl; end; return; /* * change the delta value; */ deltait: delta = .1*delta; plow = pmax - 2*delta; phigh = pmax + 2*delta; xlow = xmax - 2*delta; xhigh= xmax + 2*delta; return; /* print out the results */ printit: put 'Log likelihood is ' (maxlogl) (8.4); put 'Estimate of p is ' (pmax) (7.4); put 'Estimate of x is ' (xmax) (7.4); p = pmax; x=xmax; link getlogl; put 'Geno Obs Pre'; do over prob; put (geno obs prob) (@3 $2. 2*8.4); end; return; run;