LIBNAME good 'c:\temp'; /* PROC LOGISTIC DATA=good.assignment COVOUT OUTEST=a DESCENDING; MODEL owncar= budget credits offcamp /TECHNIQUE=NEWTON COVB STB; TEST budget=offcamp=0; RUN; */ %LET dataset=good.assignment; %LET depend=owncar; %LET independ= budget credits offcamp; %LET var={"Budget", "Credits", "Offcamp" }; /* Variable names */ PROC IML; /* Fisher-scoring Method rather than Newton-Raphson */ USE &dataset; /* Convert variables of a dataset to matrices */ READ ALL VAR{&depend} INTO y; /* or USE good.assignment VAR{owncar}; READ ALL INTO y; */ READ ALL VAR{&independ} INTO x; CLOSE &dataset; /* Close the dataset */ parameter={"Intercepter"}//&var; variable=&var; N = NROW(x); /* the number of observations */ x = REPEAT(1, n, 1)||x; /* N by K matrix */ K = NCOL(x); beta = REPEAT(0, K, 1); /* K by 1 matrix */ beta0 = beta+1; weight = REPEAT(1, n ,1); /* row weights */ x_bar=x[:,]; /* means of variables */ x_sd=SQRT((x[##,]-N*(x_bar#x_bar))/(N-1)); /* Standard deviations of variables */ x_max=J(1,K,0); x_min=J(1,K,0); /* Maximum and minimum values of variables */ DO i=2 TO K; x_max[1, i]=MAX(x[,i]); x_min[1, i]=MIN(x[,i]); END; DO i=1 TO 50 WHILE(MAX(ABS(beta-beta0))>1e-8); beta0 = beta; xb = x*beta; /* N X 1 */ p = 1/(1+EXP(-xb)); /* Probability of Occuring the Event */ pdf = p#p#EXP(-xb); /* Standard Logistic Probability Density Function */ lnL = SUM(((y=1)#LOG(p) + (y=0)#LOG(1-p))#weight); /* log likelihood */ w = weight/(p#(1-p)); /* weighting */ g = pdf#x; /* Matrix of Partial Derivative of P with respect to betas */ covb = INV(T(g)*(w#g)); /* Covariance matrix of betas */ beta = beta + covb*(T(g)*(w#(y-p))); /* compuing next betas */ END; p0 = SUM((y=1)#weight)/SUM(weight); /* average response */ lnL0 = SUM(((y=1)#LOG(p0) + (y=0)#LOG(1-p0))#weight); /* log likelihood of restricted model */ LR = 2*(lnL-lnL0); /* Likelihood Ratio Chi-squared */ df = K-1; /* Degree of freedom */ p_LR = 1-PROBCHI(LR, df); /* P-value of the model */ aic=(1/N)*(-2*lnL+2*K); /* Akaike's Information Criterion */ bic=-2*lnL-(N-K)*LOG(N); /* Bayesian Information Criterion */ McFadden=1-(lnL/lnL0); /* McFadden's Pseudo R squared */ PRINT ,, 'Likelihood Ratio Test on the Null Hypothesis of BETA=0', lnL LR df p_LR; PRINT ,, 'Goodness of Fit', Mcfadden aic bic,,; se = SQRT(VECDIAG(covb)); /*Standard Errors of coefficients */ Z = beta/se; /* Z values of coefficients */ Wald = Z#Z; /* Wald Chi-squared statistics of coefficients */ p_B = 2*(1-PROBNORM(ABS(z))); /* P-values of coefficients */ odds=beta||p_B||EXP(beta)||EXP(beta#T(x_sd))||T(x_sd); PRINT ,, 'Maximum Likelihood Estimates', parameter beta se z Wald p_B ; PRINT ,, 'Estimated Covariance Matrix', covb[ROWNAME=parameter COLNAME=parameter]; PRINT ,, 'Odds Ratio Estimates', odds[ROWNAME=parameter COLNAME={"Beta" "P>|z|" "e^b" "e^(b*sd)" "x_sd"}]; /**************************************************************/ START wald; /* Hypothesis Testing: Wald test */ W=J(1,3,0); Qbr=q*beta-r; W[1,1]=T(Qbr)*INV(q*covb*T(q))*Qbr; W[1,2]=NROW(r); W[1,3]=1-PROBCHI(W[1,1], W[1,2]); PRINT ,, 'Linear Hypothesis Testing', W[COLNAME={"Wald" "df" "P>Chi2"}]; FINISH wald; q={0 1 0 0, 0 0 0 1}; /* Variables to be tested */ r={0, 0}; /* Restriction */ RUN wald; /**************************************************************/ START prtab; /* Predicted probabilities of ideal types */ ideal=J(NROW(row)+1, NCOL(col)+1, 0); DO i=1 to NROW(row); baseline[1,r]=row[i,1]; ideal[i+1,1]=row[i,1]; /* Label of row */ DO j=1 to NCOL(col); baseline[1,c]=col[1,j]; IF i=1 THEN ideal[1,j+1]=col[1,j]; /* Label of column */ ideal[i+1, j+1]=EXP(baseline*beta)/(1+EXP(baseline*beta)); END; /* End of j*/ END; /* End of i*/ PRINT ,, 'Predicted Probabilities of the Ideal Types', ideal; FINISH prtab; baseline=x_bar; *baseline={1 400 12.005 1}; r=4; row={0, 1}; /* Location and values of the row variable */ c=2; col={500 1000 1500}; /* Location and values of the column variable */ RUN prtab; /**************************************************************/ START prchange; /* Descrete and marginal changes */ dc=REPEAT(0, df, 5); /* Initializing the discrete change matrix */ p1=EXP(baseline*beta)/(1+EXP(baseline*beta)); DO i=1 TO 5; /* Discrete changes */ DO j=1 TO df; /* Independent variables */ x0=baseline; x1=baseline; /* Initialzing the baseline vector */ IF i=1 THEN; DO; x0[1,j+1]=x_min[1,j+1]; x1[1,j+1]=x_max[1,j+1]; END; ELSE IF i=2 THEN; DO; x0[1,j+1]=0; x1[1,j+1]=1; END; ELSE IF i=3 THEN; DO; x0[1,j+1]=x0[1,j+1]-.5; x1[1,j+1]=x1[1,j+1]+.5; END; ELSE IF i=4 THEN; DO; x0[1,j+1]=x0[1,j+1]-(x_sd[1,j+1]/2); x1[1,j+1]=x1[1,j+1]+(x_sd[1,j+1]/2); END; IF i=5 THEN; DO; dc[j,i]=p1*(1-p1)*beta[j+1,1]; END; /* Computing marginal changes*/ ELSE; DO; /* Computing discrete changes */ xb0=x0*beta; dc0=EXP(xb0)/(1+EXP(xb0)); /* starting */ xb1=x1*beta; dc1=EXP(xb1)/(1+EXP(xb1)); /* ending */ dc[j,i]=dc1-dc0; /* difference between the starting and ending */ END; /* End of IF */ END; /* End of j */ END; /* End of i */ PRINT ,, 'Discrete and Marginal Changes of the Predicted Probability'; PRINT dc[ROWNAME=variable COLNAME={"Min->Max" "0->1" "+-1/2" "+-sd/2" "Marginal"}]; PRINT "Prob(y=1|x) = " p1; PRINT baseline[COLNAME=parameter]; /* Baseline of X */ PRINT x_bar[COLNAME=parameter]; /* Mean of X */ PRINT x_sd[COLNAME=parameter]; /* Standard deviation of X */ FINISH prchange; /* End of discrete and marginal changes */ baseline=x_bar; *baseline={1 400 12.005 1}; /* To compute discrete changes */ RUN prchange; /**************************************************************/ START prgen; nGroup=NCOL(group); minGroup=MIN(group); pr_gen=J(cut+1, nGroup+1, 0); step=(end-start)/cut; DO i=minGroup TO nGroup; baseline[1, category]=i; DO j=1 TO cut+1; baseline[1, interval]=start+j*step; IF i=0 THEN pr_gen[j,1]=start+(j-1)*step; pr_gen[j,i+2]=EXP(baseline*beta)/(1+EXP(baseline*beta)); END; /* End of j */ END; /* End of i */ PRINT pr_gen; CREATE WORK.prgen FROM pr_gen; APPEND FROM pr_gen; CLOSE WORK.prgen; FINISH prgen; /**************************************************************/ baseline=x_bar; *baseline={1 400 12.005 1}; /* To compute predicted probabilities */ interval=2; start=500; end=1500; cut=20; category=4; group={0 1}; RUN prgen; QUIT; GOPTIONS RESET=GLOBAL GUNIT=PCT BORDER; TITLE1 'Predicted Probabilities of Y=1'; SYMBOL1 COLOR=RED INTERPOL=SPLINE VALUE=DOT HEIGHT=2; SYMBOL2 COLOR=BLUE INTERPOL=JOIN VALUE=DIAMOND HEIGHT=2; SYMBOL3 COLOR=GREEN INTERPOL=JOIN VALUE=SQUARE; axis1 LABEL=('Amount of Budget') MAJOR=(HEIGHT=1) OFFSET=(0) WIDTH=2 ; axis2 LABEL=('Pr(y=1)') MAJOR=(HEIGHT=1) OFFSET=(0) WIDTH=2; PROC GPLOT DATA=WORK.prgen; PLOT col2*col1 col3*col1 /OVERLAY HAXIS=axis1 VAXIS=axis2 HMINOR=5; RUN;