/* ==============================================================*/ /* The Ordinary Least Square (OLS) Program Written in SAS IML */ /* Programmed by Hun Myoung Park, Indiana University */ /* Last modified on May, 2000 */ /* */ /* USAGE: Adjust INFILE and INPUT statement to your data file */ /* Change "predict" matrix to get its prediction interval*/ /* */ /* Recently updated one avaiable at php.indiana.edu/~kucc625 */ /* ==============================================================*/ OPTIONS PAGESIZE=60 LINESIZE=100 NODATE; DATA source; INFILE 'a:\ols.txt' FIRSTOBS=3 LRECL=100 OBS=10; INPUT year gnp energy pop tax revenue; RUN; DATA depend (KEEP=gnp); SET source; RUN; DATA independ (DROP=gnp); SET source; RUN; /* ======================= Statr SAS IML=========================*/ PROC IML; USE independ; READ ALL INTO x; /* Independent variables matrix */ USE depend; READ ALL INTO y; /* Dependent variables matrix */ /* ================== Module for DATA ENTRY =====================*/ START entry; y_mean=y[:,]; n=NROW(x); h=1; /* to calculte the number of observations */ vector=J(n, 1); /* n X 1 vector */ predict={1 5 488 887 15 54}; /* 1 X k matrix for the prediction */ x=vector||x; /* merging (n X k) x matrix */ x_pi=x//predict; /* for prediction interval */ y_bar=y_mean*vector; /* n X 1 ybar matrix */ v=INV(x`*x); /* k X k covariance matrix */ beta=v*(x`*y); /* Inv(X'X)*X'y to produce kX1 beta matrix */ y_hat=x*beta; /* (n X k)*(k X 1) = n X 1 y hat matrix */ y_pi_hat=x_pi*beta; /* (n+1) X 1 y matrix for prediction */ residual=y-y_hat; /* n X 1 residual matrix */ k=NROW(beta); df=n-k; /* k=the total number of regressors */ FINISH entry; /* ================== Module for ANOVA Table ====================*/ START anova; anova=J(3,5,0); /* Establishing ANOVA Table */ anova[1,1]=k-h; anova[2,1]=df; anova[3,1]=n-h; /* dfs */ anova[1,2]=SSQ(y_hat-y_bar);/* ssr=T(y_hat-y_bar)*(y_hat-y_bar); */ anova[1,3]=anova[1,2]/anova[1,1]; /* msr=ssr/(k-h); */ anova[2,2]=SSQ(y-y_hat); /* sse=T(y-y_hat)*(y-y_hat); */ anova[2,3]= anova[2,2]/df; /* mse=sse/(n-k); */ mse=anova[2,3]; see=SQRT(mse); /* mse (s**2) and see (s); */ anova[3,2]=SSQ(y-y_bar); /* = T(y-y_bar)*(y-y_bar); */ anova[1,4]=anova[1,3]/mse; /* f value= msr/mse; */ anova[1,5]=1-PROBF(anova[1,4], k-h, df); /* p value */ r_square=anova[1,2]/anova[3,2]; /* r square =ssr/sst=1-sse/sst */ adj_r=1-((n-1)*(1-r_square)*(1/df)); /*adjusted r square */ anv_col={'DF' 'Sum of Square' 'Mean Square' 'F Value' 'Prob > F'}; anv_row={'Model', 'Error', 'Total'}; /* 3 X 1 title matrix */ PRINT anova[R=anv_row C=anv_col]; /* ANOVA table */ PRINT n k y_mean see r_square adj_r; /* see, r_square, adj_r*/ FINISH anova; /* ======== Module for Parameter Estimaters' Statistics =========*/ START beta; t=TINV(.975, df); /* critical t value at the 5% lever (two tail) */ b_cov=mse*v; /* s*s*INV(X'x) k X k covariance matrix */ b_se=SQRT(VECDIAG(v)*mse); /* standard error for betas */ b_t=beta/b_se; /* t value for H0: beta=0 */ b_p=1-PROBF(b_t#b_t, 1, df); /* = 2*(1-PROBT(ABS(b_t), df, 0)) */ b_lower=beta-t*b_se; /* confidence interval (lower) of betas */ b_upper=beta+t*b_se; /* confidence interval (upper) of betas */ b_differ=b_upper-b_lower; /* confidence interval (difference) */ b_stats=beta||b_se||b_t||b_p; b_corr=DIAG(1/b_se)*b_cov*DIAG(1/b_se); /* correlation matrix */ b_column={'Betas' 'Standard E' 'T for H0' 'Prob > |T|' }; PRINT b_stats[C=b_column] b_lower b_upper b_differ;/* beta stats */ PRINT "Covariance " b_cov; PRINT "Correlation " b_corr; FINISH beta; /*============ Module for 95% Confidence Intervals ==============*/ START interval; hat=x_pi*v*x_pi`; /* n X n "hat matrix"= X*INV(X`X)*X`*/ h_diag=VECDIAG(hat); /* n X 1 diagonal vector of hat */ ci_lower=y_pi_hat-t#SQRT(h_diag*mse);/*lower limit of mean value */ ci_upper=y_pi_hat+t#SQRT(h_diag*mse);/*upper limit of mean value */ ci_diff=2*(t#SQRT(h_diag*mse)); /* 95% Confidence intervals */ pi_lower=y_pi_hat-t#SQRT(h_diag*mse+mse);/* lower for individual */ pi_upper=y_pi_hat+t#SQRT(h_diag*mse+mse);/* upper for individual */ pi_diff=2*(t#SQRT(h_diag*mse+mse)); /* 95% Prediction intervals */ PRINT "X values" predict; PRINT y_hat residual ci_lower ci_upper ci_diff /* confidence */ pi_lower pi_upper pi_diff; /* prediction */ FINISH interval; /* ==================== Procedure Section ====================== */ RUN entry; RUN anova; RUN beta; RUN interval; * RUN egls; QUIT; /* to terminate SAS IML */