/** multiple linear regression example */ /** chapter 9 in "applied statistics and the sas programming language" by Cody & Smith */ DATA NONEXP; INPUT ID ACH6 ACH5 APT ATT INCOME; /* ACH6: Reading achievement at the end of sixth grade. ACH5: Reading achievement at the end of fifth grade. APT: A measure of verbal aptitude taken in the fifth grade. ATT: A measure of attitude toward school taken in fifth grade. INCOME: A measure of parental income (in thousands of dollars per year). The purpose of this study is to understand what underlies the reading achievement of the students in the district. */ DATALINES; 1 7.5 6.6 104 60 67 2 6.9 6.0 116 58 29 3 7.2 6.0 130 63 36 4 6.8 5.9 110 74 84 5 6.7 6.1 114 55 33 6 6.6 6.3 108 52 21 7 7.1 5.2 103 48 19 8 6.5 4.4 92 42 30 9 7.2 4.9 136 57 32 10 6.2 5.1 105 49 23 11 6.5 4.6 98 54 57 12 5.8 4.3 91 56 29 13 6.7 4.8 100 49 30 14 5.5 4.2 98 43 36 15 5.3 4.3 101 52 31 16 4.7 4.4 84 41 33 17 4.9 3.9 96 50 20 18 4.8 4.1 99 52 34 19 4.7 3.8 106 47 30 20 4.6 3.6 89 58 27 ; PROC REG DATA=NONEXP; TITLE "Nonexperimental Design Example"; MODEL ACH6 = ACH5 APT ATT INCOME / P R; /* The options "P" and "R" specify that we want predicted values and residuals to be computed. */ RUN; QUIT; /* residual plot*/ PROC REG DATA=NONEXP; TITLE "Residual Plot"; MODEL ACH6 = ACH5; PLOT RESIDUAL.*PRED.; RUN; /* QQ plot */ PROC UNIVARIATE DATA=NONEXP; TITLE "QQ Plot"; VAR ACH6 ACH5; QQPLOT ACH5; RUN; /* Variable Section using FORWARD method: Starts with the best single regressor, then */ /* finds the best one to add to what exists, then the next bests, etc. */ PROC REG DATA=NONEXP; TITLE "Nonexperimental Design Example"; MODEL ACH6 = ACH5 APT ATT INCOME / SELECTION = FORWARD; RUN; QUIT; /* Variable Section using MAXR method: Given k, find the k-variable regression model with the highest R^2 */ PROC REG DATA=NONEXP; TITLE "Nonexperimental Design Example"; MODEL ACH6 = ACH5 APT ATT INCOME / SELECTION = MAXR; RUN; QUIT;