//========================================================= file = lr.c ==== //= Program to calculate linear regression equation for data = //========================================================================== //= Notes: = //= 1) The method of leas squares is used to performance linear = //= regression on data. The equation of the line and = //= the coefficient of dtermination are computed. = //= 2) The input file contains pairs separated by commas = // and/or whitespace. No comments are allowed in the input file. = //= 3) See, R. Jain, "The Art of Computer Systems Performance = //- Analysis," John Wiley & Sons, 1991 (pp. 221- 228). = //=------------------------------------------------------------------------= //= Example "in.dat" file (from Jain, page 224): = //= = //= 14, 2 = //= 16, 5 = //= 27, 7 = //= 42, 9 = //= 39, 10 = //= 50, 13 = //= 83, 20 = //=------------------------------------------------------------------------= //= Example output (for above "in.dat"): = //= = //= ---------------------------------------------------- lr.c ----- = //= Number of pairs = 7 = //= Y = 0.243756*x + -0.008282 (R^2 = 0.971471) = //= --------------------------------------------------------------- = //=------------------------------------------------------------------------= //= Build: bcc32 lr.c, cl lr.c, gcc lr.c -lm = //=------------------------------------------------------------------------= //= Execute: lr < in.dat = //=------------------------------------------------------------------------= //= Contact: Kenneth J. Christensen = //= University of South Florida = //= WWW: http://www.csee.usf.edu/~christen = //= Email: christen@csee.usf.edu = //=------------------------------------------------------------------------= //= History: VJE (02/13/99) - Genesis = //= KJC (03/09/99) - Minor clean-up = //= KJC (05/25/00) - Added include stdlib to make atof() work = //========================================================================== //----- Include files ------------------------------------------------------ #include // Needed for printf() and feof() #include // Needed for atof() #include // Needed for sqrt() and pow() //========================================================================== //= Main program = //========================================================================== void main(void) { double x; // Value of x read in from the file double y; // Value of y read in from the file char instring[80]; // Temporary input string long int count; // Counter for number of pairs double accum_x; // Accumulator for the sum of the x's double accum_y; // Accumulator for the sum of the y's double accum_xy; // Accumulator for the sum of (x*y) double accum_x_squared; // Accumulator for the sum of the x's squared double accum_y_squared; // Accumulator for the sum of the y's squared double numerator; // Work variable for numerator double denominator; // Work variable for denominator double linear_corr; // Linear correlation coefficient double slope; // Slope of the linear regression double intercept; // Intercept of the linear regression // Output a banner printf("---------------------------------------------------- lr.c ----- \n"); // Main loop to read values and compute accumulated values count = 0; accum_x = accum_y = accum_xy = accum_x_squared = accum_y_squared = 0.0; while (!feof(stdin)) { // Read the input values from stdin checking for feof() condition scanf("%s", instring); if (feof(stdin)) break; x = atof(instring); scanf("%s", instring); y = atof(instring); count++; // Accumulate the applicable values from the input file accum_x = accum_x + x; // Sum of x accum_y = accum_y + y; // Sum of y accum_xy = accum_xy + (x*y); // Sum of x*y accum_x_squared = accum_x_squared + (x*x); // Sum of x squared accum_y_squared = accum_y_squared + (y*y); // Sum of y squared } // Calculate the slope and intercept of the linear regression line numerator = (count * accum_xy) - (accum_x * accum_y); denominator = (count * accum_x_squared) - (accum_x * accum_x); slope = numerator / denominator; intercept = (accum_y / count) - (slope * (accum_x / count)); // Calculate the linear correlation coefficient numerator = (count * accum_xy) - (accum_x * accum_y); denominator = sqrt((count * accum_x_squared) - (accum_x * accum_x)) * sqrt((count * accum_y_squared) - (accum_y * accum_y)); linear_corr = numerator / denominator; // Output results (R^2 is the coefficient of determiniation) printf(" Number of pairs = %ld \n", count); printf(" Y = %f*x + %f (R^2 = %f) \n", slope, intercept, pow(linear_corr, 2.0)); printf("--------------------------------------------------------------- \n"); }