//=================================================== file = summary2.c ===== //= Program to compute summary statistics for a series X of size N = //= - Computes median, 1%, 2%, 5%, 95%, 98%, and 99% = //=========================================================================== //= Notes: = //= 1) Input from input file "in.dat" to stdin (see example below) = //= * Comments are bounded by "&" characters at the beginning and = //= end of the comment block = //= 2) Output is to stdout = //=-------------------------------------------------------------------------= //= Example "in.dat" file: = //= = //= & Sample series of data which can be integers or reals. = //= There are 11 values in this file. & = //= 50 = //= 42 = //= 48 = //= 61 = //= 60 = //= 53 = //= 39 = //= 54 = //= 42 = //= 59 = //= 53 = //=-------------------------------------------------------------------------= //= Example output (for above "in.dat"): = //= = //= ---------------------------------------------- summary2.c ----- = //= Total of 11 values = //= 1% value = 39.000000 = //= 2% value = 39.000000 = //= 5% value = 39.000000 = //= 25% value = 42.000000 = //= 50% value = 53.000000 = //= 75% value = 59.000000 = //= 95% value = 61.000000 = //= 98% value = 61.000000 = //= 99% value = 61.000000 = //= --------------------------------------------------------------- = //=-------------------------------------------------------------------------= //= Build: bcc32 summary2.c = //=-------------------------------------------------------------------------= //= Execute: summary2 < in.dat = //=-------------------------------------------------------------------------= //= Author: Kenneth J. Christensen = //= University of South Florida = //= WWW: http://www.csee.usf.edu/~christen = //= Email: christen@csee.usf.edu = //=-------------------------------------------------------------------------= //= History: KJC (05/23/00) - Genesis = //= KJC (10/02/00) - Fixed cast problem with array indexes = //= KJC (06/06/02) - Changed MAX_SIZE to 2 million = //= KJC (08/26/03) - Median is computed correctly for odd even = //= KJC (05/27/13) - Modified to use malloc() for heap space = //= KJC (03/15/15) - Added 25% and 75% = //=========================================================================== //----- Include files ------------------------------------------------------- #include // Needed for printf() and feof() #include // Needed for exit() and atof() #include // Needed for strcmp() //----- Defines ------------------------------------------------------------- #define MAX_SIZE 10000000 // Maximum size of time series data array //----- Globals ------------------------------------------------------------- double *X; // Time series read from "in.dat" int N; // Number of values in "in.dat" //----- Function prototypes ------------------------------------------------- void load_X_array(void); // Load X array int comp(const void *p, const void *q); // Compare p and q for qsort() //=========================================================================== //= Main program = //=========================================================================== void main(void) { double median; // Median value double percent_1; // 1%-tile value double percent_2; // 2%-tile value double percent_5; // 5%-tile value double percent_25; // 25%-tile value double percent_75; // 75%-tile value double percent_95; // 95%-tile value double percent_98; // 98%-tile value double percent_99; // 99%-tile value int i; // Loop counter // Malloc space for X X = (double *) malloc(sizeof(double) * MAX_SIZE); if (X == NULL) { printf("*** ERROR - Could not malloc() enough space \n"); exit(1); } // Load the series X printf("---------------------------------------------- summary2.c -----\n"); load_X_array(); // Sort the series X qsort(X, N, sizeof(double), comp); // Compute median precisely if ((N % 2) == 1) median = X[N / 2]; else median = 0.5 * (X[(N - 1) / 2] + X[(N + 1) / 2]); // Compute percentiles with a round-down of index percent_1 = X[(int) (0.01 * N)]; percent_2 = X[(int) (0.02 * N)]; percent_5 = X[(int) (0.05 * N)]; percent_25 = X[(int) (0.25 * N)]; percent_75 = X[(int) (0.75 * N)]; percent_95 = X[(int) (0.95 * N)]; percent_98 = X[(int) (0.98 * N)]; percent_99 = X[(int) (0.99 * N)]; printf(" Total of %ld values \n", N); printf(" 1%% value = %f \n", percent_1); printf(" 2%% value = %f \n", percent_2); printf(" 5%% value = %f \n", percent_5); printf(" 25%% value = %f \n", percent_25); printf(" 50%% value = %f \n", median); printf(" 75%% value = %f \n", percent_75); printf(" 95%% value = %f \n", percent_95); printf(" 98%% value = %f \n", percent_98); printf(" 99%% value = %f \n", percent_99); printf("---------------------------------------------------------------\n"); // Free the malloced memory free(X); } //=========================================================================== //= Function to load X array from stdin and determine N = //=========================================================================== void load_X_array(void) { char temp_string[1024]; // Temporary string variable // Read all values into X N = 0; while(1) { scanf("%s", temp_string); if (feof(stdin)) goto end; // This handles a comment bounded by "&" symbols while (strcmp(temp_string, "&") == 0) { do { scanf("%s", temp_string); if (feof(stdin)) goto end; } while (strcmp(temp_string, "&") != 0); scanf("%s", temp_string); if (feof(stdin)) goto end; } // Enter value in array and increment array index X[N] = atof(temp_string); N++; // Check if MAX_SIZE data values exceeded if (N >= MAX_SIZE) { printf("*** ERROR - greater than %ld data values \n", MAX_SIZE); exit(1); } } // End-of-file escape end: return; } //=========================================================================== //= Function to compare two aruguments in an array of double = //= - Needed by qsort() = //=========================================================================== int comp(const void *p, const void *q) { double *ptr1 = (double *)(p); double *ptr2 = (double *)(q); if (*ptr1 < *ptr2) return -1; else if (*ptr1 == *ptr2) return 0; else return 1; }