/* The basic PDH algorithm implementation plus the following features: 1. By defining HIGH_TREE to 1, we allow the tree to be built with one more level, which hopefully may give more chance for resolving inter-cell distances; 2. All atoms are chained together. For intermediate nodes, their first_atom and last_atom pointers are fixed 3. When two nodes have few atoms in it (e.g., product < 4, 8, 16), do not call resolve_two_cells 4. Effective region in resolving two cells: only consider the MBR of a cell 5. Synthetic data To compile: g++ -Wall -o pdh -I/sw/include/postgresql/ -L/sw/lib/ PDH.c in a 64-bit Mac with Mac OS X 10.6. Send comments and questions to Yicheng Tu (ytu@cse.usf.edu), or Anand Kumar (akumar8@mail.usf.edu) */ #include #include #include #include #include #include #include #include #include "randlib/src/randlib.c" using namespace std; #define INF 999999999.0 /* not really infinite, but a number large enough */ #define END_OF_LIST -1 #define NO_VALUE -1 #define TREE_DEGREE 4 /* 8 for oct trees for 3D data */ #define HIGH_TREE 0 #define TREE_STATS 1 #define MBR 0 //#define MD_DATA 1 #define BOX_SIZE 25000 #define FALSE 0 // Boolean false #define TRUE 1 // Boolean true #define DEBUG 1 typedef struct atomdesc { int count; int i; int type; double x_pos; double y_pos; double z_pos; double q; double m; int next; /* to chain all data points in the same leaf page in the DM */ } atom; typedef struct hist_entry{ //float min; //float max; long long d_cnt; /* need a long long type as the count might be huge */ } bucket; typedef struct { long long a_cnt; double x_min; double x_max; double y_min; double y_max; //double z_min; //double z_max; #if MBR != 0 double ex_min; double ex_max; double ey_min; double ey_max; #endif int next; /* pointer for chaining all nodes on the same level */ int children; int parent; int first_atom; /* index of the first atom, for leaf nodes only */ int last_atom; /* end of the atom list, for leaf nodes only */ } dmEntry; typedef struct { /* tree structure info */ unsigned first_node; unsigned total_nodes; unsigned total_e_nodes; /* PDH performance info */ long long res_dis_cnt; long long res_cnt; long long resolvable_cnt; } treeLevelInfo; treeLevelInfo * treeInfo; dmEntry * densityMap; //dmEntry * tempDensityMap; bucket * histogram; //bucket * tempHistogram; atom * atom_list; int num_buckets; int PDH_algo; long long PDH_acnt; double PDH_min = 0.0; double PDH_res; double PDH_max; int PDH_levels = 999999; int PDH_start_level = NO_VALUE; /* recording DM_0 */ int PDH_accuracy = NO_VALUE; /* defining heuristics in approx algo */ int DM_total_nodes = 0; int DM_total_effective_nodes = 0; int DM_total_levels = 0; double Zfactor; /* order of the Zipf distribution */ int Rseed; double stdDeviation = 0.0; double xMean = 0, yMean = 0, zMean = 0; int numOfDistributions = 0; int flag = 0; double u, v; /* for computing heuristic 3 for the approximate algorithm */ /* some performance records */ long long dis_cnt; /* total number of calls to the distance function */ long long res_dis_cnt; /* pairwise distances resolved in densitymap */ long long res_cnt; /* successful resolution */ long long resolvable_cnt; /* number of calls to resolvable */ double processing_time; /* total time for running the algorithm */ unsigned debug_cnt = 0; unsigned debug_cnt1 = 0; /* just for debugging purposes */ long long debug_cnt2 = 0; /* for random data generation */ int first = TRUE; // Static first time flag for zipf /* for time tracing */ struct timezone Idunno; struct timeval startTime, endTime; int readline(char s[], int max); int PDH_baseline(); int PDH_densityMap(); int PDH_serialDualBuckets(); double p2p_distance(int ind1, int ind2); void output_histogram(int pos); void output_statistics(); double report_running_time(char *); int build_2D_dm(double, double, double, double); int locate_atom2D(double, double); int resolve_two_cells(int, int, int); int resolvable(int, int); int print_tree(int, int); double * Zipf_Init (int); void generate_data(int, double, double); int zipf(double alpha, int n); double rand_val(int seed); int randNumGen(int max); int main(int argc, char **argv) { #if MD_DATA != 0 char line[1000]; char *tokens[10]; int a_cnt = 0; int i; #endif double x_max = 0.0, y_max = 0.0, z_max = 0.0; double x_min = INF, y_min = INF, z_min = INF; //long long error_cnt = 0; gettimeofday(&startTime, &Idunno); PDH_algo = atoi(argv[1]); /* get the algorithm for processing PDH */ PDH_acnt = atoi(argv[2]); /* the total number of atoms to process */ PDH_res = atof(argv[3]); PDH_max = atof(argv[4]); PDH_min = atof(argv[5]); if(PDH_acnt % 2 == 1) { printf("\n%lld\t", --PDH_acnt); /* jsut some trick to format the output */ } num_buckets = (int)((PDH_max - PDH_min)/PDH_res) + 1; histogram = (bucket *)malloc(sizeof(bucket)*num_buckets); //tempHistogram = (bucket *)malloc(sizeof(bucket)*num_buckets); //printf("size of %d\n", num_buckets); atom_list = (atom *)malloc(sizeof(atom)*PDH_acnt); #if MD_DATA != 0 while (a_cnt < PDH_acnt) { readline(line, 1000); if (strcmp(line, "") == 0) { // stop on empty line break; } i = 0; tokens[i] = strtok(line, "\t"); while (tokens[i] != NULL && i < 9) { tokens[++i] = strtok(NULL, "\t"); } if (strcmp(tokens[0], "ATOM") == 0 && i >= 8) { atom_list[a_cnt].count = atoi(tokens[1]); atom_list[a_cnt].x_pos = atof(tokens[4])*1000.0; atom_list[a_cnt].y_pos = atof(tokens[5])*1000.0; atom_list[a_cnt].z_pos = atof(tokens[6])*1000.0; atom_list[a_cnt].next = END_OF_LIST; if(atom_list[a_cnt].x_pos < x_min) x_min = atom_list[a_cnt].x_pos; if(atom_list[a_cnt].y_pos < y_min) y_min = atom_list[a_cnt].y_pos; if(atom_list[a_cnt].z_pos < z_min) z_min = atom_list[a_cnt].z_pos; if(atom_list[a_cnt].x_pos > x_max) x_max = atom_list[a_cnt].x_pos; if(atom_list[a_cnt].y_pos > y_max) y_max = atom_list[a_cnt].y_pos; if(atom_list[a_cnt].z_pos > z_max) z_max = atom_list[a_cnt].z_pos; a_cnt++; //printf("Coordinates: %f, %f, %f\n", atof(tokens[4]), atof(tokens[5]), atof(tokens[6])); } } #else x_min = y_min = z_min = 0.0; x_max = y_max = z_max = (double) (BOX_SIZE * 1.0); generate_data(atoi(argv[6]), atof(argv[7]), atof(argv[8])); #endif gettimeofday(&startTime, &Idunno); build_2D_dm(x_min, x_max, y_min, y_max); res_dis_cnt = dis_cnt = 0; res_cnt = resolvable_cnt = 0; switch(PDH_algo) { case 0: /* brute-force way */ PDH_baseline(); break; case 1: /* in-memory density map method */ PDH_densityMap(); break; case 2: /* compute histogram by individual cutoffs */ PDH_serialDualBuckets(); break; case 3: /* approximate answers */ //free(atom_list); PDH_levels = atoi(argv[8]); PDH_accuracy = atoi(argv[9]); PDH_densityMap(); break; default: fprintf(stderr, "Wrong algorithm specified!\n"); } processing_time = report_running_time("processing PDH"); output_statistics(); output_histogram(atoi(argv[6])); fprintf(stderr, "%lld\t%lf\n", PDH_acnt, processing_time); return 0; } /* generate data using different spatial distributions */ void generate_data(int policy, double random_block_size, double Zfactor) { int num_random_blocks, num_random_segs, a_cnt, block, i; int *blocks; double x_base, y_base, new_cord; //srand(time(0)); srand48(2); srand(Rseed); switch(policy){ case 0: /* uniform distribution */ for(a_cnt = 0; a_cnt < PDH_acnt; a_cnt++) { atom_list[a_cnt].x_pos = ((double)(rand()) / RAND_MAX) * BOX_SIZE; atom_list[a_cnt].y_pos = ((double)(rand()) / RAND_MAX) * BOX_SIZE; atom_list[a_cnt].z_pos = ((double)(rand()) / RAND_MAX) * BOX_SIZE; atom_list[a_cnt].next = END_OF_LIST; } break; case 1: /* Zipf */ Zfactor = 1.0; num_random_segs = (int)(BOX_SIZE / random_block_size); num_random_blocks = num_random_segs * num_random_segs; /* these record a random shuffled array of blocks */ blocks = (int*)calloc(num_random_blocks, sizeof(int)); for(i = 0; i < num_random_blocks; i++) { blocks[i] = i; } first = TRUE; /* reset the starting point for zipf generator */ random_shuffle(blocks, blocks + num_random_blocks); /* generate (x, y) coordinates */ for(a_cnt = 0; a_cnt < PDH_acnt; a_cnt++) { i = zipf(Zfactor, num_random_blocks) - 1; /* get the block that we should put the next point in */ block = blocks[i]; /* base coordinates of the block. Points are uniformly distibuted within the block */ x_base = (double)((block % num_random_segs) * random_block_size); y_base = (double)((block / num_random_segs) * random_block_size); /* the real data point coordinates */ atom_list[a_cnt].x_pos = x_base + drand48() * random_block_size; atom_list[a_cnt].y_pos = y_base + drand48() * random_block_size; atom_list[a_cnt].next = END_OF_LIST; //printf("%lf %lf\n", atom_list[a_cnt].x_pos, atom_list[a_cnt].y_pos); } free(blocks); break; case 2: /* multiple Gaussian distributions */ numOfDistributions = (int) random_block_size; stdDeviation = Zfactor; for (i = 0; i < numOfDistributions; i++) { xMean = randNumGen(BOX_SIZE); yMean = randNumGen(BOX_SIZE); zMean = randNumGen(BOX_SIZE); /* generate x coordinates */ for (a_cnt = 0; a_cnt < (PDH_acnt/numOfDistributions); a_cnt++) { new_cord = gennor(xMean, stdDeviation); if(new_cord < 0.0 || new_cord > BOX_SIZE) atom_list[(i*(PDH_acnt/numOfDistributions))+a_cnt].x_pos = xMean; else atom_list[(i*(PDH_acnt/numOfDistributions))+a_cnt].x_pos = new_cord; } /* generate y coordinates */ for (a_cnt = 0; a_cnt < (PDH_acnt/numOfDistributions); a_cnt++) { new_cord = gennor(yMean, stdDeviation); if(new_cord < 0.0 || new_cord > BOX_SIZE) atom_list[(i*(PDH_acnt/numOfDistributions))+a_cnt].y_pos = yMean; else atom_list[(i*(PDH_acnt/numOfDistributions))+a_cnt].y_pos = new_cord; } } break; default: fprintf(stderr, "Wrong data policy !\n"); } } void output_statistics() { int i; long long total_dis_cnt = ((long long)PDH_acnt * ((long long)PDH_acnt - 1) / 2); if(PDH_algo != 0) { printf(" Level | res_dis_cnt | acc_dis_cnt | per_dis | res_cnt | res_call | avg_res_dis_cnt\n"); printf("---------------------------------------------------------------------------------------------------------------\n"); for(i = 0; i < DM_total_levels; i++) { res_dis_cnt += treeInfo[i].res_dis_cnt; res_cnt += treeInfo[i].res_cnt; resolvable_cnt += treeInfo[i].resolvable_cnt; printf("%6d | %15lld | %17lld | %lf | %12lld | %12lld | %18.2lf \n", i, treeInfo[i].res_dis_cnt, res_dis_cnt, (double)((res_dis_cnt * 1.0) / (total_dis_cnt)), treeInfo[i].res_cnt, treeInfo[i].resolvable_cnt, (treeInfo[i].res_cnt == 0) ? 0.0f : (double)(treeInfo[i].res_dis_cnt*1.0/treeInfo[i].res_cnt)); } printf("---------------------------------------------------------------------------------------------------------------\n"); } //printf("debug_cnt = %u, debug_cnt1 = %u\n", debug_cnt, debug_cnt1); printf("resolvable_cnt = %lld, res_cnt = %lld, res_dis_cnt = %lld, dis_cnt = %lld\n", resolvable_cnt, res_cnt, res_dis_cnt, dis_cnt); } /* print the counts in all buckets of the histogram */ void output_histogram(int pos){ int i = 0, line_num; long long total_cnt = 0, total_error_cnt = 0; char *line, *his_value; bucket * correct_histogram; FILE * fctrl = fopen("./results/TKDE/control", "r"); if(fctrl == NULL) { return; } line_num = pos*9 + (int)log2(PDH_acnt / 100000); //printf("Line_num is %d\n", line_num); line = (char*)malloc(1024); while(i++ <= line_num) { fgets(line, 1024, fctrl); } //printf("Line got: %s\n", line); correct_histogram = (bucket *)malloc(sizeof(bucket)*num_buckets); i = 0; while ( (his_value = strsep(&line, " \t")) != NULL) { if(*his_value != '\0') correct_histogram[i++].d_cnt = strtoll(his_value, NULL, 10); } for(i=0; i< num_buckets; i++) { //if(i%5 == 0) // printf("\n%02d: ", i); printf("%15lld ", histogram[i].d_cnt); total_cnt += histogram[i].d_cnt; total_error_cnt += abs(histogram[i].d_cnt - correct_histogram[i].d_cnt); } fprintf(stderr, "T:%lld\t%lf\t", total_cnt, (double)(total_error_cnt*1.0/total_cnt)); fclose(fctrl); delete line; } /* brute-force PDH solution */ int PDH_baseline() { int i, j, h_pos; double dist; for(i = 0; i < PDH_acnt; i++) { for(j = i+1; j < PDH_acnt; j++) { dist = p2p_distance(i,j); h_pos = (int) ((dist - PDH_min) / PDH_res); histogram[h_pos].d_cnt++; dis_cnt++; } } return 0; } /* This method transforms a histogram queries into a series of two bucket histogram queries */ int PDH_serialDualBuckets() { int i; int bucket_num = num_buckets; bucket * shadow_histogram = (bucket *)calloc(bucket_num, sizeof(bucket)); double min = PDH_min, max = PDH_max, res = PDH_res; double cutoff_pt; long long higher_count = 0; /* count of the current higher region of the histogram */ for(i = bucket_num - 1; i > 0; i--) { cutoff_pt = res * i; if(i > bucket_num / 2) { PDH_min = min; PDH_res = cutoff_pt - min; PDH_max = PDH_min + 2 * PDH_res; } else { PDH_max = max; PDH_res = max - cutoff_pt; PDH_min = PDH_max - 2 * PDH_res; } //num_buckets = 2; //printf("(%lf, %lf, %lf) ", PDH_min, PDH_max, PDH_res); PDH_densityMap(); shadow_histogram[i].d_cnt = histogram[1].d_cnt - higher_count; higher_count = histogram[1].d_cnt; //printf("histogram: [%lld, %lld, %lld]\n", histogram[0].d_cnt, histogram[1].d_cnt, histogram[2].d_cnt); histogram[0].d_cnt = histogram[1].d_cnt = histogram[2].d_cnt = 0; } printf("(%d, %lld, %lld) ", i, shadow_histogram[i].d_cnt, higher_count); shadow_histogram[i].d_cnt = (PDH_acnt - 1) * PDH_acnt / 2 - higher_count; for(i = 0; i < bucket_num; i++) histogram[i].d_cnt = shadow_histogram[i].d_cnt; return 0; } /* PDH solution based on the basic density map approach */ int PDH_densityMap() { int node = 0, i, j, resolvable_1st = 1, level = 0; long long dis_count = 0; double u_bound_1st_bucket;; /* PDH_min can be negative in algorithm 2, so we need to calculate the u bound of the first bucket. However, be careful about PDH_min = 0.0, sometimes we need to use 'PDH_min - 0.0 < 0.000000001' */ if(PDH_min > 0.0) u_bound_1st_bucket = PDH_min; else if (PDH_min == 0.0) u_bound_1st_bucket = PDH_res; else u_bound_1st_bucket = PDH_res - PDH_min * (-1.0); while( sqrt(2.0) * (densityMap[node].x_max - densityMap[node].x_min) > u_bound_1st_bucket) { if(densityMap[node].children == NO_VALUE) { /* we've reached the leaf level, non-resolvable */ resolvable_1st = 0; break; } node = densityMap[node].children; level++; } if(resolvable_1st) { /* resolve all intra-cell points on the entry level of nodes */ i = node; //printf("%d level is the beginning\n", level); while(i != END_OF_LIST) { if(densityMap[i].a_cnt > 0) { dis_count = densityMap[i].a_cnt * (densityMap[i].a_cnt - 1) / 2; histogram[0].d_cnt += dis_count; #if TREE_STATS != 0 treeInfo[level].res_dis_cnt+= dis_count; #endif //printf("resolved (%d) to %d with %lld\n", i, 0, dis_count); } #if TREE_STATS != 0 treeInfo[level].res_cnt++; #endif i = densityMap[i].next; } } PDH_start_level = level; /* resolve inter-cell points */ for(i = node; densityMap[i].next != END_OF_LIST; i = densityMap[i].next) { if(densityMap[i].a_cnt != 0) { for(j = densityMap[i].next; j != END_OF_LIST; j = densityMap[j].next) if(densityMap[j].a_cnt != 0) { resolve_two_cells(i, j, level); } } } return 0; } int resolve_two_cells(int ind1, int ind2, int level) { int his_index, i, j, h_pos, last1, last2; double dist; long long dis_count, temp_count; /*flag++; if(flag%100000 == 0){printf("temp: %6.6lf\n", report_running_time("temp test")); gettimeofday(&startTime, &Idunno); }*/ dis_count = densityMap[ind1].a_cnt * densityMap[ind2].a_cnt; //printf("resolving (%d, %d) - ", ind1, ind2); #if TREE_STATS != 0 treeInfo[level].resolvable_cnt++; #endif /* first base case: two cells are directly resolvable, we set a threshold to disallow resolving pairs of cells that have small number of atom pairs in them */ if((his_index = resolvable(ind1, ind2)) >= 0) { //dis_count = densityMap[ind1].a_cnt * densityMap[ind2].a_cnt; histogram[his_index].d_cnt += dis_count; #if TREE_STATS != 0 treeInfo[level].res_dis_cnt += dis_count; treeInfo[level].res_cnt++; #endif return 1; } /* second base case: the leaf level is reached or the cells have few atoms in them */ if(PDH_algo == 3 && (level - PDH_start_level >= PDH_levels || densityMap[ind1].children == NO_VALUE)) { switch(PDH_accuracy) { case 1: /* SKEW */ histogram[his_index*(-1) - 1].d_cnt += dis_count; dis_cnt += dis_count; break; case 2: /* EVEN */ histogram[his_index*(-1)].d_cnt += (dis_count / 2); histogram[his_index*(-1) - 1].d_cnt += (dis_count - dis_count / 2); dis_cnt += dis_count; break; case 3: /* PROP */ if(v - u >= PDH_res*2) printf("Something is wrong\n"); temp_count = (long long)(dis_count * (v - his_index*(-1)*PDH_res) / (v-u)); histogram[his_index*(-1)].d_cnt += temp_count; dis_cnt += temp_count; histogram[his_index*(-1) - 1].d_cnt += dis_count - temp_count; dis_cnt += dis_count - temp_count; break; default: return NO_VALUE; } return 0; } /* third base case: the leaf level is reached */ if(densityMap[ind1].children == NO_VALUE) { for(i = densityMap[ind1].first_atom, last1 = atom_list[densityMap[ind1].last_atom].next; i != last1; i = atom_list[i].next) { for(j = densityMap[ind2].first_atom, last2 = atom_list[densityMap[ind2].last_atom].next; j != last2; j = atom_list[j].next) { dist = p2p_distance(i,j); h_pos = (int) ((dist - PDH_min) / PDH_res); histogram[h_pos].d_cnt++; #if TREE_STATS != 0 dis_cnt++; #endif } } return 0; } /* recursive case: go to the lower level with higher resolution if not resolvable now */ for(i = 0; i < TREE_DEGREE; i++) { if(densityMap[densityMap[ind1].children + i].a_cnt != 0) for(j = 0; j < TREE_DEGREE; j++) if(densityMap[densityMap[ind2].children + j].a_cnt != 0) resolve_two_cells(densityMap[ind1].children + i, densityMap[ind2].children + j, level + 1); } return 2; } /* answers the following question: given two cells on the same level of the density map, does the possible range of all distances fall into a histogram bucket? If yes, returns the index of the bucket; otherwise it returns (-1*last bucket index) . */ int resolvable(int ind1, int ind2) { #if MBR == 0 double x1_low = densityMap[ind1].x_min; double x1_high = densityMap[ind1].x_max; double y1_low = densityMap[ind1].y_min; double y1_high = densityMap[ind1].y_max; double x2_low = densityMap[ind2].x_min; double x2_high = densityMap[ind2].x_max; double y2_low = densityMap[ind2].y_min; double y2_high = densityMap[ind2].y_max; double lateral = x1_high - x1_low; double temp; #else double x1_low = densityMap[ind1].ex_min; double x1_high = densityMap[ind1].ex_max; double y1_low = densityMap[ind1].ey_min; double y1_high = densityMap[ind1].ey_max; double x2_low = densityMap[ind2].ex_min; double x2_high = densityMap[ind2].ex_max; double y2_low = densityMap[ind2].ey_min; double y2_high = densityMap[ind2].ey_max; double sq_dis1, sq_dis2; #endif double y_diff1, y_diff2, x_diff1, x_diff2; double l_bound, u_bound; int l_ind, h_ind; #if MBR == 0 if(x1_low - x2_low > -0.000000001 && x1_low - x2_low < 0.000000001) { l_bound = (y1_low < y2_low) ? (y2_low - y1_high): (y1_low - y2_high); temp = (y1_low < y2_low) ? (y2_high - y1_low) : (y1_high - y2_low); u_bound = sqrt(lateral * lateral + temp * temp); } else if (y1_low - y2_low > -0.000000001 && y1_low - y2_low < 0.000000001) { l_bound = (x1_low < x2_low) ? (x2_low - x1_high) : (x1_low - x2_high); temp = (x1_low < x2_low) ? (x2_high - x1_low) : (x1_high - x2_low); u_bound = sqrt(lateral * lateral + temp * temp); } else { x_diff1 = (x1_low < x2_low) ? (x2_low - x1_high) : (x1_low - x2_high); x_diff2 = (x1_low < x2_low) ? (x2_high - x1_low) : (x1_high - x2_low); y_diff1 = (y1_low < y2_low) ? (y2_low - y1_high) : (y1_low - y2_high); y_diff2 = (y1_low < y2_low) ? (y2_high - y1_low) : (y1_high - y2_low); l_bound = sqrt(x_diff1 * x_diff1 + y_diff1 * y_diff1); u_bound = sqrt(x_diff2 * x_diff2 + y_diff2 * y_diff2); } #else if (x2_high >= x1_low && x1_high >= x2_low) { /* the two boxes' horizontal sides overlap */ if(y1_low >= y2_high) { l_bound = y1_low - y2_high; sq_dis1 = (x1_low - x2_high) * (x1_low - x2_high) + (y1_high - y2_low) * (y1_high - y2_low); sq_dis2 = (x1_high - x2_low) * (x1_high - x2_low) + (y1_high - y2_low) * (y1_high - y2_low); } else { l_bound = y2_low - y1_high; sq_dis1 = (x2_low - x1_high) * (x2_low - x1_high) + (y2_high - y1_low) * (y2_high - y1_low); sq_dis2 = (x2_high - x1_low) * (x2_high - x1_low) + (y2_high - y1_low) * (y2_high - y1_low); } u_bound = (sq_dis1 > sq_dis2) ? sqrt(sq_dis1) : sqrt(sq_dis2); } else if (y2_high >= y1_low && y1_high >= y2_low) { /* the two boxes' vertical sides overlap */ if(x1_low >= x2_high) { l_bound = x1_low - x2_high; sq_dis1 = (x2_low - x1_high) * (x2_low - x1_high) + (y2_high - y1_low) * (y2_high - y1_low); sq_dis2 = (x2_low - x1_high) * (x2_low - x1_high) + (y2_low - y1_high) * (y2_low - y1_high); } else { l_bound = x2_low - x1_high; sq_dis1 = (x1_low - x2_high) * (x1_low - x2_high) + (y1_high - y2_low) * (y1_high - y2_low); sq_dis2 = (x1_low - x2_high) * (x1_low - x2_high) + (y1_low - y2_high) * (y1_low - y2_high); } u_bound = (sq_dis1 > sq_dis2) ? sqrt(sq_dis1) : sqrt(sq_dis2); } else { x_diff1 = (x1_high < x2_low) ? (x2_low - x1_high) : (x1_low - x2_high); y_diff1 = (y1_high < y2_low) ? (y2_low - y1_high) : (y1_low - y2_high); x_diff2 = (x1_high < x2_low) ? (x2_high - x1_low) : (x1_high - x2_low); y_diff2 = (y1_high < y2_low) ? (y2_high - y1_low) : (y1_high - y2_low); l_bound = sqrt(x_diff1 * x_diff1 + y_diff1 * y_diff1); u_bound = sqrt(x_diff2 * x_diff2 + y_diff2 * y_diff2); } #endif l_ind = (int) ((l_bound - PDH_min) / PDH_res); h_ind = (int) ((u_bound - PDH_min) / PDH_res); u = l_bound; v = u_bound; if(l_ind == h_ind) return l_ind; return (-1) * h_ind; } /* set a checkpoint and show the (natural) running time in seconds */ double report_running_time(char * info_str) { long sec_diff, usec_diff; gettimeofday(&endTime, &Idunno); sec_diff = endTime.tv_sec - startTime.tv_sec; usec_diff= endTime.tv_usec-startTime.tv_usec; if(usec_diff < 0) { sec_diff --; usec_diff += 1000000; } //printf("Running time for %s: %ld.%06ld\n", info_str, sec_diff, usec_diff); //printf("--> %ld.%06ld\n", endTime.tv_sec, endTime.tv_usec); return (double)(sec_diff*1.0 + usec_diff/1000000.0); } /* distance of two points */ double p2p_distance(int ind1, int ind2) { double x1 = atom_list[ind1].x_pos; double x2 = atom_list[ind2].x_pos; double y1 = atom_list[ind1].y_pos; double y2 = atom_list[ind2].y_pos; return sqrt((x1 - x2)*(x1-x2) + (y1 - y2)*(y1 - y2)); } int readline(char s[], int max) { int c, i; /*store an input char, index*/ for(i=0; i < max && (c=getchar()) != '\n' && c != EOF; s[i++] = c); s[i] = '\0'; return i; } int build_2D_dm(double x_min, double x_max, double y_min, double y_max) { double x_span = x_max - x_min; double y_span = y_max - y_min; double lateral; double x_low, x_high, y_low, y_high, x, y; unsigned int atom_cnt = PDH_acnt, list_len = 1, one = 1; /* number of nodes in the density map */ int i, j, levels = 0, num_nodes = 1; int curr_parent, curr_node, curr_l_node; int prev_last_atom = NO_VALUE; if(x_span > y_span) { lateral = x_span + 0.1; x_low = x_min - 0.05; x_high = x_max + 0.05; //printf("x: %lf - %lf = %lf = %lf\n", x_high, x_low, x_high - x_low, lateral); y_low = y_min - 0.05; y_high = y_min + lateral - 0.05; //printf("y: %lf - %lf = %lf = %lf\n", y_high, y_low, y_high - y_low, lateral); } else { lateral = y_span + 0.1; y_low = y_min - 0.05; y_high = y_max + 0.05; //printf("y: %lf - %lf = %lf = %lf\n", y_high, y_low, y_high - y_low, lateral); x_low = x_min - 0.05; x_high = x_min + lateral; //printf("x: %lf - %lf = %lf = %lf\n", x_high, x_low, x_high - x_low, lateral); } /* for HIGH_TREE, more levels of nodes will be built */ while((atom_cnt = (atom_cnt >> 2)) != 0) { levels++; list_len += (one = one << 2) ; } for(i = 0; i < HIGH_TREE; i++) { levels++; list_len += (one = one << 2) ; } //printf("PDH_acnt = %lld, list_len = %d, one = %d, levels = %d\n", PDH_acnt, list_len, one, levels); DM_total_nodes = list_len; DM_total_levels = levels+1; treeInfo = (treeLevelInfo*)calloc(DM_total_levels, sizeof(treeLevelInfo)); densityMap = (dmEntry *)malloc(sizeof(dmEntry)*list_len); for(i = 0; (unsigned)i < list_len; i++) { densityMap[i].a_cnt = 0; densityMap[i].children = densityMap[i].parent = NO_VALUE; densityMap[i].first_atom = densityMap[i].last_atom = END_OF_LIST; } curr_parent = 0; curr_node = 1; densityMap[0].x_min = x_low; densityMap[0].x_max = x_high; densityMap[0].y_min = y_low; densityMap[0].y_max = y_high; densityMap[0].next = END_OF_LIST; for(i = 0; i < levels; i++) { if (i != 0 ) num_nodes *= 4; treeInfo[i].first_node = curr_parent; treeInfo[i].total_nodes = num_nodes; for(j = 1; j <= num_nodes; j++) { /* get the coordinates for the rectangle of the parent node */ x_low = densityMap[curr_parent].x_min; x_high= densityMap[curr_parent].x_max; y_low = densityMap[curr_parent].y_min; y_high = densityMap[curr_parent].y_max; lateral = (x_high - x_low) / 2.0; /* the children pointer points to the first child only */ densityMap[curr_parent].children = curr_node; /* NW node */ densityMap[curr_node].x_min = x_low; densityMap[curr_node].x_max = x_low + lateral; densityMap[curr_node].y_min = y_low + lateral; densityMap[curr_node].y_max = y_high; densityMap[curr_node].parent = curr_parent; densityMap[curr_node].next = curr_node + 1; curr_node++; /* NE node */ densityMap[curr_node].x_min = x_low + lateral; densityMap[curr_node].x_max = x_high; densityMap[curr_node].y_min = y_low + lateral; densityMap[curr_node].y_max = y_high; densityMap[curr_node].parent = curr_parent; densityMap[curr_node].next = curr_node + 1; curr_node++; /* SW node */ densityMap[curr_node].x_min = x_low; densityMap[curr_node].x_max = x_low + lateral; densityMap[curr_node].y_min = y_low; densityMap[curr_node].y_max = y_low + lateral; densityMap[curr_node].parent = curr_parent; densityMap[curr_node].next = curr_node + 1; curr_node++; /* SE node */ densityMap[curr_node].x_min = x_low + lateral; densityMap[curr_node].x_max = x_high; densityMap[curr_node].y_min = y_low; densityMap[curr_node].y_max = y_low + lateral; densityMap[curr_node].parent = curr_parent; if(j < num_nodes) densityMap[curr_node].next = curr_node + 1; else densityMap[curr_node].next = END_OF_LIST; curr_node++; /* add children to the next node */ curr_parent++; } } treeInfo[i].first_node = curr_parent; treeInfo[i].total_nodes = num_nodes << 2; //printf("curr_parent = %d, curr_node = %d\n", curr_parent, curr_node); #if MBR != 0 for(i = 0; i < DM_total_nodes; i++) { densityMap[i].ex_min = densityMap[0].x_max; densityMap[i].ex_max = densityMap[0].x_min; densityMap[i].ey_min = densityMap[0].y_max; densityMap[i].ey_max = densityMap[0].y_min; } #endif for(i = 0; i < PDH_acnt; i++) { x = atom_list[i].x_pos; y = atom_list[i].y_pos; curr_node = locate_atom2D(x, y); if (curr_node >= 0 && curr_node < (int)list_len && densityMap[curr_node].children == NO_VALUE) { if (densityMap[curr_node].last_atom == END_OF_LIST) { densityMap[curr_node].first_atom = densityMap[curr_node].last_atom = i; } else { atom_list[densityMap[curr_node].last_atom].next = i; densityMap[curr_node].last_atom = i; } /* this is the critical part, update atom counts in the leaf node and all its ancestors */ while (curr_node != NO_VALUE) { densityMap[curr_node].a_cnt++; /* this means we have one more node with a_cnt > 0 that should be included in the final density map */ if(densityMap[curr_node].a_cnt == 1) DM_total_effective_nodes++; #if MBR != 0 /* update the effective minimum bounding rectangle */ if(x < densityMap[curr_node].ex_min) densityMap[curr_node].ex_min = x; if(x > densityMap[curr_node].ex_max) densityMap[curr_node].ex_max = x; if(y < densityMap[curr_node].ey_min) densityMap[curr_node].ey_min = y; if(y > densityMap[curr_node].ey_max) densityMap[curr_node].ey_max = y; #endif curr_node = densityMap[curr_node].parent; } } else { fprintf(stderr, "ERROR: find an atom %d in wrong tree node %d!\n", i, curr_node); } } /* fix the first_atom pointer for all intermediate nodes */ for (curr_l_node = treeInfo[DM_total_levels - 1].first_node; curr_l_node != END_OF_LIST; curr_l_node = densityMap[curr_l_node].next) { //printf("having curr_L_node = %d\n", curr_l_node); if(densityMap[curr_l_node].a_cnt == 0) continue; curr_node = curr_l_node; curr_parent = densityMap[curr_node].parent; while(curr_parent != NO_VALUE && densityMap[curr_parent].first_atom == END_OF_LIST) { densityMap[curr_parent].first_atom = densityMap[curr_node].first_atom; curr_node = densityMap[curr_node].parent; curr_parent = densityMap[curr_parent].parent; } /* chain all the atoms together */ if(prev_last_atom == NO_VALUE) prev_last_atom = densityMap[curr_l_node].last_atom; else { atom_list[prev_last_atom].next = densityMap[curr_l_node].first_atom; prev_last_atom = densityMap[curr_l_node].last_atom; } } /* fix the last_atom pointer for all intermediate nodes */ i = treeInfo[DM_total_levels - 1].first_node; for(curr_l_node = DM_total_nodes - 1; curr_l_node >= i; curr_l_node--) { if(densityMap[curr_l_node].a_cnt == 0) continue; curr_node = curr_l_node; curr_parent = densityMap[curr_node].parent; while(curr_parent != NO_VALUE && densityMap[curr_parent].last_atom == END_OF_LIST) { densityMap[curr_parent].last_atom = densityMap[curr_node].last_atom; curr_node = densityMap[curr_node].parent; curr_parent = densityMap[curr_parent].parent; } } //print_tree(0, DM_total_nodes - 1); return 0; } /* find the node where a point belongs */ int locate_atom2D(double x_cor, double y_cor) { int curr_node = 0; double x_low, x_high, y_low, y_high, lateral; while(densityMap[curr_node].children != NO_VALUE) { x_low = densityMap[curr_node].x_min; x_high= densityMap[curr_node].x_max; y_low = densityMap[curr_node].y_min; y_high = densityMap[curr_node].y_max; lateral = (x_high - x_low) / 2.0; if(x_cor < x_low + lateral) { if (y_cor > y_low + lateral) curr_node = densityMap[curr_node].children; else curr_node = densityMap[curr_node].children + 2; } else { if (y_cor > y_low + lateral) curr_node = densityMap[curr_node].children + 1; else curr_node = densityMap[curr_node].children + 3; } } return curr_node; } int print_tree(int start, int end) { int i, j = 0, k; for(i = start; i <= end; i++) { if(i < DM_total_nodes) { //printf("(%d: a_cnt=%d, p=%d, c=%d, <%lf, %lf, %lf, %lf>, a_list: ", printf("(%d: a_cnt=%lld, p=%d, c=%d, a_list: ", i, densityMap[i].a_cnt, densityMap[i].parent, densityMap[i].children //, densityMap[i].x_min, densityMap[i].x_max, densityMap[i].y_min, densityMap[i].y_max ); if(densityMap[i].children == NO_VALUE) { for(k = densityMap[i].first_atom; ; k = atom_list[k].next) { if(k != END_OF_LIST) printf("%d ", k); if(k == densityMap[i].last_atom) break; } } else { printf("[%d, %d]", densityMap[i].first_atom, densityMap[i].last_atom); } printf("\n"); j++; } } //for(i = densityMap[0].first_atom; i != END_OF_LIST; i = atom_list[i].next) // printf("%d, ", i); //printf("End\n"); return j; } //=========================================================================== //= Function to generate Zipf (power law) distributed random variables = //= - Input: alpha and N = //= - Output: Returns with Zipf distributed random variable = //= This is modified from the code from Ken Christensen = //= Changed the linear search of probabilities into a binary search = //=========================================================================== int zipf(double alpha, int n) { static double c, *temp; // Normalization constant double z; // Uniform random number (0 < z < 1) double sum_prob; // Sum of probabilities double zipf_value; // Computed exponential value to be returned int i; // Loop counter int head, tail, mid; // for binary search // Compute normalization constant on first call only if (first == TRUE) { temp = (double *)calloc(n, sizeof(double)); for (i=1; i<=n; i++) { c = c + (1.0 / pow((double) i, alpha)); temp[i-1] = 0.0f; } c = 1.0 / c; sum_prob = 0.0; for (i=1; i<=n; i++) { sum_prob += c / pow((double) i, alpha); temp[i-1] = sum_prob; //printf("[%d, %lf]", i-1, sum_prob); } //printf("\n"); first = FALSE; } /* generate a uniform double variable within [0, 1.0) */ z = (double)(rand())/RAND_MAX; // Map z to the value sum_prob = 0; head = 0; tail = n; mid = n/2; //printf("For z = %lf\n", z); while(head <= tail) { //printf("working on [%d, %d, %d]\n", head, mid, tail); if(z > temp[mid]) { head = mid + 1; mid = head + (tail - head)/2; } else if (z > temp[mid-1]) { zipf_value = mid + 1; break; } else { tail = mid - 1 ; mid = head + (tail - head)/2; } } // Assert that zipf_value is between 1 and N assert((zipf_value >=1) && (zipf_value <= n)); return(zipf_value); } /* Generates a random integer from 0 to max. This is used to obtain a uniform, random, subset of the data set */ int randNumGen(int max) { return (int)(rand()%max); }