/*
g++ merger.cpp memAllocation.cpp -O3 -o merger
Usage: merger WV.dat (will be created by mstreaming.cpp) clusterCount.dat(will be created by mstreaming.cpp) 
fulldata.bin (full data set;used for computing quality) count1.dat(size of full data set; used for computing quality)

*/


#include <stdio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <time.h>
#include <unistd.h>
#include <sys/times.h>
#include <sys/resource.h>
#include "memheader.h"
#include "setup1.h"


typedef float DATA_TYPE; //for weighted clusters
long int MAXDATA, GMAXDATA;


#define DATA_BYTE sizeof(DATA_TYPE)
#define DATA_BYTE1 sizeof(DATA_TYPE1)
#define MIN -1.797e100
#define MAX  1.797e100 

DATA_TYPE dbuff[S+1]; //1 extra space to load weights
DATA_TYPE1 dbuff1[S]; //for full data set;short int for mri 
float **X;
float **U;
float *fweight;
float V[CLUSTER][S],V1[CLUSTER][S];


void load_files(FILE *ptr,int no, int dim);
void wload_files(FILE *ptr,int no, int dim );
void updateU(int no,int dim);
double ED(int k, float v2[]);
double NORM(float v1[], float v2[]);
double update_centroids(int no, int dim);
void  init(int no, int dim);
int get_random_example(float vec[],int no);
void spfcm(int no,int dim);
void compute_JM(int no,int dim,FILE *gdp,float per);
void writeV();
double timing_of(struct rusage start, struct rusage stop);
void initU(int no);


int sp,loop;
float percent;
char fname[180],name[180];
FILE *info;
/*
merger wdata.bin count.dat fulldata.bin count1.dat
*/
main(int argc,char *argv[])
{
int i,j;
FILE *gdp,*note,*fd;
int dim=S,tot,size,parts;

info=fopen(argv[2],"r");
fscanf(info,"%d",&MAXDATA);
printf("\nno of weighted clusters=%d\n",MAXDATA);
fclose(info);

info=fopen(argv[4],"r");
fscanf(info,"%d",&GMAXDATA);
printf("\nnfull data =%d\n",GMAXDATA);
fclose(info);


strcpy(fname,argv[1]);


struct rusage start_time, end_time;

srand(170);
srand48(260);

sprintf(fname,"minfo.dat");
info=fopen(fname,"w");
fprintf(info,"peps seps %f %f",peps,seps);

gdp=fopen(argv[1],"rb");
tot=0;sp=0;eps=peps; 
size=MAXDATA;


for(loop=0;loop<EXP;loop++)
{
getrusage(RUSAGE_SELF,&start_time);
allocate2Dfloat(&X,dim,size); 
allocate2Dfloat(&U,CLUSTER,size); 
allocate1Dfloat(&fweight,size);
printf("\nMemory Allocation Done..\n");

wload_files(gdp,size,dim);//pass only pointer of X
initU(size);init(size,dim);


spfcm(size,dim);

getrusage(RUSAGE_SELF,&end_time);
sprintf(name,"mtime.dat");
note=fopen(name,"a");
fprintf(note,"%f\n",timing_of(start_time,end_time));
fclose(note);

if(1)
{
writeV();
//
}

free2Dfloat(&X,dim,size);
free2Dfloat(&U,CLUSTER,size);
free1Dfloat(&fweight,size);
if(1)
{
fd=fopen(argv[3],"rb");
compute_JM(GMAXDATA,dim,fd,10);
fclose(fd);
}

}


fclose(gdp);
fclose(info);
}



void initU(int no){
int i,j;
for(i=0;i<no;i++)
 for(j=0;j<CLUSTER;j++)
   U[j][i]=0;
}


void writeV(){
int i,j;
FILE *ptr;
sprintf(name,"mV.dat");  
ptr=fopen(name,"a");
for (i=0;i<CLUSTER;i++)
{for (j=0;j<S;j++)
 fprintf(ptr,"%f ",V[i][j]);
fprintf(ptr,"\n");
}
fclose(ptr);
}

void spfcm(int no,int dim)
{

int iterations=0;     
double error=100;
printf("\nepsilon=%f m=%f\n",eps,m);

updateU(no,dim);
   while (error>eps ) 
{
   
    iterations++;
    error=update_centroids(no,dim);
    updateU(no,dim);
printf("\niteration =%d\n",iterations);
}
fprintf(info,"\niteration required %d\n",iterations);
   
}



void load_files(FILE *ptr,int no, int dim )
{
int i,j,size;
printf("\nno %d dim %d\n",no,dim);


for(i=0;i<no;i++)
{
fread(dbuff1,DATA_BYTE1,dim,ptr);

for(j=0;j<dim;j++)
X[j][i]=dbuff1[j];
}

if(0)
{
for(i=0;i<no;i++) 
{for(j=0;j<dim;j++)
     printf("%f ",X[j][i]); 
printf("\n");
}
}
printf("\nLoaded %d examples into memory\n",no);
}



void wload_files(FILE *ptr,int no, int dim )
{
int i,j,size,dim1;
printf("\nno %d dim %d\n",no,dim);
dim1=dim+1;

for(i=0;i<no;i++)
{
fread(dbuff,DATA_BYTE,dim1,ptr);

for(j=0;j<dim;j++)
X[j][i]=dbuff[j];

fweight[i]=dbuff[j];//loading the weights
}

if(0)
{
for(i=0;i<no;i++) 
{for(j=0;j<dim;j++)
     printf("%f ",X[j][i]); 
  printf("%f ",fweight[i]);
printf("\n");
}
}
printf("\nLoaded %d examples into memory\n",no);
}







double update_centroids(int no, int dim)
{
  int i,k,x;
  double numerator[S], denominator;
  double U_ikm;
  double error=0; 
  /* For each cluster */
  for (i=0; i < CLUSTER; i++)  {
   
    /* Zero out numerator and denominator options */
    denominator=0;
    for (x=0; x < S; x++) 
      numerator[x]=0;


    /* Calculate numerator and denominator together */
    for (k=0; k < no; k++) {

      U_ikm=pow(U[i][k],m);
      denominator += U_ikm*fweight[k];
           

  for (x=0; x < S; x++) 
	numerator[x] += U_ikm * X[x][k]*fweight[k];
    }

    /* Calculate V */
  
  for (x=0; x < S; x++) {
      V1[i][x]= numerator[x] / denominator;

}

 }  /* endfor: C clusters */

if(0)
{
int j;
for(i=0;i<CLUSTER;i++) 
{for(j=0;j<dim;j++)
     printf("%f ",V1[i][j]); 
printf("\n");
}

}

for (i=0; i < CLUSTER; i++) 
 for (x=0; x < S; x++) 
  error=error+(V1[i][x]-V[i][x])*(V1[i][x]-V[i][x]);

error=error/float(CLUSTER*S);
error=sqrt(error);
for (i=0; i < CLUSTER; i++) 
 for (x=0; x < S; x++) 
V[i][x]=V1[i][x];

return error;
}





void updateU(int no,int dim)
{

  int i,j,k;
  double sum, D[CLUSTER];
  double sqdiff=0;
  float U1;
  int pos;

  for ( k=0; k < no; k++) {
 
pos=-1;   
 for (i=0; i < CLUSTER; i++) 
{    for (j=0; j < S; j++) 
	if ( X[j][k] != V[i][j] )
            break;      

if(j==S) pos=i;
}
if(pos!=-1)
{
for (i=0; i < CLUSTER; i++)
if ( i ==pos )
	  U[i][k]=1.0;
	else 
	  U[i][k]=0.0;

}
else
{
    for (i=0; i < CLUSTER; i++) 
      D[i]=ED(k, V[i]);//kth example of X

    for (i=0; i < CLUSTER; i++) {
      sum=0;

      for (j=0; j < CLUSTER; j++) {
	if ( i == j ) 
	  sum=sum+1;
	else
	  sum =sum+ pow( D[i] / D[j] , (2.0/ (m-1)));
 
  }


      U1=1.0/sum;
          
      U[i][k]=U1;
    }
}
}  


}




double ED(int k, float v2[])
{
  int x;
  double sum=0;
  
  for (x=0; x < S; x++) 
    sum += (X[x][k]-v2[x]) * (X[x][k]-v2[x]);
   
    return sqrt(sum);
}


double NORM(float v1[], float v2[])
{ 
  int x; 
  double sum=0;
  
  for (x=0; x < S; x++)
    sum += (v1[x]-v2[x]) * (v1[x]-v2[x]);
      
    return sqrt(sum);
}   



void  init(int no, int dim)
{
  int i,j,selected[CLUSTER];
double min[S],max[S],range[S];
FILE *ptr; 
if(INIT_CENTER ==0)
{i=0;
while(i<CLUSTER)
{
selected[i]=get_random_example(V[i],no);
 for(j=0;j<i;j++)
  if(selected[i]==selected[j]) break;

if(j==i) i++;
}
}
if(INIT_CENTER ==1)
{
float initbuffer[EXP*CLUSTER][S];
int loc;
loc=loop*CLUSTER;
ptr=fopen("initialized.dat","r");
if(ptr==NULL) printf("\ninitialized.dat not found\n");
for(i=0;i<CLUSTER*EXP;i++)
for(j=0;j<S;j++)
fscanf(ptr,"%f",&initbuffer[i][j]);
fclose(ptr);

for(i=0;i<CLUSTER;i++) 
for(j=0;j<S;j++)
V[i][j]=initbuffer[loc+i][j];
}
if(INIT_CENTER ==2) //mri initialization
{

for(i=0;i<S;i++)
{min[i]=MAX;max[i]=MIN;
 for(j=0;j<no;j++)
   {if(X[i][j]<min[i]) min[i]=X[i][j];
   if(X[i][j]>max[i]) max[i]=X[i][j];
   }
range[i]=max[i]-min[i];
}

for(i=0;i<S;i++)
range[i]=range[i]*1.0;//heuristic

for(i=0;i<CLUSTER;i++)
for(j=0;j<S;j++)
V[i][j]=min[j]+(float(i+1)/float(CLUSTER+1))*range[j];

for(i=0;i<S;i++)
printf("\nmin max of %d channel is %f %f : rangle=%f\n",i+1,min[i],max[i],range[i]);


}
if(INIT_CENTER ==3)//select any partition randomly
{
int pos1,noc;
noc=no/CLUSTER;
noc=lrand48()%noc;
pos1=noc*CLUSTER;
printf("\n chunk %d initialized: pos %d\n",noc,pos1);

for(i=pos1;i<pos1+CLUSTER;i++) 
for(j=0;j<S;j++) 
V[i-pos1][j]=X[j][i];

}




ptr=fopen("mInit.dat","a");
for(i=0;i<CLUSTER;i++) 
{
for(j=0;j<S;j++)
        fprintf(ptr,"%f ",V[i][j]);
fprintf(ptr,"\n");
}
fclose(ptr);


}


int get_random_example(float vec[],int no)
{
int i,j;
i=lrand48()%no;
printf("\nexample no %d selected\n",i+1);
for (j=0;j<S;j++)
 vec[j]=X[j][i];
return i;
}




void compute_JM(int no,int dim,FILE *gdp,float per)
{
int i,j,p;
int size,parts,tot=0;
FILE *ptr;
double temp,tempj,J2=0;
sprintf(name,"mJ2.dat");  
ptr=fopen(name,"a");

size=(int)ceil((per/100.00)*no);
parts=(int)ceil((float)no/(float)size);
printf("\nComputing J_m Loop %d Broken data size %d parts %d\n",loop+1,size,parts);
for(p=0;p<parts;p++)
{
tot=tot+size;
if(tot>GMAXDATA)
{
tot=tot-size;
size=GMAXDATA-tot;
tot=tot+size;
}
allocate2Dfloat(&X,dim,size); 
load_files(gdp,size,dim);

//compute jm
for (i=0;i<size;i++)
   {
tempj=0;
   for (j=0;j<CLUSTER;j++)
        {temp=ED(i,V[j]);
 if(temp)
         tempj=tempj+pow(temp,2.0/(1.0-m));
         }
if(tempj)
J2=J2+pow(tempj,1.0-m);
   }   
//end compute jm

free2Dfloat(&X,dim,size);
printf("\nparts =%d Computed J_m of %d examples\n",p+1,tot);
}

printf("\n The J2 value is %f \n",J2);
fprintf(ptr,"%f\n",J2);
fclose(ptr);

}



/* Get time, in seconds */
double timing_of(struct rusage start, struct rusage stop)
{
  long totaluMicroseconds, totalsMicroseconds;
  static double totalTime[2];

  totaluMicroseconds = stop.ru_utime.tv_usec - 
start.ru_utime.tv_usec;
  totalsMicroseconds = stop.ru_stime.tv_usec - 
start.ru_stime.tv_usec;

  /* If we need to borrow, do so */
  if ( totaluMicroseconds < 0 ) {
    stop.ru_utime.tv_sec--;
    totaluMicroseconds=1000000L + stop.ru_utime.tv_usec - 
start.ru_utime.tv_usec;
  //printf("\ngot it\n");
  }

  if ( totalsMicroseconds < 0 ) {
    stop.ru_stime.tv_sec--;
    totalsMicroseconds=1000000L + stop.ru_stime.tv_usec - 
start.ru_stime.tv_usec;
    //printf("\ngot it\n");

  }

  totalTime[0] = (stop.ru_utime.tv_sec - start.ru_utime.tv_sec)+ 
0.000001 * totaluMicroseconds;
  totalTime[1] = (stop.ru_stime.tv_sec - start.ru_stime.tv_sec)+ 
0.000001 * totalsMicroseconds;

  return totalTime[0]+totalTime[1];
}
