/*
Note: Modify setup.h for parameter setting
g++ mstreaming.cpp memAllocation.cpp -O3 -o online
Usage: spf fname.bin chunksize count.dat (file containing the no of data points in the full data set;this
information is just used to compute quality)
*/
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <time.h>
#include <unistd.h>
#include <sys/times.h>
#include <sys/resource.h>
#include "memheader.h"
#include "setup.h"

#define CONDENSED_EXAMPLE CONDENSED_SLICE*CLUSTER
#define DATA_BYTE sizeof(DATA_TYPE)

long int MAXDATA;


DATA_TYPE dbuff[S];
float **X;
float **U;
float V[CLUSTER][S],V1[CLUSTER][S],LV[CONDENSED_EXAMPLE][S];
float 
W[CONDENSED_EXAMPLE],LU[CLUSTER][CONDENSED_EXAMPLE],LW[CLUSTER];
int con_count;


void load_files(FILE *ptr,int no, int dim);
void updateU(int no,int dim);
double ED(int k, float v2[]);
double NORM(float v1[], float v2[]);
double update_centroids(int no, int dim);
void  init(int no, int dim);
void get_random_example(float vec[],int no);
void spfcm(int no,int dim);
void compute_JM(int no,int dim,FILE *gdp,float per);
void writeV();
void writeWV();
void updateWeights(int no, int dim, int timeline);
double timing_of(struct rusage start, struct rusage stop);
void initU(int no);
void evaluate_quality(char fname[],int NO, int dim, float p);
void random_initialization(int no, int dim);
float optimalMatch(double analysis[CLUSTER][CLUSTER],int pairs[CLUSTER][2],int choice );
int evaluate_true_distance();
void compute_PJM(int no,float per);


int sp,loop;
long int csize; //chunk size
char fname[180],name[180];
FILE *info;
/*
spf fname.bin chunksize count.dat
*/


main(int argc,char *argv[])
{
int i,j;
double timediff,tp,timetot;
FILE *gdp,*note;
int dim=S,tot,size,parts;
info=fopen(argv[3],"r");
fscanf(info,"%d",&MAXDATA);
printf("\nno of examples=%d\n",MAXDATA);
fclose(info);

csize=atoi(argv[2]);
strcpy(fname,argv[1]);


struct rusage start_time, end_time,tstart,tend;

srand(104);
srand48(104);

sprintf(fname,"%dcs%dconinfo.dat",csize,CONDENSED_SLICE);
info=fopen(fname,"w");
fprintf(info,"peps seps %f %f",peps,seps);

for(loop=0;loop<EXP;loop++)
{
size=csize;
parts=(int)ceil((float)MAXDATA/(float)size);
printf("\nLoop %d Broken data size %d parts %d\n",loop+1,size,parts);
getrusage(RUSAGE_SELF,&start_time);
for (i=0;i<CONDENSED_EXAMPLE;i++)
W[i]=0;
con_count=0;
timetot=0;

gdp=fopen(argv[1],"rb");
tot=0;sp=0;eps=peps; 
for(i=0;i<parts;i++)
{ 
if(i>=CONDENSED_SLICE)
con_count=CONDENSED_EXAMPLE;
else
con_count=i*CLUSTER;


if(i==1) {sp=1;eps=seps;}
tot=tot+size;
if(tot>MAXDATA)
{
tot=tot-size;
size=MAXDATA-tot;
tot=tot+size;
}
printf("\nsize=%d\n",size);
//
allocate2Dfloat(&X,dim,size); 
allocate2Dfloat(&U,CLUSTER,size); 
initU(size);
printf("\nMemory Allocation Done..\n");
//
load_files(gdp,size,dim);//pass only pointer of X
if(i==0){init(size,dim);}

/*
if(i>0 && CONDENSED_SLICE==0)
random_initialization(size,dim);
*/

fprintf(info,"\ndata chunk=%d\n",i+1);
spfcm(size,dim);
updateWeights(size,dim,i);

// 
free2Dfloat(&X,dim,size);
free2Dfloat(&U,CLUSTER,size);
//
writeWV();
getrusage(RUSAGE_SELF,&tstart);
writeV();
if(PARTIAL_RM)
{
tp=100/double(1+i);
if(tp<1) tp=1;
evaluate_quality(argv[1],tot,dim,tp);
//if(PARTIAL_RM)
//compute_PJM(size,percent);
}
getrusage(RUSAGE_SELF,&tend);
timediff=timing_of(tstart,tend);
timetot=timetot+timediff;
printf("\ntime for computing quality=%f",timediff);

printf("\nClustered %d examples of %d: Chunk= %d\n",tot,MAXDATA,i+1);
}
getrusage(RUSAGE_SELF,&end_time);
sprintf(name,"%dcs%dcontime.dat",csize,CONDENSED_SLICE);
note=fopen(name,"a");
fprintf(note,"%f\n",timing_of(start_time,end_time)-timetot);
fclose(note);
fclose(gdp);

}

note=fopen("clusterCount.dat","w");
fprintf(note,"%d\n",CLUSTER*parts);
fclose(note);
}


/*
int evaluate_true_distance(int cenpos)
{
int i,j,pos;
float truecen[CLUSTER][S];
double analysis[CLUSTER][CLUSTER],tanalysis[CLUSTER][CLUSTER];
int pairs[CLUSTER][2];
double cost;
char temp[180];
FILE *ptr;
ptr=fopen("truecentroids.dat","r");
if(ptr==NULL)
{
printf("\ntruecentroids.dat file not found..skipping\n");
return -1;
} 
//travel to the right centroids
pos=cenpos*CLUSTER;
for(i=0;i<pos;i++)
 for(j=0;j<S;j++)
   fscanf(ptr,"%s",temp);

for(i=0;i<CLUSTER;i++)
 for(j=0;j<S;j++)
  {fscanf(ptr,"%s",temp);truecen[i][j]=atof(temp);}
fclose(ptr);
for(i=0;i<CLUSTER;i++)
  for(j=0;j<CLUSTER;j++)
  {tanalysis[i][j]=analysis[i][j]=NORM(V[i],truecen[j]);}
optimalMatch(analysis,pairs,1);
cost=0;
for(i=0;i<CLUSTER;i++)
cost=cost+tanalysis[pairs[i][0]-1][pairs[i][1]-1];
//cost=cost/double(CLUSTER*S);

sprintf(temp,"%dcs%dconTcendis.dat",csize,CONDENSED_SLICE);  
ptr=fopen(temp,"a");
fprintf(ptr,"%f\n",cost);
fclose(ptr);
printf("\nSum of distance to true centroids %f\n",cost);
return 1;
}
*/

void evaluate_quality(char fname[],int NO, int dim, float p)
{
FILE *dqual;
dqual=fopen(fname,"r");//opened for computing quality only
compute_JM(NO,dim,dqual,p);
fclose(dqual);
}


void updateWeights(int no, int dim, int timeline){
int i,j,pos,conslice;
conslice=CONDENSED_SLICE;
for (i=0;i<CLUSTER;i++)
LW[i]=0;

if(0){
for (i=0;i<CLUSTER;i++)
{for (j=0;j<CLUSTER;j++)
 printf("%f ",LU[i][j]);
printf("\n");
} 
}

for (i=0;i<CLUSTER;i++)
{
for (j=0;j<no;j++)
LW[i]=LW[i]+U[i][j];
}
if(conslice)
{
pos=(timeline%conslice)*CLUSTER;
printf("\n time=%d, pos=%d",timeline,pos);

for (i=pos;i<CLUSTER+pos;i++)
for (j=0;j<S;j++)
{
 W[i]=LW[i-pos];
 LV[i][j]=V[i-pos][j];
}
}
if(0)
{
FILE *ptr;
ptr=fopen("info.dat","w");
for (i=0;i<con_count;i++)
{ for (j=0;j<S;j++)
  fprintf(ptr,"%f ",LV[i][j]);
fprintf(ptr,"%f",W[i]);
fprintf(ptr,"\n");
}
fclose(ptr);
double sum=0;
printf("\ncurrent buffer weights and its sum\n");
for (i=0;i<CLUSTER;i++) 
printf("\nW=%f\n",LW[i]);

for (i=0;i<CLUSTER;i++) 
sum=sum+LW[i];
printf("\ntot weights=%f\n",sum);
}



}


void initU(int no){
int i,j;
for(i=0;i<no;i++)
 for(j=0;j<CLUSTER;j++)
   U[j][i]=0;
}


void writeWV(){
int i,j,nsize;
float WV[CLUSTER][S+1];
FILE *ptr;
sprintf(name,"WV.dat");  

for (i=0;i<CLUSTER;i++)
for (j=0;j<S;j++)
 WV[i][j]=V[i][j];

for (i=0;i<CLUSTER;i++)
WV[i][S]=LW[i];

nsize=CLUSTER*(S+1);
ptr=fopen(name,"ab");
fwrite(WV,sizeof(float),nsize,ptr);
fclose(ptr);
}


void writeV(){
int i,j;
FILE *ptr;
sprintf(name,"%dcs%dconV.dat",csize,CONDENSED_SLICE);  
ptr=fopen(name,"a");
for (i=0;i<CLUSTER;i++)
{for (j=0;j<S;j++)
 fprintf(ptr,"%f ",V[i][j]);
fprintf(ptr,"\n");
}
fclose(ptr);
}

void spfcm(int no,int dim)
{

int iterations=0;     
double error=100;
printf("\nepsilon=%f m=%f\n",eps,m);

updateU(no,dim);
   while (error>eps ) 
{
   
    iterations++;
    error=update_centroids(no,dim);
    updateU(no,dim);

//printf("\niteration =%d error=%f\n",iterations,error);
}
fprintf(info,"\niteration required %d\n",iterations);
   
}



void load_files(FILE *ptr,int no, int dim)
{
int i,j,size;
printf("\nno %d dim %d\n",no,dim);


for(i=0;i<no;i++)
{
fread(dbuff,DATA_BYTE,dim,ptr);

for(j=0;j<dim;j++)
X[j][i]=dbuff[j];

}

if(0)
{
for(i=0;i<no;i++) 
{for(j=0;j<dim;j++)
     printf("%f ",X[j][i]); 
printf("\n");
}
}
printf("\nLoaded %d examples into memory\n",no);
}





double update_centroids(int no, int dim)
{
  int i,k,x;
  double numerator[S], denominator;
  double U_ikm;
  double error=0; 
  /* For each cluster */
  for (i=0; i < CLUSTER; i++)  {
   
    /* Zero out numerator and denominator options */
    denominator=0;
    for (x=0; x < S; x++) 
      numerator[x]=0;


    /* Calculate numerator and denominator together */
    for (k=0; k < no; k++) {

      U_ikm=pow(U[i][k],m);
      denominator += U_ikm;
           

  for (x=0; x < S; x++) 
	numerator[x] += U_ikm * X[x][k];
    }

if(CONDENSED_SLICE){
   for (k=0; k < con_count; k++) { 
       
      U_ikm=pow(LU[i][k],m); 
      denominator += U_ikm*W[k];
     
       
  for (x=0; x < S; x++)
        numerator[x] += U_ikm * LV[k][x]*W[k];
    } 



}

    /* Calculate V */
  
  for (x=0; x < S; x++) {
      V1[i][x]= numerator[x] / denominator;

}

 }  /* endfor: C clusters */

if(0)
{
int j;
for(i=0;i<CLUSTER;i++) 
{for(j=0;j<dim;j++)
     printf("%f ",V1[i][j]); 
printf("\n");
}

}

for (i=0; i < CLUSTER; i++) 
 for (x=0; x < S; x++) 
  error=error+(V1[i][x]-V[i][x])*(V1[i][x]-V[i][x]);

error=error/float(CLUSTER*S);
error=sqrt(error);
for (i=0; i < CLUSTER; i++) 
 for (x=0; x < S; x++) 
V[i][x]=V1[i][x];

return error;
}





void updateU(int no,int dim)
{

  int i,j,k;
  double sum, D[CLUSTER];
   float U1;
  int pos;

  for ( k=0; k < no; k++) {
 
pos=-1;   
 for (i=0; i < CLUSTER; i++) 
{    for (j=0; j < S; j++) 
	if ( X[j][k] != V[i][j] )
            break;      

if(j==S) pos=i;
}
if(pos!=-1)
{
for (i=0; i < CLUSTER; i++)
if ( i ==pos )
	  U[i][k]=1.0;
	else 
	  U[i][k]=0.0;

}
else
{
    for (i=0; i < CLUSTER; i++) 
      D[i]=ED(k, V[i]);//kth example of X

    for (i=0; i < CLUSTER; i++) {
      sum=0;

      for (j=0; j < CLUSTER; j++) {
	if ( i == j ) 
	  sum=sum+1;
	else
	  sum =sum+ pow( D[i] / D[j] , (2.0/ (m-1)));
 
  }


      U1=1.0/sum;

           
      U[i][k]=U1;
    }
}
}  

if(CONDENSED_SLICE)
{
  for ( k=0; k < con_count; k++) {
 
pos=-1;   
 for (i=0; i < CLUSTER; i++) 
{    for (j=0; j < S; j++) 
	if ( LV[k][j] != V[i][j] )
            break;      

if(j==S) pos=i;
}
if(pos!=-1)
{
for (i=0; i < CLUSTER; i++)
if ( i ==pos )
	  LU[i][k]=1.0;
	else 
	  LU[i][k]=0.0;

}
else
{
    for (i=0; i < CLUSTER; i++) 
      D[i]=NORM(LV[k], V[i]);

    for (i=0; i < CLUSTER; i++) {
      sum=0;

      for (j=0; j < CLUSTER; j++) {
	if ( i == j ) 
	  sum=sum+1;
	else
	  sum =sum+ pow( D[i] / D[j] , (2.0/ (m-1)));
 
  }


      U1=1.0/sum;

            
      LU[i][k]=U1;
    }
}

}
if(0)
{
int j;
for(i=0;i<CLUSTER;i++)
{for(j=0;j<CLUSTER;j++)
     printf("%f ",LU[i][j]);
printf("\n"); 
}  
} 

}

}




double ED(int k, float v2[])
{
  int x;
  double sum=0;
  
  for (x=0; x < S; x++) 
    sum += (X[x][k]-v2[x]) * (X[x][k]-v2[x]);
   
    return sqrt(sum);
}


double NORM(float v1[], float v2[])
{ 
  int x; 
  double sum=0;
  
  for (x=0; x < S; x++)
    sum += (v1[x]-v2[x]) * (v1[x]-v2[x]);
      
    return sqrt(sum);
}   

void random_initialization(int no, int dim)
{
int i;
for(i=0;i<CLUSTER;i++)
  get_random_example(V[i],no);

}


void  init(int no, int dim)
{
  int i,j;
FILE *ptr; 
if(INIT_CENTER ==0)
{for(i=0;i<CLUSTER;i++)
  get_random_example(V[i],no);
}
else
{
float initbuffer[EXP*CLUSTER][S];
int loc;
loc=loop*CLUSTER;
ptr=fopen("initialized.dat","r");
if(ptr==NULL) printf("\ninitialized.dat not found\n");
for(i=0;i<CLUSTER*EXP;i++)
for(j=0;j<S;j++)
fscanf(ptr,"%f",&initbuffer[i][j]);
fclose(ptr);

for(i=0;i<CLUSTER;i++) 
for(j=0;j<S;j++)
V[i][j]=initbuffer[loc+i][j];
}


for(i=0;i<CLUSTER;i++)
 W[i]=0;

for (i=0;i<CLUSTER;i++)
for (j=0;j<CLUSTER;j++)
if(i==j) 
LU[i][j]=1;
else
LU[i][j]=0; 



sprintf(name,"%dcs%dconint.dat",csize,CONDENSED_SLICE);  
ptr=fopen(name,"a");
 for (i=0; i < CLUSTER; i++) {
    for (j=0; j < S; j++)
    fprintf(ptr,"%f ",V[i][j]);
fprintf(ptr,"\n");
}
fclose(ptr);
  
}


void get_random_example(float vec[],int no)
{
int i,j;
i=lrand48()%no;
printf("\nexample no %d selected\n",i+1);
for (j=0;j<S;j++)
 vec[j]=X[j][i];

}

//this function doesnot load examples.It uses the current examples in X.
void compute_PJM(int no,float per)
{
int i,j,p;

FILE *ptr;
double temp,tempj,J2=0;
sprintf(name,"%dcs%dcon.pJ2.dat",csize,CONDENSED_SLICE);  
ptr=fopen(name,"a");


//compute jm
for (i=0;i<no;i++)
   {
tempj=0;
   for (j=0;j<CLUSTER;j++)
        {temp=ED(i,V[j]);
 if(temp)
         tempj=tempj+pow(temp,2.0/(1.0-m));
         }
if(tempj)
J2=J2+pow(tempj,1.0-m);
   }   
//end compute jm

printf("\nComputed J_m of %d examples\n",no);

if(no)
J2=J2/float(no);

printf("\n The normalized J2 value of %d examples is %f \n",no,J2);
fprintf(ptr,"%f\n",J2);
fclose(ptr);

}




void compute_JM(int no,int dim,FILE *gdp,float per)
{
int i,j,p;
int size,parts,tot=0;
FILE *ptr;
double temp,tempj,J2=0;
sprintf(name,"%dcs%dcon.J2.dat",csize,CONDENSED_SLICE);  
ptr=fopen(name,"a");

size=(int)ceil((per/100.00)*no);
parts=(int)ceil((float)no/(float)size);
printf("\nComputing J_m Loop %d Broken data size %d parts %d\n",loop+1,size,parts);
for(p=0;p<parts;p++)
{
tot=tot+size;
if(tot>MAXDATA)
{
tot=tot-size;
size=MAXDATA-tot;
tot=tot+size;
}
allocate2Dfloat(&X,dim,size); 
load_files(gdp,size,dim);

//compute jm
for (i=0;i<size;i++)
   {
tempj=0;
   for (j=0;j<CLUSTER;j++)
        {temp=ED(i,V[j]);
 if(temp)
         tempj=tempj+pow(temp,2.0/(1.0-m));
         }
if(tempj)
J2=J2+pow(tempj,1.0-m);
   }   
//end compute jm

free2Dfloat(&X,dim,size);
printf("\nparts =%d Computed J_m of %d examples\n",p+1,tot);
}
J2=J2/double(no);
printf("\n The J2 value is %f \n",J2);
fprintf(ptr,"%f\n",J2);
fclose(ptr);

}



/* Get time, in seconds */
double timing_of(struct rusage start, struct rusage stop)
{
  long totaluMicroseconds, totalsMicroseconds;
  static double totalTime[2];

  totaluMicroseconds = stop.ru_utime.tv_usec - 
start.ru_utime.tv_usec;
  totalsMicroseconds = stop.ru_stime.tv_usec - 
start.ru_stime.tv_usec;

  /* If we need to borrow, do so */
  if ( totaluMicroseconds < 0 ) {
    stop.ru_utime.tv_sec--;
    totaluMicroseconds=1000000L + stop.ru_utime.tv_usec - 
start.ru_utime.tv_usec;
  //printf("\ngot it\n");
  }

  if ( totalsMicroseconds < 0 ) {
    stop.ru_stime.tv_sec--;
    totalsMicroseconds=1000000L + stop.ru_stime.tv_usec - 
start.ru_stime.tv_usec;
    //printf("\ngot it\n");

  }

  totalTime[0] = (stop.ru_utime.tv_sec - start.ru_utime.tv_sec)+ 
0.000001 * totaluMicroseconds;
  totalTime[1] = (stop.ru_stime.tv_sec - start.ru_stime.tv_sec)+ 
0.000001 * totalsMicroseconds;

  return totalTime[0]+totalTime[1];
}
