/* Flux-based Level Set Center Detection - Flux Based Level Set Method for Nucleus Center Detection in 3D data
 * Written in 2015 by Karol Miikula mikula@math.sk
 * Robert Cunderlik cunderli@svf.stuba.sk
 * Robert Spir spir.robert@gmail.com
 * Olga Drblikova-Stasova drblikova@math.sk
 * Alessandro Sarti alessandro.sarti@ehess.fr
 * Peter Frolkovic peter.frolkovic@stuba.sk
 * 
 * To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
 * You should have received a copy of the CC BY-NC-SA 4.0 Dedication along with this software. If not, see <https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode>.
*/


/* this program counts centers of nuclei */
/* treshold 20  */
/* nacitava aj hlavicku a ako nazov je 18 znakov */

#include <string.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <mpi.h>

#define vtk
//#undef vtk

#define  p 1
#define  pocetopakovani 5
#define  pckmin     10

#define  maxpgsi 1000
#define  omega 1.0 /*1.5*/
#define  tol 0.0001 /*0.000001*/
#define  pertol 1

#define  poc_riad 10


typedef  int pole2[500000];
typedef  long pole3[10000];

int    skk, kk, pck, iz, zapis,inext,iprev;
int    n1, n2, n3, n1p, n2p, n3p;
int    N1, myid, nprocs, n1last, od, po;
int    ind, idw, ide, ids, idn, idb, idt;
int    idws, idwb, idsb, idwsb, idnt, idet, iden, ident;
int    dwe, dsn, pp, ps, pi, spolu;

double  F, D, treshold, epsilonD, epsilonF;
double  tau, h, h2, h4, scale, r, ih;
double  *u, *s, *aw, *ae, *as, *an, *ab, *at, *ap, *b;
double 	pom, men;

static pole2 jadrox, jadroy, jadroz;
static pole3 pocetjadier, lok_pocetjadier;

FILE   *vystup, *vystup2, *vystup3;
char   *name, *namezac, *namezac0, *namezac1, *namekoniec;

int min(a, b)
int a, b;
{
 if (a<=b)
   return a;
 else
   return b;
}

int max(a, b)
int a, b;
{
 if (a>=b)
     return a;
 else
     return b;
}


double rmin(a, b)
double a, b;
{
 if (a<=b)
     return a;
 else
     return b;
}

double rmax(a, b)
double a, b;
{
 if (a>=b)
     return a;
 else
     return b;
}


double  sqr(double x)
{
 return x*x;
}

double g(double v)
{
    /* model mean curvature flow */
       return 1.0/sqrt(epsilonD+v);
}


double ginv(double v)
{
    /* model mean curvature flow */
       return sqrt(epsilonD+v);
}

double gF(double v)
{
    /* model mean curvature flow */
       return 1.0/sqrt(epsilonF+v);
}

/*--------------------------------------------------------------------*/

void Jadra()
{
 long citac;
 int  i,j,k,l;
 double a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26;

printf("111 \n");
/* reflexions for jadra*/

 for (i=p;i<=po;i++)
  for (j=p;j<=n2+p;j++)
  {
    pi = (i*n2p + j)*n3p + p;
    u[pi-1]=u[pi+1];
    u[pi+n3+1]=u[pi+n3-1];
  }

 if (myid==0)
  {
    pp = p*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=0;k<=n3+2*p;k++)
        {
        pi = (pp + j)*n3p + k;
        u[pi-dwe]=u[pi+dwe];
        }
  }

 if (myid==nprocs-1)
  {
    ps = po*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=0;k<=n3+2*p;k++)
        {
        pi = (ps + j)*n3p + k;
        u[pi+dwe]=u[pi-dwe];
        }
  }

 for (i=0;i<=po+1;i++)
  for (k=0;k<=n3+2*p;k++)
  {

    pi = (i*n2p + p)*n3p + k;
    u[pi-n3p]=u[pi+n3p];
    pi += n2*n3p;
    u[pi+n3p]=u[pi-n3p];
  }


//printf("som v jadra %d, %d\n",kk, pocetjadier[0]);

 citac=spolu=0;
 jadrox[citac]=0;
 jadroy[citac]=0;
 jadroz[citac]=0;
  for (k=p;k<=n3+p;k++)
     for (j=p;j<=n2+p;j++)
             for (i=p;i<=po;i++)
           {

           ind = ((i*n2p+j)*n3p+k);
           idw = ind - dwe;
           ide = ind + dwe;
           ids = ind - dsn;
           idn = ind + dsn;
           idb = ind - 1;
           idt = ind + 1;

           idws = idw - dsn;
           idwb = idw - 1;
           idsb = ids - 1;

           idnt = idn + 1;
           idet = ide + 1;
           iden = ide + dsn;

           a1=u[ind]-u[idw];
           a2=u[ind]-u[ide];
           a3=u[ind]-u[ids];
           a4=u[ind]-u[idn];
           a5=u[ind]-u[idws];
           a6=u[ind]-u[ide - dsn];
           a7=u[ind]-u[iden];
           a8=u[ind]-u[idw + dsn];

           a9=u[ind]-u[idwb];
           a10=u[ind]-u[ide - 1];
           a11=u[ind]-u[idsb];
           a12=u[ind]-u[idn - 1];
           a13=u[ind]-u[idws - 1];
           a14=u[ind]-u[idsb + dwe];
           a15=u[ind]-u[iden - 1];
           a16=u[ind]-u[idwb + dsn];
           a17=u[ind]-u[idb];

           a18=u[ind]-u[idw + 1];
           a19=u[ind]-u[idet];
           a20=u[ind]-u[ids + 1];
           a21=u[ind]-u[idnt];
           a22=u[ind]-u[idws + 1];
           a23=u[ind]-u[idet - dsn];
           a24=u[ind]-u[iden + 1];
           a25=u[ind]-u[idnt - dwe];
           a26=u[ind]-u[idt];

 if((a1>0)&&(a2>0)&&(a3>0)&&(a4>0)&&(a5>0)&&(a6>0)&&(a7>0)&&(a8>0)&&
   (a9>0)&&(a10>0)&&(a11>0)&&(a12>0)&&(a13>0)&&(a14>0)&&(a15>0)&&(a16>0)&&
   (a17>0)&&(a18>0)&&(a19>0)&&(a20>0)&&(a21>0)&&(a22>0)&&(a23>0)&&(a24>0)&&
   (a25>0)&&(a26>0)&&(u[ind]>treshold))
              {
               citac=citac+1;
               jadrox[citac]=k;
               jadroy[citac]=j;
               jadroz[citac]=n1*myid+i;
              }
            }
printf("222 \n");
//printf("som v jadra %d, %d\n",kk, pocetjadier[0]);

 MPI_Allreduce(&citac,&spolu,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);

 	printf("Procesor %d - number of nuclei in time step %d = %d\n",myid,kk,citac);

//printf("som v jadra %d, %d\n",kk, pocetjadier[0]);
 if (myid==0)
   {
 	strcpy(name,namezac0);
 	sprintf(namekoniec,"_evolutionofcenters");
 	strcat(name,namekoniec);
 	vystup2=fopen(name,"a");

        printf("Total number of nuclei in time step %d = %d\n",kk,spolu);
 	fprintf(vystup2,"number of nuclei in time step %d = %d\n",kk,spolu);

 	fclose(vystup2);
    }
 lok_pocetjadier[kk]=citac;
 pocetjadier[kk]=spolu;
}

/*--------------------------------------------------------------------*/

void ReadingImage()
{int ll,i,j,k,n;


 vystup=fopen(namezac0,"rb");

for (n=0;n<nprocs;n++)
   {
     if (myid==n)
     {

     if(myid==0)
       {
       printf("reading image on processor %d\n",myid);
       fseek(vystup,0,SEEK_SET);
       for(i=p;i<=n1+p;i++)
         for(j=p;j<=n2+p;j++)
           for(k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=getc(vystup);
                u[ind]=ll/255.;
             }
       }

     if((myid>0)&&(myid<nprocs-1))
       {
       printf("reading image on processor %d\n",myid);
       fseek(vystup,(n1*myid-1)*(n2+p)*(n3+p),SEEK_SET);
       for(i=p-1;i<=n1+p;i++)
         for(j=p;j<=n2+p;j++)
           for(k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=getc(vystup);
                u[ind]=ll/255.;
             }
       }

     if (myid==nprocs-1 && myid!=0)
       {
       printf("reading image on processor %d\n",myid);
       fseek(vystup,(n1*myid-1)*(n2+p)*(n3+p),SEEK_SET);
       for(i=p-1;i<=n1last+p;i++)
         for(j=p;j<=n2+p;j++)
           for(k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=getc(vystup);
                u[ind]=ll/255.;
             }
        }
     }
   }
/*MPI_Barrier(MPI_COMM_WORLD);*/
 fclose(vystup);

}

/*--------------------------------------------------------------------*/

void WritingPoints()
{int l;

 printf("writing points %d on processor %d\n",kk,myid);

 strcpy(name,namezac);
 sprintf(namekoniec,"part_centers_%d_%d",kk,myid);
 strcat(name,namekoniec);
 vystup=fopen(name,"w");
 for (l=1;l<=lok_pocetjadier[kk];l++)
 {
 fprintf(vystup,"%d %d %d\n",jadrox[l],jadroy[l],jadroz[l]);
 }
 fclose(vystup);
}

/*--------------------------------------------------------------------*/

void WritingPointsU()
{int l,i,j,k;

 printf("writing pointsU %d on processor %d\n",kk,myid);

 strcpy(name,namezac);
 sprintf(namekoniec,"part_centers_intensity_%d_%d",kk,myid);
 strcat(name,namekoniec);
 vystup=fopen(name,"w");
 for (l=1;l<=lok_pocetjadier[kk];l++)
 {
  i=jadrox[l];
  j=jadroy[l];
  k=jadroz[l];
  ind = ((i*n2p+j)*n3p+k);
  fprintf(vystup,"%d %d %d %lf\n",jadrox[l],jadroy[l],jadroz[l],u[ind]);
 }
 fclose(vystup);
}

/*--------------------------------------------------------------------*/

void Writing3D()
{int ll,i,j,k;

       strcpy(name,namezac);
       printf("writing image on processor %d\n",myid);
       sprintf(namekoniec,"3D_part_%d_%d",kk,myid);
       strcat(name,namekoniec);
       vystup=fopen(name,"w");
       printf("myid=%d: zac=%d, kon=%d\n",myid,p,po);
       for (i=p;i<=po;i++)
         for (j=p;j<=n2+p;j++)
           for (k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=(int) (u[ind]*255 +0.5);
                /*fprintf(vystup,"%d\n",ll);*/
                fputc(ll,vystup);

             }
       fclose(vystup);
       strcpy(name,namezac);
}

/*--------------------------------------------------------------------*/

void Coefficients(void)
{int  i,j,k;
 double g1w,g2w,g3w,g4w,g1e,g2e,g3e,g4e,g1s,g2s,g3s,g4s;
 double g1n,g2n,g3n,g4n,g1b,g2b,g3b,g4b,g1t,g2t,g3t,g4t,gp;
 double gw,ge,gs,gn,gb,gt,gw1,ge1,gs1,gn1,gb1,gt1,derw,dere,ders,dern,derb,dert;
 double cinw,coutw,cine,coute,cins,couts,cinn,coutn,cinb,coutb,cint,coutt;


 printf("computing coefficients %d\n",kk);
/* reflexions */


 for (i=p;i<=po+1;i++)
  for (j=p;j<=n2+p;j++)
  {
    pi = (i*n2p + j)*n3p + p;
    u[pi-1]=u[pi];
    u[pi+n3+1]=u[pi+n3];
  }

 if (myid==0)
  {
    pp = p*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=0;k<=n3+2*p;k++)
        {
    	pi = (pp + j)*n3p + k;
    	u[pi-dwe]=u[pi];
  	}
  }

 if (myid==nprocs-1)
  {
    ps = po*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=0;k<=n3+2*p;k++)
     	{
    	pi = (ps + j)*n3p + k;
    	u[pi+dwe]=u[pi];
  	}
  }

 for (i=0;i<=po+1;i++)
  for (k=0;k<=n3+2*p;k++)
  {

    pi = (i*n2p + p)*n3p + k;
    u[pi-n3p]=u[pi];
    pi += n2*n3p;
    u[pi+n3p]=u[pi];
  }


/* computing of coefficients */

 for (i=p;i<=po+1;i++)
   for (j=p;j<=n2+p+1;j++)
     for (k=p;k<=n3+p+1;k++)
     {

           ind = ((i*n2p+j)*n3p+k);
           idw = ind - dwe;
           ids = ind - dsn;
           idb = ind - 1;

           idws = idw - dsn;
           idwb = idw - 1;
           idsb = ids - 1;

     s[ind]=0.125*(u[ind]+u[idw]+u[idws]+u[ids]+
                   u[idb]+u[idwb]+u[idws-1]+u[idsb]);
     }

 for (i=p;i<=po;i++)
  for (j=p;j<=n2+p;j++)
   for (k=p;k<=n3+p;k++)
   {
           ind = ((i*n2p+j)*n3p+k);
           idw = ind - dwe;
           ide = ind + dwe;
           ids = ind - dsn;
           idn = ind + dsn;
           idb = ind - 1;
           idt = ind + 1;

           idnt = idn + 1;
           idet = ide + 1;
           iden = ide + dsn;

           ident = iden + 1;

   derw=-(u[ind]-u[idw]);

   g1w=sqr((u[ind]-u[idw])*ih) +
       sqr(((u[ind]+u[idw])-(s[idt]+s[ind]))*ih)+
       sqr((s[idt]-s[ind])*ih);

   g2w=sqr((u[ind]-u[idw])*ih) +
       sqr((-(u[ind]+u[idw])+(s[idnt]+s[idt]))*ih)+
       sqr((s[idnt]-s[idt])*ih);

   g3w=sqr((u[ind]-u[idw])*ih) +
       sqr((-(u[ind]+u[idw])+(s[idnt]+s[idn]))*ih)+
       sqr((s[idnt]-s[idn])*ih);

   g4w=sqr((u[ind]-u[idw])*ih) +
       sqr(((u[ind]+u[idw])-(s[idn]+s[ind]))*ih)+
       sqr((s[idn]-s[ind])*ih);

   dere=u[ide]-u[ind];

   g1e=sqr((u[ide]-u[ind])*ih) +
       sqr(((u[ide]+u[ind])-(s[idet]+s[ide]))*ih)+
       sqr((s[idet]-s[ide])*ih);

   g2e=sqr((u[ide]-u[ind])*ih) +
       sqr((-(u[ide]+u[ind])+(s[ident]+s[idet]))*ih)+
       sqr((s[ident]-s[idet])*ih);

   g3e=sqr((u[ide]-u[ind])*ih) +
       sqr((-(u[ide]+u[ind])+(s[ident]+s[iden]))*ih)+
       sqr((s[ident]-s[iden])*ih);

   g4e=sqr((u[ide]-u[ind])*ih) +
       sqr(((u[ide]+u[ind])-(s[iden]+s[ide]))*ih)+
       sqr((s[iden]-s[ide])*ih);

   ders=-(u[ind]-u[ids]);

   g1s=sqr((u[ind]-u[ids])*ih) +
       sqr(((u[ind]+u[ids])-(s[ide]+s[ind]))*ih)+
       sqr((s[ide]-s[ind])*ih);

   g2s=sqr((u[ind]-u[ids])*ih) +
       sqr((-(u[ind]+u[ids])+(s[idet]+s[ide]))*ih)+
       sqr((s[idet]-s[ide])*ih);

   g3s=sqr((u[ind]-u[ids])*ih) +
       sqr((-(u[ind]+u[ids])+(s[idet]+s[idt]))*ih)+
       sqr((s[idet]-s[idt])*ih);

   g4s=sqr((u[ind]-u[ids])*ih) +
       sqr(((u[ind]+u[ids])-(s[idt]+s[ind]))*ih)+
       sqr((s[idt]-s[ind])*ih);

   dern=u[idn]-u[ind];

   g1n=sqr((u[idn]-u[ind])*ih) +
       sqr(((u[idn]+u[ind])-(s[iden]+s[idn]))*ih)+
       sqr((s[iden]-s[idn])*ih);

   g2n=sqr((u[idn]-u[ind])*ih) +
       sqr((-(u[idn]+u[ind])+(s[ident]+s[iden]))*ih)+
       sqr((s[ident]-s[iden])*ih);

   g3n=sqr((u[idn]-u[ind])*ih) +
       sqr((-(u[idn]+u[ind])+(s[ident]+s[idnt]))*ih)+
       sqr((s[ident]-s[idnt])*ih);

   g4n=sqr((u[idn]-u[ind])*ih) +
       sqr(((u[idn]+u[ind])-(s[idnt]+s[idn]))*ih)+
       sqr((s[idnt]-s[idn])*ih);

   derb=-(u[ind]-u[idb]);

   g1b=sqr((u[ind]-u[idb])*ih) +
       sqr(((u[ind]+u[idb])-(s[ide]+s[ind]))*ih)+
       sqr((s[ide]-s[ind])*ih);

   g2b=sqr((u[ind]-u[idb])*ih) +
       sqr((-(u[ind]+u[idb])+(s[iden]+s[ide]))*ih)+
       sqr((s[iden]-s[ide])*ih);

   g3b=sqr((u[ind]-u[idb])*ih) +
       sqr((-(u[ind]+u[idb])+(s[iden]+s[idn]))*ih)+
       sqr((s[iden]-s[idn])*ih);

   g4b=sqr((u[ind]-u[idb])*ih) +
       sqr(((u[ind]+u[idb])-(s[idn]+s[ind]))*ih)+
       sqr((s[idn]-s[ind])*ih);

   dert=u[idt]-u[ind];

   g1t=sqr((u[idt]-u[ind])*ih) +
       sqr(((u[idt]+u[ind])-(s[idet]+s[idt]))*ih)+
       sqr((s[idet]-s[idt])*ih);

   g2t=sqr((u[idt]-u[ind])*ih) +
       sqr((-(u[idt]+u[ind])+(s[ident]+s[idet]))*ih)+
       sqr((s[ident]-s[idet])*ih);

   g3t=sqr((u[idt]-u[ind])*ih) +
       sqr((-(u[idt]+u[ind])+(s[ident]+s[idnt]))*ih)+
       sqr((s[ident]-s[idnt])*ih);

   g4t=sqr((u[idt]-u[ind])*ih) +
       sqr(((u[idt]+u[ind])-(s[idnt]+s[idt]))*ih)+
       sqr((s[idnt]-s[idt])*ih);

   gp=ginv((g1w+g2w+g3w+g4w+g1e+g2e+g3e+g4e+
       g1s+g2s+g3s+g4s+g1n+g2n+g3n+g4n+
       g1b+g2b+g3b+g4b+g1t+g2t+g3t+g4t)/24.);
/*
   gp=24./(g(g1w)+g(g2w)+g(g3w)+g(g4w)+g(g1e)+g(g2e)+g(g3e)+g(g4e)+
       g(g1s)+g(g2s)+g(g3s)+g(g4s)+g(g1n)+g(g2n)+g(g3n)+g(g4n)+
       g(g1b)+g(g2b)+g(g3b)+g(g4b)+g(g1t)+g(g2t)+g(g3t)+g(g4t));
*/
   gw=0.25*(g(g1w)+g(g2w)+g(g3w)+g(g4w));
   ge=0.25*(g(g1e)+g(g2e)+g(g3e)+g(g4e));
   gs=0.25*(g(g1s)+g(g2s)+g(g3s)+g(g4s));
   gn=0.25*(g(g1n)+g(g2n)+g(g3n)+g(g4n));
   gb=0.25*(g(g1b)+g(g2b)+g(g3b)+g(g4b));
   gt=0.25*(g(g1t)+g(g2t)+g(g3t)+g(g4t));

   aw[ind]=tau*D*gw*gp;
   ae[ind]=tau*D*ge*gp;
   as[ind]=tau*D*gs*gp;
   an[ind]=tau*D*gn*gp;
   ab[ind]=tau*D*gb*gp;
   at[ind]=tau*D*gt*gp;

   ap[ind]=h2+aw[ind]+ae[ind]+as[ind]+an[ind]+
               ab[ind]+at[ind];

   gw1=0.25*(gF(g1w)+gF(g2w)+gF(g3w)+gF(g4w));
   ge1=0.25*(gF(g1e)+gF(g2e)+gF(g3e)+gF(g4e));
   gs1=0.25*(gF(g1s)+gF(g2s)+gF(g3s)+gF(g4s));
   gn1=0.25*(gF(g1n)+gF(g2n)+gF(g3n)+gF(g4n));
   gb1=0.25*(gF(g1b)+gF(g2b)+gF(g3b)+gF(g4b));
   gt1=0.25*(gF(g1t)+gF(g2t)+gF(g3t)+gF(g4t));

   cinw=rmin(0.,F*derw*gw1);
   coutw=rmax(0.,F*derw*gw1);
   cine=rmin(0.,F*dere*ge1);
   coute=rmax(0.,F*dere*ge1);
   cins=rmin(0.,F*ders*gs1);
   couts=rmax(0.,F*ders*gs1);
   cinn=rmin(0.,F*dern*gn1);
   coutn=rmax(0.,F*dern*gn1);
   cinb=rmin(0.,F*derb*gb1);
   coutb=rmax(0.,F*derb*gb1);
   cint=rmin(0.,F*dert*gt1);
   coutt=rmax(0.,F*dert*gt1);

   b[ind]=h2*u[ind]+tau*cinw*(u[ind]-u[idw])+
                    tau*cine*(u[ind]-u[ide])+
                    tau*cins*(u[ind]-u[ids])+
                    tau*cinn*(u[ind]-u[idn])+
                    tau*cinb*(u[ind]-u[idb])+
                    tau*cint*(u[ind]-u[idt]);
   }
}

/*--------------------------------------------------------------------*/

void LinearSystemSolving(void)
{int  i,j,k,pgsi=0;
 double delta=1.0e+10,deltain=0.,z,tmp;
 MPI_Status status;
 MPI_Request req1, req2, req3, req4;

 if (myid==0)
   printf("solving system %d\n",kk);

  for (i=p;i<=po;i++)
   for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++)
    {

    ind = ((i*n2p+j)*n3p+k);
    idw = ind - dwe;
    ide = ind + dwe;
    ids = ind - dsn;
    idn = ind + dsn;
    idb = ind - 1;
    idt = ind + 1;

    pom = (-aw[ind]*u[idw]-ae[ind]*u[ide]-
    as[ind]*u[ids]-an[ind]*u[idn]-
    ab[ind]*u[idb]-at[ind]*u[idt]+
    ap[ind]*u[ind]-b[ind]);

    deltain=deltain+pom*pom;
    }

 MPI_Allreduce(&deltain,&tmp,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
 deltain=tmp;

 if (myid==0)
   printf("%d: deltain=%30.25lf\n",pgsi,deltain);

 do
  {
   pgsi=pgsi+1;

   /*Iterations for RED elements */

     /*    printf("Iteration for RED elements on processor %d\n",myid);*/
  for (i=p;i<=po;i++)
   for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++)
     {
     if ((n1*myid+i+j+k) % 2 ==0)
     {

     ind = ((i*n2p+j)*n3p+k);
     idw = ind - dwe;
     ide = ind + dwe;
     ids = ind - dsn;
     idn = ind + dsn;
     idb = ind - 1;
     idt = ind + 1;

     men = ap[ind];

     z=(b[ind]+aw[ind]*u[idw]+ae[ind]*u[ide]+
        as[ind]*u[ids]+an[ind]*u[idn]+
        ab[ind]*u[idb]+at[ind]*u[idt])/men;

     u[ind]=u[ind]+omega*(z-u[ind]);
     }
     }

   /* Communication*/

   MPI_Isend(&(u[n1*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req1);
	MPI_Isend(&(u[p*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req2);
	MPI_Irecv(&(u[(n1+p)*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req3);
	MPI_Irecv(&(u[(p-1)*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req4);

	MPI_Wait(&req1, &status);
	MPI_Wait(&req2, &status);
	MPI_Wait(&req3, &status);
	MPI_Wait(&req4, &status);


   /* Iterations for BLACK elements */

/*         printf("Iteration for BLACK elements on processor %d\n",myid);
*/
    for (i=p;i<=po;i++)
    for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++)
     {
     if ((n1*myid+i+j+k) % 2 ==1)
     {

     ind = ((i*n2p+j)*n3p+k);
     idw = ind - dwe;
     ide = ind + dwe;
     ids = ind - dsn;
     idn = ind + dsn;
     idb = ind - 1;
     idt = ind + 1;

     men = ap[ind];

     z=(b[ind]+aw[ind]*u[idw]+ae[ind]*u[ide]+
        as[ind]*u[ids]+an[ind]*u[idn]+
        ab[ind]*u[idb]+at[ind]*u[idt])/men;

     u[ind]=u[ind]+omega*(z-u[ind]);
     }
     }

   /* Communication*/

   MPI_Isend(&(u[n1*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req1);
	MPI_Isend(&(u[p*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req2);
	MPI_Irecv(&(u[(n1+p)*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req3);
	MPI_Irecv(&(u[(p-1)*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req4);

	MPI_Wait(&req1, &status);
	MPI_Wait(&req2, &status);
	MPI_Wait(&req3, &status);
	MPI_Wait(&req4, &status);

 for (i=p;i<=po;i++)
  for (j=p;j<=n2+p;j++)
  {
    pi = (i*n2p + j)*n3p + p;
    u[pi-1]=u[pi];
    u[pi+n3+1]=u[pi+n3];
  }

 if (myid==0)
  {
    pp = p*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=0;k<=n3+2*p;k++)
        {
        pi = (pp + j)*n3p + k;
        u[pi-dwe]=u[pi];
        }
  }

 if (myid==nprocs-1)
  {
    ps = po*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=0;k<=n3+2*p;k++)
        {
        pi = (ps + j)*n3p + k;
        u[pi+dwe]=u[pi];
        }
  }

 for (i=0;i<=po+1;i++)
  for (k=0;k<=n3+2*p;k++)
  {

    pi = (i*n2p + p)*n3p + k;
    u[pi-n3p]=u[pi];
    pi += n2*n3p;
    u[pi+n3p]=u[pi];
  }

   /* Test of convergence */

   if (pgsi % pertol==0)
     {
      delta=0.;

      for (i=p;i<=po;i++)
      for (j=p;j<=n2+p;j++)
      for (k=p;k<=n3+p;k++)
      {

      ind = ((i*n2p+j)*n3p+k);
      idw = ind - dwe;
      ide = ind + dwe;
      ids = ind - dsn;
      idn = ind + dsn;
      idb = ind - 1;
      idt = ind + 1;

      delta=delta+sqr(-aw[ind]*u[idw]-ae[ind]*u[ide]-
      as[ind]*u[ids]-an[ind]*u[idn]-
      ab[ind]*u[idb]-at[ind]*u[idt]+
      ap[ind]*u[ind]-b[ind]);
      }

      MPI_Allreduce(&delta,&tmp,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
      delta=tmp;

      if (myid==0)
        printf("%d: delta=%30.25lf\n",pgsi,delta);

     }

  } while ((delta>tol*deltain) && (pgsi<maxpgsi));

 if (myid==0)
  printf("%d system solved with %d gs iterations\n",kk,pgsi);
}

/*--------------------------------------------------------------------*/

void EllipticStep(void)
{ int  i,j,k;
 double cput1,cput2;

 cput1=clock()/(double)(CLOCKS_PER_SEC);
 Coefficients();
 cput2=clock()/(double)(CLOCKS_PER_SEC);
 printf("Processor %d - CPU time coeff in time step %d: %e secs\n",myid,kk,cput2-cput1);

 cput1=clock()/(double)(CLOCKS_PER_SEC);
 LinearSystemSolving();
 cput2=clock()/(double)(CLOCKS_PER_SEC);
 printf("Processor %d - CPU time system in time step %d: %e secs\n",myid,kk,cput2-cput1);
}

/*--------------------------------------------------------------------*/

int main(int argc, char *argv[])
{
 int i,j,k,testkoniec;
 double CPUT1,CPUT2,cput1,cput2;
 
 MPI_Init(&argc, &argv);
 
 CPUT1=MPI_Wtime();

 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
 MPI_Comm_rank(MPI_COMM_WORLD, &myid);

 name=(char *)malloc(200);
 namekoniec=(char *)malloc(50);
 namezac1=(char *)malloc(200);
 namezac0=(char *)malloc(200);
 namezac=(char *)malloc(200);

 if (myid==0)
   {
 	scanf("%s %s",namezac0,namezac1);
	pp = (int) strlen(namezac0);
	pi = (int) strlen(namezac1);
 	scanf("%d %d %d",&N1,&n2,&n3);
	scanf("%lf %lf",&F,&D);
 	scanf("%lf %lf %lf",&treshold,&epsilonD,&epsilonF);
 	scanf("%lf %d %d",&tau,&pck,&iz);
 	scanf("%d",&zapis);
   }
    inext=myid+1;
	iprev=myid-1;
	if(myid==0)iprev=MPI_PROC_NULL;
	if(myid==nprocs-1)inext=MPI_PROC_NULL;
  MPI_Bcast(&N1, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&n2, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&n3, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&F, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&D, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&treshold, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&epsilonD, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&epsilonF, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&tau, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&pck, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&iz, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&zapis, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&pp, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&pi, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(namezac0, pp+2, MPI_CHAR,0,MPI_COMM_WORLD);
  MPI_Bcast(namezac1, pi+2, MPI_CHAR,0,MPI_COMM_WORLD);

  printf("After on myid=%d:\t%s\t%s\n%d %d %d\t\t%.1lf\t%.5lf\t\t%.2lf\t%.1lf %.7lf\t\t%.4lf %d %d\n",myid,namezac0, namezac1, N1, n2, n3, F, D, treshold, epsilonD, epsilonF, tau, pck, iz);

 strcpy(namezac,namezac0);
 strcpy(name,namezac);
 strcat(namezac0,namezac1);
 printf("%s",namezac0);
 N1 -= 1;
 n2 -= 1;
 n3 -= 1;

 n1 = ceil((int) N1 / nprocs)+1;
 n1p = n1 + 2*p + 1;
 n2p = n2 + 2*p + 1;
 n3p = n3 + 2*p + 1;
 n1last = N1-(nprocs-1)*n1;

 dwe = n2p*n3p;
 dsn = n3p;

 u = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 s = (double *) malloc(n1p * n2p * n3p * sizeof(double));

 aw = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 ae = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 as = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 an = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 ab = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 at = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 ap = (double *) malloc(n1p * n2p * n3p * sizeof(double));
 b  = (double *) malloc(n1p * n2p * n3p * sizeof(double));

 h=0.01;
 h2=h*h;
 h4=h2*h2;
 r=h2/tau;
 scale=0.0;
 kk=0;
 ih=1./h;

 if (myid==0)
 	printf("tau=%lf, h=%lf, h^2/2=%lf\n",tau,h,h*h/2.);


  if (myid==0)
    od = p;
  else
    od = p-1;

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p-1;

if(myid==0)
 {
 strcpy(name,namezac0);
 sprintf(namekoniec,"_evolutionofcenters");
 strcat(name,namekoniec);
 vystup2=fopen(name,"w");
 fclose(vystup2);
 }

ReadingImage();

 Jadra();

 WritingPoints();

//  WritingPointsU();

 testkoniec=1;

 for (kk=1;kk<=pck;kk++)
   {
    scale=scale+tau;

    cput1=MPI_Wtime();

    EllipticStep();

   Jadra();

    cput2=MPI_Wtime();;

    printf("Processor %d - CPU time elliptic step in time step %d: %e secs\n",myid,kk,cput2-cput1);

    if (kk % iz ==0)
     {
      cput1=MPI_Wtime();;

    WritingPoints();
  //     WritingPointsU();
      if(zapis == 1)
         Writing3D();


      cput2=MPI_Wtime();
      printf("Processor %d - CPU time writing: %e secs\n",myid,cput2-cput1);
     }

 if (myid==0)
  {
   printf("%3d .time step finished\n",kk);

   printf("%d, %d\n",kk,pocetjadier[kk]);
   printf("%d, %d\n",kk-1,pocetjadier[kk-1]);
  }

   if (pocetjadier[kk]<pocetjadier[kk-1])
    testkoniec=1;

   if ((pocetjadier[kk]>=pocetjadier[kk-1])&&(kk>=pckmin))
    {
     pocetjadier[kk]=pocetjadier[kk-1];
     testkoniec=testkoniec+1;
     if (myid==0)
          printf("same number of nuclei repeats %d -times\n",testkoniec);
    }

   if (testkoniec>=pocetopakovani)
    {
   WritingPoints();
 //    WritingPointsU();
    if(zapis == 1)
       Writing3D();
    break;}
 
  }

 CPUT2=MPI_Wtime();
 printf("Processor %d - total time: %e secs \n",myid,CPUT2-CPUT1);

 free(u);
 free(s);

 free(aw);
 free(ae);
 free(as);
 free(an);
 free(ab);
 free(at);
 free(ap);
 free(b);

 free(name);

MPI_Finalize();
}

	
