/* Geodesic Mean Curvature Flow - Geodesic Mean Curvature Flow Method for 3D image filtering 
 * Written in 2015 by Karol Miikula mikula@math.sk
 * Robert Cunderlik cunderli@svf.stuba.sk
 * Robert Spir spir.robert@gmail.com
 * Alessandro Sarti alessandro.sarti@ehess.fr
 * Zuzana Kriva kriva@math.sk
 * 
 * To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
 * You should have received a copy of the CC BY-NC-SA 4.0 Dedication along with this software. If not, see <https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode>.
 */


/* Program for multiscale analysis of 3D images by
Perona - Malik equation (Osher - Rudin model and heat equation)
including convolution realized by one step of heat equation

semi-implicit approximation in scale
"diamond cell" finite volume method in space
*/

#include <string.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <mpi.h>


#define  p 1
#define  pertol 10
#define  omega 1.85
#define  maxpgsi 10000
#define  tol 1.0e-4
#define  convolution 1


int    N1,n2,n3;
int    skk, kk, pck, iz, myid, nprocs, n1last,inext,iprev;
int    od, po, n1, n1p, n2p, n3p;
int    ind, idw, ide, ids, idn, idb, idt;
int    idws, idwb, idsb, idnt, idet, iden;
int    dwe, dsn, pp, pi;

double K,epsilon;
double tau, sigma, h, h2, scale, koef, koefconv, men;
double pom, pom1, pom2, pom3, ih;

double *u, *uprev, *stred1, *stred2, *stred3;
double  *aw, *ae, *as, *an, *ab, *at, *ap, *b;
double  *aw1, *ae1, *as1, *an1, *ab1, *at1;


FILE   *vystup, *vystup2, *vystup3;
char   *name, *namezac, *namekoniec, *namezac0, *namezac1, *namezac2;



double  sqr(double x)
{
  return x*x;
}            
 
double g(double v)
{
    /* mean curvature flow */
       return 1.0/sqrt(epsilon+v);
}

double ginv(double v)
{
    /* mean curvature flow */
       return sqrt(epsilon+v);
}

double pm(double v)
{
    /* geodesic model */
       return 1.0/(1.0+K*v);
    /* mean curvature flow */
      /* return 1.0; */
}

/*--------------------------------------------------------------------*/

void ReadingImage()
{int ll,i,j,k,n;

     vystup=fopen(namezac0,"rb");

     if(myid==0)
       {
       printf("reading image on processor %d\n",myid);
       fseek(vystup,0,SEEK_SET);
       for(i=p;i<=n1+p;i++)
         for(j=p;j<=n2+p;j++)
           for(k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=getc(vystup);
                u[ind]=ll/255.;
             }
       }

     if((myid>0)&&(myid<nprocs-1))
       {
       printf("reading image on processor %d\n",myid); 
       fseek(vystup,(n1*myid-1)*(n2+p)*(n3+p),SEEK_SET); 
       for(i=p-1;i<=n1+p;i++)
         for(j=p;j<=n2+p;j++)
           for(k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=getc(vystup);
                u[ind]=ll/255.;
             }
       }

     if (myid==nprocs-1 && myid!=0) 
       {
       printf("reading image on processor %d\n",myid); 
       fseek(vystup,(n1*myid-1)*(n2+p)*(n3+p),SEEK_SET); 
       for(i=p-1;i<=n1last+p;i++)
         for(j=p;j<=n2+p;j++)
           for(k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=getc(vystup);
                u[ind]=ll/255.;
             }
        }

/*MPI_Barrier(MPI_COMM_WORLD);*/
       fclose(vystup);

       printf("%d procd:\tM[20][250][250] = %.9lf\n",myid,u[(n2p+1)*n3p+1]); 

}

/*--------------------------------------------------------------------*/

void WritingImage()
{int ll,i,j,k,n;

     if (myid==0)
       {
//       printf("writing image on processor %d\n",myid);
       sprintf(namekoniec,"part_%d_%d",kk,myid);
       strcat(name,namekoniec);
       vystup=fopen(name,"w");
       printf("myid=%d: zac=%d, kon=%d\n",myid,p,n1);
       for (i=p;i<=n1;i++)
         for (j=p;j<=n2+p;j++)
           for (k=p;k<=n3+p;k++)
	     {
                ind = ((i*n2p+j)*n3p+k);
                ll=(int) (u[ind]*255 +0.5);
                /*fprintf(vystup,"%d\n",ll);*/
                fputc(ll,vystup); 
	     }
       fclose(vystup);
       strcpy(name,namezac);
       }

     if ((myid>0)&&(myid<nprocs-1))
       {
//       printf("writing image on processor %d\n",myid);
       sprintf(namekoniec,"part_%d_%d",kk,myid);
       strcat(name,namekoniec);
       vystup=fopen(name,"w");
//       printf("myid=%d: zac=%d, kon=%d\n",myid,p,n1);
       for (i=p;i<=n1;i++)
         for (j=p;j<=n2+p;j++)
           for (k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=(int) (u[ind]*255 +0.5);
                /*fprintf(vystup,"%d\n",ll);*/
                fputc(ll,vystup); 
                                
             }
       fclose(vystup);
       strcpy(name,namezac);
       }

     if (myid==nprocs-1 && myid!=0)
       {
//       printf("writing image on processor %d\n",myid);
       sprintf(namekoniec,"part_%d_%d",kk,myid);
       strcat(name,namekoniec);
       vystup=fopen(name,"w");
       printf("myid=%d: zac=%d, kon=%d\n",myid,p,n1last+p);
       for (i=p;i<=n1last+p;i++)
         for (j=p;j<=n2+p;j++)
           for (k=p;k<=n3+p;k++)
             {
                ind = ((i*n2p+j)*n3p+k);
                ll=(int) (u[ind]*255 +0.5);
                /*fprintf(vystup,"%d\n",ll);*/
                fputc(ll,vystup);
             }
       fclose(vystup);
       strcpy(name,namezac);
       }

}

/*--------------------------------------------------------------------*/

void LinearSystemSolving(void)
{int  i,j,k,pgsi=0;
 double delta=1.0e+10,deltain=0.,z,tmp;
 MPI_Status status;
 MPI_Request req1, req2, req3, req4;

 if (myid==0)
   printf("solving system %d\n",kk); 

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p-1;

  for (i=p;i<=po;i++)
   for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++)    
    {

    ind = ((i*n2p+j)*n3p+k);
    idw = ind - dwe;
    ide = ind + dwe;
    ids = ind - dsn;
    idn = ind + dsn;
    idb = ind - 1;
    idt = ind + 1;

    pom = (-aw[ind]*u[idw]-ae[ind]*u[ide]-
    as[ind]*u[ids]-an[ind]*u[idn]-
    ab[ind]*u[idb]-at[ind]*u[idt]+
    ap[ind]*u[ind]-b[ind]);

    deltain=deltain+pom*pom;
    }

 MPI_Allreduce(&deltain,&tmp,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
 deltain=tmp;
 
 if (myid==0)
   printf("%d: deltain=%30.25lf\n",pgsi,deltain); 

 do
  {
   pgsi=pgsi+1;

   /*Iterations for RED elements */

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p-1;

     /*    printf("Iteration for RED elements on processor %d\n",myid);*/
  for (i=p;i<=po;i++)
   for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++)
     { 
     if ((n1*myid+i+j+k) % 2 ==0)
     {
      
     ind = ((i*n2p+j)*n3p+k);
     idw = ind - dwe;
     ide = ind + dwe;
     ids = ind - dsn;
     idn = ind + dsn;
     idb = ind - 1;
     idt = ind + 1;

     men = ap[ind];

     z=(b[ind]+aw[ind]*u[idw]+ae[ind]*u[ide]+
        as[ind]*u[ids]+an[ind]*u[idn]+
        ab[ind]*u[idb]+at[ind]*u[idt])/men;

     u[ind]=u[ind]+omega*(z-u[ind]);
     }
     }

   /* Communication*/
	MPI_Isend(&(u[n1*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req1);
	MPI_Isend(&(u[p*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req2);
	MPI_Irecv(&(u[(n1+p)*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req3);
	MPI_Irecv(&(u[(p-1)*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req4);

	MPI_Wait(&req1, &status);
	MPI_Wait(&req2, &status);
	MPI_Wait(&req3, &status);
	MPI_Wait(&req4, &status);



   /* Iterations for BLACK elements */

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p-1;

/*         printf("Iteration for BLACK elements on processor %d\n",myid);
*/
    for (i=p;i<=po;i++)
    for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++)
     { 
     if ((n1*myid+i+j+k) % 2 ==1)
     {

     ind = ((i*n2p+j)*n3p+k);
     idw = ind - dwe;
     ide = ind + dwe;
     ids = ind - dsn;
     idn = ind + dsn;
     idb = ind - 1;
     idt = ind + 1;

     men = ap[ind];

     z=(b[ind]+aw[ind]*u[idw]+ae[ind]*u[ide]+
        as[ind]*u[ids]+an[ind]*u[idn]+
        ab[ind]*u[idb]+at[ind]*u[idt])/men;

     u[ind]=u[ind]+omega*(z-u[ind]);
     }
     }

   /* Communication*/

        MPI_Isend(&(u[n1*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req1);
	MPI_Isend(&(u[p*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req2);
	MPI_Irecv(&(u[(n1+p)*dwe]),dwe,MPI_DOUBLE,inext,7,MPI_COMM_WORLD,&req3);
	MPI_Irecv(&(u[(p-1)*dwe]),dwe,MPI_DOUBLE,iprev,7,MPI_COMM_WORLD,&req4);

	MPI_Wait(&req1, &status);
	MPI_Wait(&req2, &status);
	MPI_Wait(&req3, &status);
	MPI_Wait(&req4, &status);

   

   /* Test of convergence */

   if (pgsi % pertol==0)
     {
      delta=0.;


      for (i=p;i<=po;i++)
      for (j=p;j<=n2+p;j++)
      for (k=p;k<=n3+p;k++)    
      {

      ind = ((i*n2p+j)*n3p+k);
      idw = ind - dwe;
      ide = ind + dwe;
      ids = ind - dsn;
      idn = ind + dsn;
      idb = ind - 1;
      idt = ind + 1;

      delta=delta+sqr(-aw[ind]*u[idw]-ae[ind]*u[ide]-
		as[ind]*u[ids]-an[ind]*u[idn]-
		ab[ind]*u[idb]-at[ind]*u[idt]+
		ap[ind]*u[ind]-b[ind]);
      }

      MPI_Allreduce(&delta,&tmp,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
      delta=tmp;

      if (myid==0) 
        printf("%d: delta=%30.25lf\n",pgsi,delta); 
     
     }
     
  } while ((delta>tol*deltain) && (pgsi<maxpgsi));

 if (myid==0)
  printf("%d system solved with %d gs iterations\n",kk,pgsi);
}	

/*--------------------------------------------------------------------*/

void SubCoefficients(void)
{int  i,j,k;

  if (myid==nprocs-1)
    po = n1last+p+1;
  else
    po = n1+p;


  for (i=p;i<=po;i++)
    for (j=p;j<=n2+p+1;j++)
      for (k=p;k<=n3+p+1;k++)
          {
           ind = ((i*n2p+j)*n3p+k);
           aw[ind]=koefconv;
           ae[ind]=koefconv;
           as[ind]=koefconv;
           an[ind]=koefconv;
           ab[ind]=koefconv;
           at[ind]=koefconv;
          }

  if (myid==0)
   {
    pp = p*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=p;k<=n3+p;k++)
          {
           pi = (pp+j)*n3p+k;
           aw[pi]=0.0;
           ae[pi]=2.0*ae[pi];
          }
   }      

  if (myid==nprocs-1)
   {
    pp = (n1last+p)*n2p;
    for (j=p;j<=n2+p;j++)
      for (k=p;k<=n3+p;k++)
          {
           pi = (pp+j)*n3p+k;
           ae[pi]=0.0;
           aw[pi]=2.0*aw[pi];
          }
   }

    for (i=p;i<=po-1;i++)
         for (k=p;k<=n3+p;k++)
          {
                pi = (i*n2p + p)*n3p + k;
                as[pi]=0.0;
                an[pi]=2.0*an[pi];

                pi += n2*n3p;
                an[pi]=0.0;
                as[pi]=2.0*as[pi];
          }

    for (i=p;i<=po-1;i++)
         for (j=p;j<=n2+p;j++)
          {
                pi = (i*n2p + j)*n3p + p;
                ab[pi]=0.0;
                at[pi]=2.0*at[pi];
                
                pi += n3;
                at[pi]=0.0;
                ab[pi]=2.0*ab[pi];
          }

   for (i=p;i<=po-1;i++)
    for (j=p;j<=n2+p;j++)
      for (k=p;k<=n3+p;k++)
          {
          ind = ((i*n2p+j)*n3p+k);
          ap[ind]=1.0+aw[ind]+ae[ind]+as[ind]+an[ind]+
                      ab[ind]+at[ind];
          b[ind]=u[ind];
          }
}

/*--------------------------------------------------------------------*/

void Coefficients(void)
{int  i,j,k;
double z1,z2,z3,z4,z5,z6,z7;

/* reflexions */

  if (myid==0)
    od = p;
  else
    od = p-1;

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p;


  for (i=od;i<=po;i++)
   for (j=p;j<=n2+p;j++)
   {
     pi = (i*n2p + j)*n3p + p;
     u[pi-1]=u[pi];
     u[pi+n3+1]=u[pi+n3];
   }

  for (i=od;i<=po;i++)
   for (k=0;k<=n3+p+1;k++)
   {
     pi = (i*n2p + p)*n3p + k;
     u[pi-n3p]=u[pi];
     pi += n2*n3p;
     u[pi+n3p]=u[pi];
   }


  if (myid==0)
   {
     
     pp = p*n2p;
     for (j=0;j<=n2+p+1;j++)
       for (k=0;k<=n3+p+1;k++)
         {
          pi = (pp + j)*n3p + k;
          u[pi-dwe]=u[pi];
         }
   }

  if (myid==nprocs-1)
   {
     pp = (n1last + p)*n2p;
     for (j=0;j<=n2+p+1;j++)
       for (k=0;k<=n3+p+1;k++)
         {
          pi = (pp + j)*n3p + k;
          u[pi+dwe]=u[pi];
         }
   }

/* computing of coefficients */

  if (myid==nprocs-1)
    po = n1last+p+1;
  else
    po = n1+p;

  for (i=p;i<=po;i++)
   for (j=p;j<=n2+p+1;j++)
    for (k=p;k<=n3+p+1;k++)
     {
     
      ind = ((i*n2p+j)*n3p+k);
      idw = ind - dwe;
      ids = ind - dsn;
      idb = ind - 1;
      idws = idw - dsn;
      idwb = idw - 1;
      idsb = ids - 1;

     stred1[ind]=0.25*(u[idws]+u[ids]+u[ind]+u[idw]);
     stred2[ind]=0.25*(u[idwb]+u[idb]+u[ind]+u[idw]);
     stred3[ind]=0.25*(u[idsb]+u[idb]+u[ind]+u[ids]);
     }

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p-1;

 for (i=p;i<=po ;i++)
  for (j=p;j<=n2+p;j++)
   for (k=p;k<=n3+p;k++)
   {

    ind = ((i*n2p+j)*n3p+k);
    idw = ind - dwe;
    ide = ind + dwe;
    ids = ind - dsn;
    idn = ind + dsn;
    idb = ind - 1;
    idt = ind + 1;

    idnt = idn + 1;
    idet = ide + 1;
    iden = ide + dsn;

    pom1 = ih*(u[ind]-u[idw]);
    pom2 = ih*(stred1[idn]-stred1[ind]);
    pom3 = ih*(stred2[idt]-stred2[ind]);

    z1 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[ide]-u[ind]);
    pom2 = ih*(stred1[iden]-stred1[ide]);
    pom3 = ih*(stred2[idet]-stred2[ide]);

    z2 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[ind]-u[ids]);
    pom2 = ih*(stred1[ide]-stred1[ind]);
    pom3 = ih*(stred3[idt]-stred3[ind]);

    z3 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[idn]-u[ind]);
    pom2 = ih*(stred1[iden]-stred1[idn]);
    pom3 = ih*(stred3[idnt]-stred3[idn]);

    z4 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[ind]-u[idb]);
    pom2 = ih*(stred2[ide]-stred2[ind]);
    pom3 = ih*(stred3[idn]-stred3[ind]);

    z5 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[idt]-u[ind]);
    pom2 = ih*(stred2[idet]-stred2[idt]);
    pom3 = ih*(stred3[idnt]-stred3[idt]);

    z6 = pom1*pom1+pom2*pom2+pom3*pom3;

    z7=(z1+z2+z3+z4+z5+z6)/6.;

    pom=koef*ginv(z7);

    aw1[ind]=pom*g(z1);
    ae1[ind]=pom*g(z2);
    as1[ind]=pom*g(z3);
    an1[ind]=pom*g(z4);
    ab1[ind]=pom*g(z5);
    at1[ind]=pom*g(z6);
    }

    if (convolution==1)
    {
     SubCoefficients();
     LinearSystemSolving();
    }

/* reflexions */

  if (myid==0)
    od = p;
  else
    od = p-1;

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p;


  for (i=od;i<=po;i++)
    for (j=p;j<=n2+p;j++)
     {
      pi = (i*n2p + j)*n3p + p;
      u[pi-1]=u[pi];
      u[pi+n3+1]=u[pi+n3];
     }

  for (i=od;i<=po;i++)
    for (k=0;k<=n3+p+1;k++)
     {
       pi = (i*n2p + p)*n3p + k;
       u[pi-n3p]=u[pi];
       pi += n2*n3p;
       u[pi+n3p]=u[pi];
     }

  if (myid==0)
   {

     pp = p*n2p;
     for (j=0;j<=n2+p+1;j++)
       for (k=0;k<=n3+p+1;k++)
         {
          pi = (pp + j)*n3p + k;
          u[pi-dwe]=u[pi];
         }
   }

  if (myid==nprocs-1)
   {
     pp = (n1last + p)*n2p;
     for (j=0;j<=n2+p+1;j++)
       for (k=0;k<=n3+p+1;k++)
         {
          pi = (pp + j)*n3p + k;
          u[pi+dwe]=u[pi];
         }
   }


/* computing of coefficients */

  if (myid==nprocs-1)
    po = n1last+p+1;
  else
    po = n1+p;

  for (i=p;i<=po;i++)
   for (j=p;j<=n2+p+1;j++)
    for (k=p;k<=n3+p+1;k++)
     {
     
      ind = ((i*n2p+j)*n3p+k);
      idw = ind - dwe;
      ids = ind - dsn;
      idb = ind - 1;
      idws = idw - dsn;
      idwb = idw - 1;
      idsb = ids - 1;

     stred1[ind]=0.25*(u[idws]+u[ids]+u[ind]+u[idw]);
     stred2[ind]=0.25*(u[idwb]+u[idb]+u[ind]+u[idw]);
     stred3[ind]=0.25*(u[idsb]+u[idb]+u[ind]+u[ids]);
     }

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p-1;

 for (i=p;i<=po ;i++)
  for (j=p;j<=n2+p;j++)
   for (k=p;k<=n3+p;k++)
   {

    ind = ((i*n2p+j)*n3p+k);
    idw = ind - dwe;
    ide = ind + dwe;
    ids = ind - dsn;
    idn = ind + dsn;
    idb = ind - 1;
    idt = ind + 1;

    idnt = idn + 1;
    idet = ide + 1;
    iden = ide + dsn;

    pom1 = ih*(u[ind]-u[idw]);
    pom2 = ih*(stred1[idn]-stred1[ind]);
    pom3 = ih*(stred2[idt]-stred2[ind]);

    z1 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[ide]-u[ind]);
    pom2 = ih*(stred1[iden]-stred1[ide]);
    pom3 = ih*(stred2[idet]-stred2[ide]);

    z2 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[ind]-u[ids]);
    pom2 = ih*(stred1[ide]-stred1[ind]);
    pom3 = ih*(stred3[idt]-stred3[ind]);

    z3 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[idn]-u[ind]);
    pom2 = ih*(stred1[iden]-stred1[idn]);
    pom3 = ih*(stred3[idnt]-stred3[idn]);

    z4 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[ind]-u[idb]);
    pom2 = ih*(stred2[ide]-stred2[ind]);
    pom3 = ih*(stred3[idn]-stred3[ind]);

    z5 = pom1*pom1+pom2*pom2+pom3*pom3;

    pom1 = ih*(u[idt]-u[ind]);
    pom2 = ih*(stred2[idet]-stred2[idt]);
    pom3 = ih*(stred3[idnt]-stred3[idt]);

    z6 = pom1*pom1+pom2*pom2+pom3*pom3;

    aw[ind] = aw1[ind]*pm(z1);
    ae[ind] = ae1[ind]*pm(z2);
    as[ind] = as1[ind]*pm(z3);
    an[ind] = an1[ind]*pm(z4);
    ab[ind] = ab1[ind]*pm(z5);
    at[ind] = at1[ind]*pm(z6);

    ap[ind]=1.0+aw[ind]+ae[ind]+as[ind]+an[ind]+
                ab[ind]+at[ind];
    b[ind]=uprev[ind];
    }
}

/*--------------------------------------------------------------------*/

void EllipticStep(void)
{ int  i,j,k;
  double cput1,cput2,delta=0.,masa=0.,tmp;
  
  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p;


  for (i=p;i<=po;i++)
   for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++)
      {
        ind = ((i*n2p+j)*n3p+k);
        uprev[ind]=u[ind];
      }

  cput1=MPI_Wtime();

  Coefficients(); 

  cput2=MPI_Wtime();
  if (myid==0)
    printf("Processor %d - CPU time coeff in time step %d: %e secs\n",myid,kk,cput2-cput1);
     
  cput1=MPI_Wtime();

  LinearSystemSolving();

  cput2=MPI_Wtime();
  if (myid==0)
  printf("Processor %d - CPU time system in time step %d: %e secs\n",myid,kk,cput2-cput1);

  if (myid==nprocs-1)
    po = n1last+p;
  else
    po = n1+p-1;

    for (i=p;i<=po;i++)
    for (j=p;j<=n2+p;j++)
    for (k=p;k<=n3+p;k++) 
     {
      ind = ((i*n2p+j)*n3p+k);
      pom = u[ind]-uprev[ind];
      delta=delta+sqr(pom);
      masa=masa+fabs(pom);
     }

  MPI_Allreduce(&delta,&tmp,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
  delta=tmp;

  MPI_Allreduce(&masa,&tmp,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
  masa=tmp;

  masa=masa*h*h*h;
  delta=sqrt(delta*h*h*h);
  if (myid==0)
    printf("L_2 norma odchyliek iteracii = %e, rozdiel v mase iteracii = %e\n",delta,masa);

}

/*--------------------------------------------------------------------*/

int main(int argc, char *argv[])
{
  int i,j,k;
  double CPUT1,CPUT2,cput1,cput2,zmena;


  MPI_Init(&argc, &argv);

  CPUT1=MPI_Wtime();

  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myid);

   name=(char *)malloc(200);
   namekoniec=(char *)malloc(50);
   namezac1=(char *)malloc(200);
   namezac0=(char *)malloc(200);
   namezac2=(char *)malloc(200);
   namezac=(char *)malloc(200);

  if (myid==0)
    {
    scanf("%s %s",namezac0,namezac1);
    
    pp = (int) strlen(namezac0);
    pi = (int) strlen(namezac1);
//    printf("tau, sigma, number of scale steps and periodicity of writing:\n");
    scanf("%d %d %d",&N1,&n2,&n3);
    scanf("%lf %lf",&K,&epsilon);
    scanf("%lf %lf %d %d",&tau,&sigma,&pck,&iz);
    }
    //printf("%d %d %d\n",myid,pp,pi);
    inext=myid+1;
	iprev=myid-1;
	if(myid==0)iprev=MPI_PROC_NULL;
	if(myid==nprocs-1)inext=MPI_PROC_NULL;
  MPI_Bcast(&N1, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&n2, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&n3, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&K, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&epsilon, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&tau, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&sigma, 1, MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_Bcast(&pck, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&iz, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&pp, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&pi, 1, MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(namezac0, pp+2, MPI_CHAR,0,MPI_COMM_WORLD);
  MPI_Bcast(namezac1, pi+2, MPI_CHAR,0,MPI_COMM_WORLD);
  
    
//  printf("After on myid=%d:\t%s\t%lf\t%lf\t%d %d\n",myid,namezac1,sigma,tau,pck,iz);
  //printf("%d %s %s\n",myid,namezac0,namezac1);
  strcpy(namezac,namezac0);
  strcpy(name,namezac);
  strcat(namezac0,namezac1); 

  N1 -= 1;
  n2 -= 1;
  n3 -= 1;
  
  n1 = ceil((int) N1 / nprocs)+1;
  n1p = n1 + 2*p + 1;
  n2p = n2 + 2*p + 1;
  n3p = n3 + 2*p + 1;
  n1last = N1-(nprocs-1)*n1;
  u = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  uprev = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  stred1 = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  stred2 = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  stred3 = (double *) calloc(n1p * n2p * n3p , sizeof(double));

  aw = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  ae = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  as = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  an = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  ab = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  at = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  ap = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  b  = (double *) calloc(n1p * n2p * n3p , sizeof(double));

  aw1 = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  ae1 = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  as1 = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  an1 = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  ab1 = (double *) calloc(n1p * n2p * n3p , sizeof(double));
  at1 = (double *) calloc(n1p * n2p * n3p , sizeof(double));

  h=0.01;
  ih = 1./h;
  koef=tau/(h*h);
  koefconv=sigma/(h*h);
  scale=0.0;
  skk = 0;
  kk = skk;

  dwe = n2p*n3p;
  dsn = n3p;

  ReadingImage();
  
  for (kk=skk+1;kk<=pck;kk++)
    {
     scale=scale+tau;

     cput1=MPI_Wtime();                                    

     EllipticStep();

     cput2=MPI_Wtime();    
     if (myid==0)
       printf("Processor %d - CPU time elliptic step in time step %d: %e secs\n",myid,kk,cput2-cput1);                                 
  
     if (kk % iz ==0)
      { 
       cput1=MPI_Wtime(); 
       WritingImage();  
       cput2=MPI_Wtime();
      if (myid==0)
         printf("Processor %d - CPU time writing: %e secs\n",myid,cput2-cput1);
      } 
     if (myid==0)
       printf("%3d .time step finished\n",kk);

    }    

  free(name);

  free(u);
  free(uprev);
  free(stred1);
  free(stred2);
  free(stred3);

  free(aw);
  free(ae);
  free(as);
  free(an);
  free(ab);
  free(at);
  free(ap);
  free(b);

  free(aw1);
  free(ae1);
  free(as1);
  free(an1);
  free(ab1);
  free(at1);

CPUT2=MPI_Wtime();
printf("Processor %d - total time: %e secs \n",myid,CPUT2-CPUT1);

MPI_Finalize();
}
