c----------------------------------------------------------------------- subroutine PRVDIST32_RANDOM(part,qm,edges,npp,nps,vtx,vty,vtz,vdx, 1vdy,vdz,npx,npy,npz,nx,ny,nz,ipbc,idimp,npmax,mblok,nblok,idps,sig 1x,sigy,sigz,x0,y0,z0,cx,cy,lquiet,ierr) c old quiet start c keep 1 + p^2 = gamma c for 3d code, this subroutine calculates initial particle co-ordinates c and velocities with tri-gaussian density and maxwellian velocity with c drift for distributed data with 2D spatial decomposition c part(1,n,m) = position x of particle n in partition m c part(2,n,m) = position y of particle n in partition m c part(3,n,m) = position z of particle n in partition m c part(4,n,m) = velocity vx of particle n in partition m c part(5,n,m) = velocity vy of particle n in partition m c part(6,n,m) = velocity vz of particle n in partition m c edges(1,m) = lower boundary in y of particle partition m c edges(2,m) = upper boundary in y of particle partition m c edges(3,m) = lower boundary in z of particle partition m c edges(4,m) = upper boundary in z of particle partition m c npp(m) = number of particles in partition m c nps(m) = starting address of particles in partition m c vtx/vty/vtz = thermal velocity of electrons in x/y/z direction c vdx/vdy/vdz = drift velocity of beam electrons in x/y/z direction c npx/npy/npz = initial number of particles distributed in x/y/z c direction c nx/ny/nz = system length in x/y/z direction c idimp = size of phase space = 6 c npmax = maximum number of particles in each partition c mblok/nblok = number of particle partitions in y/z c idps = number of partition boundaries c ipbc = particle boundary condition = (0,1,2,3) = c (none,xy periodic,xy reflecting, x reflecting/y periodic) c ierr = (0,1) = (no,yes) error condition exists c ranorm = gaussian random number with zero mean and unit variance implicit none integer nps,npp,npmax,nblok,npx,npy,npz,idimp,nx,ny,nz,idps,ierr integer mblok,ipbc real qm,sigx,sigy,sigz,x0,y0,z0 real cx,cy double precision random,ranorm real part,edges,vtx,vty,vtz,vdx,vdy,vdz dimension part(idimp,npmax,nblok) dimension edges(idps,nblok), npp(nblok), nps(nblok) dimension cx(0:2),cy(0:2) logical lquiet c local variables integer j,k,l,m,my,mz,moff,mnblok,k1,npt real tempx,tempy,tempxx,tempyy,x2,y2,tempz,tvtx,tvty,tvtz c borderlx(yz), lower bound; borderx(yz), upper bound. real borderlx,borderly,borderlz, borderx, bordery, borderz ierr = 0 npt = 1 x2 = 2.0 * x0 y2 = 2.0 * y0 borderlx = max((x0-5.0*sigx),1.0) borderly = max((y0-5.0*sigy),1.0) borderlz = max((z0-5.0*sigz),1.0) borderx = min((x0+5.0*sigx),float(nx-1)) bordery = min((y0+5.0*sigy),float(ny-1)) borderz = min((z0+5.0*sigz),float(nz-1)) j = 0 l = npz do while (j<npx) k = 0 do while (k<npy) l = l - npz do while (l<npz) 10 tempz = z0+sigz*ranorm() if (tempz>=(borderz) .or. tempz<=borderlz) goto 10 20 tempx = x0+sigx*ranorm() if (tempx>=(borderx) .or. tempx<=borderlx) goto 20 30 tempy = y0+sigy*ranorm() if (tempy>=(bordery) .or. tempy<=borderly) goto 30 c generate velocity tvtx = vtx*ranorm() + vdx tvty = vty*ranorm() + vdy tvtz = vtz*ranorm() + vdz tvtz = sqrt(tvtz*tvtz-1-tvtx*tvtx-tvty*tvty) do 110 mz = 1, nblok moff = mblok*(mz - 1) do 100 my = 1, mblok m = my + moff c check if particle belongs to this partition if ((tempy.ge.edges(1,m)) .and. (tempy.lt.edges(2,m)) .and. & & (tempz.ge.edges(3,m)) .and. (tempz.lt.edges(4,m)) ) then if (npt.lt.npmax) then npt = npp(m) + 1 part(3,npt,m) = tempz c calculate offset in x tempxx = -cx(0)*(part(3,npt,m)-z0)**2-cx(1)*(part(3,npt,m) & & - z0)-cx(2) part(1,npt,m) = tempx + tempxx c calculate offset in y tempyy = -cy(2)*(part(3,npt,m)-z0)**2-cy(1)*(part(3,npt,m) & & - z0)-cy(2) part(2,npt,m) = tempy + tempyy part(4,npt,m) = tvtx part(5,npt,m) = tvty part(6,npt,m) = tvtz part(7,npt,m) = qm npp(m) = npt else ierr = ierr + 1 endif c quiet start if (lquiet) then if (npt.lt.npmax) then npt = npp(m) + 1 part(3,npt,m) = tempz part(1,npt,m) = x2 - tempx + tempxx part(2,npt,m) = y2 - tempy + tempyy part(4,npt,m) = -tvtx part(5,npt,m) = -tvty part(6,npt,m) = tvtz part(7,npt,m) = qm npp(m) = npt else ierr = ierr + 1 endif endif endif 100 continue 110 continue l = l + 1 if (lquiet) l = l + 1 enddo k = k + 1 enddo j = j + 1 enddo return end c----------------------------------------------------------------------- subroutine PGPOST32L(part,q,npp,noff,idimp,npmax,mnblok,nxv,nypmx, 1nzpmx,idds) c for 3d code, this subroutine calculates particle charge density c using first-order linear interpolation, and distributed data c with 2D spatial decomposition c scalar version using guard cells, for distributed data c 33 flops/particle, 11 loads, 8 stores c input: all, output: q c charge density is approximated by values at the nearest grid points c q(n,m,l)=qm*(1.-dx)*(1.-dy)*(1.-dz) c q(n+1,m,l)=qm*dx*(1.-dy)*(1.-dz) c q(n,m+1,l)=qm*(1.-dx)*dy*(1.-dz) c q(n+1,m+1,l)=qm*dx*dy*(1.-dz) c q(n,m,l+1)=qm*(1.-dx)*(1.-dy)*dz c q(n+1,m,l+1)=qm*dx*(1.-dy)*dz c q(n,m+1,l+1)=qm*(1.-dx)*dy*dz c q(n+1,m+1,l+1)=qm*dx*dy*dz c where n,m,l = nearest grid points and dx = x-n, dy = y-m, dz = z-l c part(1,n,m) = position x of particle n in partition m c part(2,n,m) = position y of particle n in partition m c part(3,n,m) = position z of particle n in partition m c q(j,k,l,m) = charge density at grid point (j,kk,ll), c where kk = k + noff(1,m) - 1, and ll = l + noff(2,m) - 1 c npp(m) = number of particles in partition m c noff(1,m) = lowermost global gridpoint in y in particle partition m c noff(2,m) = backmost global gridpoint in z in particle partition m c qm = charge on particle, in units of e c idimp = size of phase space = 6 c npmax = maximum number of particles in each partition c mnblok = number of particle partitions. c nxv = first dimension of charge array, must be >= nx+1 c nypmx = maximum size of particle partition in y, including guard cells c nzpmx = maximum size of particle partition in z, including guard cells c idds = dimensionality of domain decomposition dimension part(idimp,npmax,mnblok), q(nxv,nypmx,nzpmx,mnblok) dimension npp(mnblok), noff(idds,mnblok) real qm do 20 m = 1, mnblok mnoff = noff(1,m) - 1 lnoff = noff(2,m) - 1 c find interpolation weights do 10 j = 1, npp(m) nn = part(1,j,m) mm = part(2,j,m) ll = part(3,j,m) qm = part(7,j,m) dxp = qm*(part(1,j,m) - float(nn)) dyp = part(2,j,m) - float(mm) dzp = part(3,j,m) - float(ll) nn = nn + 1 amx = qm - dxp amy = 1. - dyp np = nn + 1 mm = mm - mnoff dx1 = dxp*dyp dyp = amx*dyp mp = mm + 1 amx = amx*amy amz = 1. - dzp ll = ll - lnoff amy = dxp*amy lp = ll + 1 c deposit charge q(nn,mm,ll,m) = q(nn,mm,ll,m) + amx*amz q(np,mm,ll,m) = q(np,mm,ll,m) + amy*amz q(nn,mp,ll,m) = q(nn,mp,ll,m) + dyp*amz q(np,mp,ll,m) = q(np,mp,ll,m) + dx1*amz q(nn,mm,lp,m) = q(nn,mm,lp,m) + amx*dzp q(np,mm,lp,m) = q(np,mm,lp,m) + amy*dzp q(nn,mp,lp,m) = q(nn,mp,lp,m) + dyp*dzp q(np,mp,lp,m) = q(np,mp,lp,m) + dx1*dzp 10 continue 20 continue return end c----------------------------------------------------------------------- subroutine PGBPUSH32L_QP(part,fxyz,bxyz,npp,noff,qbm,dt,dtc,ek,nx, 1ny,nz,idimp,npmax,mnblok,nxv,nypmx,nzpmx,idds,ipbc,deltax,deltaz , 2cofd) double precision sum1 dimension part(idimp,npmax,mnblok) dimension fxyz(3,nxv,nypmx,nzpmx,mnblok) dimension bxyz(3,nxv,nypmx,nzpmx,mnblok) dimension npp(mnblok), noff(idds,mnblok) real qtmg,dtgx,dtgy,cofd real delx, dely, delz, ngamma real p2t,p2l, cofd1 real dtc_over_deltax, dtc_over_deltaz real one_minus_vz0, one_minus_vz qtmh = qbm*dt sum1 = 0.0d0 dtc_over_deltax = dtc/deltax dtc_over_deltaz = dtc/deltaz one_minus_vz0 = 0. cofd1 = cofd/dt c set boundary values if (ipbc.eq.1) then edgelx = 1. edgely = 1. edgelz = 1. edgerx = float(nx-1) edgery = float(ny-1) edgerz = float(nz-1) endif do 20 m = 1, mnblok mnoff = noff(1,m) - 1 lnoff = noff(2,m) - 1 c find interpolation weights inpp = npp(m) do 10 j = 1, npp(m) if (j.gt.inpp) exit 11 nn = part(1,j,m) mm = part(2,j,m) ll = part(3,j,m) dxp = part(1,j,m) - float(nn) dyp = part(2,j,m) - float(mm) dzp = part(3,j,m) - float(ll) nn = nn + 1 amx = 1. - dxp amy = 1. - dyp np = nn + 1 mm = mm - mnoff dx1 = dxp*dyp dyp = amx*dyp mp = mm + 1 amx = amx*amy amz = 1. - dzp ll = ll - lnoff amy = dxp*amy lp = ll + 1 c find electric field dx = amz*(amx*fxyz(1,nn,mm,ll,m) + amy*fxyz(1,np,mm,ll,m) + dyp*fx 1yz(1,nn,mp,ll,m) + dx1*fxyz(1,np,mp,ll,m)) + dzp*(amx*fxyz(1,nn,mm 2,lp,m) + amy*fxyz(1,np,mm,lp,m) + dyp*fxyz(1,nn,mp,lp,m) + dx1*fxy 3z(1,np,mp,lp,m)) dy = amz*(amx*fxyz(2,nn,mm,ll,m) + amy*fxyz(2,np,mm,ll,m) + dyp*fx 1yz(2,nn,mp,ll,m) + dx1*fxyz(2,np,mp,ll,m)) + dzp*(amx*fxyz(2,nn,mm 2,lp,m) + amy*fxyz(2,np,mm,lp,m) + dyp*fxyz(2,nn,mp,lp,m) + dx1*fxy 3z(2,np,mp,lp,m)) dz = amz*(amx*fxyz(3,nn,mm,ll,m) + amy*fxyz(3,np,mm,ll,m) + dyp*fx 1yz(3,nn,mp,ll,m) + dx1*fxyz(3,np,mp,ll,m)) + dzp*(amx*fxyz(3,nn,mm 2,lp,m) + amy*fxyz(3,np,mm,lp,m) + dyp*fxyz(3,nn,mp,lp,m) + dx1*fxy 3z(3,np,mp,lp,m)) c find magnetic field ox = amz*(amx*bxyz(1,nn,mm,ll,m) + amy*bxyz(1,np,mm,ll,m) + dyp*bx 1yz(1,nn,mp,ll,m) + dx1*bxyz(1,np,mp,ll,m)) + dzp*(amx*bxyz(1,nn,mm 2,lp,m) + amy*bxyz(1,np,mm,lp,m) + dyp*bxyz(1,nn,mp,lp,m) + dx1*bxy 3z(1,np,mp,lp,m)) oy = amz*(amx*bxyz(2,nn,mm,ll,m) + amy*bxyz(2,np,mm,ll,m) + dyp*bx 1yz(2,nn,mp,ll,m) + dx1*bxyz(2,np,mp,ll,m)) + dzp*(amx*bxyz(2,nn,mm 2,lp,m) + amy*bxyz(2,np,mm,lp,m) + dyp*bxyz(2,nn,mp,lp,m) + dx1*bxy 3z(2,np,mp,lp,m)) dx = dx - oy dy = dy + ox delx = qtmh*dx dely = qtmh*dy delz = qtmh*dz c half acceleration c momentums are normalized to c temporarily dx = part(4,j,m) + delx dy = part(5,j,m) + dely dz = part(6,j,m) + delz part(4,j,m) = dx part(5,j,m) = dy c update gamma and inverse gamma p2t = dx*dx + dy*dy p2l = dz*dz p2 = p2t + p2l ngamma = sqrt(1.0 + p2) dtgx = dtc_over_deltax/ngamma dtgy = dtc_over_deltax/ngamma one_minus_vz = (1.0+p2t)/(dz*(dz+ngamma)) c include radiation damping in the z direction c note: this is not time-centered c print *,"cofd1,ngamma,dz,delx,dely=",cofd1,ngamma, dz, delx, dely,& c &cofd1*ngamma*((delx*delx+dely*dely)*dz-delz*(delx*dx+dely*dy)) dz = dz - cofd1*ngamma*((delx*delx+dely*dely)*dz-delz*(delx*dx+del& &y*dy)) c comment this line to shut off longitudinal push part(6,j,m) = dz c new position in grid unit dx = part(1,j,m) + dx*dtgx dy = part(2,j,m) + dy*dtgy c dz = part(3,j,m) + (one_minus_vz - one_minus_vz0)*dtc_over_deltaz dz = part(3,j,m) + one_minus_vz*dtc_over_deltaz c dropping boundary conditions in x and y if (ipbc.eq.1) then if ((dx.lt.edgelx).or.(dx.ge.edgerx)) then if (j.eq.inpp) then inpp = inpp - 1 exit end if part(:,j,m) = part(:,inpp,m) inpp = inpp -1 goto 11 endif if ((dy.lt.edgely).or.(dy.ge.edgery)) then if (j.eq.inpp) then inpp = inpp - 1 exit end if part(:,j,m) = part(:,inpp,m) inpp = inpp -1 goto 11 endif if ((dz.lt.edgelz).or.(dz.ge.edgerz)) then if (j.eq.inpp) then inpp = inpp - 1 exit end if part(:,j,m) = part(:,inpp,m) inpp = inpp -1 goto 11 endif endif c set new position part(1,j,m) = dx part(2,j,m) = dy c comment this line to shut off longitudinal push part(3,j,m) = dz 10 continue npp(m) = inpp 20 continue return end c----------------------------------------------------------------------- subroutine PMOVE32(part,edges,npp,sbufr,sbufl,rbufr,rbufl,ihole,pb 1uff,jsr,jsl,jss,ny,nz,kstrt,nvpy,nvpz,idimp,npmax,mblok,nblok,idps 2,nbmax,idds,ntmax,tag1,tag2,id,info) c modified version with particle moving in y only c this subroutine moves particles into appropriate spatial regions c periodic boundary conditions with 2D spatial decomposition c part(1,n,m) = position x of particle n in partition m c part(2,n,m) = position y of particle n in partition m c part(3,n,m) = position z of particle n in partition m c part(4,n,m) = velocity vx of particle n in partition m c part(5,n,m) = velocity vy of particle n in partition m c part(6,n,m) = velocity vz of particle n in partition m c edges(1,m) = lower boundary in y of particle partition m c edges(2,m) = upper boundary in y of particle partition m c edges(3,m) = back boundary in z of particle partition m c edges(4,m) = front boundary in z of particle partition m c npp(m) = number of particles in partition m c sbufl = buffer for particles being sent to back processor c sbufr = buffer for particles being sent to front processor c rbufl = buffer for particles being received from back processor c rbufr = buffer for particles being received from front processor c pbuff = buffer for particles being sent to next pipeline stage c ihole = location of holes left in particle arrays c jsl(idds,m) = number of particles going back in particle partition m c jsr(idds,m) = number of particles going front in particle partition m c jss(idds,m) = scratch array for particle partition m c ny/nz = system length in y/z direction c kstrt = starting data block number c nvpy/nvpz = number of real or virtual processors in y/z c idimp = size of phase space = 6 c npmax = maximum number of particles in each partition c mblok/nblok = number of particle partitions in y/z c idps = number of particle partition boundaries c nbmax = size of buffers for passing particles between processors c idds = dimensionality of domain decomposition c ntmax = size of hole array for particles leaving processors c info = status information c info(1) = ierr = (0,N) = (no,yes) error condition exists c info(2) = maximum number of particles per processor c info(3) = minimum number of particles per processor c info(4) = maximum number of buffer overflows in y c info(5) = maximum number of buffer overflows in z c info(6) = maximum number of particle passes required in y c info(7) = maximum number of particle passes required in z c info(8) = total number of particles on entry c info(9) = difference of total number of particles on exit implicit none include "mpif.h" real part, edges, sbufr, sbufl, rbufr, rbufl, pbuff integer npp, ihole, jsr, jsl, jss, info integer ny, nz, kstrt, nvpy, nvpz, idimp, npmax, mblok, nblok integer idps, nbmax, idds, ntmax, tag1, tag2, id dimension part(idimp,npmax,mblok*nblok) dimension edges(idps,mblok*nblok), npp(mblok*nblok) dimension sbufl(idimp,nbmax,mblok*nblok) dimension sbufr(idimp,nbmax,mblok*nblok) dimension rbufl(idimp,nbmax,mblok*nblok) dimension rbufr(idimp,nbmax,mblok*nblok) dimension pbuff(idimp,nbmax) dimension jsl(idds,mblok*nblok), jsr(idds,mblok*nblok) dimension jss(idds,mblok*nblok) dimension ihole(ntmax,mblok*nblok) dimension info(9) c common block for parallel processing integer nproc, lgrp, lstat, mreal, mint, mcplx, mdouble, lworld c lstat = length of status array parameter(lstat=10) c lgrp = current communicator c mint = default datatype for integers c mreal = default datatype for reals common /PPARMS/ nproc, lgrp, mreal, mint, mcplx, mdouble, lworld c local data integer iy, iz parameter(iy=2,iz=3) integer ierr, ic, js, ks, mnblok, i, n, m, my, mz, moff, nvp, iter integer npr, nps, npt, kb, kl, kr, j, j1, j2, nbsize, nter, mter integer itermax integer msid, istatus integer ibflg, iwork double precision bflg, work real an, xt dimension msid(4), istatus(lstat) dimension ibflg(4), iwork(4) dimension bflg(2), work(2) dimension kb(2) ks = (kstrt - 1)/nvpy js = kstrt - nvpy*ks - 2 ks = ks - 1 mnblok = mblok*nblok nbsize = idimp*nbmax do 5 j = 1, 9 info(j) = 0 5 continue c buffer outgoing particles, first in y then in z direction ic = iz nvp = nvpy*nvpz an = float(nz) n = 2 kl = kstrt - nvpy if (kl.lt.1) go to 21 call MPI_IRECV(rbufl,nbsize,mreal,kl-1,tag1,lworld,msid(1),ierr) call MPI_WAIT(msid(1),istatus,ierr) call MPI_GET_COUNT(istatus,mreal,nps,ierr) jsl(2,1) = nps/idimp jss(2,1) = npp(1) + jsl(2,1) if (jss(2,1).le.npmax) then do 11 j = 1,jsl(2,1) do 8 i = 1, idimp part(i,j+npp(1),1) = rbufl(i,j,1) 8 continue 11 continue npp(1) = jss(2,1) else write (2,*) 'particle overflow', jss(2,1) info(1) = jss(2,1) return endif 21 iter = 2 nter = 0 mter = 0 do 61 mz = 1, nblok moff = mblok*(mz - 1) do 51 my = 1, mblok m = my + moff jsl(1,m) = 0 jsr(1,m) = 0 jss(2,m) = 0 do 31 j = 1, npp(m) xt = part(ic,j,m) c particles going backward, not going to happen if (xt.lt.edges(2*n-1,m)) then jss(2,m) = 1 write (2,*) 'Error: particles move to the previous stage' go to 41 c particles going forward else if (xt.ge.edges(2*n,m)) then if (jsr(1,m).lt.nbmax) then jsr(1,m) = jsr(1,m) + 1 do 28 i = 1, idimp pbuff(i,jsr(1,m)) = part(i,j,m) 28 continue ihole(jsr(1,m),m) = j else jss(2,m) = 1 go to 41 endif endif 31 continue 41 jss(1,m) = jsl(1,m) + jsr(1,m) 51 continue 61 continue c check for full buffer condition nps = 0 do 101 m = 1, mnblok nps = max0(nps,jss(2,m)) 101 continue if (nps.gt.0) then info(1) = nps write (2,*) 'particle buffer full' return endif c copy particle buffers 111 iter = iter + 2 mter = mter + 1 do 151 mz = 1, nblok moff = mblok*(mz - 1) do 141 my = 1, mblok m = my + moff kr = kstrt + nvpy if (kr.gt.nvp) go to 155 c send particles call MPI_ISEND(pbuff,idimp*jsr(1,m),mreal,kr-1,tag2,lworld,id,ierr 1) 141 continue 151 continue c fill the holes 155 npt = npp(1) do 291 j = 1, jsr(1,1) j1 = ihole((jsr(1,1)-j+1),1) if (j1.lt.npt) then do 274 i = 1, idimp part(i,j1,1) = part(i,npt,m) 274 continue npt = npt - 1 else npt = npt - 1 endif 291 continue npp(1) = npt itermax = 20000 c buffer outgoing particles, first in y then in z direction do 300 n = 1, 1 if (n.eq.1) then ic = iy nvp = nvpy an = float(ny) elseif (n.eq.2) then ic = iz nvp = nvpz an = float(nz) endif iter = 2 nter = 0 20 mter = 0 do 60 mz = 1, nblok moff = mblok*(mz - 1) kb(2) = mz + ks do 50 my = 1, mblok m = my + moff kb(1) = my + js jsl(1,m) = 0 jsr(1,m) = 0 jss(2,m) = 0 do 30 j = 1, npp(m) xt = part(ic,j,m) c particles going down or backward if (xt.lt.edges(2*n-1,m)) then if (jsl(1,m).lt.nbmax) then jsl(1,m) = jsl(1,m) + 1 if (kb(n).eq.0) xt = xt + an do 23 i = 1, idimp sbufl(i,jsl(1,m),m) = part(i,j,m) 23 continue sbufl(ic,jsl(1,m),m) = xt ihole(jsl(1,m)+jsr(1,m),m) = j else jss(2,m) = 1 go to 40 endif c particles going up or forward else if (xt.ge.edges(2*n,m)) then if (jsr(1,m).lt.nbmax) then jsr(1,m) = jsr(1,m) + 1 if ((kb(n)+1).eq.nvp) xt = xt - an do 27 i = 1, idimp sbufr(i,jsr(1,m),m) = part(i,j,m) 27 continue sbufr(ic,jsr(1,m),m) = xt ihole(jsl(1,m)+jsr(1,m),m) = j else jss(2,m) = 1 go to 40 endif endif 30 continue 40 jss(1,m) = jsl(1,m) + jsr(1,m) 50 continue 60 continue c check for full buffer condition nps = 0 do 100 m = 1, mnblok nps = max0(nps,jss(2,m)) 100 continue ibflg(3) = nps c copy particle buffers 110 iter = iter + 2 mter = mter + 1 do 150 mz = 1, nblok moff = mblok*(mz - 1) do 140 my = 1, mblok m = my + moff kb(1) = my + js kb(2) = mz + ks c get particles from below and above or back and front kl = kb(n) kb(n) = kl + 1 if (kb(n).ge.nvp) kb(n) = kb(n) - nvp kr = kb(1) + nvpy*kb(2) + 1 kb(n) = kl - 1 if (kb(n).lt.0) kb(n) = kb(n) + nvp kl = kb(1) + nvpy*kb(2) + 1 c this segment is used for shared memory computers c jsl(2,m) = jsr(1,kl) c do 120 j = 1, jsl(2,m) c do 115 i = 1, idimp c rbufl(i,j,m) = sbufr(i,j,kl) c 115 continue c 120 continue c jsr(2,m) = jsl(1,kr) c do 130 j = 1, jsr(2,m) c do 125 i = 1, idimp c rbufr(i,j,m) = sbufl(i,j,kr) c 125 continue c 130 continue c this segment is used for mpi computers c post receive call MPI_IRECV(rbufl,nbsize,mreal,kl-1,iter-1,lworld,msid(1),ierr) call MPI_IRECV(rbufr,nbsize,mreal,kr-1,iter,lworld,msid(2),ierr) c send particles call MPI_ISEND(sbufr,idimp*jsr(1,m),mreal,kr-1,iter-1,lworld,msid( 13),ierr) call MPI_ISEND(sbufl,idimp*jsl(1,m),mreal,kl-1,iter,lworld,msid(4) 1,ierr) c wait for particles to arrive call MPI_WAIT(msid(1),istatus,ierr) call MPI_GET_COUNT(istatus,mreal,nps,ierr) jsl(2,m) = nps/idimp call MPI_WAIT(msid(2),istatus,ierr) call MPI_GET_COUNT(istatus,mreal,nps,ierr) jsr(2,m) = nps/idimp 140 continue 150 continue c check if particles must be passed further nps = 0 do 180 m = 1, mnblok c check if any particles coming from above or front belong here jsl(1,m) = 0 jsr(1,m) = 0 jss(2,m) = 0 do 160 j = 1, jsr(2,m) if (rbufr(ic,j,m).lt.edges(2*n-1,m)) jsl(1,m) = jsl(1,m) + 1 if (rbufr(ic,j,m).ge.edges(2*n,m)) jsr(1,m) = jsr(1,m) + 1 160 continue if (jsr(1,m).ne.0) then if (n.eq.1) then write (2,*) 'Info:',jsr(1,m),' particles returning above' elseif (n.eq.2) then write (2,*) 'Info:',jsr(1,m),' particles returning front' endif endif c check if any particles coming from below or back belong here do 170 j = 1, jsl(2,m) if (rbufl(ic,j,m).ge.edges(2*n,m)) jsr(1,m) = jsr(1,m) + 1 if (rbufl(ic,j,m).lt.edges(2*n-1,m)) jss(2,m) = jss(2,m) + 1 170 continue if (jss(2,m).ne.0) then if (n.eq.1) then write (2,*) 'Info:',jss(2,m),' particles returning below' elseif (n.eq.2) then write (2,*) 'Info:',jss(2,m),' particles returning back' endif endif jsl(1,m) = jsl(1,m) + jss(2,m) nps = max0(nps,jsl(1,m)+jsr(1,m)) 180 continue ibflg(2) = nps c make sure sbufr and sbufl have been sent call MPI_WAIT(msid(3),istatus,ierr) call MPI_WAIT(msid(4),istatus,ierr) if (nps.eq.0) go to 240 c remove particles which do not belong here do 230 mz = 1, nblok moff = mblok*(mz - 1) kb(2) = mz + ks do 220 my = 1, mblok m = my + moff kb(1) = my + js c first check particles coming from above or front jsl(1,m) = 0 jsr(1,m) = 0 jss(2,m) = 0 do 190 j = 1, jsr(2,m) xt = rbufr(ic,j,m) c particles going down or back if (xt.lt.edges(2*n-1,m)) then jsl(1,m) = jsl(1,m) + 1 if (kb(n).eq.0) xt = xt + an rbufr(ic,j,m) = xt do 183 i = 1, idimp sbufl(i,jsl(1,m),m) = rbufr(i,j,m) 183 continue c particles going up or front, should not happen elseif (xt.ge.edges(2*n,m)) then jsr(1,m) = jsr(1,m) + 1 if ((kb(n)+1).eq.nvp) xt = xt - an rbufr(ic,j,m) = xt do 185 i = 1, idimp sbufr(i,jsr(1,m),m) = rbufr(i,j,m) 185 continue c particles staying here else jss(2,m) = jss(2,m) + 1 do 187 i = 1, idimp rbufr(i,jss(2,m),m) = rbufr(i,j,m) 187 continue endif 190 continue jsr(2,m) = jss(2,m) c next check particles coming from below or back jss(2,m) = 0 do 200 j = 1, jsl(2,m) xt = rbufl(ic,j,m) c particles going up or front if (xt.ge.edges(2*n,m)) then if (jsr(1,m).lt.nbmax) then jsr(1,m) = jsr(1,m) + 1 if ((kb(n)+1).eq.nvp) xt = xt - an rbufl(ic,j,m) = xt do 193 i = 1, idimp sbufr(i,jsr(1,m),m) = rbufl(i,j,m) 193 continue else jss(2,m) = 2*npmax go to 210 endif c particles going down back, should not happen elseif (xt.lt.edges(2*n-1,m)) then if (jsl(1,m).lt.nbmax) then jsl(1,m) = jsl(1,m) + 1 if (kb(n).eq.0) xt = xt + an rbufl(ic,j,m) = xt do 195 i = 1, idimp sbufl(i,jsl(1,m),m) = rbufl(i,j,m) 195 continue else jss(2,m) = 2*npmax go to 210 endif c particles staying here else jss(2,m) = jss(2,m) + 1 do 197 i = 1, idimp rbufl(i,jss(2,m),m) = rbufl(i,j,m) 197 continue endif 200 continue 210 jsl(2,m) = jss(2,m) 220 continue 230 continue c check if move would overflow particle array 240 nps = 0 npt = npmax do 250 m = 1, mnblok jss(2,m) = npp(m) + jsl(2,m) + jsr(2,m) - jss(1,m) nps = max0(nps,jss(2,m)) npt = min0(npt,jss(2,m)) 250 continue ibflg(1) = nps ibflg(4) = -npt iwork = ibflg call MPI_ALLREDUCE(iwork,ibflg,4,mint,MPI_MAX,lgrp,ierr) info(2) = ibflg(1) info(3) = -ibflg(4) ierr = ibflg(1) - npmax if (ierr.gt.0) then write (2,*) 'particle overflow error, ierr = ', ierr info(1) = ierr return endif c distribute incoming particles from buffers do 290 m = 1, mnblok c distribute particles coming from below or back into holes jss(2,m) = min0(jss(1,m),jsl(2,m)) do 260 j = 1, jss(2,m) do 255 i = 1, idimp part(i,ihole(j,m),m) = rbufl(i,j,m) 255 continue 260 continue if (jss(1,m).gt.jsl(2,m)) then jss(2,m) = min0(jss(1,m)-jsl(2,m),jsr(2,m)) else jss(2,m) = jsl(2,m) - jss(1,m) endif do 270 j = 1, jss(2,m) c no more particles coming from below or back c distribute particles coming from above or front into holes if (jss(1,m).gt.jsl(2,m)) then do 263 i = 1, idimp part(i,ihole(j+jsl(2,m),m),m) = rbufr(i,j,m) 263 continue else c no more holes c distribute remaining particles from below or back into bottom do 267 i = 1, idimp part(i,j+npp(m),m) = rbufl(i,j+jss(1,m),m) 267 continue endif 270 continue if (jss(1,m).le.jsl(2,m)) then npp(m) = npp(m) + (jsl(2,m) - jss(1,m)) jss(1,m) = jsl(2,m) endif jss(2,m) = jss(1,m) - (jsl(2,m) + jsr(2,m)) if (jss(2,m).gt.0) then jss(1,m) = (jsl(2,m) + jsr(2,m)) jsr(2,m) = jss(2,m) else jss(1,m) = jss(1,m) - jsl(2,m) jsr(2,m) = -jss(2,m) endif do 280 j = 1, jsr(2,m) c holes left over c fill up remaining holes in particle array with particles from bottom if (jss(2,m).gt.0) then j1 = npp(m) - j + 1 j2 = jss(1,m) + jss(2,m) - j + 1 if (j1.gt.ihole(j2,m)) then c move particle only if it is below current hole do 273 i = 1, idimp part(i,ihole(j2,m),m) = part(i,j1,m) 273 continue endif else c no more holes c distribute remaining particles from above or front into bottom do 277 i = 1, idimp part(i,j+npp(m),m) = rbufr(i,j+jss(1,m),m) 277 continue endif 280 continue if (jss(2,m).gt.0) then npp(m) = npp(m) - jsr(2,m) else npp(m) = npp(m) + jsr(2,m) endif jss(1,m) = 0 290 continue c check if any particles have to be passed further info(5+n) = max0(info(5+n),mter) if (ibflg(2).gt.0) then write (2,*) 'Info: particles being passed further = ', ibflg(2) if (ibflg(3).gt.0) ibflg(3) = 1 if (iter.lt.itermax) go to 110 ierr = -((iter-2)/2) write (2,*) 'Iteration overflow, iter = ', ierr info(1) = ierr go to 320 endif c check if buffer overflowed and more particles remain to be checked if (ibflg(3).gt.0) then nter = nter + 1 info(3+n) = nter write (2,*) "new loop, nter=", nter go to 20 endif 300 continue c information 320 if (nter.gt.0) then write (2,*) 'Info: ', nter, ' buffer overflows, nbmax=', nbmax endif return end c----------------------------------------------------------------------- function ranorm() c this program calculates a random number y from a gaussian distribution c with zero mean and unit variance, according to the method of c mueller and box: c y(k) = (-2*ln(x(k)))**1/2*sin(2*pi*x(k+1)) c y(k+1) = (-2*ln(x(k)))**1/2*cos(2*pi*x(k+1)), c where x is a random number uniformly distributed on (0,1). c written for the ibm by viktor k. decyk, ucla integer r1,r2,r4,r5 double precision ranorm,h1l,h1u,h2l,r0,r3,asc,bsc,temp save iflg,r1,r2,r4,r5,h1l,h1u,h2l,r0 data r1,r2,r4,r5 /885098780,1824280461,1396483093,55318673/ data h1l,h1u,h2l /65531.0d0,32767.0d0,65525.0d0/ data iflg,r0 /0,0.0d0/ if (iflg.eq.0) go to 10 ranorm = r0 r0 = 0.0d0 iflg = 0 return 10 isc = 65536 asc = dble(isc) bsc = asc*asc i1 = r1 - (r1/isc)*isc r3 = h1l*dble(r1) + asc*h1u*dble(i1) i1 = r3/bsc r3 = r3 - dble(i1)*bsc bsc = 0.5d0*bsc i1 = r2/isc isc = r2 - i1*isc r0 = h1l*dble(r2) + asc*h1u*dble(isc) asc = 1.0d0/bsc isc = r0*asc r2 = r0 - dble(isc)*bsc r3 = r3 + (dble(isc) + 2.0d0*h1u*dble(i1)) isc = r3*asc r1 = r3 - dble(isc)*bsc temp = dsqrt(-2.0d0*dlog((dble(r1) + dble(r2)*asc)*asc)) isc = 65536 asc = dble(isc) bsc = asc*asc i1 = r4 - (r4/isc)*isc r3 = h2l*dble(r4) + asc*h1u*dble(i1) i1 = r3/bsc r3 = r3 - dble(i1)*bsc bsc = 0.5d0*bsc i1 = r5/isc isc = r5 - i1*isc r0 = h2l*dble(r5) + asc*h1u*dble(isc) asc = 1.0d0/bsc isc = r0*asc r5 = r0 - dble(isc)*bsc r3 = r3 + (dble(isc) + 2.0d0*h1u*dble(i1)) isc = r3*asc r4 = r3 - dble(isc)*bsc r0 = 6.28318530717959d0*((dble(r4) + dble(r5)*asc)*asc) ranorm = temp*dsin(r0) r0 = temp*dcos(r0) iflg = 1 return end c-----------------------------------------------------------------------