Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- program Parallel_Bratu_2D
- implicit none
- include 'mpif.h'
- ! 2-dimensional non linear problem
- ! biquadratic finite element basis functions
- integer,parameter::nemax=5635,nnmax=23011,m=20
- integer*8:: nzeros
- integer:: nex=0,ney=0,nnx=0,nny=0,ne=0,np=0,nop(nemax,9),nell=0
- real:: xorigin=0,yorigin=0,xlast=0,ylast=0,deltax=0,deltay=0,xpt(nnmax),ypt(nnmax)
- real*8:: lamda=0,alfa=0,wtime_i=0,wtime_f=0,wtime=0
- real*8:: r1(nnmax),u(nnmax),d(nnmax),rest
- real*8,allocatable::AA(:)
- integer,allocatable:: JA(:)
- integer:: i,j,N=0,Nz=0,ncod(nnmax),p,q,iterat,dim=0,dim_buf=0
- integer:: k=0,l=0,indexAA(2),init_el=0,fin_el=0
- integer,allocatable:: IA(:),inner(:)
- real*8:: value=0,time=0
- !MPI variables
- integer:: nump,rank,ierr,tag1=0,tag2=0,right=0,left=0
- integer:: stats(MPI_status_size,8),reqs(8)
- real*8,allocatable:: buffer1(:),buffer2(:),buffer3(:),buffer4(:)
- call MPI_init(ierr)
- if (ierr .ne. MPI_success) then
- write(*,*) 'Error starting MPI program'
- call MPI_finalize(ierr)
- endif
- wtime_i=MPI_wtime()
- call MPI_comm_rank(MPI_comm_world,rank,ierr)
- call MPI_comm_size(MPI_comm_world,nump,ierr)
- !ORISMOS alfa,lamda
- alfa=0
- lamda=2
- !Mesh construction
- call xydiscr(nex,ney,xorigin,yorigin,xlast,ylast,deltax,deltay)
- call nodnumb(nemax,nnmax,ne,nnx,nny,nex,ney,np,nop)
- call xycoord(nemax,nnmax,xpt,ypt,xorigin,yorigin,nnx,nny,deltax,deltay)
- if (nex<nump) then
- write(*,*) 'Condition nex>nump not met.Parallel solving not possible'
- call MPI_finalize(ierr)
- endif
- !orismos N,m gia GMRES,dim_buf gia size_of buffer
- N=np
- nzeros=0.008*N*(N/nump)
- dim_buf=nny+2
- allocate(AA(nzeros))
- allocate(JA(nzeros))
- allocate(inner(np))
- allocate(buffer1(dim_buf))
- allocate(buffer2(dim_buf))
- allocate(buffer3(dim_buf))
- allocate(buffer4(dim_buf))
- !orismos diastasis IA gia CSR
- dim=np+1
- allocate(IA(dim))
- !Prepare for Dirichlet Boundary conditions
- do i=1,np
- inner(i)=-1
- ncod(i)=0
- enddo
- !markarisma aristera
- do i=1,nny
- ncod(i)=1
- enddo
- !markarisma katw
- do i=1,(np-nny+1),nny
- ncod(i)=1
- enddo
- !markarisma panw
- do i=nny,np,nny
- ncod(i)=1
- enddo
- !markarisma dexia
- do i=(np-nny+1),np
- ncod(i)=1
- enddo
- iterat=1
- do while (iterat<=4)
- iterat=iterat+1
- !arxikopoiisi
- do i=1,dim
- IA(i)=i
- enddo
- do i=1,nzeros
- AA(i)=0
- JA(i)=0
- enddo
- do i=1,np
- r1(i)=0
- d(i)=0
- enddo
- Nz=0
- !KSEKINIMA PARALLEL
- !kathe process sarwnei ena kommati tou mesh->sugekrimena elements
- if(mod(ne,nump)==0) then
- init_el=(rank*int(ne/(nump))+1)
- fin_el=(rank+1)*int(ne/nump)
- do nell=init_el,fin_el
- call abfind(nemax,nnmax,nell,r1,nop,xpt,ypt,u,lamda,alfa,value,AA,JA,IA,Nz,ncod,np,dim,nzeros)
- enddo
- else
- if(rank/=(nump-1)) then
- init_el=(rank*int(ne/nump)+rank+1)
- fin_el=((rank+1)*(int(ne/nump)+1))
- do nell=init_el,((rank+1)*(int(ne/nump)+1))
- call abfind(nemax,nnmax,nell,r1,nop,xpt,ypt,u,lamda,alfa,value,AA,JA,IA,Nz,ncod,np,dim,nzeros)
- enddo
- else if(rank==(nump-1)) then
- init_el=(rank*int(ne/nump)+rank+1)
- fin_el=((rank+1)*(int(ne/nump))+mod(ne,nump))
- do nell=init_el,fin_el
- call abfind(nemax,nnmax,nell,r1,nop,xpt,ypt,u,lamda,alfa,value,AA,JA,IA,Nz,ncod,np,dim,nzeros)
- enddo
- endif
- endif
- !indexAA periexei tin prwti kai teleutaia mi mideniki grammi gia ton AAlocal
- if(iterat==2) then
- i=0
- if (AA(1)/=0) then
- indexAA(1)=1
- else
- do while (AA(IA(i))==0)
- i=i+1
- enddo
- indexAA(1)=i
- endif
- do i=indexAA(1),np
- if (AA(IA(i))/=0) then
- indexAA(2)=i
- endif
- enddo
- !Markarisma sunoriakwn nodes
- !INNER NODES->0
- !LEFT BOUNDARY NODES->1
- !RIGHT BOUNDARY NODES->2
- if(rank==0) then
- do i=1,nop(fin_el,6)
- inner(i)=0
- enddo
- else if(rank==nump-1) then
- do i=nop(init_el,4),np
- inner(i)=0
- enddo
- else
- do i=nop(init_el,4),nop(fin_el,6)
- inner(i)=0
- enddo
- endif
- if(rank/=0) then
- !markarisma sunoriakwn aristera nodes,inner(boundary_node)=1
- do i=0,2*(ney-mod((init_el-1),ney))
- inner(nop(init_el,1)+i)=1
- enddo
- if(mod((init_el-1),ney)/=0) then
- inner(nop(init_el,4))=1
- do i=0,2*mod((init_el-1),ney)
- inner(nop(init_el+ney-mod((init_el-1),ney),1)+i)=1
- enddo
- endif
- endif
- if(rank/=(nump-1)) then
- !markarisma sunoriakwn dexia nodes,inner(boundary_node)=2
- if (mod(fin_el,ney)==0) then
- do i=0,2*(ney)
- inner(nop(fin_el,9)-i)=2
- enddo
- endif
- if(mod(fin_el,ney)/=0) then
- do i=0,2*(mod(fin_el,ney))
- inner(nop(fin_el,9)-i)=2
- enddo
- inner(nop(fin_el,6))=2
- do i=0,2*(ney-mod(fin_el,ney))
- inner(nop(fin_el-mod(fin_el,ney),9)-i)=2
- enddo
- endif
- endif
- endif
- if(nump>1) then
- do i=1,dim_buf
- buffer1=0
- buffer2=0
- buffer3=0
- buffer4=0
- enddo
- tag1=10
- tag2=20
- left=rank-1
- right=rank+1
- !to kathe process exei to kommati tou r1 pou upologise+olokliro sta sunoriaka
- k=1
- l=1
- do i=indexAA(1),indexAA(2)
- if (inner(i)==1) then
- buffer2(k)=r1(i)
- k=k+1
- else if(inner(i)==2) then
- buffer1(l)=r1(i)
- l=l+1
- endif
- enddo
- if (rank==0) then
- call MPI_IRECV(buffer3,dim_buf,MPI_double_precision,right,tag1,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer1,dim_buf,MPI_double_precision,right,tag2,MPI_COMM_WORLD,reqs(2),ierr)
- else if(rank==nump-1) then
- call MPI_IRECV(buffer4,dim_buf,MPI_double_precision,left,tag2,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer2,dim_buf,MPI_double_precision,left,tag1,MPI_COMM_WORLD,reqs(2),ierr)
- else
- call MPI_IRECV(buffer4,dim_buf,MPI_double_precision,left,tag2,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer2,dim_buf,MPI_double_precision,left,tag1,MPI_COMM_WORLD,reqs(2),ierr)
- call MPI_IRECV(buffer3,dim_buf,MPI_double_precision,right,tag1,MPI_COMM_WORLD,reqs(3),ierr)
- call MPI_ISSEND(buffer1,dim_buf,MPI_double_precision,right,tag2,MPI_COMM_WORLD,reqs(4),ierr)
- endif
- do i=1,2
- call MPI_wait(reqs(i),stats(1,i),ierr)
- enddo
- if ((rank>0) .and. (rank<(nump-1))) then
- do i=3,4
- call MPI_wait(reqs(i),stats(1,i),ierr)
- enddo
- endif
- k=1
- l=1
- do i=indexAA(1),indexAA(2)
- if (inner(i)==1) then
- r1(i)=r1(i)+buffer4(k)
- k=k+1
- else if(inner(i)==2) then
- r1(i)=r1(i)+buffer3(l)
- l=l+1
- endif
- enddo
- endif
- !impose essential boundary conditions gia ton pinaka r1 / gia ton sk einai stin CSR mes
- do i=1,np
- if (ncod(i).eq.1) then
- r1(i)=0
- r1(i)=-(u(i)-0)
- endif
- enddo
- call GMRES_m(nemax,nnmax,r1,N,m,d,AA,JA,IA,dim,nzeros,indexAA,rank,nump,inner,reqs,stats,dim_buf)
- !KATHE PROCESS exei to kommati tis lusis tou+olo to sunoriako
- do i=indexAA(1),indexAA(2)
- u(i)=u(i)+d(i)
- enddo
- enddo
- ! open(unit=1,file='./results1.txt',status='unknown')
- ! do i=1,np+1
- ! write(1,5618) xpt(i),ypt(i),u(i)
- !5618 format(3(e12.5,2x))
- ! enddo
- ! close(1)
- wtime_f=MPI_wtime()
- wtime=wtime_f-wtime_i
- if (rank==0) then
- write(*,*) ' ###############################################################'
- write(6,1050)
- 1050 format(/,15x,'Parallel solving of 2-D non linear Bratu Problem')
- write(*,1051)
- 1051 format(8x,'Solver: GMRES iterative Matrix format: CSR Parallel Library: MPI',/)
- write(*,231) rank,nump
- 231 format('Writing process:',3x,I2,3x,'out of',3x,I2,3x,'processes!',//)
- write(6,3002) nex,ney,ne,np
- 3002 format(5x,'nex=',i5,3x,'ney=',i5,3x,'ne=',i6,3x,'np=',i10,/)
- write(*,*) ''
- write(*,612)
- 612 format(/,'Problem Solved, Newton Converged',/)
- endif
- call MPI_barrier(mpi_comm_world,ierr)
- if (u(int(0.5*np+0.5))/=0) then
- write(*,*) 'u=',u(int(0.5*np+0.5)),'λ=',lamda,'rank=',rank
- endif
- call MPI_barrier(mpi_comm_world,ierr)
- call MPI_allreduce(wtime,time,1,MPI_double_precision,MPI_max,MPI_comm_world,ierr)
- if (rank==0) then
- write(*,222) time
- 222 format(/,'Maximum Time=',f30.5,'s',//)
- endif
- if(rank==0) then
- write(*,*) 'NUMP=',nump,'nzeros=',nzeros
- endif
- !telos MPI world
- call MPI_finalize(ierr)
- end
- subroutine xydiscr(nex,ney,xorigin,yorigin,xlast,ylast,deltax,deltay)
- implicit none
- !orismos arithmou elements-suntetagmenwn
- integer,intent(OUT):: nex,ney
- real,intent(OUT):: xorigin,yorigin,xlast,ylast,deltax,deltay
- nex=25
- ney=25
- xorigin=0.
- yorigin=0.
- xlast=1
- ylast=1
- deltax=(xlast-xorigin)/nex
- deltay=(ylast-yorigin)/ney
- return
- end
- subroutine nodnumb(nemax,nnmax,ne,nnx,nny,nex,ney,np,nop)
- implicit none
- !orismos arithmou nodes-node numbering
- integer,intent(IN)::nemax,nnmax,nex,ney
- integer,intent(OUT)::ne,nnx,nny,np,nop(nemax,9)
- integer:: nel=0,l=0,k=0,i=0,j=0
- ne=nex*ney
- nnx=2*nex+1
- nny=2*ney+1
- np=nnx*nny
- ! nodal numbering
- nel=0
- do 10 i=1,nex
- do 10 j=1,ney
- nel=nel+1
- do 10 k=1,3
- l=3*k-2
- nop(nel,l)=nny*(2*i+k-3)+2*j-1
- nop(nel,l+1)=nop(nel,l)+1
- 10 nop(nel,l+2)=nop(nel,l)+2
- return
- end
- subroutine xycoord(nemax,nnmax,xpt,ypt,xorigin,yorigin,nnx,nny,deltax,deltay)
- implicit none
- integer,intent(IN)::nemax,nnmax,nnx,nny
- real,intent(OUT):: xpt(nnmax),ypt(nnmax)
- real,intent(IN):: xorigin,yorigin,deltax,deltay
- integer:: nnode=0,i=0,j=0
- xpt(1)=xorigin
- ypt(1)=yorigin
- do 10 i=1,nnx
- nnode=(i-1)*nny+1
- xpt(nnode)=xpt(1)+(i-1)*deltax/2
- ypt(nnode)=ypt(1)
- do 20 j=2,nny
- xpt(nnode+j-1)=xpt(nnode)
- ypt(nnode+j-1)=ypt(nnode)+(j-1)*deltay/2
- 20 continue
- 10 continue
- return
- end
- subroutine tsfun(x,y,phi,phic,phie)
- real*8:: x,y
- real*8,intent(OUT):: phi(9),phic(9),phie(9)
- real*8:: l1,l2,l3,c
- l1(c)=2.*c**2-3.*c+1.
- l2(c)=-4.*c**2+4.*c
- l3(c)=2.*c**2-c
- dl1(c)=4.*c-3.
- dl2(c)=-8.*c+4.
- dl3(c)=4.*c-1.
- phi(1)=l1(x)*l1(y)
- phi(2)=l1(x)*l2(y)
- phi(3)=l1(x)*l3(y)
- phi(4)=l2(x)*l1(y)
- phi(5)=l2(x)*l2(y)
- phi(6)=l2(x)*l3(y)
- phi(7)=l3(x)*l1(y)
- phi(8)=l3(x)*l2(y)
- phi(9)=l3(x)*l3(y)
- phic(1)=l1(y)*dl1(x)
- phic(2)=l2(y)*dl1(x)
- phic(3)=l3(y)*dl1(x)
- phic(4)=l1(y)*dl2(x)
- phic(5)=l2(y)*dl2(x)
- phic(6)=l3(y)*dl2(x)
- phic(7)=l1(y)*dl3(x)
- phic(8)=l2(y)*dl3(x)
- phic(9)=l3(y)*dl3(x)
- phie(1)=l1(x)*dl1(y)
- phie(2)=l1(x)*dl2(y)
- phie(3)=l1(x)*dl3(y)
- phie(4)=l2(x)*dl1(y)
- phie(5)=l2(x)*dl2(y)
- phie(6)=l2(x)*dl3(y)
- phie(7)=l3(x)*dl1(y)
- phie(8)=l3(x)*dl2(y)
- phie(9)=l3(x)*dl3(y)
- return
- end
- subroutine abfind(nemax,nnmax,nell,r1,nop,xpt,ypt,u,lamda,alfa,value,AA,JA,IA,Nz,ncod,np,dim,nzeros)
- implicit none
- integer*8,intent(IN):: nzeros
- integer,intent(IN)::dim,nemax,nnmax
- integer,intent(IN):: nop(nemax,9),ncod(nnmax)
- real*8,intent(OUT)::r1(nnmax),value,AA(nzeros)
- integer,intent(IN):: nell,np
- integer,intent(INOUT)::JA(nzeros),IA(dim),Nz
- real,intent(IN):: xpt(nnmax),ypt(nnmax)
- real*8,intent(IN)::lamda,alfa,u(nnmax)
- real*8:: phi(9),phic(9),phie(9),usol,usol1,usol2,x1,x2,y1,y2,dett
- integer:: ngl(10)
- real*8:: tphx(9),tphy(9)
- real*8:: rest,w(3),gp(3),estifm(9,9)
- integer i,j,k,p,q,t,r,in,l,m,n
- data w,gp/0.27777777777778,0.444444444444,0.27777777777778,0.1127016654,0.5,0.8872983346/
- do i=1,9
- do j=1,9
- estifm(i,j)=0
- enddo
- enddo
- do 5 i=1,9
- 5 ngl(i)=nop(nell,i)
- do 100 j=1,3
- do 100 k=1,3
- call tsfun(gp(j),gp(k),phi,phic,phie)
- ! isoparametric transformation
- x1=0.
- x2=0.
- y1=0.
- y2=0.
- usol=0.
- usol1=0.
- usol2=0.
- do 103 n=1,9
- x1=x1+xpt(ngl(n))*phic(n)
- x2=x2+xpt(ngl(n))*phie(n)
- y1=y1+ypt(ngl(n))*phic(n)
- 103 y2=y2+ypt(ngl(n))*phie(n)
- dett=x1*y2-x2*y1
- do in=1,9
- tphx(in)=(y2*phic(in)-y1*phie(in))/dett
- tphy(in)=(x1*phie(in)-x2*phic(in))/dett
- enddo
- do n=1,9
- !athroisma sto oloklirwma
- usol=usol+u(ngl(n))*phi(n)
- usol1=usol1+u(ngl(n))*tphx(n)
- usol2=usol2+u(ngl(n))*tphy(n)
- enddo
- ! residuals
- do 100 l=1,9
- r1(ngl(l))=r1(ngl(l))+w(j)*w(k)*dett*(usol1*tphx(l)+usol2*tphy(l))
- r1(ngl(l))=r1(ngl(l))-w(j)*w(k)*dett*lamda*phi(l)*exp(usol/(1+alfa*usol))
- do 100 m=1,9
- estifm(l,m)=estifm(l,m)-w(j)*w(k)*dett*((tphx(l)*tphx(m))+(tphy(l)*tphy(m)))
- estifm(l,m)=estifm(l,m)+w(j)*w(k)*dett*lamda*phi(l)*phi(m)*exp(usol/(1+(alfa*usol)))*(1/(1+alfa*usol)**2)
- 100 continue
- do t=1,9
- do r=1,9
- value=estifm(t,r)
- p=ngl(t)
- q=ngl(r)
- call CSR(nemax,nnmax,AA,JA,IA,p,q,value,Nz,ncod,np,dim,nzeros)
- enddo
- enddo
- end
- subroutine GMRES_m(nemax,nnmax,r1,N,m,d,AA,JA,IA,dim,nzeros,indexAA,rank,nump,inner,reqs,stats,dim_buf)
- implicit none
- include 'mpif.h'
- integer*8,intent(IN):: nzeros
- integer,intent(IN)::N,m,dim,rank,nump,dim_buf,nemax,nnmax
- integer,intent(INOUT)::reqs(8),stats(MPI_status_size,8)
- integer,intent(IN)::IA(dim),JA(nzeros),indexAA(2),inner(dim)
- real*8,intent(IN)::r1(nnmax),AA(nzeros)
- real*8,intent(OUT)::d(nnmax)
- integer:: i=0,j=0,k=0,l=0,k1=0,k2=0,iter=0,tag1=0,tag2=0,right=0,left=0,ierr
- real*8:: tot_rest=0,vita=0,eps=0,buffer1(dim_buf),buffer2(dim_buf),buffer3(dim_buf),buffer4(dim_buf),buff=0
- real*8:: b(N),Hm(m+1,m),x(N),x0(N),r0(N),e(m+1),y(m),u_base(N,m),uj(N),g(m+1),rest(N)
- !arxikopoiisi pinakwn kai eisagwgi dedomenwn
- do i=indexAA(1),indexAA(2)
- b(i)=r1(i)
- enddo
- do i=1,N
- x(i)=0
- enddo
- tot_rest=1
- iter=0
- !epithumiti akriveia
- iter=1
- do while(iter<=30)
- do i=indexAA(1),indexAA(2)
- x0(i)=x(i)
- enddo
- iter=iter+1
- !midenismos pinakwn-anusmatwn Krylov
- do i=1,N
- uj(i)=0
- r0(i)=0
- do j=1,m
- u_base(i,j)=0
- enddo
- enddo
- do i=1,m+1
- e(i)=0
- g(i)=0
- do j=1,m
- Hm(i,j)=0
- y(j)=0
- enddo
- enddo
- e(1)=1
- !upologismos dianusmatos r0=b-Ax0 me r0=b-MATMUL(A,x0) se CSR format
- do i=indexAA(1),indexAA(2)
- k1=IA(i)
- k2=IA(i+1)-1
- do j=k1,k2
- r0(i)=r0(i)+AA(j)*x0(JA(j))
- enddo
- enddo
- !to kathe process exei to kommati tou r1 pou upologise+olokliro sta sunoriaka
- if(nump>1) then
- do i=1,dim_buf
- buffer1(i)=0
- buffer2(i)=0
- buffer3(i)=0
- buffer4(i)=0
- enddo
- tag1=11
- tag2=21
- right=rank+1
- left=rank-1
- k=1
- l=1
- do i=indexAA(1),indexAA(2)
- if (inner(i)==1) then
- buffer2(k)=r0(i)
- k=k+1
- else if(inner(i)==2) then
- buffer1(l)=r0(i)
- l=l+1
- endif
- enddo
- if (rank==0) then
- call MPI_IRECV(buffer3,dim_buf,MPI_double_precision,right,tag1,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer1,dim_buf,MPI_double_precision,right,tag2,MPI_COMM_WORLD,reqs(2),ierr)
- else if(rank==nump-1) then
- call MPI_IRECV(buffer4,dim_buf,MPI_double_precision,left,tag2,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer2,dim_buf,MPI_double_precision,left,tag1,MPI_COMM_WORLD,reqs(2),ierr)
- else
- call MPI_IRECV(buffer4,dim_buf,MPI_double_precision,left,tag2,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer2,dim_buf,MPI_double_precision,left,tag1,MPI_COMM_WORLD,reqs(2),ierr)
- call MPI_IRECV(buffer3,dim_buf,MPI_double_precision,right,tag1,MPI_COMM_WORLD,reqs(3),ierr)
- call MPI_ISSEND(buffer1,dim_buf,MPI_double_precision,right,tag2,MPI_COMM_WORLD,reqs(4),ierr)
- endif
- do i=1,2
- call MPI_wait(reqs(i),stats(1,i),ierr)
- enddo
- if ((rank>0) .and. (rank<(nump-1))) then
- do i=3,4
- call MPI_wait(reqs(i),stats(1,i),ierr)
- enddo
- endif
- k=1
- l=1
- do i=indexAA(1),indexAA(2)
- if (inner(i)==1) then
- r0(i)=r0(i)+buffer4(k)
- k=k+1
- else if(inner(i)==2) then
- r0(i)=r0(i)+buffer3(l)
- l=l+1
- endif
- enddo
- endif
- do i=1,N
- r0(i)=-r0(i)+b(i)
- enddo
- !end CSR multi
- vita=0
- !ypologismos normas tou r0 k apothikeusi sto vita
- !prosoxi na min vazei 2 fores ta sunoriaka
- do i=indexAA(1),indexAA(2)
- if((inner(i)==0) .or. (inner(i)==1)) then
- vita=vita+r0(i)**2
- endif
- enddo
- buff=0
- call MPI_allreduce(vita,buff,1,MPI_double_precision,MPI_sum,MPI_comm_world,ierr)
- vita=sqrt(buff)
- !upologismos arxikou dianusmatos uj k apothikeusi ston uj kai u_base(i,1)
- uj=r0/vita
- !apothikeusi tou u1 ston pinaka vasewn tou upoxwrou Krylov(to kathena apothikeuei to kommati tou)
- do i=1,N
- u_base(i,1)=uj(i)
- enddo
- !dianusma g=b*e1
- g=vita*e
- !sunartisi pou paragei to xwro Krylov
- call Krylov(AA,JA,IA,dim,nzeros,Hm,u_base,uj,N,m,indexAA,rank,nump,inner,reqs,stats,dim_buf)
- call Least_sq(Hm,g,m)
- call Back_sub(Hm,y,g,m)
- x=x0+MATMUL(u_base,y)
- enddo
- do i=indexAA(1),indexAA(2)
- d(i)=x(i)
- enddo
- end subroutine GMRES_m
- subroutine Krylov(AA,JA,IA,dim,nzeros,Hm,u_base,uj,N,m,indexAA,rank,nump,inner,reqs,stats,dim_buf)
- implicit none
- include 'mpif.h'
- integer:: i=0,j=0,k=0,k1=0,k2=0,p=0,tag1=0,tag2=0,right=0,left=0,count=0,ierr,l=0
- integer*8,intent(IN):: nzeros
- integer,intent(IN):: N,m,dim,rank,nump,dim_buf
- integer,intent(INOUT)::reqs(8),stats(MPI_status_size,8)
- integer,intent(IN):: JA(nzeros),IA(dim),indexAA(2),inner(dim)
- real*8,intent(OUT)::Hm(m+1,m),u_base(N,m),uj(N)
- real*8,intent(IN):: AA(nzeros)
- real*8:: w(N),buffer1(dim_buf),buffer2(dim_buf),buffer3(dim_buf),buffer4(dim_buf),buff,buff1
- do j=1,m
- !to j einai count gia mas!
- !vazei to kommati tou uj pou antistoixei sto process
- do k=indexAA(1),indexAA(2)
- if(inner(k)/=(-1)) then
- uj(k)=u_base(k,j)
- endif
- enddo
- !MATMUL me CSR format w=MATMUL(A,uj)
- do i=1,N
- w(i)=0
- enddo
- do i=indexAA(1),indexAA(2)
- if(inner(i)/=(-1)) then
- k1=IA(i)
- k2=IA(i+1)-1
- do k=k1,k2
- w(i)=w(i)+AA(k)*uj(JA(k))
- enddo
- endif
- enddo
- if(nump>1) then
- do i=1,dim_buf
- buffer1(i)=0
- buffer2(i)=0
- buffer3(i)=0
- buffer4(i)=0
- enddo
- tag1=31
- tag2=41
- right=rank+1
- left=rank-1
- k=1
- l=1
- do i=indexAA(1),indexAA(2)
- if (inner(i)==1) then
- buffer2(k)=w(i)
- k=k+1
- else if(inner(i)==2) then
- buffer1(l)=w(i)
- l=l+1
- endif
- enddo
- if (rank==0) then
- call MPI_IRECV(buffer3,dim_buf,MPI_double_precision,right,tag1,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer1,dim_buf,MPI_double_precision,right,tag2,MPI_COMM_WORLD,reqs(2),ierr)
- else if(rank==nump-1) then
- call MPI_IRECV(buffer4,dim_buf,MPI_double_precision,left,tag2,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer2,dim_buf,MPI_double_precision,left,tag1,MPI_COMM_WORLD,reqs(2),ierr)
- else
- call MPI_IRECV(buffer4,dim_buf,MPI_double_precision,left,tag2,MPI_COMM_WORLD,reqs(1),ierr)
- call MPI_ISSEND(buffer2,dim_buf,MPI_double_precision,left,tag1,MPI_COMM_WORLD,reqs(2),ierr)
- call MPI_IRECV(buffer3,dim_buf,MPI_double_precision,right,tag1,MPI_COMM_WORLD,reqs(3),ierr)
- call MPI_ISSEND(buffer1,dim_buf,MPI_double_precision,right,tag2,MPI_COMM_WORLD,reqs(4),ierr)
- endif
- do i=1,2
- call MPI_wait(reqs(i),stats(1,i),ierr)
- enddo
- if ((rank>0) .and. (rank<(nump-1))) then
- do i=3,4
- call MPI_wait(reqs(i),stats(1,i),ierr)
- enddo
- endif
- k=1
- l=1
- do i=indexAA(1),indexAA(2)
- if (inner(i)==1) then
- w(i)=w(i)+buffer4(k)
- k=k+1
- else if(inner(i)==2) then
- w(i)=w(i)+buffer3(l)
- l=l+1
- endif
- enddo
- endif
- !end CSR multi
- do i=1,j
- do k=indexAA(1),indexAA(2)
- if(inner(k)/=(-1)) then
- uj(k)=u_base(k,i)
- endif
- enddo
- ! Hm(i,j)=DOT_PRODUCT(w,uj)
- Hm(i,j)=0
- do k=indexAA(1),indexAA(2)
- !einai swsto to IF ALLA THA TO VGALW META ti suglisi wste na sugrinw me to test
- if((inner(k)==0) .or. (inner(k)==1)) then
- Hm(i,j)=Hm(i,j)+w(k)*uj(k)
- endif
- enddo
- buff=0
- buff1=Hm(i,j)
- call MPI_allreduce(buff1,buff,1,MPI_double_precision,MPI_sum,MPI_comm_world,ierr)
- Hm(i,j)=buff
- !END DOT PRODUCT
- do k=indexAA(1),indexAA(2)
- if(inner(k)/=(-1)) then
- w(k)=w(k)-Hm(i,j)*uj(k)
- endif
- enddo
- enddo
- do k=indexAA(1),indexAA(2)
- if((inner(k)==0) .or. (inner(k)==1)) then
- Hm(j+1,j)=Hm(j+1,j)+(w(k))**2
- endif
- enddo
- buff=0
- call MPI_allreduce(Hm(j+1,j),buff,1,MPI_double_precision,MPI_sum,MPI_comm_world,ierr)
- Hm(j+1,j)=sqrt(buff)
- if(j<m) then
- do k=indexAA(1),indexAA(2)
- if(inner(k)/=(-1)) then
- u_base(k,j+1)=w(k)/Hm(j+1,j)
- endif
- enddo
- endif
- enddo
- end subroutine Krylov
- subroutine Least_sq(Hm,g,m)
- implicit none
- integer:: i,j,k
- integer,intent(IN):: m
- real*8,intent(OUT)::Hm(m+1,m),g(m+1)
- real*8:: W(m+1,m+1),si,ci
- !m pollaplasiasmoi pinakwn
- do i=1,m
- !upologismos si kai ci
- si=Hm(i+1,i)/sqrt((Hm(i,i)**2)+(Hm(i+1,i)**2))
- ci=Hm(i,i)/sqrt((Hm(i,i)**2)+(Hm(i+1,i)**2))
- !kataskeui pinaka W
- do j=1,m+1
- do k=1,m+1
- if (j==k) then
- W(j,k)=1
- else
- W(j,k)=0
- endif
- enddo
- enddo
- !eisagwgi timwn pinaka stin i,i+1 grammi
- W(i,i)=ci
- W(i,i+1)=si
- W(i+1,i)=-si
- W(i+1,i+1)=ci
- Hm=MATMUL(W,Hm)
- g=MATMUL(W,g)
- enddo
- end subroutine Least_sq
- subroutine Back_sub(Hm,y,g,m)
- implicit none
- integer:: i,j,k
- integer,intent(IN):: m
- real*8,intent(IN)::Hm(m+1,m),g(m+1)
- real*8,intent(OUT):: y(m+1)
- real*8:: sum=0,help(m+1)
- sum=0
- do i=1,m+1
- help(i)=g(i)
- enddo
- do i=m,1,-1
- sum=help(i)
- if (i.LT.m) then
- do j=i+1,m
- sum=sum-Hm(i,j)*help(j)
- enddo
- endif
- help(i)=sum/Hm(i,i)
- enddo
- do i=1,m
- y(i)=help(i)
- enddo
- end subroutine Back_sub
- subroutine CSR(nemax,nnmax,AA,JA,IA,i,j,value,Nz,ncod,np,dim,nzeros)
- implicit none
- integer*8,intent(IN):: nzeros
- integer,intent(IN):: i,j,ncod(nnmax),np,dim,nemax,nnmax
- real*8,intent(INOUT):: AA(nzeros),value
- real*8:: BB(nzeros)
- integer,intent(INOUT)::JA(nzeros),IA(dim),Nz
- integer:: k=0,l=0,t=0,update=0
- !edw ksekiname
- update=0
- !Dirichlet Boundary Condition
- if (j/=np+1) then
- if ((ncod(i)==1) .and. (i/=j)) then
- value=0
- else if ((ncod(i)==1) .and. (i==j) .and. (AA(IA(i))==0)) then
- value=1
- else if ((ncod(i)==1) .and. (i==j) .and. (AA(IA(i))/=0)) then
- value=0
- endif
- endif
- !Ksekinaei i eisagwgi
- if (abs(value)>1E-10) then
- !to stoixeio IA(i) deixnei ti thesi ston AA pou einai to prwto mi mideniko tou pinaka i
- if(AA(IA(i))==0) then
- AA(IA(i))=value
- JA(IA(i))=j
- Nz=Nz+1
- else
- t=IA(i)
- do while (t<IA(i+1))
- if(JA(t)==j) then
- update=1
- exit
- endif
- t=t+1
- enddo
- endif
- if (update==1 .and. ncod(i)/=1) then
- AA(t)=AA(t)+value
- else if (update==1 .and. (j==np+1)) then
- AA(t)=AA(t)+value
- else
- if ((AA(IA(i))/=0) .and. (JA(IA(i))>j)) then
- do k=(IA(dim)+2),IA(i),-1
- AA(k+1)=AA(k)
- JA(k+1)=JA(k)
- enddo
- AA(IA(i))=value
- JA(IA(i))=j
- do k=i+1,dim
- IA(k)=IA(k)+1
- enddo
- Nz=Nz+1
- else if ((AA(IA(i))/=0) .and. (JA(IA(i))<j)) then
- k=IA(i)
- do while ((JA(k)<j) .and. (k<IA(i+1)))
- k=k+1
- enddo
- do l=(IA(dim)+2),k,-1
- AA(l+1)=AA(l)
- JA(l+1)=JA(l)
- enddo
- AA(k)=value
- JA(k)=j
- do k=i+1,dim
- IA(k)=IA(k)+1
- enddo
- Nz=Nz+1
- endif
- endif
- endif
- l=IA(dim-1)+1
- do while (AA(l)/=0)
- l=l+1
- enddo
- IA(dim)=l
- end subroutine CSR
- real function get_CSR(AA,JA,IA,dim,nzeros,i,j)
- implicit none
- integer,intent(IN):: dim,nzeros,i,j
- integer,intent(IN):: JA(nzeros),IA(dim)
- real*8,intent(IN):: AA(nzeros)
- integer:: k=0,l=0,test=0
- !Default value
- get_CSR=0
- k=IA(i)
- if (JA(IA(i))==j) then
- get_CSR=AA(IA(i))
- else
- do k=IA(i),(IA(i+1)-1)
- if (JA(k)==j) then
- get_CSR=AA(k)
- exit
- else
- get_CSR=0
- endif
- enddo
- endif
- end function get_CSR
Add Comment
Please, Sign In to add comment