123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563 |
- program PCMRH
- !***************************************************************
- ! Parallel CMRH program with MPI
- !
- ! Created by S. Duminil sep. 2012
- !***************************************************************
- use mpi
- external SSWAP,DCOPY,DSCAL,DGEMV,DSWAP
- intrinsic dabs,dsqrt
- !********************************************
- ! VARIABLES
- !********************************************
- integer,parameter ::Nprocs_max=16
- integer ::rang,Nprocs,n,nl,code,i,j,k,choix,solchoice,iprint
- real(kind=8) ::init(5)
- integer,dimension(MPI_STATUS_SIZE) ::statut
- real(kind=8),allocatable,dimension(:,:) ::AL
- real(kind=8) ::btemp,bmax,btemp2
- real(kind=8),allocatable,dimension(:) ::b,sol,rs,temp,x,workl,work,tempn,c,s,xl,cl,temp2,xx,yy,valeurs
- integer,allocatable,dimension(:) ::p
- real*8 ::dnorm2,D_inv,beta,dnorm,dnrm2,tol,alpha,H_k1,dgam,r
- integer ::indice,idamax,k1,n_k,knl,knlp,inl,inlp
- real*8 dtime, delta, delta0, dt1, dt2, dt3
- real*8 etime, eelta, eelta0, et1, et2, et3
- real*8 t1, t2, t3, tm1, tm2, tm3
- real*8 dtime1, delta1, delta01
- real*8 etime1, eelta1, eelta01,l,p1,p2
- integer nb_ligne
- real(kind=8) ::temps,tempsd,tempsf,temps2
- integer*4,dimension(3) ::timearray
- real ::rand
- real*4 da(2),ea(2),da1(2),ea1(2)
- !********************************************
- ! MPI CONFIG
- !********************************************
- call MPI_INIT(code)
- call MPI_COMM_RANK(MPI_COMM_WORLD,rang,code)
- call MPI_COMM_SIZE(MPI_COMM_WORLD,Nprocs,code)
- call MPI_BARRIER(MPI_COMM_WORLD,code)
- call itime(timearray) ! Get the current time
- i = rand ( timearray(1)+timearray(2)+timearray(3) )
- !************************************
- ! Matrix Initialisation
- !************************************
- if (rang==0) then
- open(unit=10,file='inputfile.dat',status='old')
- read(10,*)init
- close(10)
- iprint=int(init(1))
- n=int(init(2))
- choix=int(init(3))
- solchoice=int(init(4))
- tol=init(5)
- end if
- tempsd=MPI_WTIME()
- call MPI_BCAST(n,1,MPI_INTEGER,0,MPI_COMM_WORLD,code)
- call MPI_BCAST(choix,1,MPI_INTEGER,0,MPI_COMM_WORLD,code)
- call MPI_BCAST(tol,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,code)
- if (mod(n,Nprocs)==0)then
- nl=n/Nprocs
- else
- nl=(n/Nprocs)+1
- end if
- allocate(b(n),rs(n),x(n),AL(nl,n),workl(nl),sol(n),temp(n),temp2(n))
- if (choix<3)then
- call matrice(AL,nl,n,2.d0,1.d0,choix,rang)
- else
- allocate(valeurs(n))
- open(unit=30,file='matrixfile.dat',status='old')
- do i=1,n
- call position(i,nl,inl,inlp)
- read(30,*)valeurs
- if (rang.eq.inl)then
- AL(inlp,:)=valeurs(:)
- endif
- enddo
- close(30)
- endif
- !********************************************
- ! End Matrix initialisation
- !********************************************
- !********************************************
- ! Solution initialisation
- !********************************************
- call MPI_BCAST(solchoice,1,MPI_INTEGER,0,MPI_COMM_WORLD,code)
- call solution(AL,workl,sol,nl,n,solchoice)
- call MPI_ALLGATHER(workl,nl,MPI_DOUBLE_PRECISION,b,nl,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,code)
- !********************************************
- ! End solution initialisation
- !********************************************
- !********************************************
- ! Permutation Vector initialisation
- ! and computation of initial residual
- !********************************************
- if (rang==0) then
- allocate(p(n))
- do i=1,n
- p(i)=i
- end do
- end if
- dnorm2=dnrm2(n,b,1)
- if (rang==0)then
- if (iprint==2)then
- print*,''
- print*,'la norme 2 de r0=',dnorm2
- print*,''
- end if
- end if
- !********************************************
- ! Computation of max b(i)
- !********************************************
- indice = idamax(n,b,1)
- beta = b(indice)
- D_inv = 1.d0/beta
- !********************************************
- ! First permutation
- !********************************************
- if (indice .ne.1) then
- call dswap(1,b(indice),1,b(1),1)
- call dswap(nl,AL(1,indice),1,AL(1,1),1)
- call position(indice,nl,inl,inlp)
- if (inl==0) then
- if (rang==inl)then
- call dswap(n,AL(inlp,1),nl,AL(1,1),nl)
- end if
- else
- if (rang==inl) then
- call dcopy(n,AL(inlp,1),nl,temp,1)
- end if
- call MPI_BCAST(temp,n,MPI_DOUBLE_PRECISION,inl,MPI_COMM_WORLD,code)
- if (rang==0)then
- call dcopy(n,AL(1,1),nl,temp2,1)
- end if
- call MPI_BCAST(temp2,n,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,code)
- if (rang==inl)then
- call dcopy(n,temp2,1,AL(inlp,1),nl)
- end if
- if (rang==0)then
- call dcopy(n,temp,1,AL(1,1),nl)
- end if
- end if
- if (rang==0)then
- call sswap(1,p(indice),1,p(1),1)
- end if
- end if
- !********************************************
- ! End permutation
- !********************************************
- ! call dscal(n,0.d0,x,1)
- call dscal(n,D_inv,b,1)
- rs(1)=beta
- dnorm=dabs(beta)
- allocate(tempn(nl),work(n),c(n),s(n),xl(nl),cl(nl))
- call dscal(nl,0.d0,xl,1)
- call MPI_BARRIER(MPI_COMM_WORLD,code)
- k=0
- if (rang==0) then
- if (iprint==2)then
- write(*,*)'iter., pseudo residu '
- end if
- open(unit=30,file='presidual.dat',status='unknown')
- write(30,*)'Iter., Pseudo Residual Norm'
- end if
- !tol=1.d-7
- delta0 = dtime(da)
- eelta0 = etime(ea)
- tempsf=MPI_WTIME()
- !********************************************
- ! Loop
- !********************************************
- do while((dnorm.gt.tol).and.(k.lt.n))
- k=k+1;
- n_k=n-k;
- k1=k+1;
- if (rang==0) then
- if (iprint==2)then
- write(*,10)k-1,dnorm
- end if
- write(30,10)k-1,dnorm
- end if
- 10 format(i5,2x,e12.4)
- !********************************************
- ! workl = A*b
- !********************************************
- call dgemv('N',nl,n_k+1,1.d0,AL(1,k),nl,b(k),1,0.d0,workl,1)
- ! ! call dscal(nl,0.d0,workl,1)
- ! ! do j=k1,n
- ! ! do i=1,nl
- ! ! workl(i)=workl(i)+AL(i,j)*b(j)
- ! ! end do
- ! ! end do
- !********************************************
- ! A(:,k)=b
- !********************************************
- call position(k1,nl,knl,knlp)
- if (rang>knl)then
- call dcopy(nl,b(rang*nl+1),1,AL(1,k),1)
- else
- if (rang==knl) then
- call dcopy(nl-knlp+1,b(k1),1,AL(knlp,k),1)
- end if
- end if
- !********************************************
- ! j Loop
- !********************************************
- do j=1,k
- !********************************************
- ! A(j,k)=workl(j)
- ! workl(j)=0
- !********************************************
- call position(j,nl,knl,knlp)
- if (rang==knl) then
- AL(knlp,k)=workl(knlp)
- workl(knlp)=0.d0
- alpha=-AL(knlp,k)
- end if
- call MPI_BCAST(alpha,1,MPI_DOUBLE_PRECISION,knl,MPI_COMM_WORLD,code)
- !********************************************
- ! workl=workl - alpha A(:,j)
- !********************************************
- call position(j+1,nl,knl,knlp)
- if (rang >knl)then
- call daxpy(nl,alpha,AL(1,j),1,workl,1)
- end if
- if (rang==knl) then
- call daxpy(nl-knlp+1,alpha,AL(knlp,j),1,workl(knlp),1)
- end if
- end do
- !********************************************
- ! End j Loop
- !********************************************
- if (k.lt.n)then
- call MPI_ALLGATHER(workl,nl,MPI_DOUBLE_PRECISION,b,nl,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,code)
- !********************************************
- ! Search indice i_0
- !********************************************
- indice=k+idamax(n_k,b(k1),1)
- H_k1=b(indice)
- !********************************************
- ! Permutation
- !********************************************
- if (indice.ne.k1) then
- if (rang==0) then
- call sswap(1,p(indice),1,p(k1),1)
- end if
- call dswap(1,b(indice),1,b(k1),1)
- call dswap(nl,AL(1,indice),1,AL(1,k1),1)
- call MPI_BARRIER(MPI_COMM_WORLD,code)
- call position(k1,nl,knl,knlp)
- call position(indice,nl,inl,inlp)
- if (inl==knl) then
- if (rang==inl)then
- call dswap(n,AL(inlp,1),nl,AL(knlp,1),nl)
- end if
- else
- if (rang==inl) then
- call dcopy(n,AL(inlp,1),nl,temp,1)
- end if
- call MPI_BCAST(temp,n,MPI_DOUBLE_PRECISION,inl,MPI_COMM_WORLD,code)
- if (rang==knl)then
- call dcopy(n,AL(knlp,1),nl,temp2,1)
- end if
- call MPI_BCAST(temp2,n,MPI_DOUBLE_PRECISION,knl,MPI_COMM_WORLD,code)
- if (rang==inl)then
- call dcopy(n,temp2,1,AL(inlp,1),nl)
- end if
- if (rang==knl)then
- call dcopy(n,temp,1,AL(knlp,1),nl)
- end if
- end if
- end if
- !********************************************
- ! End Permutation
- !********************************************
- if (H_k1.ne.0.d0)then
- !********************************************
- ! b=b/(b)_(i_0)
- !********************************************
- D_inv=1.d0/H_k1
- ! call dcopy(n,work,1,b,1)
- call dscal(n,D_inv,b,1)
- end if
- else
- H_k1=0.d0
- call dscal(n,0.d0,b,1)
- end if
- !********************************************
- ! Application of Givens rotations
- !********************************************
- if (k.gt.1)then
- do i=2,k
- call position(i-1,nl,knl,knlp)
- call position(i,nl,inl,inlp)
- if (inl==knl)then
- if (rang==inl)then
- btemp=AL(knlp,k)
- AL(knlp,k)=c(i-1)*btemp+s(i-1)*AL(inlp,k)
- AL(inlp,k)=-s(i-1)*btemp+c(i-1)*AL(inlp,k)
- end if
- else
- if (rang==inl)then
- btemp=AL(inlp,k)
- end if
- if (rang==knl)then
- btemp2=AL(knlp,k)
- end if
- call MPI_BCAST(btemp,1,MPI_DOUBLE_PRECISION,inl,MPI_COMM_WORLD,code)
- call MPI_BCAST(btemp2,1,MPI_DOUBLE_PRECISION,knl,MPI_COMM_WORLD,code)
- net=net+2
- if (rang==inl)then
- AL(inlp,k)=-s(i-1)*btemp2+c(i-1)*btemp
- end if
- if (rang==knl)then
- AL(knlp,k)=c(i-1)*btemp2+s(i-1)*btemp
- end if
- end if
- end do
- end if
- if (k.le.n)then
- call position(k,nl,knl,knlp)
- if (rang==knl)then
- btemp=AL(knlp,k)
- end if
- call MPI_BCAST(btemp,1,MPI_DOUBLE_PRECISION,knl,MPI_COMM_WORLD,code)
- dgam=dsqrt(btemp**2+H_k1**2)
- c(k)=dabs(btemp)/dgam
- s(k)=(H_k1*dabs(btemp))/(btemp*dgam)
- rs(k1)=-s(k)*rs(k)
- rs(k)=c(k)*rs(k)
- dnorm=dabs(rs(k1))
- if (rang==knl)then
- AL(knlp,k)=c(k)*btemp+s(k)*H_k1
- end if
- end if
- !********************************************
- ! End Givens rotations
- !********************************************
- end do
- !********************************************
- ! Solve H_k d_k = alpha e_1
- !********************************************
- call position(k,nl,knl,knlp)
- do j=k,1,-1
- call position(j,nl,inl,inlp)
- if (rang==inl) then
- rs(j)=rs(j)/AL(inlp,j)
- btemp=rs(j)
- end if
- call MPI_BCAST(btemp,1,MPI_DOUBLE_PRECISION,inl,MPI_COMM_WORLD,code)
- rs(j)=btemp
- work(j)=btemp
- call position(j-1,nl,inl,inlp)
- if (rang==inl)then
- call daxpy(inlp,-btemp,AL(1,j),1,rs(rang*nl+1),1)
- end if
- if (rang.lt.inl)then
- call daxpy(nl,-btemp,AL(1,j),1,rs(rang*nl+1),1)
- end if
- end do
- do j=k,1,-1
- btemp=rs(j)
- call position(k,nl,knl,knlp)
- call position(j+1,nl,inl,inlp)
- if (rang==inl)then
- if (rang==knl)then
- call daxpy((knlp-inlp+1),btemp,AL(inlp,j),1,rs(rang*nl+inlp),1)
- end if
- if (rang.lt.knl)then
- call daxpy(nl-inlp+1,btemp,AL(inlp,j),1,rs(rang*nl+inlp),1)
- end if
- end if
- if (rang.gt.inl)then
- if (rang==knl)then
- call daxpy(knlp,btemp,AL(1,j),1,rs(rang*nl+1),1)
- end if
- if (rang.lt.knl)then
- call daxpy(nl,btemp,AL(1,j),1,rs(rang*nl+1),1)
- end if
- end if
- end do
- !********************************************
- ! End Loop
- !********************************************
- !********************************************
- ! Update x_k=x_0 + L_k d_k
- !********************************************
- call position(k1,nl,knl,knlp)
- if (rang.gt.knl)then
- call dgemv('N',nl,k,1.d0,AL(1,1),nl,work,1,0.d0,c(rang*nl+1),1)
- end if
- if (rang==knl)then
- call dgemv('N',nl-knlp+1,k,1.d0,AL(knlp,1),nl,work,1,0.d0,c(rang*nl+knlp),1)
- end if
- call position(k,nl,knl,knlp)
- if (rang.lt.knl)then
- call daxpy(nl,1.d0,rs(rang*nl+1),1,xl,1)
- end if
- if (rang==knl)then
- call daxpy(knlp,1.d0,rs(rang*nl+1),1,xl,1)
- call daxpy(nl-knlp,1.d0,c(rang*nl+knlp+1),1,xl(knlp+1),1)
- end if
- if (rang.gt.knl)then
- call daxpy(nl,1.d0,c(rang*nl+1),1,xl,1)
- end if
- !********************************************
- ! Reorder the components of x_k
- !********************************************
- call MPI_GATHER(xl,nl,MPI_DOUBLE_PRECISION,x,nl,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,code)
- call MPI_BARRIER(MPI_COMM_WORLD,code)
- if (rang==0)then
- do j=1,n
- b(p(j))=x(j)
- end do
- end if
- tempsf=(MPI_WTIME()-tempsf)
- tempsd=(MPI_WTIME()-tempsd)
- !write(*,*)'rang et temps',rang,tempsf,tempsd
- call MPI_REDUCE(tempsf,temps,1,MPI_DOUBLE_PRECISION,MPI_MAX,0,MPI_COMM_WORLD,code)
- call MPI_REDUCE(tempsd,temps2,1,MPI_DOUBLE_PRECISION,MPI_MAX,0,MPI_COMM_WORLD,code)
- if (rang==0)then
- if (iprint >= 2) then
- print*,'Results : iter, norm residual, time'
- write(*,40)k,dnorm,temps,temps2
- end if
- write(30,10),k,dnorm
- close(30)
- open(unit=10,file='temps.txt',status='unknown',access='append')
- write(unit=10,FMT=*)'Proc. Number :',Nprocs,'Iter. Number:',k,'Time:',temps,temps2
- close(10)
- end if
- if (rang==0)then
- if (iprint==2) then
- write(*,*)
- write(*,*)' la perm. et les solutions CMRH et EXACTE sont : '
- write(*,*)' ______________________________________________ '
- if (n.lt.10)then
- do i=1,n
- write(*,20)p(i), b(i), sol(i)
- end do
- else
- do i=1,10
- write(*,20)p(i), b(i), sol(i)
- end do
- end if
- end if
- end if
- 30 format(A37,4(f14.7,1x))
- 20 format(1x,i5,2(1x,f14.8))
- 40 format(1x,i5,4(1x,e12.4))
- ! deallocate(AL,b,sol,workl,temp,tempn,work,c,s,rs,xl,x,cl)
- if (rang==0) then
- deallocate(p)
- end if
- call MPI_FINALIZE(code)
- end program PCMRH
- !********************************************
- ! End Parallel CMRH
- !********************************************
- !********************************************
- ! Matrix Choice
- !********************************************
- subroutine matrice(A,nl,n,alpha,beta,choix,rang)
- integer ::n,i,j,choix,rang,nl,k
- real(kind=8),dimension(nl,n)::A
- real*8 ::alpha,beta
- if (choix==1) then
- do i = 1, nl
- k=rang*nl+i
- do j = 1, k
- A(i,j) = (2.d0*dble(j)-1.d0)/dble(n-k+j)
- end do
- do j = k+1, n
- A(i,j) = (2.d0*dble(k)-1.d0)/dble(n-k+j)
- end do
- end do
- end if
- if (choix==2)then
- do i=1,nl
- k=rang*nl+i
- do j=1,k
- A(i,j)=dble(k)
- end do
- do j=k+1,n
- A(i,j)=dble(j)
- end do
- end do
- end if
- end subroutine matrice
- !********************************************
- ! Solution Choice
- !********************************************
- subroutine solution(A,b,sol,nl,n,solchoice)
- integer ::n,i,nl,solchoice
- real(kind=8),dimension(n,n) ::A
- real(kind=8),dimension(n) ::sol
- real(kind=8),dimension(nl) ::b
- real ::rand
- do i=1,n
- if (solchoice==1) then
- sol(i)=1.d0
- end if
- if (solchoice==2) then
- sol(i)=dble(n-i+1)
- end if
- if (solchoice==3)then
- sol(i)=rand(0)
- end if
- end do
- call dgemv('N',nl,n,1.d0,A,nl,sol,1,0.d0,b,1)
- end subroutine solution
- !********************************************
- ! Where are we ?
- !********************************************
- subroutine position(k,nl,knl,knlp)
- integer :: k,nl,knl,knlp
- knl=(k-1)/nl
- knlp=k-knl*nl
- end subroutine position
|