Written two modules:
- conjugate gradient method
- band matrix-vector multiplication
they work separately (with machine error difference due to addition nonassociativity being around 1e-14 to 1e-15)
But when combined I cant track the reason for the error. The results between regular matrix vector multiplication and using my module matVecMult differ significantly.
Could one suggest where the issue may be coming from?
Modules, vector, matricies are below:
a) dense banded matrix dnsa
b) sparse matrix a
c) vector of non-zero diagonals indicies
cols = [1 2 3 4 5 6 9]
d) CG solver
module LinSolver
use denseTimesVec
implicit none
contains
subroutine cg(a,dnsa,n,b,cols,x)
implicit none
integer*8, intent (in) :: n
real*8, intent (in) :: a(:,:)
real*8, intent (in) :: dnsa(:,:)
real*8, intent (inout):: b(:)
real*8, intent (out) :: x(:)
integer*8, intent (in) :: cols(:)
! local variables
integer :: i, j, k
real*8, dimension(n) :: r, p, Ap, ApDNS
real*8, parameter :: eps = 1.e-6
real*8 :: rsold, rsnew, alpha, pAp
call matVecMult(n,dnsa,x,cols,ApDNS)
do i = 1,n
Ap(i) = 0
do j = 1,n
Ap(i) = Ap(i) + a(i,j) * x(j)
enddo
enddo
print*,cols
r = b - Ap
p = r
rsold = 0;
do j = 1,n
rsold = rsold + r(j)*r(j);
enddo
do i=1,n
call matVecMult(n,dnsa,p,cols,ApDNS)
do k = 1,n
Ap(k) = 0;
do j = 1,n
Ap(k) = Ap(k) + A(k,j) * p(j);
enddo
enddo
pAp = 0;
do j = 1,n
pAp = pAp + Ap(j) * p(j);
enddo
alpha = rsold / pAp;
x = x + alpha * p;
r = r - alpha * Ap;
rsnew = 0;
do j = 1,n
rsnew = rsnew + r(j)*r(j);
enddo
if (sqrt(rsnew)<eps) exit
p = r + (rsnew / rsold) * p
rsold = rsnew
enddo
end subroutine cg
end module LinSolver
e) multiplication module
module denseTimesVec
implicit none
contains
subroutine matVecMult(n,a,x,cols,b)
implicit none
integer*8, intent (in) :: n
real*8, intent (out) :: b(:)
real*8, intent (in):: a(:,:), x(:)
integer*8, intent (in) :: cols(:)
integer*8 :: i, j, width
real*8, dimension (:), allocatable :: xTimesUpper, xTimesLower
allocate ( xTimesUpper(n), xTimesLower(n))
do j = 1,size(cols)
width = cols(j)-1;
do i = 1,n-width
if (width == 0) then
b(i) = b(i)+a(i,1)*x(i);
else
xTimesUpper = x(1+width:n);
xTimesLower = x(1:n-width);
! multiplying upper/lower part
b(i) = b(i) + a(i,j) * xTimesUpper(i); ! OK if compared to triuA*x-b
b(i+width) = b(i+width) + a(i,j)*xTimesLower(i);
endif
enddo
enddo
end subroutine matVecMult
end module denseTimesVec