mirror of
https://github.com/HongjianFang/DSurfTomo.git
synced 2025-05-08 00:01:14 +08:00

change 1e-2 to 'ftol' in CalSurfG.f90, which may cause problem with small study region (~2 km)
755 lines
28 KiB
Fortran
755 lines
28 KiB
Fortran
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
! File lsmrModule.f90
|
|
!
|
|
! LSMR
|
|
!
|
|
! LSMR solves Ax = b or min ||Ax - b|| with or without damping,
|
|
! using the iterative algorithm of David Fong and Michael Saunders:
|
|
! http://www.stanford.edu/group/SOL/software/lsmr.html
|
|
!
|
|
! Maintained by
|
|
! David Fong <clfong@stanford.edu>
|
|
! Michael Saunders <saunders@stanford.edu>
|
|
! Systems Optimization Laboratory (SOL)
|
|
! Stanford University
|
|
! Stanford, CA 94305-4026, USA
|
|
!
|
|
! 17 Jul 2010: F90 LSMR derived from F90 LSQR and lsqr.m.
|
|
! 07 Sep 2010: Local reorthogonalization now works (localSize > 0).
|
|
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
module lsmrModule
|
|
|
|
use lsmrDataModule, only : dp
|
|
use lsmrblasInterface, only : dnrm2, dscal
|
|
implicit none
|
|
private
|
|
public :: LSMR
|
|
|
|
contains
|
|
|
|
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
! subroutine LSMR ( m, n, Aprod1, Aprod2, b, damp, &
|
|
! atol, btol, conlim, itnlim, localSize, nout, &
|
|
! x, istop, itn, normA, condA, normr, normAr, normx )
|
|
subroutine LSMR ( m, n, leniw, lenrw,iw,rw, b, damp, &
|
|
atol, btol, conlim, itnlim, localSize, nout, &
|
|
x, istop, itn, normA, condA, normr, normAr, normx )
|
|
|
|
integer, intent(in) :: leniw
|
|
integer, intent(in) :: lenrw
|
|
integer, intent(in) :: iw(leniw)
|
|
real, intent(in) :: rw(lenrw)
|
|
|
|
integer, intent(in) :: m, n, itnlim, localSize, nout
|
|
integer, intent(out) :: istop, itn
|
|
real(dp), intent(in) :: b(m)
|
|
real(dp), intent(out) :: x(n)
|
|
real(dp), intent(in) :: atol, btol, conlim, damp
|
|
real(dp), intent(out) :: normA, condA, normr, normAr, normx
|
|
|
|
interface
|
|
subroutine aprod(mode,m,n,x,y,leniw,lenrw,iw,rw) ! y := y + A*x
|
|
use lsmrDataModule, only : dp
|
|
integer, intent(in) :: mode,lenrw
|
|
integer, intent(in) :: leniw
|
|
real, intent(in) :: rw(lenrw)
|
|
integer, intent(in) :: iw(leniw)
|
|
integer, intent(in) :: m,n
|
|
real(dp), intent(inout) :: x(n)
|
|
real(dp), intent(inout) :: y(m)
|
|
end subroutine aprod
|
|
! subroutine Aprod1(m,n,x,y) ! y := y + A*x
|
|
! use lsmrDataModule, only : dp
|
|
! integer, intent(in) :: m,n
|
|
! real(dp), intent(in) :: x(n)
|
|
! real(dp), intent(inout) :: y(m)
|
|
! end subroutine Aprod1
|
|
!
|
|
! subroutine Aprod2(m,n,x,y) ! x := x + A'*y
|
|
! use lsmrDataModule, only : dp
|
|
! integer, intent(in) :: m,n
|
|
! real(dp), intent(inout) :: x(n)
|
|
! real(dp), intent(in) :: y(m)
|
|
! end subroutine Aprod2
|
|
end interface
|
|
|
|
!-------------------------------------------------------------------
|
|
! LSMR finds a solution x to the following problems:
|
|
!
|
|
! 1. Unsymmetric equations: Solve A*x = b
|
|
!
|
|
! 2. Linear least squares: Solve A*x = b
|
|
! in the least-squares sense
|
|
!
|
|
! 3. Damped least squares: Solve ( A )*x = ( b )
|
|
! ( damp*I ) ( 0 )
|
|
! in the least-squares sense
|
|
!
|
|
! where A is a matrix with m rows and n columns, b is an m-vector,
|
|
! and damp is a scalar. (All quantities are real.)
|
|
! The matrix A is treated as a linear operator. It is accessed
|
|
! by means of subroutine calls with the following purpose:
|
|
!
|
|
! call Aprod1(m,n,x,y) must compute y = y + A*x without altering x.
|
|
! call Aprod2(m,n,x,y) must compute x = x + A'*y without altering y.
|
|
!
|
|
! LSMR uses an iterative method to approximate the solution.
|
|
! The number of iterations required to reach a certain accuracy
|
|
! depends strongly on the scaling of the problem. Poor scaling of
|
|
! the rows or columns of A should therefore be avoided where
|
|
! possible.
|
|
!
|
|
! For example, in problem 1 the solution is unaltered by
|
|
! row-scaling. If a row of A is very small or large compared to
|
|
! the other rows of A, the corresponding row of ( A b ) should be
|
|
! scaled up or down.
|
|
!
|
|
! In problems 1 and 2, the solution x is easily recovered
|
|
! following column-scaling. Unless better information is known,
|
|
! the nonzero columns of A should be scaled so that they all have
|
|
! the same Euclidean norm (e.g., 1.0).
|
|
!
|
|
! In problem 3, there is no freedom to re-scale if damp is
|
|
! nonzero. However, the value of damp should be assigned only
|
|
! after attention has been paid to the scaling of A.
|
|
!
|
|
! The parameter damp is intended to help regularize
|
|
! ill-conditioned systems, by preventing the true solution from
|
|
! being very large. Another aid to regularization is provided by
|
|
! the parameter condA, which may be used to terminate iterations
|
|
! before the computed solution becomes very large.
|
|
!
|
|
! Note that x is not an input parameter.
|
|
! If some initial estimate x0 is known and if damp = 0,
|
|
! one could proceed as follows:
|
|
!
|
|
! 1. Compute a residual vector r0 = b - A*x0.
|
|
! 2. Use LSMR to solve the system A*dx = r0.
|
|
! 3. Add the correction dx to obtain a final solution x = x0 + dx.
|
|
!
|
|
! This requires that x0 be available before and after the call
|
|
! to LSMR. To judge the benefits, suppose LSMR takes k1 iterations
|
|
! to solve A*x = b and k2 iterations to solve A*dx = r0.
|
|
! If x0 is "good", norm(r0) will be smaller than norm(b).
|
|
! If the same stopping tolerances atol and btol are used for each
|
|
! system, k1 and k2 will be similar, but the final solution x0 + dx
|
|
! should be more accurate. The only way to reduce the total work
|
|
! is to use a larger stopping tolerance for the second system.
|
|
! If some value btol is suitable for A*x = b, the larger value
|
|
! btol*norm(b)/norm(r0) should be suitable for A*dx = r0.
|
|
!
|
|
! Preconditioning is another way to reduce the number of iterations.
|
|
! If it is possible to solve a related system M*x = b efficiently,
|
|
! where M approximates A in some helpful way
|
|
! (e.g. M - A has low rank or its elements are small relative to
|
|
! those of A), LSMR may converge more rapidly on the system
|
|
! A*M(inverse)*z = b,
|
|
! after which x can be recovered by solving M*x = z.
|
|
!
|
|
! NOTE: If A is symmetric, LSMR should not be used!
|
|
! Alternatives are the symmetric conjugate-gradient method (CG)
|
|
! and/or SYMMLQ.
|
|
! SYMMLQ is an implementation of symmetric CG that applies to
|
|
! any symmetric A and will converge more rapidly than LSMR.
|
|
! If A is positive definite, there are other implementations of
|
|
! symmetric CG that require slightly less work per iteration
|
|
! than SYMMLQ (but will take the same number of iterations).
|
|
!
|
|
!
|
|
! Notation
|
|
! --------
|
|
! The following quantities are used in discussing the subroutine
|
|
! parameters:
|
|
!
|
|
! Abar = ( A ), bbar = (b)
|
|
! (damp*I) (0)
|
|
!
|
|
! r = b - A*x, rbar = bbar - Abar*x
|
|
!
|
|
! normr = sqrt( norm(r)**2 + damp**2 * norm(x)**2 )
|
|
! = norm( rbar )
|
|
!
|
|
! eps = the relative precision of floating-point arithmetic.
|
|
! On most machines, eps is about 1.0e-7 and 1.0e-16
|
|
! in single and double precision respectively.
|
|
! We expect eps to be about 1e-16 always.
|
|
!
|
|
! LSMR minimizes the function normr with respect to x.
|
|
!
|
|
!
|
|
! Parameters
|
|
! ----------
|
|
! m input m, the number of rows in A.
|
|
!
|
|
! n input n, the number of columns in A.
|
|
!
|
|
! Aprod1, Aprod2 See above.
|
|
!
|
|
! damp input The damping parameter for problem 3 above.
|
|
! (damp should be 0.0 for problems 1 and 2.)
|
|
! If the system A*x = b is incompatible, values
|
|
! of damp in the range 0 to sqrt(eps)*norm(A)
|
|
! will probably have a negligible effect.
|
|
! Larger values of damp will tend to decrease
|
|
! the norm of x and reduce the number of
|
|
! iterations required by LSMR.
|
|
!
|
|
! The work per iteration and the storage needed
|
|
! by LSMR are the same for all values of damp.
|
|
!
|
|
! b(m) input The rhs vector b.
|
|
!
|
|
! x(n) output Returns the computed solution x.
|
|
!
|
|
! atol input An estimate of the relative error in the data
|
|
! defining the matrix A. For example, if A is
|
|
! accurate to about 6 digits, set atol = 1.0e-6.
|
|
!
|
|
! btol input An estimate of the relative error in the data
|
|
! defining the rhs b. For example, if b is
|
|
! accurate to about 6 digits, set btol = 1.0e-6.
|
|
!
|
|
! conlim input An upper limit on cond(Abar), the apparent
|
|
! condition number of the matrix Abar.
|
|
! Iterations will be terminated if a computed
|
|
! estimate of cond(Abar) exceeds conlim.
|
|
! This is intended to prevent certain small or
|
|
! zero singular values of A or Abar from
|
|
! coming into effect and causing unwanted growth
|
|
! in the computed solution.
|
|
!
|
|
! conlim and damp may be used separately or
|
|
! together to regularize ill-conditioned systems.
|
|
!
|
|
! Normally, conlim should be in the range
|
|
! 1000 to 1/eps.
|
|
! Suggested value:
|
|
! conlim = 1/(100*eps) for compatible systems,
|
|
! conlim = 1/(10*sqrt(eps)) for least squares.
|
|
!
|
|
! Note: Any or all of atol, btol, conlim may be set to zero.
|
|
! The effect will be the same as the values eps, eps, 1/eps.
|
|
!
|
|
! itnlim input An upper limit on the number of iterations.
|
|
! Suggested value:
|
|
! itnlim = n/2 for well-conditioned systems
|
|
! with clustered singular values,
|
|
! itnlim = 4*n otherwise.
|
|
!
|
|
! localSize input No. of vectors for local reorthogonalization.
|
|
! 0 No reorthogonalization is performed.
|
|
! >0 This many n-vectors "v" (the most recent ones)
|
|
! are saved for reorthogonalizing the next v.
|
|
! localSize need not be more than min(m,n).
|
|
! At most min(m,n) vectors will be allocated.
|
|
!
|
|
! nout input File number for printed output. If positive,
|
|
! a summary will be printed on file nout.
|
|
!
|
|
! istop output An integer giving the reason for termination:
|
|
!
|
|
! 0 x = 0 is the exact solution.
|
|
! No iterations were performed.
|
|
!
|
|
! 1 The equations A*x = b are probably compatible.
|
|
! Norm(A*x - b) is sufficiently small, given the
|
|
! values of atol and btol.
|
|
!
|
|
! 2 damp is zero. The system A*x = b is probably
|
|
! not compatible. A least-squares solution has
|
|
! been obtained that is sufficiently accurate,
|
|
! given the value of atol.
|
|
!
|
|
! 3 damp is nonzero. A damped least-squares
|
|
! solution has been obtained that is sufficiently
|
|
! accurate, given the value of atol.
|
|
!
|
|
! 4 An estimate of cond(Abar) has exceeded conlim.
|
|
! The system A*x = b appears to be ill-conditioned,
|
|
! or there could be an error in Aprod1 or Aprod2.
|
|
!
|
|
! 5 The iteration limit itnlim was reached.
|
|
!
|
|
! itn output The number of iterations performed.
|
|
!
|
|
! normA output An estimate of the Frobenius norm of Abar.
|
|
! This is the square-root of the sum of squares
|
|
! of the elements of Abar.
|
|
! If damp is small and the columns of A
|
|
! have all been scaled to have length 1.0,
|
|
! normA should increase to roughly sqrt(n).
|
|
! A radically different value for normA may
|
|
! indicate an error in Aprod1 or Aprod2.
|
|
!
|
|
! condA output An estimate of cond(Abar), the condition
|
|
! number of Abar. A very high value of condA
|
|
! may again indicate an error in Aprod1 or Aprod2.
|
|
!
|
|
! normr output An estimate of the final value of norm(rbar),
|
|
! the function being minimized (see notation
|
|
! above). This will be small if A*x = b has
|
|
! a solution.
|
|
!
|
|
! normAr output An estimate of the final value of
|
|
! norm( Abar'*rbar ), the norm of
|
|
! the residual for the normal equations.
|
|
! This should be small in all cases. (normAr
|
|
! will often be smaller than the true value
|
|
! computed from the output vector x.)
|
|
!
|
|
! normx output An estimate of norm(x) for the final solution x.
|
|
!
|
|
! Subroutines and functions used
|
|
! ------------------------------
|
|
! BLAS dscal, dnrm2
|
|
! USER Aprod1, Aprod2
|
|
!
|
|
! Precision
|
|
! ---------
|
|
! The number of iterations required by LSMR will decrease
|
|
! if the computation is performed in higher precision.
|
|
! At least 15-digit arithmetic should normally be used.
|
|
! "real(dp)" declarations should normally be 8-byte words.
|
|
! If this ever changes, the BLAS routines dnrm2, dscal
|
|
! (Lawson, et al., 1979) will also need to be changed.
|
|
!
|
|
!
|
|
! Reference
|
|
! ---------
|
|
! http://www.stanford.edu/group/SOL/software/lsmr.html
|
|
! ------------------------------------------------------------------
|
|
!
|
|
! LSMR development:
|
|
! 21 Sep 2007: Fortran 90 version of LSQR implemented.
|
|
! Aprod1, Aprod2 implemented via f90 interface.
|
|
! 17 Jul 2010: LSMR derived from LSQR and lsmr.m.
|
|
! 07 Sep 2010: Local reorthogonalization now working.
|
|
!-------------------------------------------------------------------
|
|
|
|
intrinsic :: abs, dot_product, min, max, sqrt
|
|
|
|
! Local arrays and variables
|
|
real(dp) :: h(n), hbar(n), u(m), v(n), w(n), localV(n,min(localSize,m,n))
|
|
logical :: damped, localOrtho, localVQueueFull, prnt, show
|
|
integer :: i, localOrthoCount, localOrthoLimit, localPointer, localVecs, &
|
|
pcount, pfreq
|
|
real(dp) :: alpha, alphabar, alphahat, &
|
|
beta, betaacute, betacheck, betad, betadd, betahat, &
|
|
normb, c, cbar, chat, ctildeold, ctol, &
|
|
d, maxrbar, minrbar, normA2, &
|
|
rho, rhobar, rhobarold, rhodold, rhoold, rhotemp, &
|
|
rhotildeold, rtol, s, sbar, shat, stildeold, &
|
|
t1, taud, tautildeold, test1, test2, test3, &
|
|
thetabar, thetanew, thetatilde, thetatildeold, &
|
|
zeta, zetabar, zetaold
|
|
|
|
! Local constants
|
|
real(dp), parameter :: zero = 0.0_dp, one = 1.0_dp
|
|
character(len=*), parameter :: enter = ' Enter LSMR. '
|
|
character(len=*), parameter :: exitt = ' Exit LSMR. '
|
|
character(len=*), parameter :: msg(0:7) = &
|
|
(/ 'The exact solution is x = 0 ', &
|
|
'Ax - b is small enough, given atol, btol ', &
|
|
'The least-squares solution is good enough, given atol', &
|
|
'The estimate of cond(Abar) has exceeded conlim ', &
|
|
'Ax - b is small enough for this machine ', &
|
|
'The LS solution is good enough for this machine ', &
|
|
'Cond(Abar) seems to be too large for this machine ', &
|
|
'The iteration limit has been reached ' /)
|
|
!-------------------------------------------------------------------
|
|
|
|
|
|
! Initialize.
|
|
|
|
localVecs = min(localSize,m,n)
|
|
show = nout > 0
|
|
if (show) then
|
|
write(nout, 1000) enter,m,n,damp,atol,conlim,btol,itnlim,localVecs
|
|
end if
|
|
|
|
pfreq = 20 ! print frequency (for repeating the heading)
|
|
pcount = 0 ! print counter
|
|
damped = damp > zero !
|
|
|
|
!-------------------------------------------------------------------
|
|
! Set up the first vectors u and v for the bidiagonalization.
|
|
! These satisfy beta*u = b, alpha*v = A(transpose)*u.
|
|
!-------------------------------------------------------------------
|
|
u(1:m) = b(1:m)
|
|
v(1:n) = zero
|
|
x(1:n) = zero
|
|
|
|
alpha = zero
|
|
beta = dnrm2 (m, u, 1)
|
|
|
|
if (beta > zero) then
|
|
call dscal (m, (one/beta), u, 1)
|
|
! call Aprod2(m, n, v, u) ! v = A'*u
|
|
call aprod(2,m,n,v,u,leniw,lenrw,iw,rw)
|
|
alpha = dnrm2 (n, v, 1)
|
|
end if
|
|
|
|
if (alpha > zero) then
|
|
call dscal (n, (one/alpha), v, 1)
|
|
w = v
|
|
end if
|
|
|
|
normAr = alpha*beta
|
|
if (normAr == zero) go to 800
|
|
|
|
! Initialization for local reorthogonalization.
|
|
|
|
localOrtho = .false.
|
|
if (localVecs > 0) then
|
|
localPointer = 1
|
|
localOrtho = .true.
|
|
localVQueueFull = .false.
|
|
localV(:,1) = v
|
|
end if
|
|
|
|
! Initialize variables for 1st iteration.
|
|
|
|
itn = 0
|
|
zetabar = alpha*beta
|
|
alphabar = alpha
|
|
rho = 1
|
|
rhobar = 1
|
|
cbar = 1
|
|
sbar = 0
|
|
|
|
h = v
|
|
hbar(1:n) = zero
|
|
x(1:n) = zero
|
|
|
|
! Initialize variables for estimation of ||r||.
|
|
|
|
betadd = beta
|
|
betad = 0
|
|
rhodold = 1
|
|
tautildeold = 0
|
|
thetatilde = 0
|
|
zeta = 0
|
|
d = 0
|
|
|
|
! Initialize variables for estimation of ||A|| and cond(A).
|
|
|
|
normA2 = alpha**2
|
|
maxrbar = 0_dp
|
|
minrbar = 1e+30_dp
|
|
|
|
! Items for use in stopping rules.
|
|
normb = beta
|
|
istop = 0
|
|
ctol = zero
|
|
if (conlim > zero) ctol = one/conlim
|
|
normr = beta
|
|
|
|
! Exit if b=0 or A'b = 0.
|
|
|
|
normAr = alpha * beta
|
|
if (normAr == 0) then
|
|
if (show) then
|
|
write(nout,'(a)') msg(1)
|
|
end if
|
|
return
|
|
end if
|
|
|
|
! Heading for iteration log.
|
|
|
|
if (show) then
|
|
if (damped) then
|
|
write(nout,1300)
|
|
else
|
|
write(nout,1200)
|
|
end if
|
|
test1 = one
|
|
test2 = alpha/beta
|
|
write(nout, 1500) itn,x(1),normr,normAr,test1,test2
|
|
end if
|
|
|
|
!===================================================================
|
|
! Main iteration loop.
|
|
!===================================================================
|
|
do
|
|
itn = itn + 1
|
|
|
|
!----------------------------------------------------------------
|
|
! Perform the next step of the bidiagonalization to obtain the
|
|
! next beta, u, alpha, v. These satisfy
|
|
! beta*u = A*v - alpha*u,
|
|
! alpha*v = A'*u - beta*v.
|
|
!----------------------------------------------------------------
|
|
call dscal (m,(- alpha), u, 1)
|
|
! call Aprod1(m, n, v, u) ! u = A*v
|
|
call aprod ( 1,m,n,v,u,leniw,lenrw,iw,rw )
|
|
beta = dnrm2 (m, u, 1)
|
|
|
|
if (beta > zero) then
|
|
call dscal (m, (one/beta), u, 1)
|
|
if (localOrtho) then ! Store v into the circular buffer localV.
|
|
call localVEnqueue ! Store old v for local reorthog'n of new v.
|
|
end if
|
|
call dscal (n, (- beta), v, 1)
|
|
|
|
!call Aprod2(m, n, v, u) ! v = A'*u
|
|
call aprod ( 2,m,n,v,u,leniw,lenrw,iw,rw )
|
|
if (localOrtho) then ! Perform local reorthogonalization of V.
|
|
call localVOrtho ! Local-reorthogonalization of new v.
|
|
end if
|
|
alpha = dnrm2 (n, v, 1)
|
|
if (alpha > zero) then
|
|
call dscal (n, (one/alpha), v, 1)
|
|
end if
|
|
end if
|
|
|
|
! At this point, beta = beta_{k+1}, alpha = alpha_{k+1}.
|
|
|
|
!----------------------------------------------------------------
|
|
! Construct rotation Qhat_{k,2k+1}.
|
|
|
|
alphahat = d2norm(alphabar, damp)
|
|
chat = alphabar/alphahat
|
|
shat = damp/alphahat
|
|
|
|
! Use a plane rotation (Q_i) to turn B_i to R_i.
|
|
|
|
rhoold = rho
|
|
rho = d2norm(alphahat, beta)
|
|
c = alphahat/rho
|
|
s = beta/rho
|
|
thetanew = s*alpha
|
|
alphabar = c*alpha
|
|
|
|
! Use a plane rotation (Qbar_i) to turn R_i^T into R_i^bar.
|
|
|
|
rhobarold = rhobar
|
|
zetaold = zeta
|
|
thetabar = sbar*rho
|
|
rhotemp = cbar*rho
|
|
rhobar = d2norm(cbar*rho, thetanew)
|
|
cbar = cbar*rho/rhobar
|
|
sbar = thetanew/rhobar
|
|
zeta = cbar*zetabar
|
|
zetabar = - sbar*zetabar
|
|
|
|
! Update h, h_hat, x.
|
|
|
|
hbar = h - (thetabar*rho/(rhoold*rhobarold))*hbar
|
|
x = x + (zeta/(rho*rhobar))*hbar
|
|
h = v - (thetanew/rho)*h
|
|
|
|
! Estimate ||r||.
|
|
|
|
! Apply rotation Qhat_{k,2k+1}.
|
|
betaacute = chat* betadd
|
|
betacheck = - shat* betadd
|
|
|
|
! Apply rotation Q_{k,k+1}.
|
|
betahat = c*betaacute
|
|
betadd = - s*betaacute
|
|
|
|
! Apply rotation Qtilde_{k-1}.
|
|
! betad = betad_{k-1} here.
|
|
|
|
thetatildeold = thetatilde
|
|
rhotildeold = d2norm(rhodold, thetabar)
|
|
ctildeold = rhodold/rhotildeold
|
|
stildeold = thetabar/rhotildeold
|
|
thetatilde = stildeold* rhobar
|
|
rhodold = ctildeold* rhobar
|
|
betad = - stildeold*betad + ctildeold*betahat
|
|
|
|
! betad = betad_k here.
|
|
! rhodold = rhod_k here.
|
|
|
|
tautildeold = (zetaold - thetatildeold*tautildeold)/rhotildeold
|
|
taud = (zeta - thetatilde*tautildeold)/rhodold
|
|
d = d + betacheck**2
|
|
normr = sqrt(d + (betad - taud)**2 + betadd**2)
|
|
|
|
! Estimate ||A||.
|
|
normA2 = normA2 + beta**2
|
|
normA = sqrt(normA2)
|
|
normA2 = normA2 + alpha**2
|
|
|
|
! Estimate cond(A).
|
|
maxrbar = max(maxrbar,rhobarold)
|
|
if (itn > 1) then
|
|
minrbar = min(minrbar,rhobarold)
|
|
end if
|
|
condA = max(maxrbar,rhotemp)/min(minrbar,rhotemp)
|
|
|
|
!----------------------------------------------------------------
|
|
! Test for convergence.
|
|
!----------------------------------------------------------------
|
|
|
|
! Compute norms for convergence testing.
|
|
normAr = abs(zetabar)
|
|
normx = dnrm2(n, x, 1)
|
|
|
|
! Now use these norms to estimate certain other quantities,
|
|
! some of which will be small near a solution.
|
|
|
|
test1 = normr /normb
|
|
test2 = normAr/(normA*normr)
|
|
test3 = one/condA
|
|
t1 = test1/(one + normA*normx/normb)
|
|
rtol = btol + atol*normA*normx/normb
|
|
|
|
! The following tests guard against extremely small values of
|
|
! atol, btol or ctol. (The user may have set any or all of
|
|
! the parameters atol, btol, conlim to 0.)
|
|
! The effect is equivalent to the normAl tests using
|
|
! atol = eps, btol = eps, conlim = 1/eps.
|
|
|
|
if (itn >= itnlim) istop = 7
|
|
if (one+test3 <= one) istop = 6
|
|
if (one+test2 <= one) istop = 5
|
|
if (one+t1 <= one) istop = 4
|
|
|
|
! Allow for tolerances set by the user.
|
|
|
|
if ( test3 <= ctol) istop = 3
|
|
if ( test2 <= atol) istop = 2
|
|
if ( test1 <= rtol) istop = 1
|
|
|
|
!----------------------------------------------------------------
|
|
! See if it is time to print something.
|
|
!----------------------------------------------------------------
|
|
prnt = .false.
|
|
if (show) then
|
|
if (n <= 40) prnt = .true.
|
|
if (itn <= 10) prnt = .true.
|
|
if (itn >= itnlim-10) prnt = .true.
|
|
if (mod(itn,10) == 0) prnt = .true.
|
|
if (test3 <= 1.1*ctol) prnt = .true.
|
|
if (test2 <= 1.1*atol) prnt = .true.
|
|
if (test1 <= 1.1*rtol) prnt = .true.
|
|
if (istop /= 0) prnt = .true.
|
|
|
|
if (prnt) then ! Print a line for this iteration
|
|
if (pcount >= pfreq) then ! Print a heading first
|
|
pcount = 0
|
|
if (damped) then
|
|
write(nout,1300)
|
|
else
|
|
write(nout,1200)
|
|
end if
|
|
end if
|
|
pcount = pcount + 1
|
|
write(nout,1500) itn,x(1),normr,normAr,test1,test2,normA,condA
|
|
end if
|
|
end if
|
|
|
|
if (istop /= 0) exit
|
|
end do
|
|
!===================================================================
|
|
! End of iteration loop.
|
|
!===================================================================
|
|
|
|
! Come here if normAr = 0, or if normal exit.
|
|
|
|
800 if (damped .and. istop==2) istop=3 ! Decide if istop = 2 or 3.
|
|
if (show) then ! Print the stopping condition.
|
|
write(nout, 2000) &
|
|
exitt,istop,itn, &
|
|
exitt,normA,condA, &
|
|
exitt,normb, normx, &
|
|
exitt,normr,normAr
|
|
write(nout, 3000) &
|
|
exitt, msg(istop)
|
|
end if
|
|
|
|
return
|
|
|
|
1000 format(// a, ' Least-squares solution of Ax = b' &
|
|
/ ' The matrix A has', i7, ' rows and', i7, ' columns' &
|
|
/ ' damp =', es22.14 &
|
|
/ ' atol =', es10.2, 15x, 'conlim =', es10.2 &
|
|
/ ' btol =', es10.2, 15x, 'itnlim =', i10 &
|
|
/ ' localSize (no. of vectors for local reorthogonalization) =', i7)
|
|
1200 format(/ " Itn x(1) norm r A'r ", &
|
|
' Compatible LS norm A cond A')
|
|
1300 format(/ " Itn x(1) norm rbar Abar'rbar", &
|
|
' Compatible LS norm Abar cond Abar')
|
|
1500 format(i6, 2es17.9, 5es10.2)
|
|
2000 format(/ a, 5x, 'istop =', i2, 15x, 'itn =', i8 &
|
|
/ a, 5x, 'normA =', es12.5, 5x, 'condA =', es12.5 &
|
|
/ a, 5x, 'normb =', es12.5, 5x, 'normx =', es12.5 &
|
|
/ a, 5x, 'normr =', es12.5, 5x, 'normAr =', es12.5)
|
|
3000 format(a, 5x, a)
|
|
|
|
contains
|
|
|
|
function d2norm( a, b )
|
|
|
|
real(dp) :: d2norm
|
|
real(dp), intent(in) :: a, b
|
|
|
|
!-------------------------------------------------------------------
|
|
! d2norm returns sqrt( a**2 + b**2 )
|
|
! with precautions to avoid overflow.
|
|
!
|
|
! 21 Mar 1990: First version.
|
|
! 17 Sep 2007: Fortran 90 version.
|
|
! 24 Oct 2007: User real(dp) instead of compiler option -r8.
|
|
!-------------------------------------------------------------------
|
|
|
|
intrinsic :: abs, sqrt
|
|
real(dp) :: scale
|
|
real(dp), parameter :: zero = 0.0_dp
|
|
|
|
scale = abs(a) + abs(b)
|
|
if (scale == zero) then
|
|
d2norm = zero
|
|
else
|
|
d2norm = scale*sqrt((a/scale)**2 + (b/scale)**2)
|
|
end if
|
|
|
|
end function d2norm
|
|
|
|
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
subroutine localVEnqueue
|
|
|
|
! Store v into the circular buffer localV.
|
|
|
|
if (localPointer < localVecs) then
|
|
localPointer = localPointer + 1
|
|
else
|
|
localPointer = 1
|
|
localVQueueFull = .true.
|
|
end if
|
|
localV(:,localPointer) = v
|
|
|
|
end subroutine localVEnqueue
|
|
|
|
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
subroutine localVOrtho
|
|
|
|
! Perform local reorthogonalization of current v.
|
|
|
|
real(dp) :: d
|
|
|
|
if (localVQueueFull) then
|
|
localOrthoLimit = localVecs
|
|
else
|
|
localOrthoLimit = localPointer
|
|
end if
|
|
|
|
do localOrthoCount = 1, localOrthoLimit
|
|
d = dot_product(v,localV(:,localOrthoCount))
|
|
v = v - d * localV(:,localOrthoCount)
|
|
end do
|
|
|
|
end subroutine localVOrtho
|
|
|
|
end subroutine LSMR
|
|
|
|
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
end module LSMRmodule
|