DSurfTomo/srcsparsity/lsmrModule.f90

755 lines
28 KiB
Fortran
Raw Normal View History

2015-07-05 16:57:48 +08:00
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
! File lsmrModule.f90
!
! LSMR
!
! LSMR solves Ax = b or min ||Ax - b|| with or without damping,
! using the iterative algorithm of David Fong and Michael Saunders:
! http://www.stanford.edu/group/SOL/software/lsmr.html
!
! Maintained by
! David Fong <clfong@stanford.edu>
! Michael Saunders <saunders@stanford.edu>
! Systems Optimization Laboratory (SOL)
! Stanford University
! Stanford, CA 94305-4026, USA
!
! 17 Jul 2010: F90 LSMR derived from F90 LSQR and lsqr.m.
! 07 Sep 2010: Local reorthogonalization now works (localSize > 0).
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
module lsmrModule
use lsmrDataModule, only : dp
use lsmrblasInterface, only : dnrm2, dscal
implicit none
private
public :: LSMR
contains
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
! subroutine LSMR ( m, n, Aprod1, Aprod2, b, damp, &
! atol, btol, conlim, itnlim, localSize, nout, &
! x, istop, itn, normA, condA, normr, normAr, normx )
subroutine LSMR ( m, n, leniw, lenrw,iw,rw, b, damp, &
atol, btol, conlim, itnlim, localSize, nout, &
x, istop, itn, normA, condA, normr, normAr, normx )
integer, intent(in) :: leniw
integer, intent(in) :: lenrw
integer, intent(in) :: iw(leniw)
real, intent(in) :: rw(lenrw)
integer, intent(in) :: m, n, itnlim, localSize, nout
integer, intent(out) :: istop, itn
real(dp), intent(in) :: b(m)
real(dp), intent(out) :: x(n)
real(dp), intent(in) :: atol, btol, conlim, damp
real(dp), intent(out) :: normA, condA, normr, normAr, normx
interface
subroutine aprod(mode,m,n,x,y,leniw,lenrw,iw,rw) ! y := y + A*x
use lsmrDataModule, only : dp
integer, intent(in) :: mode,lenrw
integer, intent(in) :: leniw
real, intent(in) :: rw(lenrw)
integer, intent(in) :: iw(leniw)
integer, intent(in) :: m,n
real(dp), intent(inout) :: x(n)
real(dp), intent(inout) :: y(m)
end subroutine aprod
! subroutine Aprod1(m,n,x,y) ! y := y + A*x
! use lsmrDataModule, only : dp
! integer, intent(in) :: m,n
! real(dp), intent(in) :: x(n)
! real(dp), intent(inout) :: y(m)
! end subroutine Aprod1
!
! subroutine Aprod2(m,n,x,y) ! x := x + A'*y
! use lsmrDataModule, only : dp
! integer, intent(in) :: m,n
! real(dp), intent(inout) :: x(n)
! real(dp), intent(in) :: y(m)
! end subroutine Aprod2
end interface
!-------------------------------------------------------------------
! LSMR finds a solution x to the following problems:
!
! 1. Unsymmetric equations: Solve A*x = b
!
! 2. Linear least squares: Solve A*x = b
! in the least-squares sense
!
! 3. Damped least squares: Solve ( A )*x = ( b )
! ( damp*I ) ( 0 )
! in the least-squares sense
!
! where A is a matrix with m rows and n columns, b is an m-vector,
! and damp is a scalar. (All quantities are real.)
! The matrix A is treated as a linear operator. It is accessed
! by means of subroutine calls with the following purpose:
!
! call Aprod1(m,n,x,y) must compute y = y + A*x without altering x.
! call Aprod2(m,n,x,y) must compute x = x + A'*y without altering y.
!
! LSMR uses an iterative method to approximate the solution.
! The number of iterations required to reach a certain accuracy
! depends strongly on the scaling of the problem. Poor scaling of
! the rows or columns of A should therefore be avoided where
! possible.
!
! For example, in problem 1 the solution is unaltered by
! row-scaling. If a row of A is very small or large compared to
! the other rows of A, the corresponding row of ( A b ) should be
! scaled up or down.
!
! In problems 1 and 2, the solution x is easily recovered
! following column-scaling. Unless better information is known,
! the nonzero columns of A should be scaled so that they all have
! the same Euclidean norm (e.g., 1.0).
!
! In problem 3, there is no freedom to re-scale if damp is
! nonzero. However, the value of damp should be assigned only
! after attention has been paid to the scaling of A.
!
! The parameter damp is intended to help regularize
! ill-conditioned systems, by preventing the true solution from
! being very large. Another aid to regularization is provided by
! the parameter condA, which may be used to terminate iterations
! before the computed solution becomes very large.
!
! Note that x is not an input parameter.
! If some initial estimate x0 is known and if damp = 0,
! one could proceed as follows:
!
! 1. Compute a residual vector r0 = b - A*x0.
! 2. Use LSMR to solve the system A*dx = r0.
! 3. Add the correction dx to obtain a final solution x = x0 + dx.
!
! This requires that x0 be available before and after the call
! to LSMR. To judge the benefits, suppose LSMR takes k1 iterations
! to solve A*x = b and k2 iterations to solve A*dx = r0.
! If x0 is "good", norm(r0) will be smaller than norm(b).
! If the same stopping tolerances atol and btol are used for each
! system, k1 and k2 will be similar, but the final solution x0 + dx
! should be more accurate. The only way to reduce the total work
! is to use a larger stopping tolerance for the second system.
! If some value btol is suitable for A*x = b, the larger value
! btol*norm(b)/norm(r0) should be suitable for A*dx = r0.
!
! Preconditioning is another way to reduce the number of iterations.
! If it is possible to solve a related system M*x = b efficiently,
! where M approximates A in some helpful way
! (e.g. M - A has low rank or its elements are small relative to
! those of A), LSMR may converge more rapidly on the system
! A*M(inverse)*z = b,
! after which x can be recovered by solving M*x = z.
!
! NOTE: If A is symmetric, LSMR should not be used!
! Alternatives are the symmetric conjugate-gradient method (CG)
! and/or SYMMLQ.
! SYMMLQ is an implementation of symmetric CG that applies to
! any symmetric A and will converge more rapidly than LSMR.
! If A is positive definite, there are other implementations of
! symmetric CG that require slightly less work per iteration
! than SYMMLQ (but will take the same number of iterations).
!
!
! Notation
! --------
! The following quantities are used in discussing the subroutine
! parameters:
!
! Abar = ( A ), bbar = (b)
! (damp*I) (0)
!
! r = b - A*x, rbar = bbar - Abar*x
!
! normr = sqrt( norm(r)**2 + damp**2 * norm(x)**2 )
! = norm( rbar )
!
! eps = the relative precision of floating-point arithmetic.
! On most machines, eps is about 1.0e-7 and 1.0e-16
! in single and double precision respectively.
! We expect eps to be about 1e-16 always.
!
! LSMR minimizes the function normr with respect to x.
!
!
! Parameters
! ----------
! m input m, the number of rows in A.
!
! n input n, the number of columns in A.
!
! Aprod1, Aprod2 See above.
!
! damp input The damping parameter for problem 3 above.
! (damp should be 0.0 for problems 1 and 2.)
! If the system A*x = b is incompatible, values
! of damp in the range 0 to sqrt(eps)*norm(A)
! will probably have a negligible effect.
! Larger values of damp will tend to decrease
! the norm of x and reduce the number of
! iterations required by LSMR.
!
! The work per iteration and the storage needed
! by LSMR are the same for all values of damp.
!
! b(m) input The rhs vector b.
!
! x(n) output Returns the computed solution x.
!
! atol input An estimate of the relative error in the data
! defining the matrix A. For example, if A is
! accurate to about 6 digits, set atol = 1.0e-6.
!
! btol input An estimate of the relative error in the data
! defining the rhs b. For example, if b is
! accurate to about 6 digits, set btol = 1.0e-6.
!
! conlim input An upper limit on cond(Abar), the apparent
! condition number of the matrix Abar.
! Iterations will be terminated if a computed
! estimate of cond(Abar) exceeds conlim.
! This is intended to prevent certain small or
! zero singular values of A or Abar from
! coming into effect and causing unwanted growth
! in the computed solution.
!
! conlim and damp may be used separately or
! together to regularize ill-conditioned systems.
!
! Normally, conlim should be in the range
! 1000 to 1/eps.
! Suggested value:
! conlim = 1/(100*eps) for compatible systems,
! conlim = 1/(10*sqrt(eps)) for least squares.
!
! Note: Any or all of atol, btol, conlim may be set to zero.
! The effect will be the same as the values eps, eps, 1/eps.
!
! itnlim input An upper limit on the number of iterations.
! Suggested value:
! itnlim = n/2 for well-conditioned systems
! with clustered singular values,
! itnlim = 4*n otherwise.
!
! localSize input No. of vectors for local reorthogonalization.
! 0 No reorthogonalization is performed.
! >0 This many n-vectors "v" (the most recent ones)
! are saved for reorthogonalizing the next v.
! localSize need not be more than min(m,n).
! At most min(m,n) vectors will be allocated.
!
! nout input File number for printed output. If positive,
! a summary will be printed on file nout.
!
! istop output An integer giving the reason for termination:
!
! 0 x = 0 is the exact solution.
! No iterations were performed.
!
! 1 The equations A*x = b are probably compatible.
! Norm(A*x - b) is sufficiently small, given the
! values of atol and btol.
!
! 2 damp is zero. The system A*x = b is probably
! not compatible. A least-squares solution has
! been obtained that is sufficiently accurate,
! given the value of atol.
!
! 3 damp is nonzero. A damped least-squares
! solution has been obtained that is sufficiently
! accurate, given the value of atol.
!
! 4 An estimate of cond(Abar) has exceeded conlim.
! The system A*x = b appears to be ill-conditioned,
! or there could be an error in Aprod1 or Aprod2.
!
! 5 The iteration limit itnlim was reached.
!
! itn output The number of iterations performed.
!
! normA output An estimate of the Frobenius norm of Abar.
! This is the square-root of the sum of squares
! of the elements of Abar.
! If damp is small and the columns of A
! have all been scaled to have length 1.0,
! normA should increase to roughly sqrt(n).
! A radically different value for normA may
! indicate an error in Aprod1 or Aprod2.
!
! condA output An estimate of cond(Abar), the condition
! number of Abar. A very high value of condA
! may again indicate an error in Aprod1 or Aprod2.
!
! normr output An estimate of the final value of norm(rbar),
! the function being minimized (see notation
! above). This will be small if A*x = b has
! a solution.
!
! normAr output An estimate of the final value of
! norm( Abar'*rbar ), the norm of
! the residual for the normal equations.
! This should be small in all cases. (normAr
! will often be smaller than the true value
! computed from the output vector x.)
!
! normx output An estimate of norm(x) for the final solution x.
!
! Subroutines and functions used
! ------------------------------
! BLAS dscal, dnrm2
! USER Aprod1, Aprod2
!
! Precision
! ---------
! The number of iterations required by LSMR will decrease
! if the computation is performed in higher precision.
! At least 15-digit arithmetic should normally be used.
! "real(dp)" declarations should normally be 8-byte words.
! If this ever changes, the BLAS routines dnrm2, dscal
! (Lawson, et al., 1979) will also need to be changed.
!
!
! Reference
! ---------
! http://www.stanford.edu/group/SOL/software/lsmr.html
! ------------------------------------------------------------------
!
! LSMR development:
! 21 Sep 2007: Fortran 90 version of LSQR implemented.
! Aprod1, Aprod2 implemented via f90 interface.
! 17 Jul 2010: LSMR derived from LSQR and lsmr.m.
! 07 Sep 2010: Local reorthogonalization now working.
!-------------------------------------------------------------------
intrinsic :: abs, dot_product, min, max, sqrt
! Local arrays and variables
real(dp) :: h(n), hbar(n), u(m), v(n), w(n), localV(n,min(localSize,m,n))
logical :: damped, localOrtho, localVQueueFull, prnt, show
integer :: i, localOrthoCount, localOrthoLimit, localPointer, localVecs, &
pcount, pfreq
real(dp) :: alpha, alphabar, alphahat, &
beta, betaacute, betacheck, betad, betadd, betahat, &
normb, c, cbar, chat, ctildeold, ctol, &
d, maxrbar, minrbar, normA2, &
rho, rhobar, rhobarold, rhodold, rhoold, rhotemp, &
rhotildeold, rtol, s, sbar, shat, stildeold, &
t1, taud, tautildeold, test1, test2, test3, &
thetabar, thetanew, thetatilde, thetatildeold, &
zeta, zetabar, zetaold
! Local constants
real(dp), parameter :: zero = 0.0_dp, one = 1.0_dp
character(len=*), parameter :: enter = ' Enter LSMR. '
character(len=*), parameter :: exitt = ' Exit LSMR. '
character(len=*), parameter :: msg(0:7) = &
(/ 'The exact solution is x = 0 ', &
'Ax - b is small enough, given atol, btol ', &
'The least-squares solution is good enough, given atol', &
'The estimate of cond(Abar) has exceeded conlim ', &
'Ax - b is small enough for this machine ', &
'The LS solution is good enough for this machine ', &
'Cond(Abar) seems to be too large for this machine ', &
'The iteration limit has been reached ' /)
!-------------------------------------------------------------------
! Initialize.
localVecs = min(localSize,m,n)
show = nout > 0
if (show) then
write(nout, 1000) enter,m,n,damp,atol,conlim,btol,itnlim,localVecs
end if
pfreq = 20 ! print frequency (for repeating the heading)
pcount = 0 ! print counter
damped = damp > zero !
!-------------------------------------------------------------------
! Set up the first vectors u and v for the bidiagonalization.
! These satisfy beta*u = b, alpha*v = A(transpose)*u.
!-------------------------------------------------------------------
u(1:m) = b(1:m)
v(1:n) = zero
x(1:n) = zero
alpha = zero
beta = dnrm2 (m, u, 1)
if (beta > zero) then
call dscal (m, (one/beta), u, 1)
! call Aprod2(m, n, v, u) ! v = A'*u
call aprod(2,m,n,v,u,leniw,lenrw,iw,rw)
alpha = dnrm2 (n, v, 1)
end if
if (alpha > zero) then
call dscal (n, (one/alpha), v, 1)
w = v
end if
normAr = alpha*beta
if (normAr == zero) go to 800
! Initialization for local reorthogonalization.
localOrtho = .false.
if (localVecs > 0) then
localPointer = 1
localOrtho = .true.
localVQueueFull = .false.
localV(:,1) = v
end if
! Initialize variables for 1st iteration.
itn = 0
zetabar = alpha*beta
alphabar = alpha
rho = 1
rhobar = 1
cbar = 1
sbar = 0
h = v
hbar(1:n) = zero
x(1:n) = zero
! Initialize variables for estimation of ||r||.
betadd = beta
betad = 0
rhodold = 1
tautildeold = 0
thetatilde = 0
zeta = 0
d = 0
! Initialize variables for estimation of ||A|| and cond(A).
normA2 = alpha**2
maxrbar = 0_dp
minrbar = 1e+30_dp
! Items for use in stopping rules.
normb = beta
istop = 0
ctol = zero
if (conlim > zero) ctol = one/conlim
normr = beta
! Exit if b=0 or A'b = 0.
normAr = alpha * beta
if (normAr == 0) then
if (show) then
write(nout,'(a)') msg(1)
end if
return
end if
! Heading for iteration log.
if (show) then
if (damped) then
write(nout,1300)
else
write(nout,1200)
end if
test1 = one
test2 = alpha/beta
write(nout, 1500) itn,x(1),normr,normAr,test1,test2
end if
!===================================================================
! Main iteration loop.
!===================================================================
do
itn = itn + 1
!----------------------------------------------------------------
! Perform the next step of the bidiagonalization to obtain the
! next beta, u, alpha, v. These satisfy
! beta*u = A*v - alpha*u,
! alpha*v = A'*u - beta*v.
!----------------------------------------------------------------
call dscal (m,(- alpha), u, 1)
! call Aprod1(m, n, v, u) ! u = A*v
call aprod ( 1,m,n,v,u,leniw,lenrw,iw,rw )
beta = dnrm2 (m, u, 1)
if (beta > zero) then
call dscal (m, (one/beta), u, 1)
if (localOrtho) then ! Store v into the circular buffer localV.
call localVEnqueue ! Store old v for local reorthog'n of new v.
end if
call dscal (n, (- beta), v, 1)
!call Aprod2(m, n, v, u) ! v = A'*u
call aprod ( 2,m,n,v,u,leniw,lenrw,iw,rw )
if (localOrtho) then ! Perform local reorthogonalization of V.
call localVOrtho ! Local-reorthogonalization of new v.
end if
alpha = dnrm2 (n, v, 1)
if (alpha > zero) then
call dscal (n, (one/alpha), v, 1)
end if
end if
! At this point, beta = beta_{k+1}, alpha = alpha_{k+1}.
!----------------------------------------------------------------
! Construct rotation Qhat_{k,2k+1}.
alphahat = d2norm(alphabar, damp)
chat = alphabar/alphahat
shat = damp/alphahat
! Use a plane rotation (Q_i) to turn B_i to R_i.
rhoold = rho
rho = d2norm(alphahat, beta)
c = alphahat/rho
s = beta/rho
thetanew = s*alpha
alphabar = c*alpha
! Use a plane rotation (Qbar_i) to turn R_i^T into R_i^bar.
rhobarold = rhobar
zetaold = zeta
thetabar = sbar*rho
rhotemp = cbar*rho
rhobar = d2norm(cbar*rho, thetanew)
cbar = cbar*rho/rhobar
sbar = thetanew/rhobar
zeta = cbar*zetabar
zetabar = - sbar*zetabar
! Update h, h_hat, x.
hbar = h - (thetabar*rho/(rhoold*rhobarold))*hbar
x = x + (zeta/(rho*rhobar))*hbar
h = v - (thetanew/rho)*h
! Estimate ||r||.
! Apply rotation Qhat_{k,2k+1}.
betaacute = chat* betadd
betacheck = - shat* betadd
! Apply rotation Q_{k,k+1}.
betahat = c*betaacute
betadd = - s*betaacute
! Apply rotation Qtilde_{k-1}.
! betad = betad_{k-1} here.
thetatildeold = thetatilde
rhotildeold = d2norm(rhodold, thetabar)
ctildeold = rhodold/rhotildeold
stildeold = thetabar/rhotildeold
thetatilde = stildeold* rhobar
rhodold = ctildeold* rhobar
betad = - stildeold*betad + ctildeold*betahat
! betad = betad_k here.
! rhodold = rhod_k here.
tautildeold = (zetaold - thetatildeold*tautildeold)/rhotildeold
taud = (zeta - thetatilde*tautildeold)/rhodold
d = d + betacheck**2
normr = sqrt(d + (betad - taud)**2 + betadd**2)
! Estimate ||A||.
normA2 = normA2 + beta**2
normA = sqrt(normA2)
normA2 = normA2 + alpha**2
! Estimate cond(A).
maxrbar = max(maxrbar,rhobarold)
if (itn > 1) then
minrbar = min(minrbar,rhobarold)
end if
condA = max(maxrbar,rhotemp)/min(minrbar,rhotemp)
!----------------------------------------------------------------
! Test for convergence.
!----------------------------------------------------------------
! Compute norms for convergence testing.
normAr = abs(zetabar)
normx = dnrm2(n, x, 1)
! Now use these norms to estimate certain other quantities,
! some of which will be small near a solution.
test1 = normr /normb
test2 = normAr/(normA*normr)
test3 = one/condA
t1 = test1/(one + normA*normx/normb)
rtol = btol + atol*normA*normx/normb
! The following tests guard against extremely small values of
! atol, btol or ctol. (The user may have set any or all of
! the parameters atol, btol, conlim to 0.)
! The effect is equivalent to the normAl tests using
! atol = eps, btol = eps, conlim = 1/eps.
if (itn >= itnlim) istop = 7
if (one+test3 <= one) istop = 6
if (one+test2 <= one) istop = 5
if (one+t1 <= one) istop = 4
! Allow for tolerances set by the user.
if ( test3 <= ctol) istop = 3
if ( test2 <= atol) istop = 2
if ( test1 <= rtol) istop = 1
!----------------------------------------------------------------
! See if it is time to print something.
!----------------------------------------------------------------
prnt = .false.
if (show) then
if (n <= 40) prnt = .true.
if (itn <= 10) prnt = .true.
if (itn >= itnlim-10) prnt = .true.
if (mod(itn,10) == 0) prnt = .true.
if (test3 <= 1.1*ctol) prnt = .true.
if (test2 <= 1.1*atol) prnt = .true.
if (test1 <= 1.1*rtol) prnt = .true.
if (istop /= 0) prnt = .true.
if (prnt) then ! Print a line for this iteration
if (pcount >= pfreq) then ! Print a heading first
pcount = 0
if (damped) then
write(nout,1300)
else
write(nout,1200)
end if
end if
pcount = pcount + 1
write(nout,1500) itn,x(1),normr,normAr,test1,test2,normA,condA
end if
end if
if (istop /= 0) exit
end do
!===================================================================
! End of iteration loop.
!===================================================================
! Come here if normAr = 0, or if normal exit.
800 if (damped .and. istop==2) istop=3 ! Decide if istop = 2 or 3.
if (show) then ! Print the stopping condition.
write(nout, 2000) &
exitt,istop,itn, &
exitt,normA,condA, &
exitt,normb, normx, &
exitt,normr,normAr
write(nout, 3000) &
exitt, msg(istop)
end if
return
1000 format(// a, ' Least-squares solution of Ax = b' &
/ ' The matrix A has', i7, ' rows and', i7, ' columns' &
/ ' damp =', es22.14 &
/ ' atol =', es10.2, 15x, 'conlim =', es10.2 &
/ ' btol =', es10.2, 15x, 'itnlim =', i10 &
/ ' localSize (no. of vectors for local reorthogonalization) =', i7)
1200 format(/ " Itn x(1) norm r A'r ", &
' Compatible LS norm A cond A')
1300 format(/ " Itn x(1) norm rbar Abar'rbar", &
' Compatible LS norm Abar cond Abar')
1500 format(i6, 2es17.9, 5es10.2)
2000 format(/ a, 5x, 'istop =', i2, 15x, 'itn =', i8 &
/ a, 5x, 'normA =', es12.5, 5x, 'condA =', es12.5 &
/ a, 5x, 'normb =', es12.5, 5x, 'normx =', es12.5 &
/ a, 5x, 'normr =', es12.5, 5x, 'normAr =', es12.5)
3000 format(a, 5x, a)
contains
function d2norm( a, b )
real(dp) :: d2norm
real(dp), intent(in) :: a, b
!-------------------------------------------------------------------
! d2norm returns sqrt( a**2 + b**2 )
! with precautions to avoid overflow.
!
! 21 Mar 1990: First version.
! 17 Sep 2007: Fortran 90 version.
! 24 Oct 2007: User real(dp) instead of compiler option -r8.
!-------------------------------------------------------------------
intrinsic :: abs, sqrt
real(dp) :: scale
real(dp), parameter :: zero = 0.0_dp
scale = abs(a) + abs(b)
if (scale == zero) then
d2norm = zero
else
d2norm = scale*sqrt((a/scale)**2 + (b/scale)**2)
end if
end function d2norm
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
subroutine localVEnqueue
! Store v into the circular buffer localV.
if (localPointer < localVecs) then
localPointer = localPointer + 1
else
localPointer = 1
localVQueueFull = .true.
end if
localV(:,localPointer) = v
end subroutine localVEnqueue
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
subroutine localVOrtho
! Perform local reorthogonalization of current v.
real(dp) :: d
if (localVQueueFull) then
localOrthoLimit = localVecs
else
localOrthoLimit = localPointer
end if
do localOrthoCount = 1, localOrthoLimit
d = dot_product(v,localV(:,localOrthoCount))
v = v - d * localV(:,localOrthoCount)
end do
end subroutine localVOrtho
end subroutine LSMR
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
end module LSMRmodule