initial upload
This commit is contained in:
10
.gitignore
vendored
10
.gitignore
vendored
@@ -1,4 +1,3 @@
|
|||||||
# ---> C++
|
|
||||||
# Prerequisites
|
# Prerequisites
|
||||||
*.d
|
*.d
|
||||||
|
|
||||||
@@ -32,3 +31,12 @@
|
|||||||
*.out
|
*.out
|
||||||
*.app
|
*.app
|
||||||
|
|
||||||
|
# folder preferences and build folder
|
||||||
|
.DS_Store
|
||||||
|
build/
|
||||||
|
pack/
|
||||||
|
.vscode/
|
||||||
|
out/
|
||||||
|
*.sh
|
||||||
|
case_*
|
||||||
|
config.h
|
||||||
30
CMakeLists.txt
Normal file
30
CMakeLists.txt
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.15.2)
|
||||||
|
# 设置工程名称
|
||||||
|
project(LibLCG VERSION 3.1 LANGUAGES CXX)
|
||||||
|
# 添加配置配件编写的函数
|
||||||
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
|
message(STATUS "Platform: " ${CMAKE_HOST_SYSTEM_NAME})
|
||||||
|
# CMake默认的安装路径 Windows下为C:/Program\ Files/${Project_Name} Linux/Unix下为/usr/local
|
||||||
|
message(STATUS "Install prefix: " ${CMAKE_INSTALL_PREFIX})
|
||||||
|
# CMake默认的变异类型为空
|
||||||
|
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||||
|
|
||||||
|
# 添加编译选项
|
||||||
|
option(LibLCG_OPENMP "Use OpenMP" ON) # Set OFF to disable the functionality
|
||||||
|
option(LibLCG_EIGEN "Use Eigen" ON)
|
||||||
|
option(LibLCG_STD_COMPLEX "Use STD complex" ON)
|
||||||
|
option(LibLCG_CUDA "Use CUDA" ON)
|
||||||
|
message(STATUS "Use OpenMP: " ${LibLCG_OPENMP})
|
||||||
|
message(STATUS "Use Eigen: " ${LibLCG_EIGEN})
|
||||||
|
message(STATUS "Use STD complex: " ${LibLCG_STD_COMPLEX})
|
||||||
|
message(STATUS "Use CUDA: " ${LibLCG_CUDA})
|
||||||
|
|
||||||
|
# 加入一个头文件配置,让cmake对源码进行操作
|
||||||
|
configure_file(
|
||||||
|
"${PROJECT_SOURCE_DIR}/config.h.in"
|
||||||
|
"${PROJECT_SOURCE_DIR}/src/lib/config.h"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 添加源文件地址
|
||||||
|
add_subdirectory(src/)
|
||||||
524
LICENSE
Normal file
524
LICENSE
Normal file
@@ -0,0 +1,524 @@
|
|||||||
|
LibLCG License
|
||||||
|
--------------
|
||||||
|
|
||||||
|
LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
General Public License (LGPL) as published by the Free Software
|
||||||
|
Foundation, either version 2 of the License, or (at your option) any
|
||||||
|
later version. A copy of the GNU Lesser General Public License is
|
||||||
|
reproduced below.
|
||||||
|
|
||||||
|
If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
a one time fee. Please send corresponding requests to:
|
||||||
|
yizhang-geo@zju.edu.cn. Please do not forget to include some
|
||||||
|
description of your company and the realm of its activities. Also add
|
||||||
|
information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
=====================================================================
|
||||||
|
GNU LESSER GENERAL PUBLIC LICENSE
|
||||||
|
Version 2.1, February 1999
|
||||||
|
|
||||||
|
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
[This is the first released version of the Lesser GPL. It also counts
|
||||||
|
as the successor of the GNU Library Public License, version 2, hence
|
||||||
|
the version number 2.1.]
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The licenses for most software are designed to take away your
|
||||||
|
freedom to share and change it. By contrast, the GNU General Public
|
||||||
|
Licenses are intended to guarantee your freedom to share and change
|
||||||
|
free software--to make sure the software is free for all its users.
|
||||||
|
|
||||||
|
This license, the Lesser General Public License, applies to some
|
||||||
|
specially designated software packages--typically libraries--of the
|
||||||
|
Free Software Foundation and other authors who decide to use it. You
|
||||||
|
can use it too, but we suggest you first think carefully about whether
|
||||||
|
this license or the ordinary General Public License is the better
|
||||||
|
strategy to use in any particular case, based on the explanations below.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom of use,
|
||||||
|
not price. Our General Public Licenses are designed to make sure that
|
||||||
|
you have the freedom to distribute copies of free software (and charge
|
||||||
|
for this service if you wish); that you receive source code or can get
|
||||||
|
it if you want it; that you can change the software and use pieces of
|
||||||
|
it in new free programs; and that you are informed that you can do
|
||||||
|
these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to make restrictions that forbid
|
||||||
|
distributors to deny you these rights or to ask you to surrender these
|
||||||
|
rights. These restrictions translate to certain responsibilities for
|
||||||
|
you if you distribute copies of the library or if you modify it.
|
||||||
|
|
||||||
|
For example, if you distribute copies of the library, whether gratis
|
||||||
|
or for a fee, you must give the recipients all the rights that we gave
|
||||||
|
you. You must make sure that they, too, receive or can get the source
|
||||||
|
code. If you link other code with the library, you must provide
|
||||||
|
complete object files to the recipients, so that they can relink them
|
||||||
|
with the library after making changes to the library and recompiling
|
||||||
|
it. And you must show them these terms so they know their rights.
|
||||||
|
|
||||||
|
We protect your rights with a two-step method: (1) we copyright the
|
||||||
|
library, and (2) we offer you this license, which gives you legal
|
||||||
|
permission to copy, distribute and/or modify the library.
|
||||||
|
|
||||||
|
To protect each distributor, we want to make it very clear that
|
||||||
|
there is no warranty for the free library. Also, if the library is
|
||||||
|
modified by someone else and passed on, the recipients should know
|
||||||
|
that what they have is not the original version, so that the original
|
||||||
|
author's reputation will not be affected by problems that might be
|
||||||
|
introduced by others.
|
||||||
|
|
||||||
|
Finally, software patents pose a constant threat to the existence of
|
||||||
|
any free program. We wish to make sure that a company cannot
|
||||||
|
effectively restrict the users of a free program by obtaining a
|
||||||
|
restrictive license from a patent holder. Therefore, we insist that
|
||||||
|
any patent license obtained for a version of the library must be
|
||||||
|
consistent with the full freedom of use specified in this license.
|
||||||
|
|
||||||
|
Most GNU software, including some libraries, is covered by the
|
||||||
|
ordinary GNU General Public License. This license, the GNU Lesser
|
||||||
|
General Public License, applies to certain designated libraries, and
|
||||||
|
is quite different from the ordinary General Public License. We use
|
||||||
|
this license for certain libraries in order to permit linking those
|
||||||
|
libraries into non-free programs.
|
||||||
|
|
||||||
|
When a program is linked with a library, whether statically or using
|
||||||
|
a shared library, the combination of the two is legally speaking a
|
||||||
|
combined work, a derivative of the original library. The ordinary
|
||||||
|
General Public License therefore permits such linking only if the
|
||||||
|
entire combination fits its criteria of freedom. The Lesser General
|
||||||
|
Public License permits more lax criteria for linking other code with
|
||||||
|
the library.
|
||||||
|
|
||||||
|
We call this license the "Lesser" General Public License because it
|
||||||
|
does Less to protect the user's freedom than the ordinary General
|
||||||
|
Public License. It also provides other free software developers Less
|
||||||
|
of an advantage over competing non-free programs. These disadvantages
|
||||||
|
are the reason we use the ordinary General Public License for many
|
||||||
|
libraries. However, the Lesser license provides advantages in certain
|
||||||
|
special circumstances.
|
||||||
|
|
||||||
|
For example, on rare occasions, there may be a special need to
|
||||||
|
encourage the widest possible use of a certain library, so that it becomes
|
||||||
|
a de-facto standard. To achieve this, non-free programs must be
|
||||||
|
allowed to use the library. A more frequent case is that a free
|
||||||
|
library does the same job as widely used non-free libraries. In this
|
||||||
|
case, there is little to gain by limiting the free library to free
|
||||||
|
software only, so we use the Lesser General Public License.
|
||||||
|
|
||||||
|
In other cases, permission to use a particular library in non-free
|
||||||
|
programs enables a greater number of people to use a large body of
|
||||||
|
free software. For example, permission to use the GNU C Library in
|
||||||
|
non-free programs enables many more people to use the whole GNU
|
||||||
|
operating system, as well as its variant, the GNU/Linux operating
|
||||||
|
system.
|
||||||
|
|
||||||
|
Although the Lesser General Public License is Less protective of the
|
||||||
|
users' freedom, it does ensure that the user of a program that is
|
||||||
|
linked with the Library has the freedom and the wherewithal to run
|
||||||
|
that program using a modified version of the Library.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow. Pay close attention to the difference between a
|
||||||
|
"work based on the library" and a "work that uses the library". The
|
||||||
|
former contains code derived from the library, whereas the latter must
|
||||||
|
be combined with the library in order to run.
|
||||||
|
|
||||||
|
GNU LESSER GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. This License Agreement applies to any software library or other
|
||||||
|
program which contains a notice placed by the copyright holder or
|
||||||
|
other authorized party saying it may be distributed under the terms of
|
||||||
|
this Lesser General Public License (also called "this License").
|
||||||
|
Each licensee is addressed as "you".
|
||||||
|
|
||||||
|
A "library" means a collection of software functions and/or data
|
||||||
|
prepared so as to be conveniently linked with application programs
|
||||||
|
(which use some of those functions and data) to form executables.
|
||||||
|
|
||||||
|
The "Library", below, refers to any such software library or work
|
||||||
|
which has been distributed under these terms. A "work based on the
|
||||||
|
Library" means either the Library or any derivative work under
|
||||||
|
copyright law: that is to say, a work containing the Library or a
|
||||||
|
portion of it, either verbatim or with modifications and/or translated
|
||||||
|
straightforwardly into another language. (Hereinafter, translation is
|
||||||
|
included without limitation in the term "modification".)
|
||||||
|
|
||||||
|
"Source code" for a work means the preferred form of the work for
|
||||||
|
making modifications to it. For a library, complete source code means
|
||||||
|
all the source code for all modules it contains, plus any associated
|
||||||
|
interface definition files, plus the scripts used to control compilation
|
||||||
|
and installation of the library.
|
||||||
|
|
||||||
|
Activities other than copying, distribution and modification are not
|
||||||
|
covered by this License; they are outside its scope. The act of
|
||||||
|
running a program using the Library is not restricted, and output from
|
||||||
|
such a program is covered only if its contents constitute a work based
|
||||||
|
on the Library (independent of the use of the Library in a tool for
|
||||||
|
writing it). Whether that is true depends on what the Library does
|
||||||
|
and what the program that uses the Library does.
|
||||||
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Library's
|
||||||
|
complete source code as you receive it, in any medium, provided that
|
||||||
|
you conspicuously and appropriately publish on each copy an
|
||||||
|
appropriate copyright notice and disclaimer of warranty; keep intact
|
||||||
|
all the notices that refer to this License and to the absence of any
|
||||||
|
warranty; and distribute a copy of this License along with the
|
||||||
|
Library.
|
||||||
|
|
||||||
|
You may charge a fee for the physical act of transferring a copy,
|
||||||
|
and you may at your option offer warranty protection in exchange for a
|
||||||
|
fee.
|
||||||
|
|
||||||
|
2. You may modify your copy or copies of the Library or any portion
|
||||||
|
of it, thus forming a work based on the Library, and copy and
|
||||||
|
distribute such modifications or work under the terms of Section 1
|
||||||
|
above, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) The modified work must itself be a software library.
|
||||||
|
|
||||||
|
b) You must cause the files modified to carry prominent notices
|
||||||
|
stating that you changed the files and the date of any change.
|
||||||
|
|
||||||
|
c) You must cause the whole of the work to be licensed at no
|
||||||
|
charge to all third parties under the terms of this License.
|
||||||
|
|
||||||
|
d) If a facility in the modified Library refers to a function or a
|
||||||
|
table of data to be supplied by an application program that uses
|
||||||
|
the facility, other than as an argument passed when the facility
|
||||||
|
is invoked, then you must make a good faith effort to ensure that,
|
||||||
|
in the event an application does not supply such function or
|
||||||
|
table, the facility still operates, and performs whatever part of
|
||||||
|
its purpose remains meaningful.
|
||||||
|
|
||||||
|
(For example, a function in a library to compute square roots has
|
||||||
|
a purpose that is entirely well-defined independent of the
|
||||||
|
application. Therefore, Subsection 2d requires that any
|
||||||
|
application-supplied function or table used by this function must
|
||||||
|
be optional: if the application does not supply it, the square
|
||||||
|
root function must still compute square roots.)
|
||||||
|
|
||||||
|
These requirements apply to the modified work as a whole. If
|
||||||
|
identifiable sections of that work are not derived from the Library,
|
||||||
|
and can be reasonably considered independent and separate works in
|
||||||
|
themselves, then this License, and its terms, do not apply to those
|
||||||
|
sections when you distribute them as separate works. But when you
|
||||||
|
distribute the same sections as part of a whole which is a work based
|
||||||
|
on the Library, the distribution of the whole must be on the terms of
|
||||||
|
this License, whose permissions for other licensees extend to the
|
||||||
|
entire whole, and thus to each and every part regardless of who wrote
|
||||||
|
it.
|
||||||
|
|
||||||
|
Thus, it is not the intent of this section to claim rights or contest
|
||||||
|
your rights to work written entirely by you; rather, the intent is to
|
||||||
|
exercise the right to control the distribution of derivative or
|
||||||
|
collective works based on the Library.
|
||||||
|
|
||||||
|
In addition, mere aggregation of another work not based on the Library
|
||||||
|
with the Library (or with a work based on the Library) on a volume of
|
||||||
|
a storage or distribution medium does not bring the other work under
|
||||||
|
the scope of this License.
|
||||||
|
|
||||||
|
3. You may opt to apply the terms of the ordinary GNU General Public
|
||||||
|
License instead of this License to a given copy of the Library. To do
|
||||||
|
this, you must alter all the notices that refer to this License, so
|
||||||
|
that they refer to the ordinary GNU General Public License, version 2,
|
||||||
|
instead of to this License. (If a newer version than version 2 of the
|
||||||
|
ordinary GNU General Public License has appeared, then you can specify
|
||||||
|
that version instead if you wish.) Do not make any other change in
|
||||||
|
these notices.
|
||||||
|
|
||||||
|
Once this change is made in a given copy, it is irreversible for
|
||||||
|
that copy, so the ordinary GNU General Public License applies to all
|
||||||
|
subsequent copies and derivative works made from that copy.
|
||||||
|
|
||||||
|
This option is useful when you wish to copy part of the code of
|
||||||
|
the Library into a program that is not a library.
|
||||||
|
|
||||||
|
4. You may copy and distribute the Library (or a portion or
|
||||||
|
derivative of it, under Section 2) in object code or executable form
|
||||||
|
under the terms of Sections 1 and 2 above provided that you accompany
|
||||||
|
it with the complete corresponding machine-readable source code, which
|
||||||
|
must be distributed under the terms of Sections 1 and 2 above on a
|
||||||
|
medium customarily used for software interchange.
|
||||||
|
|
||||||
|
If distribution of object code is made by offering access to copy
|
||||||
|
from a designated place, then offering equivalent access to copy the
|
||||||
|
source code from the same place satisfies the requirement to
|
||||||
|
distribute the source code, even though third parties are not
|
||||||
|
compelled to copy the source along with the object code.
|
||||||
|
|
||||||
|
5. A program that contains no derivative of any portion of the
|
||||||
|
Library, but is designed to work with the Library by being compiled or
|
||||||
|
linked with it, is called a "work that uses the Library". Such a
|
||||||
|
work, in isolation, is not a derivative work of the Library, and
|
||||||
|
therefore falls outside the scope of this License.
|
||||||
|
|
||||||
|
However, linking a "work that uses the Library" with the Library
|
||||||
|
creates an executable that is a derivative of the Library (because it
|
||||||
|
contains portions of the Library), rather than a "work that uses the
|
||||||
|
library". The executable is therefore covered by this License.
|
||||||
|
Section 6 states terms for distribution of such executables.
|
||||||
|
|
||||||
|
When a "work that uses the Library" uses material from a header file
|
||||||
|
that is part of the Library, the object code for the work may be a
|
||||||
|
derivative work of the Library even though the source code is not.
|
||||||
|
Whether this is true is especially significant if the work can be
|
||||||
|
linked without the Library, or if the work is itself a library. The
|
||||||
|
threshold for this to be true is not precisely defined by law.
|
||||||
|
|
||||||
|
If such an object file uses only numerical parameters, data
|
||||||
|
structure layouts and accessors, and small macros and small inline
|
||||||
|
functions (ten lines or less in length), then the use of the object
|
||||||
|
file is unrestricted, regardless of whether it is legally a derivative
|
||||||
|
work. (Executables containing this object code plus portions of the
|
||||||
|
Library will still fall under Section 6.)
|
||||||
|
|
||||||
|
Otherwise, if the work is a derivative of the Library, you may
|
||||||
|
distribute the object code for the work under the terms of Section 6.
|
||||||
|
Any executables containing that work also fall under Section 6,
|
||||||
|
whether or not they are linked directly with the Library itself.
|
||||||
|
|
||||||
|
6. As an exception to the Sections above, you may also combine or
|
||||||
|
link a "work that uses the Library" with the Library to produce a
|
||||||
|
work containing portions of the Library, and distribute that work
|
||||||
|
under terms of your choice, provided that the terms permit
|
||||||
|
modification of the work for the customer's own use and reverse
|
||||||
|
engineering for debugging such modifications.
|
||||||
|
|
||||||
|
You must give prominent notice with each copy of the work that the
|
||||||
|
Library is used in it and that the Library and its use are covered by
|
||||||
|
this License. You must supply a copy of this License. If the work
|
||||||
|
during execution displays copyright notices, you must include the
|
||||||
|
copyright notice for the Library among them, as well as a reference
|
||||||
|
directing the user to the copy of this License. Also, you must do one
|
||||||
|
of these things:
|
||||||
|
|
||||||
|
a) Accompany the work with the complete corresponding
|
||||||
|
machine-readable source code for the Library including whatever
|
||||||
|
changes were used in the work (which must be distributed under
|
||||||
|
Sections 1 and 2 above); and, if the work is an executable linked
|
||||||
|
with the Library, with the complete machine-readable "work that
|
||||||
|
uses the Library", as object code and/or source code, so that the
|
||||||
|
user can modify the Library and then relink to produce a modified
|
||||||
|
executable containing the modified Library. (It is understood
|
||||||
|
that the user who changes the contents of definitions files in the
|
||||||
|
Library will not necessarily be able to recompile the application
|
||||||
|
to use the modified definitions.)
|
||||||
|
|
||||||
|
b) Use a suitable shared library mechanism for linking with the
|
||||||
|
Library. A suitable mechanism is one that (1) uses at run time a
|
||||||
|
copy of the library already present on the user's computer system,
|
||||||
|
rather than copying library functions into the executable, and (2)
|
||||||
|
will operate properly with a modified version of the library, if
|
||||||
|
the user installs one, as long as the modified version is
|
||||||
|
interface-compatible with the version that the work was made with.
|
||||||
|
|
||||||
|
c) Accompany the work with a written offer, valid for at
|
||||||
|
least three years, to give the same user the materials
|
||||||
|
specified in Subsection 6a, above, for a charge no more
|
||||||
|
than the cost of performing this distribution.
|
||||||
|
|
||||||
|
d) If distribution of the work is made by offering access to copy
|
||||||
|
from a designated place, offer equivalent access to copy the above
|
||||||
|
specified materials from the same place.
|
||||||
|
|
||||||
|
e) Verify that the user has already received a copy of these
|
||||||
|
materials or that you have already sent this user a copy.
|
||||||
|
|
||||||
|
For an executable, the required form of the "work that uses the
|
||||||
|
Library" must include any data and utility programs needed for
|
||||||
|
reproducing the executable from it. However, as a special exception,
|
||||||
|
the materials to be distributed need not include anything that is
|
||||||
|
normally distributed (in either source or binary form) with the major
|
||||||
|
components (compiler, kernel, and so on) of the operating system on
|
||||||
|
which the executable runs, unless that component itself accompanies
|
||||||
|
the executable.
|
||||||
|
|
||||||
|
It may happen that this requirement contradicts the license
|
||||||
|
restrictions of other proprietary libraries that do not normally
|
||||||
|
accompany the operating system. Such a contradiction means you cannot
|
||||||
|
use both them and the Library together in an executable that you
|
||||||
|
distribute.
|
||||||
|
|
||||||
|
7. You may place library facilities that are a work based on the
|
||||||
|
Library side-by-side in a single library together with other library
|
||||||
|
facilities not covered by this License, and distribute such a combined
|
||||||
|
library, provided that the separate distribution of the work based on
|
||||||
|
the Library and of the other library facilities is otherwise
|
||||||
|
permitted, and provided that you do these two things:
|
||||||
|
|
||||||
|
a) Accompany the combined library with a copy of the same work
|
||||||
|
based on the Library, uncombined with any other library
|
||||||
|
facilities. This must be distributed under the terms of the
|
||||||
|
Sections above.
|
||||||
|
|
||||||
|
b) Give prominent notice with the combined library of the fact
|
||||||
|
that part of it is a work based on the Library, and explaining
|
||||||
|
where to find the accompanying uncombined form of the same work.
|
||||||
|
|
||||||
|
8. You may not copy, modify, sublicense, link with, or distribute
|
||||||
|
the Library except as expressly provided under this License. Any
|
||||||
|
attempt otherwise to copy, modify, sublicense, link with, or
|
||||||
|
distribute the Library is void, and will automatically terminate your
|
||||||
|
rights under this License. However, parties who have received copies,
|
||||||
|
or rights, from you under this License will not have their licenses
|
||||||
|
terminated so long as such parties remain in full compliance.
|
||||||
|
|
||||||
|
9. You are not required to accept this License, since you have not
|
||||||
|
signed it. However, nothing else grants you permission to modify or
|
||||||
|
distribute the Library or its derivative works. These actions are
|
||||||
|
prohibited by law if you do not accept this License. Therefore, by
|
||||||
|
modifying or distributing the Library (or any work based on the
|
||||||
|
Library), you indicate your acceptance of this License to do so, and
|
||||||
|
all its terms and conditions for copying, distributing or modifying
|
||||||
|
the Library or works based on it.
|
||||||
|
|
||||||
|
10. Each time you redistribute the Library (or any work based on the
|
||||||
|
Library), the recipient automatically receives a license from the
|
||||||
|
original licensor to copy, distribute, link with or modify the Library
|
||||||
|
subject to these terms and conditions. You may not impose any further
|
||||||
|
restrictions on the recipients' exercise of the rights granted herein.
|
||||||
|
You are not responsible for enforcing compliance by third parties with
|
||||||
|
this License.
|
||||||
|
|
||||||
|
11. If, as a consequence of a court judgment or allegation of patent
|
||||||
|
infringement or for any other reason (not limited to patent issues),
|
||||||
|
conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot
|
||||||
|
distribute so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you
|
||||||
|
may not distribute the Library at all. For example, if a patent
|
||||||
|
license would not permit royalty-free redistribution of the Library by
|
||||||
|
all those who receive copies directly or indirectly through you, then
|
||||||
|
the only way you could satisfy both it and this License would be to
|
||||||
|
refrain entirely from distribution of the Library.
|
||||||
|
|
||||||
|
If any portion of this section is held invalid or unenforceable under any
|
||||||
|
particular circumstance, the balance of the section is intended to apply,
|
||||||
|
and the section as a whole is intended to apply in other circumstances.
|
||||||
|
|
||||||
|
It is not the purpose of this section to induce you to infringe any
|
||||||
|
patents or other property right claims or to contest validity of any
|
||||||
|
such claims; this section has the sole purpose of protecting the
|
||||||
|
integrity of the free software distribution system which is
|
||||||
|
implemented by public license practices. Many people have made
|
||||||
|
generous contributions to the wide range of software distributed
|
||||||
|
through that system in reliance on consistent application of that
|
||||||
|
system; it is up to the author/donor to decide if he or she is willing
|
||||||
|
to distribute software through any other system and a licensee cannot
|
||||||
|
impose that choice.
|
||||||
|
|
||||||
|
This section is intended to make thoroughly clear what is believed to
|
||||||
|
be a consequence of the rest of this License.
|
||||||
|
|
||||||
|
12. If the distribution and/or use of the Library is restricted in
|
||||||
|
certain countries either by patents or by copyrighted interfaces, the
|
||||||
|
original copyright holder who places the Library under this License may add
|
||||||
|
an explicit geographical distribution limitation excluding those countries,
|
||||||
|
so that distribution is permitted only in or among countries not thus
|
||||||
|
excluded. In such case, this License incorporates the limitation as if
|
||||||
|
written in the body of this License.
|
||||||
|
|
||||||
|
13. The Free Software Foundation may publish revised and/or new
|
||||||
|
versions of the Lesser General Public License from time to time.
|
||||||
|
Such new versions will be similar in spirit to the present version,
|
||||||
|
but may differ in detail to address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the Library
|
||||||
|
specifies a version number of this License which applies to it and
|
||||||
|
"any later version", you have the option of following the terms and
|
||||||
|
conditions either of that version or of any later version published by
|
||||||
|
the Free Software Foundation. If the Library does not specify a
|
||||||
|
license version number, you may choose any version ever published by
|
||||||
|
the Free Software Foundation.
|
||||||
|
|
||||||
|
14. If you wish to incorporate parts of the Library into other free
|
||||||
|
programs whose distribution conditions are incompatible with these,
|
||||||
|
write to the author to ask for permission. For software which is
|
||||||
|
copyrighted by the Free Software Foundation, write to the Free
|
||||||
|
Software Foundation; we sometimes make exceptions for this. Our
|
||||||
|
decision will be guided by the two goals of preserving the free status
|
||||||
|
of all derivatives of our free software and of promoting the sharing
|
||||||
|
and reuse of software generally.
|
||||||
|
|
||||||
|
NO WARRANTY
|
||||||
|
|
||||||
|
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
|
||||||
|
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
|
||||||
|
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
|
||||||
|
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
|
||||||
|
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
|
||||||
|
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
|
||||||
|
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
|
||||||
|
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
|
||||||
|
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
|
||||||
|
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
|
||||||
|
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
|
||||||
|
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
|
||||||
|
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
|
||||||
|
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
|
||||||
|
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
DAMAGES.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Libraries
|
||||||
|
|
||||||
|
If you develop a new library, and you want it to be of the greatest
|
||||||
|
possible use to the public, we recommend making it free software that
|
||||||
|
everyone can redistribute and change. You can do so by permitting
|
||||||
|
redistribution under these terms (or, alternatively, under the terms of the
|
||||||
|
ordinary General Public License).
|
||||||
|
|
||||||
|
To apply these terms, attach the following notices to the library. It is
|
||||||
|
safest to attach them to the start of each source file to most effectively
|
||||||
|
convey the exclusion of warranty; and each file should have at least the
|
||||||
|
"copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the library's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
|
||||||
|
USA
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or your
|
||||||
|
school, if any, to sign a "copyright disclaimer" for the library, if
|
||||||
|
necessary. Here is a sample; alter the names:
|
||||||
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||||
|
library `Frob' (a library for tweaking knobs) written by James Random
|
||||||
|
Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1990
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
That's all there is to it!
|
||||||
20
LibLCGConfig.cmake.in
Normal file
20
LibLCGConfig.cmake.in
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
set(@PROJECT_NAME@_Version "@PROJECT_VERSION@")
|
||||||
|
set_and_check(@PROJECT_NAME@_INSTALL_PREFIX "${PACKAGE_PREFIX_DIR}")
|
||||||
|
set_and_check(@PROJECT_NAME@_INC_DIR "${PACKAGE_PREFIX_DIR}/include")
|
||||||
|
set_and_check(@PROJECT_NAME@_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")
|
||||||
|
set_and_check(@PROJECT_NAME@_LIB_DIR "${PACKAGE_PREFIX_DIR}/lib")
|
||||||
|
set_and_check(@PROJECT_NAME@_LIBRARY_DIR "${PACKAGE_PREFIX_DIR}/lib")
|
||||||
|
|
||||||
|
set(@PROJECT_NAME@_LIB lcg)
|
||||||
|
set(@PROJECT_NAME@_LIBRARY lcg)
|
||||||
|
set(@PROJECT_NAME@_FOUND 1)
|
||||||
|
|
||||||
|
set(@PROJECT_NAME@_OPENMP @LibLCG_OPENMP@)
|
||||||
|
set(@PROJECT_NAME@_EIGEN @LibLCG_EIGEN@)
|
||||||
|
set(@PROJECT_NAME@_STD_COMPLEX @LibLCG_STD_COMPLEX@)
|
||||||
|
set(@PROJECT_NAME@_CUDA @LibLCG_CUDA@)
|
||||||
|
|
||||||
|
# include target information
|
||||||
|
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
||||||
225
README.md
225
README.md
@@ -1,2 +1,225 @@
|
|||||||
# liblcg
|
# C++ Library of the Linear Conjugate Gradient Methods (LibLCG) 说明文档
|
||||||
|
|
||||||
|
张壹(yizhang-geo@zju.edu.cn)
|
||||||
|
|
||||||
|
_浙江大学地球科学学院·地球物理研究所_
|
||||||
|
|
||||||
|
**此说明仅覆盖算法库的简单介绍及使用,更详细的内容请查看代码注释。如果还有问题,请发邮件联系我。同时也欢迎有兴趣的同学加入开发团队!**
|
||||||
|
|
||||||
|
## 简介
|
||||||
|
|
||||||
|
liblcg 是一个高效的、可扩展的 C++ 线性共轭梯度算法库,在原生数据结构接口的基础上,同时提供基于Eigen3和CUDA的算法接口,可以方便的实现基于CPU或GPU并行的加速计算,其中基于Eigen3的算法包含了稠密与稀疏矩阵的实现,而基于CUDA的算法主要为稀疏矩阵的实现。liblcg 包含多种实数与复数域的共轭梯度算法与其他一些迭代求解方法。目前已有得方法包括共轭梯度法、预优的共轭梯度算法、共轭梯度平方算法、双稳共轭梯度算法、BB步共轭梯度投影法与SPG共轭梯度投影法;复数域的双共轭梯度法、共轭梯度平方法、预优的共轭梯度法与TFQMR法。共轭梯度法广泛应用于无约束与不等式约束的线性最优化问题,拥有优良的收敛与计算效率。
|
||||||
|
|
||||||
|
共轭梯度算法可用于求解如下形式的线性方程组:
|
||||||
|
|
||||||
|
```
|
||||||
|
Ax = B
|
||||||
|
```
|
||||||
|
|
||||||
|
其中,A 是一个 N 阶的方阵、x 为 N\*1 大小的待求解的模型向量,B 为 N\*1 大小的需拟合的目标向量。需要注意的是,不同种类的共轭梯度算法对A可能有不同的要求,比如必须是正定的,或者对称的。不同算法的具体要求可以查阅其他参考文献或者查看代码中的注释。
|
||||||
|
|
||||||
|
## 安装
|
||||||
|
|
||||||
|
算法库使用 CMake 工具进行汇编,可在不同操作平台生成相应的Makefile或工程文件。
|
||||||
|
|
||||||
|
### 编译选项
|
||||||
|
|
||||||
|
算法库目前可用的编译选项有:
|
||||||
|
* LibLCG_OPENMP:是否使用OpenMP进行加速,需要安装OpeMP。默认为ON。
|
||||||
|
* LibLCG_EIGEN:是否编译基于Eigen的算法与借口,需要安装Eigen。默认为ON。
|
||||||
|
* LibLCG_STD_COMPLEX:是否使用std::complex\<double\>作为复数的默认类型。默认为ON。
|
||||||
|
* LibLCG_CUDA:是否编译基于CUDA的算法与借口,需要安装CUDA。默认为ON。
|
||||||
|
|
||||||
|
用户可以使用cmake命令中的-D选项对编译选项进行设置,比如关闭LibLCG_Eigen:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cmake -DLibLCG_EIGEN=OFF
|
||||||
|
```
|
||||||
|
|
||||||
|
### Linux 与 MacOS
|
||||||
|
|
||||||
|
liblcg的默认安装路径为 /usr/local。头文件与动态库分别安装于 include 与 lib 文件夹。具体的编译与安装步骤如下:
|
||||||
|
|
||||||
|
1. 下载安装CMake软件;
|
||||||
|
2. 下载安装GCC编译器(常见系统已内置);
|
||||||
|
3. 在源文件路径内使用如下命令进行编译与安装:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
mkdir build && cd build && cmake .. && make install
|
||||||
|
```
|
||||||
|
|
||||||
|
### Windows
|
||||||
|
|
||||||
|
#### MinGW 和 GCC
|
||||||
|
|
||||||
|
Windows系统不包含GNU编译环境,用户需自行下载并配置。方法如下:
|
||||||
|
|
||||||
|
1. 下载MinGW安装文件,并选择gcc、pthreads与make相关软件包安装;
|
||||||
|
2. 下载安装CMake软件;
|
||||||
|
3. 添加CMake与MinGW可执行文件路径至Windows环境变量;
|
||||||
|
4. 在源文件路径内使用如下命令进行编译与安装:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
mkdir build && cd build && cmake .. -G "MinGW Makefiles" && make install
|
||||||
|
```
|
||||||
|
|
||||||
|
默认的安装路径为C:/Program\\ Files。头文件与动态库分别安装于 include 与 lib 文件夹。
|
||||||
|
|
||||||
|
**注意:用户需要手动添加头文件与动态库地址到计算机的环境变量中。**
|
||||||
|
|
||||||
|
#### Visual Studio
|
||||||
|
|
||||||
|
用户可使用CMake工具构建VS工程文件并编译使用动态库。方法如下:
|
||||||
|
|
||||||
|
1. 下载安装 Visual Studio 软件;
|
||||||
|
2. 下载安装CMake软件;
|
||||||
|
3. 在源文件路径内使用如下命令生成VS工程文件:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
mkdir build && cd build && cmake .. -G "Visual Studio 16 2019"
|
||||||
|
```
|
||||||
|
|
||||||
|
_注:如需生成其他版本的VS工程文件,请使用-G命令查看相应的识别码。_
|
||||||
|
|
||||||
|
4. 使用 Visual Studio 打开.sln工程文件并编译动态库。
|
||||||
|
|
||||||
|
## 使用与编译
|
||||||
|
|
||||||
|
用户使用库函数时需在源文件中引入相应的头文件,如:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
#include "lcg/lcg.h"
|
||||||
|
```
|
||||||
|
|
||||||
|
编译可执行文件时需链接lcg动态库。以g++为例:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
g++ example.cpp -llcg -o example_out
|
||||||
|
```
|
||||||
|
|
||||||
|
## 快速开始
|
||||||
|
|
||||||
|
要使用liblcg求解线性方程组Ax=B,用户需要定义Ax乘积的计算函数(回调函数),该函数的功能为计算不同的x所对应的乘积Ax。以实数类型的共轭梯度算法为例,其回调函数的接口定义为:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
typedef void (*lcg_axfunc_ptr)(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_size);
|
||||||
|
```
|
||||||
|
|
||||||
|
其中,`x`为输入的向量,`prod_Ax`为返回的乘积向量,`n`为这两个向量的长度。注意此处参数列表中并不包含矩阵A,这意味这A必须为全局或者类变量。这样设计的主要原因是在某些复杂最优化问题的编程中,计算并存储A并不实际或者划算,此时一般采用的策略是存储相关变量且仅计算Ax的乘积,所以矩阵A并不总是存在。
|
||||||
|
|
||||||
|
用户在定义Ax计算函数后即可调用求解函数 lcg_solver() 对线性方程组进行求解。以无约束的求解函数为例,其声明如下:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
int lcg_solver(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||||
|
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CGS);
|
||||||
|
```
|
||||||
|
|
||||||
|
其中:
|
||||||
|
1. `lcg_axfunc_ptr Afp` 为正演计算的回调函数;
|
||||||
|
2. `lcg_progress_ptr Pfp` 监控迭代过程的回调函数(非必须,无需监控时使用 nullptr 参数即可);
|
||||||
|
3. `lcg_float* m` 初始解向量,迭代取得的解也保存与此数组;
|
||||||
|
4. `const lcg_float* B` Ax = B 中的 B 项;
|
||||||
|
5. `const int n_size` 解向量的大小;
|
||||||
|
6. `const lcg_para* param` 迭代使用的参数,此参数为 nullptr 即使用默认参数;
|
||||||
|
7. `void* instance` 传入的实例对象, 此函数在类中使用即为类的 this 指针, 在普通函数中使用时即为 nullptr;
|
||||||
|
8. `int solver_id` 求解函数使用的求解方法,具体的方法代号可查看对应的头文件;
|
||||||
|
|
||||||
|
### 一个简单的例子
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
#include "cmath"
|
||||||
|
#include "iostream"
|
||||||
|
#include "lcg/lcg.h"
|
||||||
|
|
||||||
|
#define M 100
|
||||||
|
#define N 80
|
||||||
|
|
||||||
|
// 返回两个数组元素之间的最大差值
|
||||||
|
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
|
||||||
|
{
|
||||||
|
lcg_float max = -1;
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 普通二维数组做核矩阵
|
||||||
|
lcg_float **kernel;
|
||||||
|
// 中间结果数组
|
||||||
|
lcg_float *tmp_arr;
|
||||||
|
|
||||||
|
// 计算核矩阵乘向量的乘积 lcg_solver的回调函数
|
||||||
|
void CalAx(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_s)
|
||||||
|
{
|
||||||
|
// 注意核矩阵实际为 kernel^T * kernel,大小为N*N
|
||||||
|
lcg_matvec(kernel, x, tmp_arr, M, n_s, MatNormal); // tmp_tar = kernel * x
|
||||||
|
lcg_matvec(kernel, tmp_arr, prod_Ax, M, n_s, MatTranspose); // prod_Ax = kernel^T * tmp_tar
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 定义监控函数 lcg_solver的回调函数
|
||||||
|
// 这个函数显示当前的迭代次数与收敛值
|
||||||
|
int Prog(void* instance, const lcg_float* m, const lcg_float converge, const lcg_para* param, const int n_s, const int k)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
// 开辟数组空间
|
||||||
|
kernel = lcg_malloc(M, N);
|
||||||
|
tmp_arr = lcg_malloc(M);
|
||||||
|
|
||||||
|
// 为核矩阵赋初值
|
||||||
|
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
|
||||||
|
|
||||||
|
// 生成一组理论解
|
||||||
|
lcg_float *fm = lcg_malloc(N);
|
||||||
|
lcg_vecrnd(fm, 1.0, 2.0, N);
|
||||||
|
|
||||||
|
// 计算共轭梯度B项
|
||||||
|
lcg_float *B = lcg_malloc(N);
|
||||||
|
lcg_matvec(kernel, fm, tmp_arr, M, N, MatNormal);
|
||||||
|
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
|
||||||
|
|
||||||
|
// 设置共轭梯度参数
|
||||||
|
lcg_para self_para = lcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-5;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
lcg_float *m = lcg_malloc(N);
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
|
||||||
|
// 使用标准共轭梯度方法(LCG_CG)求解线性方程组
|
||||||
|
// 将回调函数传递给solver
|
||||||
|
// 由于回调函数为全局函数,因此instance变量的值为NULL
|
||||||
|
int ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CG);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// 销毁数组
|
||||||
|
lcg_free(kernel, M);
|
||||||
|
lcg_free(tmp_arr);
|
||||||
|
lcg_free(fm);
|
||||||
|
lcg_free(B);
|
||||||
|
lcg_free(m);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**完整的例子储存在[sample](src/sample)文件夹内。**
|
||||||
|
|
||||||
|
## 类模版
|
||||||
|
|
||||||
|
liblcg为不同类型的共轭梯度算法定义了通用的求解类模版,包含了类中函数的指针代理及通用的监控函数实现,用户可直接继承并使用。需要注意的是这些类模版中定义了纯虚的函数接口,用户需要全部实现。其中没用到的定义成空函数就行了。以实数的求解类模版为例,需要实现的接口函数包括:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
void AxProduct(const lcg_float* a, lcg_float* b, const int num) = 0
|
||||||
|
void MxProduct(const lcg_float* a, lcg_float* b, const int num) = 0
|
||||||
|
```
|
||||||
|
|
||||||
|
其中`AxProduct`是Ax的计算函数,`MxProduct`是预优过程的计算函数,即M^-1x。
|
||||||
4
config.h.in
Normal file
4
config.h.in
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
#cmakedefine LibLCG_OPENMP
|
||||||
|
#cmakedefine LibLCG_EIGEN
|
||||||
|
#cmakedefine LibLCG_STD_COMPLEX
|
||||||
|
#cmakedefine LibLCG_CUDA
|
||||||
11
data/README
Normal file
11
data/README
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
case_*_A: Full symmetric matrix
|
||||||
|
|
||||||
|
[ N (int) | nz (int) ]
|
||||||
|
[ RowIdx (int) | ColIdx (int) | Val (double) ] * nz
|
||||||
|
[ b (double) * N ]
|
||||||
|
[ d (double) * N ] (complex matrix only)
|
||||||
|
|
||||||
|
case_*_B: Vector
|
||||||
|
|
||||||
|
[ N (int) ]
|
||||||
|
[ x (double) * N]
|
||||||
BIN
data/cases.7z
Normal file
BIN
data/cases.7z
Normal file
Binary file not shown.
105
data/get_cdat.cpp
Normal file
105
data/get_cdat.cpp
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
#include "../src/lib/lcg_complex.h"
|
||||||
|
#include "iostream"
|
||||||
|
#include "fstream"
|
||||||
|
#include "vector"
|
||||||
|
|
||||||
|
#include "Eigen/Sparse"
|
||||||
|
|
||||||
|
#define random(x) (rand()%x)
|
||||||
|
|
||||||
|
typedef Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor> spmat_cd; // 注意Eigen默认的稀疏矩阵排序为列优先
|
||||||
|
typedef Eigen::Triplet<lcg_complex> triplt_cd;
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
int N = 1000000;
|
||||||
|
int nz = 1013000;
|
||||||
|
|
||||||
|
lcg_complex *v = new lcg_complex[nz];
|
||||||
|
lcg_complex *x = new lcg_complex[N];
|
||||||
|
lcg_complex *b = new lcg_complex[N];
|
||||||
|
|
||||||
|
lcg_complex one(1.0, 1.0), none(-1.0, -1.0), zero(0.0, 0.0);
|
||||||
|
|
||||||
|
clcg_vecrnd(v, 1.0*one, 10.0*one, nz);
|
||||||
|
clcg_vecrnd(x, 1.0*one, 2.0*one, N);
|
||||||
|
clcg_vecset(b, zero, N);
|
||||||
|
|
||||||
|
std::vector<triplt_cd> val_triplt;
|
||||||
|
val_triplt.reserve(2*(nz-N) + N);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
val_triplt.push_back(triplt_cd(i, i, v[i]));
|
||||||
|
b[i] += v[i]*x[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
srand((int)time(0));
|
||||||
|
|
||||||
|
int r, c;
|
||||||
|
size_t j = N;
|
||||||
|
while (j < nz)
|
||||||
|
{
|
||||||
|
r = random(N);
|
||||||
|
c = random(N);
|
||||||
|
if (r != c)
|
||||||
|
{
|
||||||
|
val_triplt.push_back(triplt_cd(r, c, v[j]));
|
||||||
|
val_triplt.push_back(triplt_cd(c, r, v[j]));
|
||||||
|
|
||||||
|
b[r] += v[j]*x[c];
|
||||||
|
b[c] += v[j]*x[r];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spmat_cd A;
|
||||||
|
A.resize(N, N);
|
||||||
|
A.setZero();
|
||||||
|
|
||||||
|
A.setFromTriplets(val_triplt.begin(), val_triplt.end());
|
||||||
|
|
||||||
|
std::ofstream Aout, Bout;
|
||||||
|
Aout.open("case_1M_cA", std::ios::binary);
|
||||||
|
Bout.open("case_1M_cB", std::ios::binary);
|
||||||
|
|
||||||
|
lcg_complex tmp;
|
||||||
|
|
||||||
|
nz = A.nonZeros();
|
||||||
|
|
||||||
|
Aout.write((char*)&N, sizeof(int));
|
||||||
|
Aout.write((char*)&nz, sizeof(int));
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
for (Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor>::InnerIterator it(A, i); it; ++it) // 列循环
|
||||||
|
{
|
||||||
|
r = it.row();
|
||||||
|
c = it.col();
|
||||||
|
tmp = it.value();
|
||||||
|
|
||||||
|
Aout.write((char*)&r, sizeof(int));
|
||||||
|
Aout.write((char*)&c, sizeof(int));
|
||||||
|
Aout.write((char*)&tmp, sizeof(lcg_complex));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
tmp = b[i];
|
||||||
|
Aout.write((char*)&tmp, sizeof(lcg_complex));
|
||||||
|
}
|
||||||
|
Aout.close();
|
||||||
|
|
||||||
|
Bout.write((char*)&N, sizeof(int));
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
tmp = x[i];
|
||||||
|
Bout.write((char*)&tmp, sizeof(lcg_complex));
|
||||||
|
}
|
||||||
|
Bout.close();
|
||||||
|
|
||||||
|
delete[] v;
|
||||||
|
delete[] x;
|
||||||
|
delete[] b;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
577
doxy/doxygen.sty
Normal file
577
doxy/doxygen.sty
Normal file
@@ -0,0 +1,577 @@
|
|||||||
|
% stylesheet for doxygen 1.8.17
|
||||||
|
\NeedsTeXFormat{LaTeX2e}
|
||||||
|
\ProvidesPackage{doxygen}
|
||||||
|
|
||||||
|
% Packages used by this style file
|
||||||
|
\RequirePackage{alltt}
|
||||||
|
%%\RequirePackage{array} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package
|
||||||
|
\RequirePackage{calc}
|
||||||
|
\RequirePackage{float}
|
||||||
|
%%\RequirePackage{ifthen} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package
|
||||||
|
\RequirePackage{verbatim}
|
||||||
|
\RequirePackage[table]{xcolor}
|
||||||
|
\RequirePackage{longtable_doxygen}
|
||||||
|
\RequirePackage{tabu_doxygen}
|
||||||
|
\RequirePackage{fancyvrb}
|
||||||
|
\RequirePackage{tabularx}
|
||||||
|
\RequirePackage{multirow}
|
||||||
|
\RequirePackage{hanging}
|
||||||
|
\RequirePackage{ifpdf}
|
||||||
|
\RequirePackage{adjustbox}
|
||||||
|
\RequirePackage{amssymb}
|
||||||
|
\RequirePackage{stackengine}
|
||||||
|
\RequirePackage[normalem]{ulem} % for strikeout, but don't modify emphasis
|
||||||
|
|
||||||
|
%---------- Internal commands used in this style file ----------------
|
||||||
|
|
||||||
|
\newcommand{\ensurespace}[1]{%
|
||||||
|
\begingroup%
|
||||||
|
\setlength{\dimen@}{#1}%
|
||||||
|
\vskip\z@\@plus\dimen@%
|
||||||
|
\penalty -100\vskip\z@\@plus -\dimen@%
|
||||||
|
\vskip\dimen@%
|
||||||
|
\penalty 9999%
|
||||||
|
\vskip -\dimen@%
|
||||||
|
\vskip\z@skip% hide the previous |\vskip| from |\addvspace|
|
||||||
|
\endgroup%
|
||||||
|
}
|
||||||
|
|
||||||
|
\newcommand{\DoxyHorRuler}[1]{%
|
||||||
|
\setlength{\parskip}{0ex plus 0ex minus 0ex}%
|
||||||
|
\ifthenelse{#1=0}%
|
||||||
|
{%
|
||||||
|
\hrule%
|
||||||
|
}%
|
||||||
|
{%
|
||||||
|
\hrulefilll%
|
||||||
|
}%
|
||||||
|
}
|
||||||
|
\newcommand{\DoxyLabelFont}{}
|
||||||
|
\newcommand{\entrylabel}[1]{%
|
||||||
|
{%
|
||||||
|
\parbox[b]{\labelwidth-4pt}{%
|
||||||
|
\makebox[0pt][l]{\DoxyLabelFont#1}%
|
||||||
|
\vspace{1.5\baselineskip}%
|
||||||
|
}%
|
||||||
|
}%
|
||||||
|
}
|
||||||
|
|
||||||
|
\newenvironment{DoxyDesc}[1]{%
|
||||||
|
\ensurespace{4\baselineskip}%
|
||||||
|
\begin{list}{}{%
|
||||||
|
\settowidth{\labelwidth}{20pt}%
|
||||||
|
%\setlength{\parsep}{0pt}%
|
||||||
|
\setlength{\itemsep}{0pt}%
|
||||||
|
\setlength{\leftmargin}{\labelwidth+\labelsep}%
|
||||||
|
\renewcommand{\makelabel}{\entrylabel}%
|
||||||
|
}%
|
||||||
|
\item[#1]%
|
||||||
|
}{%
|
||||||
|
\end{list}%
|
||||||
|
}
|
||||||
|
|
||||||
|
\newsavebox{\xrefbox}
|
||||||
|
\newlength{\xreflength}
|
||||||
|
\newcommand{\xreflabel}[1]{%
|
||||||
|
\sbox{\xrefbox}{#1}%
|
||||||
|
\setlength{\xreflength}{\wd\xrefbox}%
|
||||||
|
\ifthenelse{\xreflength>\labelwidth}{%
|
||||||
|
\begin{minipage}{\textwidth}%
|
||||||
|
\setlength{\parindent}{0pt}%
|
||||||
|
\hangindent=15pt\bfseries #1\vspace{1.2\itemsep}%
|
||||||
|
\end{minipage}%
|
||||||
|
}{%
|
||||||
|
\parbox[b]{\labelwidth}{\makebox[0pt][l]{\textbf{#1}}}%
|
||||||
|
}%
|
||||||
|
}
|
||||||
|
|
||||||
|
%---------- Commands used by doxygen LaTeX output generator ----------
|
||||||
|
|
||||||
|
% Used by <pre> ... </pre>
|
||||||
|
\newenvironment{DoxyPre}{%
|
||||||
|
\small%
|
||||||
|
\begin{alltt}%
|
||||||
|
}{%
|
||||||
|
\end{alltt}%
|
||||||
|
\normalsize%
|
||||||
|
}
|
||||||
|
% Necessary for redefining not defined characters, i.e. "Replacement Character" in tex output.
|
||||||
|
\newlength{\CodeWidthChar}
|
||||||
|
\newlength{\CodeHeightChar}
|
||||||
|
\settowidth{\CodeWidthChar}{?}
|
||||||
|
\settoheight{\CodeHeightChar}{?}
|
||||||
|
% Necessary for hanging indent
|
||||||
|
\newlength{\DoxyCodeWidth}
|
||||||
|
|
||||||
|
\newcommand\DoxyCodeLine[1]{\hangpara{\DoxyCodeWidth}{1}{#1}\par}
|
||||||
|
|
||||||
|
\newcommand\NiceSpace{%
|
||||||
|
\discretionary{}{\kern\fontdimen2\font}{\kern\fontdimen2\font}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @code ... @endcode
|
||||||
|
\newenvironment{DoxyCode}[1]{%
|
||||||
|
\par%
|
||||||
|
\scriptsize%
|
||||||
|
\normalfont\ttfamily%
|
||||||
|
\rightskip0pt plus 1fil%
|
||||||
|
\settowidth{\DoxyCodeWidth}{000000}%
|
||||||
|
\settowidth{\CodeWidthChar}{?}%
|
||||||
|
\settoheight{\CodeHeightChar}{?}%
|
||||||
|
\setlength{\parskip}{0ex plus 0ex minus 0ex}%
|
||||||
|
\ifthenelse{\equal{#1}{0}}
|
||||||
|
{
|
||||||
|
{\lccode`~32 \lowercase{\global\let~}\NiceSpace}\obeyspaces%
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{\lccode`~32 \lowercase{\global\let~}}\obeyspaces%
|
||||||
|
}
|
||||||
|
|
||||||
|
}{%
|
||||||
|
\normalfont%
|
||||||
|
\normalsize%
|
||||||
|
\settowidth{\CodeWidthChar}{?}%
|
||||||
|
\settoheight{\CodeHeightChar}{?}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Redefining not defined characters, i.e. "Replacement Character" in tex output.
|
||||||
|
\def\ucr{\adjustbox{width=\CodeWidthChar,height=\CodeHeightChar}{\stackinset{c}{}{c}{-.2pt}{%
|
||||||
|
\textcolor{white}{\sffamily\bfseries\small ?}}{%
|
||||||
|
\rotatebox{45}{$\blacksquare$}}}}
|
||||||
|
|
||||||
|
% Used by @example, @include, @includelineno and @dontinclude
|
||||||
|
\newenvironment{DoxyCodeInclude}[1]{%
|
||||||
|
\DoxyCode{#1}%
|
||||||
|
}{%
|
||||||
|
\endDoxyCode%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @verbatim ... @endverbatim
|
||||||
|
\newenvironment{DoxyVerb}{%
|
||||||
|
\footnotesize%
|
||||||
|
\verbatim%
|
||||||
|
}{%
|
||||||
|
\endverbatim%
|
||||||
|
\normalsize%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @verbinclude
|
||||||
|
\newenvironment{DoxyVerbInclude}{%
|
||||||
|
\DoxyVerb%
|
||||||
|
}{%
|
||||||
|
\endDoxyVerb%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by numbered lists (using '-#' or <ol> ... </ol>)
|
||||||
|
\newenvironment{DoxyEnumerate}{%
|
||||||
|
\enumerate%
|
||||||
|
}{%
|
||||||
|
\endenumerate%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by bullet lists (using '-', @li, @arg, or <ul> ... </ul>)
|
||||||
|
\newenvironment{DoxyItemize}{%
|
||||||
|
\itemize%
|
||||||
|
}{%
|
||||||
|
\enditemize%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by description lists (using <dl> ... </dl>)
|
||||||
|
\newenvironment{DoxyDescription}{%
|
||||||
|
\description%
|
||||||
|
}{%
|
||||||
|
\enddescription%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc
|
||||||
|
% (only if caption is specified)
|
||||||
|
\newenvironment{DoxyImage}{%
|
||||||
|
\begin{figure}[H]%
|
||||||
|
\begin{center}%
|
||||||
|
}{%
|
||||||
|
\end{center}%
|
||||||
|
\end{figure}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc
|
||||||
|
% (only if no caption is specified)
|
||||||
|
\newenvironment{DoxyImageNoCaption}{%
|
||||||
|
\begin{center}%
|
||||||
|
}{%
|
||||||
|
\end{center}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @image
|
||||||
|
% (only if inline is specified)
|
||||||
|
\newenvironment{DoxyInlineImage}{%
|
||||||
|
}{%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @attention
|
||||||
|
\newenvironment{DoxyAttention}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @author and @authors
|
||||||
|
\newenvironment{DoxyAuthor}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @date
|
||||||
|
\newenvironment{DoxyDate}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @invariant
|
||||||
|
\newenvironment{DoxyInvariant}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @note
|
||||||
|
\newenvironment{DoxyNote}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @post
|
||||||
|
\newenvironment{DoxyPostcond}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @pre
|
||||||
|
\newenvironment{DoxyPrecond}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @copyright
|
||||||
|
\newenvironment{DoxyCopyright}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @remark
|
||||||
|
\newenvironment{DoxyRemark}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @return and @returns
|
||||||
|
\newenvironment{DoxyReturn}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @since
|
||||||
|
\newenvironment{DoxySince}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @see
|
||||||
|
\newenvironment{DoxySeeAlso}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @version
|
||||||
|
\newenvironment{DoxyVersion}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @warning
|
||||||
|
\newenvironment{DoxyWarning}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @internal
|
||||||
|
\newenvironment{DoxyInternal}[1]{%
|
||||||
|
\paragraph*{#1}%
|
||||||
|
}{%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @par and @paragraph
|
||||||
|
\newenvironment{DoxyParagraph}[1]{%
|
||||||
|
\begin{DoxyDesc}{#1}%
|
||||||
|
}{%
|
||||||
|
\end{DoxyDesc}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by parameter lists
|
||||||
|
\newenvironment{DoxyParams}[2][]{%
|
||||||
|
\tabulinesep=1mm%
|
||||||
|
\par%
|
||||||
|
\ifthenelse{\equal{#1}{}}%
|
||||||
|
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|}}% name + description
|
||||||
|
{\ifthenelse{\equal{#1}{1}}%
|
||||||
|
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + name + desc
|
||||||
|
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + type + name + desc
|
||||||
|
}
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endfirsthead%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endhead%
|
||||||
|
}{%
|
||||||
|
\end{longtabu*}%
|
||||||
|
\vspace{6pt}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used for fields of simple structs
|
||||||
|
\newenvironment{DoxyFields}[1]{%
|
||||||
|
\tabulinesep=1mm%
|
||||||
|
\par%
|
||||||
|
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|X[-1,l]|}%
|
||||||
|
\multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endfirsthead%
|
||||||
|
\multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endhead%
|
||||||
|
}{%
|
||||||
|
\end{longtabu*}%
|
||||||
|
\vspace{6pt}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used for fields simple class style enums
|
||||||
|
\newenvironment{DoxyEnumFields}[1]{%
|
||||||
|
\tabulinesep=1mm%
|
||||||
|
\par%
|
||||||
|
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endfirsthead%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endhead%
|
||||||
|
}{%
|
||||||
|
\end{longtabu*}%
|
||||||
|
\vspace{6pt}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used for parameters within a detailed function description
|
||||||
|
\newenvironment{DoxyParamCaption}{%
|
||||||
|
\renewcommand{\item}[2][]{\\ \hspace*{2.0cm} ##1 {\em ##2}}%
|
||||||
|
}{%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by return value lists
|
||||||
|
\newenvironment{DoxyRetVals}[1]{%
|
||||||
|
\tabulinesep=1mm%
|
||||||
|
\par%
|
||||||
|
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endfirsthead%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endhead%
|
||||||
|
}{%
|
||||||
|
\end{longtabu*}%
|
||||||
|
\vspace{6pt}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by exception lists
|
||||||
|
\newenvironment{DoxyExceptions}[1]{%
|
||||||
|
\tabulinesep=1mm%
|
||||||
|
\par%
|
||||||
|
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endfirsthead%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endhead%
|
||||||
|
}{%
|
||||||
|
\end{longtabu*}%
|
||||||
|
\vspace{6pt}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by template parameter lists
|
||||||
|
\newenvironment{DoxyTemplParams}[1]{%
|
||||||
|
\tabulinesep=1mm%
|
||||||
|
\par%
|
||||||
|
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endfirsthead%
|
||||||
|
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||||
|
\hline%
|
||||||
|
\endhead%
|
||||||
|
}{%
|
||||||
|
\end{longtabu*}%
|
||||||
|
\vspace{6pt}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used for member lists
|
||||||
|
\newenvironment{DoxyCompactItemize}{%
|
||||||
|
\begin{itemize}%
|
||||||
|
\setlength{\itemsep}{-3pt}%
|
||||||
|
\setlength{\parsep}{0pt}%
|
||||||
|
\setlength{\topsep}{0pt}%
|
||||||
|
\setlength{\partopsep}{0pt}%
|
||||||
|
}{%
|
||||||
|
\end{itemize}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used for member descriptions
|
||||||
|
\newenvironment{DoxyCompactList}{%
|
||||||
|
\begin{list}{}{%
|
||||||
|
\setlength{\leftmargin}{0.5cm}%
|
||||||
|
\setlength{\itemsep}{0pt}%
|
||||||
|
\setlength{\parsep}{0pt}%
|
||||||
|
\setlength{\topsep}{0pt}%
|
||||||
|
\renewcommand{\makelabel}{\hfill}%
|
||||||
|
}%
|
||||||
|
}{%
|
||||||
|
\end{list}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used for reference lists (@bug, @deprecated, @todo, etc.)
|
||||||
|
\newenvironment{DoxyRefList}{%
|
||||||
|
\begin{list}{}{%
|
||||||
|
\setlength{\labelwidth}{10pt}%
|
||||||
|
\setlength{\leftmargin}{\labelwidth}%
|
||||||
|
\addtolength{\leftmargin}{\labelsep}%
|
||||||
|
\renewcommand{\makelabel}{\xreflabel}%
|
||||||
|
}%
|
||||||
|
}{%
|
||||||
|
\end{list}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @bug, @deprecated, @todo, etc.
|
||||||
|
\newenvironment{DoxyRefDesc}[1]{%
|
||||||
|
\begin{list}{}{%
|
||||||
|
\renewcommand\makelabel[1]{\textbf{##1}}%
|
||||||
|
\settowidth\labelwidth{\makelabel{#1}}%
|
||||||
|
\setlength\leftmargin{\labelwidth+\labelsep}%
|
||||||
|
}%
|
||||||
|
}{%
|
||||||
|
\end{list}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by parameter lists and simple sections
|
||||||
|
\newenvironment{Desc}
|
||||||
|
{\begin{list}{}{%
|
||||||
|
\settowidth{\labelwidth}{20pt}%
|
||||||
|
\setlength{\parsep}{0pt}%
|
||||||
|
\setlength{\itemsep}{0pt}%
|
||||||
|
\setlength{\leftmargin}{\labelwidth+\labelsep}%
|
||||||
|
\renewcommand{\makelabel}{\entrylabel}%
|
||||||
|
}
|
||||||
|
}{%
|
||||||
|
\end{list}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by tables
|
||||||
|
\newcommand{\PBS}[1]{\let\temp=\\#1\let\\=\temp}%
|
||||||
|
\newenvironment{TabularC}[1]%
|
||||||
|
{\tabulinesep=1mm
|
||||||
|
\begin{longtabu*}spread 0pt [c]{*#1{|X[-1]}|}}%
|
||||||
|
{\end{longtabu*}\par}%
|
||||||
|
|
||||||
|
\newenvironment{TabularNC}[1]%
|
||||||
|
{\begin{tabu}spread 0pt [l]{*#1{|X[-1]}|}}%
|
||||||
|
{\end{tabu}\par}%
|
||||||
|
|
||||||
|
% Used for member group headers
|
||||||
|
\newenvironment{Indent}{%
|
||||||
|
\begin{list}{}{%
|
||||||
|
\setlength{\leftmargin}{0.5cm}%
|
||||||
|
}%
|
||||||
|
\item[]\ignorespaces%
|
||||||
|
}{%
|
||||||
|
\unskip%
|
||||||
|
\end{list}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used when hyperlinks are turned off
|
||||||
|
\newcommand{\doxyref}[3]{%
|
||||||
|
\textbf{#1} (\textnormal{#2}\,\pageref{#3})%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used to link to a table when hyperlinks are turned on
|
||||||
|
\newcommand{\doxytablelink}[2]{%
|
||||||
|
\ref{#1}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used to link to a table when hyperlinks are turned off
|
||||||
|
\newcommand{\doxytableref}[3]{%
|
||||||
|
\ref{#3}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Used by @addindex
|
||||||
|
\newcommand{\lcurly}{\{}
|
||||||
|
\newcommand{\rcurly}{\}}
|
||||||
|
|
||||||
|
% Colors used for syntax highlighting
|
||||||
|
\definecolor{comment}{rgb}{0.5,0.0,0.0}
|
||||||
|
\definecolor{keyword}{rgb}{0.0,0.5,0.0}
|
||||||
|
\definecolor{keywordtype}{rgb}{0.38,0.25,0.125}
|
||||||
|
\definecolor{keywordflow}{rgb}{0.88,0.5,0.0}
|
||||||
|
\definecolor{preprocessor}{rgb}{0.5,0.38,0.125}
|
||||||
|
\definecolor{stringliteral}{rgb}{0.0,0.125,0.25}
|
||||||
|
\definecolor{charliteral}{rgb}{0.0,0.5,0.5}
|
||||||
|
\definecolor{vhdldigit}{rgb}{1.0,0.0,1.0}
|
||||||
|
\definecolor{vhdlkeyword}{rgb}{0.43,0.0,0.43}
|
||||||
|
\definecolor{vhdllogic}{rgb}{1.0,0.0,0.0}
|
||||||
|
\definecolor{vhdlchar}{rgb}{0.0,0.0,0.0}
|
||||||
|
|
||||||
|
% Color used for table heading
|
||||||
|
\newcommand{\tableheadbgcolor}{lightgray}%
|
||||||
|
|
||||||
|
% Version of hypertarget with correct landing location
|
||||||
|
\newcommand{\Hypertarget}[1]{\Hy@raisedlink{\hypertarget{#1}{}}}
|
||||||
|
|
||||||
|
% possibility to have sections etc. be within the margins
|
||||||
|
% unfortunately had to copy part of book.cls and add \raggedright
|
||||||
|
\makeatletter
|
||||||
|
\newcommand\doxysection{\@startsection {section}{1}{\z@}%
|
||||||
|
{-3.5ex \@plus -1ex \@minus -.2ex}%
|
||||||
|
{2.3ex \@plus.2ex}%
|
||||||
|
{\raggedright\normalfont\Large\bfseries}}
|
||||||
|
\newcommand\doxysubsection{\@startsection{subsection}{2}{\z@}%
|
||||||
|
{-3.25ex\@plus -1ex \@minus -.2ex}%
|
||||||
|
{1.5ex \@plus .2ex}%
|
||||||
|
{\raggedright\normalfont\large\bfseries}}
|
||||||
|
\newcommand\doxysubsubsection{\@startsection{subsubsection}{3}{\z@}%
|
||||||
|
{-3.25ex\@plus -1ex \@minus -.2ex}%
|
||||||
|
{1.5ex \@plus .2ex}%
|
||||||
|
{\raggedright\normalfont\normalsize\bfseries}}
|
||||||
|
\newcommand\doxyparagraph{\@startsection{paragraph}{4}{\z@}%
|
||||||
|
{3.25ex \@plus1ex \@minus.2ex}%
|
||||||
|
{-1em}%
|
||||||
|
{\raggedright\normalfont\normalsize\bfseries}}
|
||||||
|
\newcommand\doxysubparagraph{\@startsection{subparagraph}{5}{\parindent}%
|
||||||
|
{3.25ex \@plus1ex \@minus .2ex}%
|
||||||
|
{-1em}%
|
||||||
|
{\raggedright\normalfont\normalsize\bfseries}}
|
||||||
|
\makeatother
|
||||||
|
% Define caption that is also suitable in a table
|
||||||
|
\makeatletter
|
||||||
|
\def\doxyfigcaption{%
|
||||||
|
\refstepcounter{figure}%
|
||||||
|
\@dblarg{\@caption{figure}}}
|
||||||
|
\makeatother
|
||||||
12
doxy/footer.tex
Normal file
12
doxy/footer.tex
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
% Latex footer for doxygen 1.8.17
|
||||||
|
%--- End generated contents ---
|
||||||
|
|
||||||
|
% Index
|
||||||
|
\backmatter
|
||||||
|
\newpage
|
||||||
|
\phantomsection
|
||||||
|
\clearemptydoublepage
|
||||||
|
\addcontentsline{toc}{chapter}{\indexname}
|
||||||
|
\printindex
|
||||||
|
|
||||||
|
\end{document}
|
||||||
174
doxy/header.tex
Normal file
174
doxy/header.tex
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
% Latex header for doxygen 1.8.17
|
||||||
|
\let\mypdfximage\pdfximage\def\pdfximage{\immediate\mypdfximage}\documentclass[twoside]{book}
|
||||||
|
|
||||||
|
%% moved from doxygen.sty due to workaround for LaTex 2019 version and unmaintained tabu package
|
||||||
|
\usepackage{ifthen}
|
||||||
|
\ifx\requestedLaTeXdate\undefined
|
||||||
|
\usepackage{array}
|
||||||
|
\else
|
||||||
|
\usepackage{array}[=2016-10-06]
|
||||||
|
\fi
|
||||||
|
%%
|
||||||
|
% Packages required by doxygen
|
||||||
|
\usepackage{fixltx2e}
|
||||||
|
\usepackage{calc}
|
||||||
|
\usepackage{doxygen}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage{makeidx}
|
||||||
|
\usepackage{multicol}
|
||||||
|
\usepackage{multirow}
|
||||||
|
\PassOptionsToPackage{warn}{textcomp}
|
||||||
|
\usepackage{textcomp}
|
||||||
|
\usepackage[nointegrals]{wasysym}
|
||||||
|
\usepackage[table]{xcolor}
|
||||||
|
\usepackage{ifpdf,ifxetex}
|
||||||
|
|
||||||
|
% Font selection
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage[scaled=.90]{helvet}
|
||||||
|
\usepackage{courier}
|
||||||
|
\usepackage{amssymb}
|
||||||
|
\usepackage{sectsty}
|
||||||
|
\renewcommand{\familydefault}{\sfdefault}
|
||||||
|
\allsectionsfont{%
|
||||||
|
\fontseries{bc}\selectfont%
|
||||||
|
\color{darkgray}%
|
||||||
|
}
|
||||||
|
\renewcommand{\DoxyLabelFont}{%
|
||||||
|
\fontseries{bc}\selectfont%
|
||||||
|
\color{darkgray}%
|
||||||
|
}
|
||||||
|
\newcommand{\+}{\discretionary{\mbox{\scriptsize$\hookleftarrow$}}{}{}}
|
||||||
|
|
||||||
|
% Arguments of doxygenemoji:
|
||||||
|
% 1) ':<text>:' form of the emoji, already "LaTeX"-escaped
|
||||||
|
% 2) file with the name of the emoji without the .png extension
|
||||||
|
% in case image exist use this otherwise use the ':<text>:' form
|
||||||
|
\newcommand{\doxygenemoji}[2]{%
|
||||||
|
\IfFileExists{./#2.png}{\raisebox{-0.1em}{\includegraphics[height=0.9em]{./#2.png}}}{#1}%
|
||||||
|
}
|
||||||
|
% Page & text layout
|
||||||
|
\usepackage{geometry}
|
||||||
|
\geometry{%
|
||||||
|
a4paper,%
|
||||||
|
top=2.5cm,%
|
||||||
|
bottom=2.5cm,%
|
||||||
|
left=2.5cm,%
|
||||||
|
right=2.5cm%
|
||||||
|
}
|
||||||
|
\tolerance=750
|
||||||
|
\hfuzz=15pt
|
||||||
|
\hbadness=750
|
||||||
|
\setlength{\emergencystretch}{15pt}
|
||||||
|
\setlength{\parindent}{0cm}
|
||||||
|
\newcommand{\doxynormalparskip}{\setlength{\parskip}{3ex plus 2ex minus 2ex}}
|
||||||
|
\newcommand{\doxytocparskip}{\setlength{\parskip}{1ex plus 0ex minus 0ex}}
|
||||||
|
\doxynormalparskip
|
||||||
|
\makeatletter
|
||||||
|
\renewcommand{\paragraph}{%
|
||||||
|
\@startsection{paragraph}{4}{0ex}{-1.0ex}{1.0ex}{%
|
||||||
|
\normalfont\normalsize\bfseries\SS@parafont%
|
||||||
|
}%
|
||||||
|
}
|
||||||
|
\renewcommand{\subparagraph}{%
|
||||||
|
\@startsection{subparagraph}{5}{0ex}{-1.0ex}{1.0ex}{%
|
||||||
|
\normalfont\normalsize\bfseries\SS@subparafont%
|
||||||
|
}%
|
||||||
|
}
|
||||||
|
\makeatother
|
||||||
|
|
||||||
|
\makeatletter
|
||||||
|
\newcommand\hrulefilll{\leavevmode\leaders\hrule\hskip 0pt plus 1filll\kern\z@}
|
||||||
|
\makeatother
|
||||||
|
|
||||||
|
% Headers & footers
|
||||||
|
\usepackage{fancyhdr}
|
||||||
|
\pagestyle{fancyplain}
|
||||||
|
\fancyhead[LE]{\fancyplain{}{\bfseries\thepage}}
|
||||||
|
\fancyhead[CE]{\fancyplain{}{}}
|
||||||
|
\fancyhead[RE]{\fancyplain{}{\bfseries\leftmark}}
|
||||||
|
\fancyhead[LO]{\fancyplain{}{\bfseries\rightmark}}
|
||||||
|
\fancyhead[CO]{\fancyplain{}{}}
|
||||||
|
\fancyhead[RO]{\fancyplain{}{\bfseries\thepage}}
|
||||||
|
\fancyfoot[LE]{\fancyplain{}{}}
|
||||||
|
\fancyfoot[CE]{\fancyplain{}{}}
|
||||||
|
\fancyfoot[RE]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }}
|
||||||
|
\fancyfoot[LO]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }}
|
||||||
|
\fancyfoot[CO]{\fancyplain{}{}}
|
||||||
|
\fancyfoot[RO]{\fancyplain{}{}}
|
||||||
|
\renewcommand{\footrulewidth}{0.4pt}
|
||||||
|
\renewcommand{\chaptermark}[1]{%
|
||||||
|
\markboth{#1}{}%
|
||||||
|
}
|
||||||
|
\renewcommand{\sectionmark}[1]{%
|
||||||
|
\markright{\thesection\ #1}%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Indices & bibliography
|
||||||
|
\usepackage{natbib}
|
||||||
|
\usepackage[titles]{tocloft}
|
||||||
|
\setcounter{tocdepth}{3}
|
||||||
|
\setcounter{secnumdepth}{5}
|
||||||
|
\makeindex
|
||||||
|
|
||||||
|
\usepackage{newunicodechar}
|
||||||
|
\newunicodechar{⁻}{${}^{-}$}% Superscript minus
|
||||||
|
\newunicodechar{²}{${}^{2}$}% Superscript two
|
||||||
|
\newunicodechar{³}{${}^{3}$}% Superscript three
|
||||||
|
|
||||||
|
% Hyperlinks (required, but should be loaded last)
|
||||||
|
\ifpdf
|
||||||
|
\usepackage[pdftex,pagebackref=true]{hyperref}
|
||||||
|
\else
|
||||||
|
\ifxetex
|
||||||
|
\usepackage[pagebackref=true]{hyperref}
|
||||||
|
\else
|
||||||
|
\usepackage[ps2pdf,pagebackref=true]{hyperref}
|
||||||
|
\fi
|
||||||
|
\fi
|
||||||
|
|
||||||
|
\hypersetup{%
|
||||||
|
colorlinks=true,%
|
||||||
|
linkcolor=blue,%
|
||||||
|
citecolor=blue,%
|
||||||
|
unicode%
|
||||||
|
}
|
||||||
|
|
||||||
|
% Custom commands
|
||||||
|
\newcommand{\clearemptydoublepage}{%
|
||||||
|
\newpage{\pagestyle{empty}\cleardoublepage}%
|
||||||
|
}
|
||||||
|
|
||||||
|
\usepackage{caption}
|
||||||
|
\captionsetup{labelsep=space,justification=centering,font={bf},singlelinecheck=off,skip=4pt,position=top}
|
||||||
|
|
||||||
|
\usepackage{etoc}
|
||||||
|
\etocsettocstyle{\doxytocparskip}{\doxynormalparskip}
|
||||||
|
\renewcommand{\numberline}[1]{#1~}
|
||||||
|
%===== C O N T E N T S =====
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
% Titlepage & ToC
|
||||||
|
\hypersetup{pageanchor=false,
|
||||||
|
bookmarksnumbered=true,
|
||||||
|
pdfencoding=unicode
|
||||||
|
}
|
||||||
|
\pagenumbering{alph}
|
||||||
|
\begin{titlepage}
|
||||||
|
\vspace*{7cm}
|
||||||
|
\begin{center}%
|
||||||
|
{\Large C++ Library of the Linear Conjugate Gradient Methods (LibLCG)}\\
|
||||||
|
\vspace*{1cm}
|
||||||
|
{\large Yi Zhang}\\
|
||||||
|
\end{center}
|
||||||
|
\end{titlepage}
|
||||||
|
\clearemptydoublepage
|
||||||
|
\pagenumbering{roman}
|
||||||
|
\tableofcontents
|
||||||
|
\clearemptydoublepage
|
||||||
|
\pagenumbering{arabic}
|
||||||
|
\hypersetup{pageanchor=true}
|
||||||
|
|
||||||
|
%--- Begin generated contents ---
|
||||||
BIN
refman.pdf
Normal file
BIN
refman.pdf
Normal file
Binary file not shown.
181
src/CMakeLists.txt
Normal file
181
src/CMakeLists.txt
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
# 设定源文件文件夹
|
||||||
|
aux_source_directory(lib LCGLIB_SRC)
|
||||||
|
|
||||||
|
if(NOT LibLCG_EIGEN)
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/algebra_eigen.cpp")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_eigen.cpp")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_eigen.cpp")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/solver_eigen.cpp")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/preconditioner_eigen.cpp")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT LibLCG_CUDA)
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/algebra_cuda.cu")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_complex_cuda.cu")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_cuda.cu")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_cuda.cu")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_cuda_f.cu")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/solver_cuda.cu")
|
||||||
|
list(REMOVE_ITEM LCGLIB_SRC "lib/preconditioner_cuda.cu")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# 以下部分为库的编译
|
||||||
|
# 注意目标名必须唯一 所以不能直接生成相同名称的动态库与静态库
|
||||||
|
# 注意此处不必为目标名称添加lib前缀和相应后缀,cmake会自行添加
|
||||||
|
add_library(lcg SHARED ${LCGLIB_SRC})
|
||||||
|
# 首先添加静态库的生成命令
|
||||||
|
add_library(lcg_static STATIC ${LCGLIB_SRC})
|
||||||
|
# 设置静态库的输出名称从而获得与动态库名称相同的静态库
|
||||||
|
set_target_properties(lcg_static PROPERTIES OUTPUT_NAME "lcg")
|
||||||
|
# 设置输出目标属性以同时输出动态库与静态库
|
||||||
|
set_target_properties(lcg PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||||
|
set_target_properties(lcg_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||||
|
if(LibLCG_CUDA)
|
||||||
|
set_target_properties(lcg PROPERTIES CUDA_ARCHITECTURES 70)
|
||||||
|
set_target_properties(lcg_static PROPERTIES CUDA_ARCHITECTURES 70)
|
||||||
|
endif()
|
||||||
|
# 设置动态库的版本号
|
||||||
|
set_target_properties(lcg PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||||
|
# 设置库文件的输出地址
|
||||||
|
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
|
||||||
|
# 设置编译选项
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
|
|
||||||
|
if(LibLCG_EIGEN)
|
||||||
|
find_package(Eigen3 REQUIRED)
|
||||||
|
if(EIGEN3_FOUND)
|
||||||
|
message(STATUS "Eigen3 Found.")
|
||||||
|
include_directories(${EIGEN3_INCLUDE_DIR})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(LibLCG_CUDA)
|
||||||
|
enable_language(CUDA)
|
||||||
|
find_package(CUDA REQUIRED)
|
||||||
|
if(CUDA_FOUND)
|
||||||
|
message(STATUS "CUDA Found.")
|
||||||
|
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||||
|
find_library(CUBLAS_LIBRARY cublas ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||||
|
find_library(CUSPARSE_LIBRARY cusparse ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||||
|
find_library(CUSOLVER_LIBRARY cusolver ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||||
|
target_link_libraries(lcg PUBLIC ${CUBLAS_LIBRARY})
|
||||||
|
target_link_libraries(lcg_static ${CUBLAS_LIBRARY})
|
||||||
|
target_link_libraries(lcg PUBLIC ${CUSPARSE_LIBRARY})
|
||||||
|
target_link_libraries(lcg_static ${CUSPARSE_LIBRARY})
|
||||||
|
target_link_libraries(lcg PUBLIC ${CUSOLVER_LIBRARY})
|
||||||
|
target_link_libraries(lcg_static ${CUSOLVER_LIBRARY})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(LibLCG_OPENMP)
|
||||||
|
# 添加openmp的编译命令 设置编译选项
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
if (OpenMP_CXX_FOUND)
|
||||||
|
message(STATUS "OpenMP Found.")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||||
|
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
|
||||||
|
target_link_libraries(lcg PUBLIC OpenMP::OpenMP_CXX)
|
||||||
|
target_link_libraries(lcg_static OpenMP::OpenMP_CXX)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CONFIG_FILE_PATH lib/cmake/${PROJECT_NAME})
|
||||||
|
|
||||||
|
configure_package_config_file(${PROJECT_SOURCE_DIR}/${PROJECT_NAME}Config.cmake.in
|
||||||
|
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||||
|
INSTALL_DESTINATION ${CONFIG_FILE_PATH}
|
||||||
|
NO_CHECK_REQUIRED_COMPONENTS_MACRO)
|
||||||
|
|
||||||
|
write_basic_package_version_file(${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||||
|
VERSION ${PROJECT_VERSION}
|
||||||
|
COMPATIBILITY SameMajorVersion)
|
||||||
|
|
||||||
|
# 库的安装命令
|
||||||
|
if(WIN32)
|
||||||
|
install(TARGETS lcg DESTINATION lib)
|
||||||
|
install(TARGETS lcg_static DESTINATION lib)
|
||||||
|
else()
|
||||||
|
install(TARGETS lcg lcg_static
|
||||||
|
EXPORT ${PROJECT_NAME}Targets
|
||||||
|
LIBRARY DESTINATION lib
|
||||||
|
ARCHIVE DESTINATION lib)
|
||||||
|
install(EXPORT ${PROJECT_NAME}Targets
|
||||||
|
DESTINATION ${CONFIG_FILE_PATH})
|
||||||
|
install(FILES
|
||||||
|
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||||
|
${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||||
|
DESTINATION ${CONFIG_FILE_PATH})
|
||||||
|
endif()
|
||||||
|
# 头文件安装命令
|
||||||
|
install(FILES lib/config.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/algebra.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/lcg_complex.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/util.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/lcg.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/clcg.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/solver.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/preconditioner.h DESTINATION include/lcg)
|
||||||
|
|
||||||
|
if(LibLCG_CUDA)
|
||||||
|
install(FILES lib/algebra_cuda.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/lcg_complex_cuda.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/lcg_cuda.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/clcg_cuda.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/clcg_cudaf.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/solver_cuda.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/preconditioner_cuda.h DESTINATION include/lcg)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(LibLCG_EIGEN)
|
||||||
|
install(FILES lib/algebra_eigen.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/lcg_eigen.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/clcg_eigen.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/solver_eigen.h DESTINATION include/lcg)
|
||||||
|
install(FILES lib/preconditioner_eigen.h DESTINATION include/lcg)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# 以下部分为例子程序的编译
|
||||||
|
# 设置可执行文件的输出地址
|
||||||
|
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
|
||||||
|
|
||||||
|
# 例子的编译方法
|
||||||
|
macro(add_sample name file)
|
||||||
|
# 添加可执行文件 命令行
|
||||||
|
add_executable(${name} sample/${file})
|
||||||
|
# 为安装文件添加动态库的搜索地址 在Windows下并没有什么用 直接忽略
|
||||||
|
set_target_properties(${name} PROPERTIES INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib)
|
||||||
|
# 链接动态库
|
||||||
|
target_link_libraries(${name} PUBLIC lcg)
|
||||||
|
# 设置CUDA
|
||||||
|
if(LibLCG_CUDA)
|
||||||
|
set_target_properties(${name} PROPERTIES CUDA_ARCHITECTURES 70)
|
||||||
|
endif()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
add_sample(lcg_sample1 sample1.cpp)
|
||||||
|
add_sample(lcg_sample2 sample2.cpp)
|
||||||
|
add_sample(lcg_sample3 sample3.cpp)
|
||||||
|
add_sample(lcg_sample4 sample4.cpp)
|
||||||
|
|
||||||
|
if(LibLCG_EIGEN)
|
||||||
|
add_sample(lcg_sample5 sample5.cpp)
|
||||||
|
add_sample(lcg_sample7 sample7.cpp)
|
||||||
|
if(LibLCG_STD_COMPLEX)
|
||||||
|
add_sample(lcg_sample6 sample6.cpp)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(LibLCG_CUDA)
|
||||||
|
# The followings are not working for now due to CUDA 12+ compatibility issues. Check more later
|
||||||
|
#add_sample(lcg_sample8 sample8.cu)
|
||||||
|
#add_sample(lcg_sample9 sample9.cu)
|
||||||
|
#add_sample(lcg_sample10 sample10.cu)
|
||||||
|
#add_sample(lcg_sample11 sample11.cu)
|
||||||
|
#add_sample(lcg_sample12 sample12.cu)
|
||||||
|
#add_sample(lcg_sample13 sample13.cu)
|
||||||
|
#add_sample(lcg_sample14 sample14.cu)
|
||||||
|
add_sample(lcg_sample15 sample15.cu)
|
||||||
|
endif()
|
||||||
222
src/lib/algebra.cpp
Normal file
222
src/lib/algebra.cpp
Normal file
@@ -0,0 +1,222 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "ctime"
|
||||||
|
#include "random"
|
||||||
|
|
||||||
|
#include "algebra.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
lcg_float lcg_abs(lcg_float a)
|
||||||
|
{
|
||||||
|
if (a >= 0.0) return a;
|
||||||
|
return -1.0*a;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float lcg_max(lcg_float a, lcg_float b)
|
||||||
|
{
|
||||||
|
if (a >= b) return a;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float lcg_min(lcg_float a, lcg_float b)
|
||||||
|
{
|
||||||
|
if (a <= b) return a;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float lcg_set2box(lcg_float low, lcg_float hig, lcg_float a,
|
||||||
|
bool low_bound, bool hig_bound)
|
||||||
|
{
|
||||||
|
if (hig_bound && a >= hig) return hig;
|
||||||
|
if (!hig_bound && a >= hig) return (hig - 1e-16);
|
||||||
|
if (low_bound && a <= low) return low;
|
||||||
|
if (!low_bound && a <= low) return (low + 1e-16);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float* lcg_malloc(int n)
|
||||||
|
{
|
||||||
|
lcg_float* x = new lcg_float [n];
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float** lcg_malloc(int m, int n)
|
||||||
|
{
|
||||||
|
lcg_float **x = new lcg_float* [m];
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
{
|
||||||
|
x[i] = new lcg_float [n];
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_free(lcg_float* x)
|
||||||
|
{
|
||||||
|
if (x != nullptr)
|
||||||
|
{
|
||||||
|
delete[] x;
|
||||||
|
x = nullptr;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_free(lcg_float **x, int m)
|
||||||
|
{
|
||||||
|
if (x != nullptr)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
{
|
||||||
|
delete[] x[i];
|
||||||
|
}
|
||||||
|
delete[] x;
|
||||||
|
x = nullptr;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_vecset(lcg_float *a, lcg_float b, int size)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
a[i] = b;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_vecset(lcg_float **a, lcg_float b, int m, int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < m; ++i)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < n; ++j)
|
||||||
|
{
|
||||||
|
a[i][j] = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_vecrnd(lcg_float *a, lcg_float l, lcg_float h, int size)
|
||||||
|
{
|
||||||
|
srand(time(nullptr));
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
a[i] = (h-l)*rand()*1.0/RAND_MAX + l;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_vecrnd(lcg_float **a, lcg_float l, lcg_float h, int m, int n)
|
||||||
|
{
|
||||||
|
srand(time(nullptr));
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < n; j++)
|
||||||
|
{
|
||||||
|
a[i][j] = (h-l)*rand()*1.0/RAND_MAX + l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double lcg_squaredl2norm(lcg_float *a, int n)
|
||||||
|
{
|
||||||
|
lcg_float sum = 0;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
sum += a[i]*a[i];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_dot(lcg_float &ret, const lcg_float *a,
|
||||||
|
const lcg_float *b, int size)
|
||||||
|
{
|
||||||
|
ret = 0.0;
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
ret += a[i]*b[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_matvec(lcg_float **A, const lcg_float *x, lcg_float *Ax,
|
||||||
|
int m_size, int n_size, lcg_matrix_e layout)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
if (layout == MatNormal)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for private (i, j) schedule(guided)
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
Ax[i] = 0.0;
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
Ax[i] += A[i][j]*x[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i, j) schedule(guided)
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
Ax[j] = 0.0;
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
Ax[j] += A[i][j]*x[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_matvec_coo(const int *row, const int *col, const lcg_float *Mat, const lcg_float *V, lcg_float *p, int M, int N, int nz_size, bool pre_position)
|
||||||
|
{
|
||||||
|
if (!pre_position)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
p[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
p[row[i]] += Mat[i]*V[col[i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
p[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
p[col[i]] += Mat[i]*V[row[i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
219
src/lib/algebra.h
Normal file
219
src/lib/algebra.h
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _ALGEBRA_H
|
||||||
|
#define _ALGEBRA_H
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Matrix layouts.
|
||||||
|
*/
|
||||||
|
enum lcg_matrix_e
|
||||||
|
{
|
||||||
|
MatNormal,
|
||||||
|
MatTranspose,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Conjugate types for a complex number.
|
||||||
|
*/
|
||||||
|
enum clcg_complex_e
|
||||||
|
{
|
||||||
|
NonConjugate,
|
||||||
|
Conjugate,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A simple definition of the float type we use here.
|
||||||
|
* Easy to change in the future. Right now it is just an alias of double
|
||||||
|
*/
|
||||||
|
typedef double lcg_float;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return absolute value
|
||||||
|
*
|
||||||
|
* @param[in] a input value
|
||||||
|
*
|
||||||
|
* @return The absolute value
|
||||||
|
*/
|
||||||
|
lcg_float lcg_abs(lcg_float a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return the bigger value
|
||||||
|
*
|
||||||
|
* @param[in] a input value
|
||||||
|
* @param[in] b input value
|
||||||
|
*
|
||||||
|
* @return The bigger value
|
||||||
|
*/
|
||||||
|
lcg_float lcg_max(lcg_float a, lcg_float b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return the smaller value
|
||||||
|
*
|
||||||
|
* @param[in] a input value
|
||||||
|
* @param[in] b input value
|
||||||
|
*
|
||||||
|
* @return The smaller value
|
||||||
|
*/
|
||||||
|
lcg_float lcg_min(lcg_float a, lcg_float b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the input value within a box constraint
|
||||||
|
*
|
||||||
|
* @param a low boundary
|
||||||
|
* @param b high boundary
|
||||||
|
* @param in input value
|
||||||
|
* @param low_bound Whether to include the low boundary value
|
||||||
|
* @param hig_bound Whether to include the high boundary value
|
||||||
|
*
|
||||||
|
* @return box constrained value
|
||||||
|
*/
|
||||||
|
lcg_float lcg_set2box(lcg_float low, lcg_float hig, lcg_float a,
|
||||||
|
bool low_bound = true, bool hig_bound = true);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Locate memory for a lcg_float pointer type.
|
||||||
|
*
|
||||||
|
* @param[in] n Size of the lcg_float array.
|
||||||
|
*
|
||||||
|
* @return Pointer of the array's location.
|
||||||
|
*/
|
||||||
|
lcg_float* lcg_malloc(int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Locate memory for a lcg_float second pointer type.
|
||||||
|
*
|
||||||
|
* @param[in] n Size of the lcg_float array.
|
||||||
|
*
|
||||||
|
* @return Pointer of the array's location.
|
||||||
|
*/
|
||||||
|
lcg_float** lcg_malloc(int m, int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy memory used by the lcg_float type array.
|
||||||
|
*
|
||||||
|
* @param x Pointer of the array.
|
||||||
|
*/
|
||||||
|
void lcg_free(lcg_float* x);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy memory used by the 2D lcg_float type array.
|
||||||
|
*
|
||||||
|
* @param x Pointer of the array.
|
||||||
|
*/
|
||||||
|
void lcg_free(lcg_float **x, int m);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a vector's value
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param[in] b initial value
|
||||||
|
* @param[in] size vector size
|
||||||
|
*/
|
||||||
|
void lcg_vecset(lcg_float *a, lcg_float b, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a 2d vector's value
|
||||||
|
*
|
||||||
|
* @param a pointer of the matrix
|
||||||
|
* @param[in] b initial value
|
||||||
|
* @param[in] m row size of the matrix
|
||||||
|
* @param[in] n column size of the matrix
|
||||||
|
*/
|
||||||
|
void lcg_vecset(lcg_float **a, lcg_float b, int m, int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a vector using random values
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param[in] l the lower bound of random values
|
||||||
|
* @param[in] h the higher bound of random values
|
||||||
|
* @param[in] size size of the vector
|
||||||
|
*/
|
||||||
|
void lcg_vecrnd(lcg_float *a, lcg_float l, lcg_float h, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a 2D vector using random values
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param[in] l the lower bound of random values
|
||||||
|
* @param[in] h the higher bound of random values
|
||||||
|
* @param[in] m row size of the vector
|
||||||
|
* @param[in] n column size of the vector
|
||||||
|
*/
|
||||||
|
void lcg_vecrnd(lcg_float **a, lcg_float l, lcg_float h, int m, int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief calculate the squared L2 norm of the input vector
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param n size of the vector
|
||||||
|
* @return double L2 norm
|
||||||
|
*/
|
||||||
|
double lcg_squaredl2norm(lcg_float *a, int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief calculate dot product of two real vectors
|
||||||
|
*
|
||||||
|
* @param[in] a pointer of the vector a
|
||||||
|
* @param[in] b pointer of the vector b
|
||||||
|
* @param[in] size size of the vector
|
||||||
|
*
|
||||||
|
* @return dot product
|
||||||
|
*/
|
||||||
|
void lcg_dot(lcg_float &ret, const lcg_float *a, const lcg_float *b, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief calculate product of a real matrix and a vector
|
||||||
|
*
|
||||||
|
* Different configurations:
|
||||||
|
* layout=Normal -> A
|
||||||
|
* layout=Transpose -> A^T
|
||||||
|
*
|
||||||
|
* @param A matrix A
|
||||||
|
* @param[in] x vector x
|
||||||
|
* @param Ax product of Ax
|
||||||
|
* @param[in] m_size row size of A
|
||||||
|
* @param[in] n_size column size of A
|
||||||
|
* @param[in] layout layout of A used for multiplication. Must be Normal or Transpose
|
||||||
|
*/
|
||||||
|
void lcg_matvec(lcg_float **A, const lcg_float *x, lcg_float *Ax, int m_size, int n_size,
|
||||||
|
lcg_matrix_e layout = MatNormal);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the product of a sparse matrix multipled by a vector. The matrix is stored in the COO format.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param Mat Non-zero values of the input sparse matrix.
|
||||||
|
* @param V Multipler vector
|
||||||
|
* @param p Output prodcut
|
||||||
|
* @param M Row number of the sparse matrix
|
||||||
|
* @param N Column number of the sparse matrix
|
||||||
|
* @param nz_size Non-zero size of the matrix
|
||||||
|
* @param pre_position If ture, the multipler is seen as a row vector. Otherwise, it is treated as a column vector.
|
||||||
|
*/
|
||||||
|
void lcg_matvec_coo(const int *row, const int *col, const lcg_float *Mat, const lcg_float *V, lcg_float *p, int M, int N, int nz_size, bool pre_position = false);
|
||||||
|
|
||||||
|
#endif //_ALGEBRA_H
|
||||||
110
src/lib/algebra_cuda.cu
Normal file
110
src/lib/algebra_cuda.cu
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "algebra_cuda.h"
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void lcg_set2box_cuda_device(const lcg_float *low, const lcg_float *hig, lcg_float *a,
|
||||||
|
int n, bool low_bound, bool hig_bound)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
if (hig_bound && a[i] >= hig[i]) a[i] = hig[i];
|
||||||
|
if (!hig_bound && a[i] > hig[i]) a[i] = hig[i];
|
||||||
|
if (low_bound && a[i] <= low[i]) a[i] = low[i];
|
||||||
|
if (!low_bound && a[i] < low[i]) a[i] = low[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void lcg_smDcsr_get_diagonal_device(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag)
|
||||||
|
{
|
||||||
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < A_len)
|
||||||
|
{
|
||||||
|
const int num_non0_row = A_ptr[i + 1] - A_ptr[i];
|
||||||
|
|
||||||
|
for (int j = 0; j < num_non0_row; j++)
|
||||||
|
{
|
||||||
|
if (A_col[j + A_ptr[i]] == i)
|
||||||
|
{
|
||||||
|
A_diag[i] = A_val[j + A_ptr[i]];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void lcg_vecMvecD_element_wise_device(const lcg_float *a, const lcg_float *b, lcg_float *c, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
c[i] = a[i] * b[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void lcg_vecDvecD_element_wise_device(const lcg_float *a, const lcg_float *b, lcg_float *c, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
c[i] = a[i] / b[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_set2box_cuda(const lcg_float *low, const lcg_float *hig, lcg_float *a,
|
||||||
|
int n, bool low_bound, bool hig_bound)
|
||||||
|
{
|
||||||
|
int blockSize = 1024;
|
||||||
|
int numBlocks = (n+ blockSize - 1) / blockSize;
|
||||||
|
lcg_set2box_cuda_device<<<numBlocks, blockSize>>>(low, hig, a, n, low_bound, hig_bound);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_smDcsr_get_diagonal(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (A_len+ blockSize - 1) / blockSize;
|
||||||
|
lcg_smDcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_vecMvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
lcg_vecMvecD_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_vecDvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
lcg_vecDvecD_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
88
src/lib/algebra_cuda.h
Normal file
88
src/lib/algebra_cuda.h
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _ALGEBRA_CUDA_H
|
||||||
|
#define _ALGEBRA_CUDA_H
|
||||||
|
|
||||||
|
#include "algebra.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_CUDA
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the input value within a box constraint
|
||||||
|
*
|
||||||
|
* @param a low boundary
|
||||||
|
* @param b high boundary
|
||||||
|
* @param in input value
|
||||||
|
* @param low_bound Whether to include the low boundary value
|
||||||
|
* @param hig_bound Whether to include the high boundary value
|
||||||
|
*
|
||||||
|
* @return box constrained value
|
||||||
|
*/
|
||||||
|
void lcg_set2box_cuda(const lcg_float *low, const lcg_float *hig, lcg_float *a,
|
||||||
|
int n, bool low_bound = true, bool hig_bound = true);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] A_ptr Row index pointer
|
||||||
|
* @param[in] A_col Column index
|
||||||
|
* @param[in] A_val Non-zero values of the matrix
|
||||||
|
* @param[in] A_len Dimension of the matrix
|
||||||
|
* @param A_diag Output digonal elements
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void lcg_smDcsr_get_diagonal(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Element-wise muplication between two CUDA arries.
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] a Pointer of the input array
|
||||||
|
* @param[in] b Pointer of the input array
|
||||||
|
* @param c Pointer of the output array
|
||||||
|
* @param[in] n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void lcg_vecMvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Element-wise division between two CUDA arries.
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] a Pointer of the input array
|
||||||
|
* @param[in] b Pointer of the input array
|
||||||
|
* @param c Pointer of the output array
|
||||||
|
* @param[in] n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void lcg_vecDvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
#endif // LibLCG_CUDA
|
||||||
|
|
||||||
|
#endif //_ALGEBRA_CUDA_H
|
||||||
32
src/lib/algebra_eigen.cpp
Normal file
32
src/lib/algebra_eigen.cpp
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "algebra_eigen.h"
|
||||||
|
|
||||||
|
void lcg_set2box_eigen(const Eigen::VectorXd &low, const Eigen::VectorXd &hig, Eigen::VectorXd m)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < m.size(); i++)
|
||||||
|
{
|
||||||
|
m[i] = lcg_set2box(low[i], hig[i], m[i]);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
43
src/lib/algebra_eigen.h
Normal file
43
src/lib/algebra_eigen.h
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _ALGEBRA_EIGEN_H
|
||||||
|
#define _ALGEBRA_EIGEN_H
|
||||||
|
|
||||||
|
#include "algebra.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_EIGEN
|
||||||
|
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the input value within a box constraint
|
||||||
|
*
|
||||||
|
* @param low_bound Whether to include the low boundary value
|
||||||
|
* @param hig_bound Whether to include the high boundary value
|
||||||
|
* @param m Returned values
|
||||||
|
*/
|
||||||
|
void lcg_set2box_eigen(const Eigen::VectorXd &low, const Eigen::VectorXd &hig, Eigen::VectorXd m);
|
||||||
|
|
||||||
|
#endif // LibLCG_EIGEN
|
||||||
|
|
||||||
|
#endif // _ALGEBRA_EIGEN_H
|
||||||
837
src/lib/clcg.cpp
Normal file
837
src/lib/clcg.cpp
Normal file
@@ -0,0 +1,837 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "clcg.h"
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef int (*clcg_solver_ptr)(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
|
||||||
|
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance);
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance);
|
||||||
|
int clbicg_symmetric(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance);
|
||||||
|
int clcgs(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance);
|
||||||
|
int clbicgstab(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance);
|
||||||
|
int cltfqmr(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance);
|
||||||
|
|
||||||
|
int clcg_solver(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
|
||||||
|
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance,
|
||||||
|
clcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
clcg_solver_ptr cg_solver;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
cg_solver = clbicg;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
cg_solver = clbicg_symmetric;
|
||||||
|
break;
|
||||||
|
case CLCG_CGS:
|
||||||
|
cg_solver = clcgs;
|
||||||
|
break;
|
||||||
|
case CLCG_BICGSTAB:
|
||||||
|
cg_solver = clbicgstab;
|
||||||
|
break;
|
||||||
|
case CLCG_TFQMR:
|
||||||
|
cg_solver = cltfqmr;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
cg_solver = clcgs;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cg_solver(Afp, Pfp, m, B, n_size, param, instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
lcg_complex *r1k = nullptr, *r2k = nullptr, *d1k = nullptr, *d2k = nullptr;
|
||||||
|
lcg_complex *Ax = nullptr;
|
||||||
|
r1k = clcg_malloc(n_size); r2k = clcg_malloc(n_size);
|
||||||
|
d1k = clcg_malloc(n_size); d2k = clcg_malloc(n_size);
|
||||||
|
Ax = clcg_malloc(n_size);
|
||||||
|
|
||||||
|
lcg_complex ak, Ad1d2, r1r2_next, betak;
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
d1k[i] = r1k[i] = B[i] - Ax[i];
|
||||||
|
d2k[i] = r2k[i] = clcg_conjugate(&r1k[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex r1r2;
|
||||||
|
clcg_inner(r1r2, r2k, r1k, n_size);
|
||||||
|
|
||||||
|
lcg_float r0_square, rk_square;
|
||||||
|
lcg_complex r0_mod, rk_mod;
|
||||||
|
clcg_inner(rk_mod, r1k, r1k, n_size);
|
||||||
|
r0_square = rk_square = clcg_square(&rk_mod);
|
||||||
|
if (r0_square < 1.0) r0_square = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_square/r0_square <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||||
|
else residual = rk_square/r0_square;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, d1k, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
clcg_inner(Ad1d2, d2k, Ax, n_size);
|
||||||
|
ak = r1r2/Ad1d2;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
m[i] = m[i] + ak*d1k[i];
|
||||||
|
r1k[i] = r1k[i] - ak*Ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(rk_mod, r1k, r1k, n_size);
|
||||||
|
rk_square = clcg_square(&rk_mod);
|
||||||
|
|
||||||
|
Afp(instance, d2k, Ax, n_size, MatTranspose, Conjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
r2k[i] = r2k[i] - clcg_conjugate(&ak)*Ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
if (m[i] != m[i])
|
||||||
|
{
|
||||||
|
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(r1r2_next, r2k, r1k, n_size);
|
||||||
|
betak = r1r2_next/r1r2;
|
||||||
|
r1r2 = r1r2_next;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
d1k[i] = r1k[i] + betak*d1k[i];
|
||||||
|
d2k[i] = r2k[i] + clcg_conjugate(&betak)*d2k[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
clcg_free(r1k);
|
||||||
|
clcg_free(r2k);
|
||||||
|
clcg_free(d1k);
|
||||||
|
clcg_free(d2k);
|
||||||
|
clcg_free(Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clbicg_symmetric(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
lcg_complex *rk = nullptr, *dk = nullptr;
|
||||||
|
lcg_complex *Ax = nullptr;
|
||||||
|
rk = clcg_malloc(n_size); dk = clcg_malloc(n_size);
|
||||||
|
Ax = clcg_malloc(n_size);
|
||||||
|
|
||||||
|
lcg_complex ak, rkrk2, betak, dkAx;
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
dk[i] = rk[i] = B[i] - Ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex rkrk;
|
||||||
|
clcg_dot(rkrk, rk, rk, n_size);
|
||||||
|
|
||||||
|
lcg_float r0_square, rk_square;
|
||||||
|
lcg_complex r0_mod, rk_mod;
|
||||||
|
clcg_inner(rk_mod, rk, rk, n_size);
|
||||||
|
r0_square = rk_square = clcg_square(&rk_mod);
|
||||||
|
if (r0_square < 1.0) r0_square = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_square/r0_square <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||||
|
else residual = rk_square/r0_square;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, dk, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
clcg_dot(dkAx, dk, Ax, n_size);
|
||||||
|
ak = rkrk/dkAx;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
m[i] = m[i] + ak*dk[i];
|
||||||
|
rk[i] = rk[i] - ak*Ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(rk_mod, rk, rk, n_size);
|
||||||
|
rk_square = clcg_square(&rk_mod);
|
||||||
|
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
if (m[i] != m[i])
|
||||||
|
{
|
||||||
|
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_dot(rkrk2, rk, rk, n_size);
|
||||||
|
betak = rkrk2/rkrk;
|
||||||
|
rkrk = rkrk2;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
dk[i] = rk[i] + betak*dk[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
clcg_free(rk);
|
||||||
|
clcg_free(dk);
|
||||||
|
clcg_free(Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clcgs(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
lcg_complex *rk = nullptr, *rbar0 = nullptr, *pk = nullptr;
|
||||||
|
lcg_complex *Ax = nullptr, *uk = nullptr, *qk = nullptr, *wk = nullptr; // w_k = u_{k-1} + q_k
|
||||||
|
rk = clcg_malloc(n_size); rbar0 = clcg_malloc(n_size);
|
||||||
|
pk = clcg_malloc(n_size); Ax = clcg_malloc(n_size);
|
||||||
|
uk = clcg_malloc(n_size); qk = clcg_malloc(n_size);
|
||||||
|
wk = clcg_malloc(n_size);
|
||||||
|
|
||||||
|
lcg_complex ak, rhok2, sigma, betak;
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
pk[i] = uk[i] = rk[i] = B[i] - Ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex rhok;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
|
||||||
|
clcg_inner(rhok, rbar0, rk, n_size);
|
||||||
|
} while (clcg_module(&rhok) < 1e-8);
|
||||||
|
|
||||||
|
lcg_float r0_square, rk_square;
|
||||||
|
lcg_complex r0_mod, rk_mod;
|
||||||
|
clcg_inner(rk_mod, rk, rk, n_size);
|
||||||
|
r0_square = rk_square = clcg_square(&rk_mod);
|
||||||
|
if (r0_square < 1.0) r0_square = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_square/r0_square <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||||
|
else residual = rk_square/r0_square;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, pk, Ax, n_size, MatNormal, NonConjugate); // vk = Apk
|
||||||
|
clcg_inner(sigma, rbar0, Ax, n_size);
|
||||||
|
ak = rhok/sigma;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
qk[i] = uk[i] - ak*Ax[i];
|
||||||
|
wk[i] = uk[i] + qk[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
Afp(instance, wk, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
m[i] = m[i] + ak*wk[i];
|
||||||
|
rk[i] = rk[i] - ak*Ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(rk_mod, rk, rk, n_size);
|
||||||
|
rk_square = clcg_square(&rk_mod);
|
||||||
|
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
if (m[i] != m[i])
|
||||||
|
{
|
||||||
|
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(rhok2, rbar0, rk, n_size);
|
||||||
|
betak = rhok2/rhok;
|
||||||
|
rhok = rhok2;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
uk[i] = rk[i] + betak*qk[i];
|
||||||
|
pk[i] = uk[i] + betak*(qk[i] + betak*pk[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
clcg_free(rk);
|
||||||
|
clcg_free(rbar0);
|
||||||
|
clcg_free(pk);
|
||||||
|
clcg_free(Ax);
|
||||||
|
clcg_free(uk);
|
||||||
|
clcg_free(qk);
|
||||||
|
clcg_free(wk);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clbicgstab(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set BICGSTAB parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
lcg_complex *rk = nullptr, *rbar0 = nullptr, *pk = nullptr, *sk = nullptr;
|
||||||
|
lcg_complex *Ap = nullptr, *As = nullptr;
|
||||||
|
rk = clcg_malloc(n_size); rbar0 = clcg_malloc(n_size);
|
||||||
|
pk = clcg_malloc(n_size); sk = clcg_malloc(n_size);
|
||||||
|
Ap = clcg_malloc(n_size); As = clcg_malloc(n_size);
|
||||||
|
|
||||||
|
lcg_complex ak, rhok2, sigma, omega, betak, Ass, AsAs;
|
||||||
|
|
||||||
|
Afp(instance, m, Ap, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
pk[i] = rk[i] = B[i] - Ap[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex rhok;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
|
||||||
|
clcg_inner(rhok, rbar0, rk, n_size);
|
||||||
|
} while (clcg_module(&rhok) < 1e-8);
|
||||||
|
|
||||||
|
lcg_float r0_square, rk_square;
|
||||||
|
lcg_complex r0_mod, rk_mod;
|
||||||
|
clcg_inner(rk_mod, rk, rk, n_size);
|
||||||
|
r0_square = rk_square = clcg_square(&rk_mod);
|
||||||
|
if (r0_square < 1.0) r0_square = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_square/r0_square <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||||
|
else residual = rk_square/r0_square;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, pk, Ap, n_size, MatNormal, NonConjugate);
|
||||||
|
clcg_inner(sigma, rbar0, Ap, n_size);
|
||||||
|
ak = rhok/sigma;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
sk[i] = rk[i] - ak*Ap[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
Afp(instance, sk, As, n_size, MatNormal, NonConjugate);
|
||||||
|
clcg_inner(Ass, As, sk, n_size);
|
||||||
|
clcg_inner(AsAs, As, As, n_size);
|
||||||
|
omega = Ass/AsAs;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
m[i] = m[i] + ak*pk[i] + omega*sk[i];
|
||||||
|
rk[i] = sk[i] - omega*As[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(rk_mod, rk, rk, n_size);
|
||||||
|
rk_square = clcg_square(&rk_mod);
|
||||||
|
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
if (m[i] != m[i])
|
||||||
|
{
|
||||||
|
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(rhok2, rbar0, rk, n_size);
|
||||||
|
betak = rhok2*ak/(rhok*omega);
|
||||||
|
rhok = rhok2;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
pk[i] = rk[i] + betak*(pk[i] - omega*Ap[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
clcg_free(rk);
|
||||||
|
clcg_free(rbar0);
|
||||||
|
clcg_free(pk);
|
||||||
|
clcg_free(sk);
|
||||||
|
clcg_free(Ap);
|
||||||
|
clcg_free(As);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cltfqmr(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||||
|
const int n_size, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
int i, j;
|
||||||
|
lcg_complex *pk = nullptr, *uk = nullptr;
|
||||||
|
lcg_complex *vk = nullptr, *dk = nullptr;
|
||||||
|
lcg_complex *rbar0 = nullptr, *rk = nullptr;
|
||||||
|
lcg_complex *Ax = nullptr, *qk = nullptr;
|
||||||
|
lcg_complex *uqk = nullptr;
|
||||||
|
pk = clcg_malloc(n_size); uk = clcg_malloc(n_size);
|
||||||
|
vk = clcg_malloc(n_size); dk = clcg_malloc(n_size);
|
||||||
|
rbar0 = clcg_malloc(n_size); rk = clcg_malloc(n_size);
|
||||||
|
Ax = clcg_malloc(n_size); qk = clcg_malloc(n_size);
|
||||||
|
uqk = clcg_malloc(n_size);
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
pk[i] = uk[i] = rk[i] = B[i] - Ax[i];
|
||||||
|
clcg_set(&dk[i], 0.0, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex rho, rk_mod, rk_mod2;
|
||||||
|
lcg_float r0_square, rk_square;
|
||||||
|
clcg_inner(rk_mod, rk, rk, n_size);
|
||||||
|
r0_square = rk_square = clcg_square(&rk_mod);
|
||||||
|
if (r0_square < 1.0) r0_square = 1.0;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
|
||||||
|
clcg_inner(rho, rbar0, rk, n_size);
|
||||||
|
} while (clcg_module(&rho) < 1e-8);
|
||||||
|
|
||||||
|
lcg_float theta = 0.0, omega = clcg_module(&rk_mod);
|
||||||
|
lcg_float residual, tao = omega;
|
||||||
|
lcg_complex sigma, alpha, betak, rho2, sign, eta(0.0, 0.0);
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_square/r0_square <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
Afp(instance, pk, vk, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
clcg_inner(sigma, rbar0, vk, n_size);
|
||||||
|
alpha = rho/sigma;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
qk[i] = uk[i] - alpha*vk[i];
|
||||||
|
uqk[i] = uk[i] + qk[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
Afp(instance, uqk, Ax, n_size, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
rk[i] = rk[i] - alpha*Ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_inner(rk_mod2, rk, rk, n_size);
|
||||||
|
|
||||||
|
for (j = 1; j <= 2; j++)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||||
|
else residual = rk_square/r0_square;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
sign = theta*theta*(eta/alpha);
|
||||||
|
|
||||||
|
if (j == 1)
|
||||||
|
{
|
||||||
|
omega = sqrt(clcg_module(&rk_mod)*clcg_module(&rk_mod2));
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
dk[i] = uk[i] + sign*dk[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
omega = clcg_module(&rk_mod2);
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
dk[i] = qk[i] + sign*dk[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
theta = omega/tao;
|
||||||
|
tao = omega/sqrt(1.0+theta*theta);
|
||||||
|
eta = (1.0/(1.0+theta*theta))*alpha;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
m[i] = m[i] + eta*dk[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
if (m[i] != m[i])
|
||||||
|
{
|
||||||
|
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rk_mod = rk_mod2;
|
||||||
|
rk_square = clcg_square(&rk_mod);
|
||||||
|
|
||||||
|
clcg_inner(rho2, rbar0, rk, n_size);
|
||||||
|
betak = rho2/rho;
|
||||||
|
rho = rho2;
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
uk[i] = rk[i] + betak*qk[i];
|
||||||
|
pk[i] = uk[i] + betak*(qk[i] + betak*pk[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
clcg_free(pk);
|
||||||
|
clcg_free(uk);
|
||||||
|
clcg_free(vk);
|
||||||
|
clcg_free(dk);
|
||||||
|
clcg_free(rbar0);
|
||||||
|
clcg_free(rk);
|
||||||
|
clcg_free(Ax);
|
||||||
|
clcg_free(qk);
|
||||||
|
clcg_free(uqk);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
78
src/lib/clcg.h
Normal file
78
src/lib/clcg.h
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _CLCG_H
|
||||||
|
#define _CLCG_H
|
||||||
|
|
||||||
|
#include "lcg_complex.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the complex product of a N*N matrix 'A' multiplied
|
||||||
|
* by a complex vertical vector 'x'.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the clcg_solver() functions by the client.
|
||||||
|
* @param x Multiplier of the Ax product.
|
||||||
|
* @param Ax Product of A multiplied by x.
|
||||||
|
* @param x_size Size of x and column/row numbers of A.
|
||||||
|
* @param layout Whether to use the transpose of A for calculation.
|
||||||
|
* @param conjugate Whether to use the conjugate of A for calculation.
|
||||||
|
*/
|
||||||
|
typedef void (*clcg_axfunc_ptr)(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
|
||||||
|
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||||
|
* if necessary.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the clcg_solver() functions by the client.
|
||||||
|
* @param m The current solutions.
|
||||||
|
* @param converge The current value evaluating the iteration progress.
|
||||||
|
* @param n_size The size of the variables
|
||||||
|
* @param k The iteration count.
|
||||||
|
*
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will terminate the optimization process.
|
||||||
|
*/
|
||||||
|
typedef int (*clcg_progress_ptr)(void* instance, const lcg_complex* m,
|
||||||
|
const lcg_float converge, const clcg_para* param, const int n_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined complex conjugate gradient solver function.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int clcg_solver(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
|
||||||
|
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance,
|
||||||
|
clcg_solver_enum solver_id = CLCG_BICG);
|
||||||
|
|
||||||
|
#endif // _CLCG_H
|
||||||
529
src/lib/clcg_cuda.cu
Normal file
529
src/lib/clcg_cuda.cu
Normal file
@@ -0,0 +1,529 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "ctime"
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
#include "clcg_cuda.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef int (*cuda_solver_ptr)(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||||
|
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||||
|
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clbicg_symmetric(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||||
|
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clcg_solver_cuda(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m, const cuDoubleComplex* B,
|
||||||
|
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||||
|
cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
cuda_solver_ptr cg_solver;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
cg_solver = clbicg;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
cg_solver = clbicg_symmetric;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return CLCG_UNKNOWN_SOLVER;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cg_solver(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef int (*cuda_precondtioned_solver_ptr)(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
|
||||||
|
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param,
|
||||||
|
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clpcg(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||||
|
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clcg_solver_preconditioned_cuda(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
|
||||||
|
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
cuda_precondtioned_solver_ptr cgp_solver;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_PCG:
|
||||||
|
cgp_solver = clpcg; break;
|
||||||
|
default:
|
||||||
|
return CLCG_UNKNOWN_SOLVER;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cgp_solver(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||||
|
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
|
||||||
|
cuDoubleComplex *r1k = nullptr, *r2k = nullptr;
|
||||||
|
cuDoubleComplex *d1k = nullptr, *d2k = nullptr, *Ax = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&r1k, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&r2k, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d1k, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d2k, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_d1k, dvec_d2k, dvec_Ax;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_d1k, n_size, d1k, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_d2k, n_size, d2k, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
|
||||||
|
|
||||||
|
cuDoubleComplex one, none;
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
none.x = -1.0; none.y = 0.0;
|
||||||
|
cuDoubleComplex ak, nak, conj_ak, Ad1d2, r1r2_next, betak, conj_betak;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(r1k, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, r1k, 1); // r0 -= Ax
|
||||||
|
cudaMemcpy(d1k, r1k, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||||
|
|
||||||
|
clcg_vecZ_conjugate(r1k, r2k, n_size);
|
||||||
|
cudaMemcpy(d2k, r2k, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
cuDoubleComplex r1r2;
|
||||||
|
cublasZdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2);
|
||||||
|
|
||||||
|
lcg_float rk_mod;
|
||||||
|
cublasDznrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||||
|
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = rk_mod/n_size;
|
||||||
|
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_d1k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
cublasZdotc_v2(cub_handle, n_size, d2k, 1, Ax, 1, &Ad1d2);
|
||||||
|
ak = cuCdiv(r1r2, Ad1d2);
|
||||||
|
nak = cuCmul(none, ak);
|
||||||
|
conj_ak = cuConj(nak);
|
||||||
|
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &ak, d1k, 1, d_m, 1);
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, r1k, 1);
|
||||||
|
|
||||||
|
cublasDznrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_d2k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE);
|
||||||
|
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &conj_ak, Ax, 1, r2k, 1);
|
||||||
|
|
||||||
|
cublasZdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2_next);
|
||||||
|
betak = cuCdiv(r1r2_next, r1r2);
|
||||||
|
conj_betak = cuConj(betak);
|
||||||
|
r1r2 = r1r2_next;
|
||||||
|
|
||||||
|
cublasZscal_v2(cub_handle, n_size, &betak, d1k, 1);
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &one, r1k, 1, d1k, 1);
|
||||||
|
|
||||||
|
cublasZscal_v2(cub_handle, n_size, &conj_betak, d2k, 1);
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &one, r2k, 1, d2k, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(r1k);
|
||||||
|
cudaFree(r2k);
|
||||||
|
cudaFree(d1k);
|
||||||
|
cudaFree(d2k);
|
||||||
|
cudaFree(Ax);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_d1k);
|
||||||
|
cusparseDestroyDnVec(dvec_d2k);
|
||||||
|
cusparseDestroyDnVec(dvec_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clbicg_symmetric(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||||
|
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
|
||||||
|
cuDoubleComplex *rk = nullptr, *dk = nullptr, *Ax = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&rk, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&dk, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Ax;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
|
||||||
|
|
||||||
|
cuDoubleComplex one, none;
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
none.x = -1.0; none.y = 0.0;
|
||||||
|
cuDoubleComplex ak, nak, rkrk2, betak, dkAx;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(rk, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||||
|
cudaMemcpy(dk, rk, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||||
|
|
||||||
|
cuDoubleComplex rkrk;
|
||||||
|
cublasZdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk);
|
||||||
|
|
||||||
|
lcg_float rk_mod;
|
||||||
|
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = rk_mod/n_size;
|
||||||
|
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
cublasZdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||||
|
ak = cuCdiv(rkrk, dkAx);
|
||||||
|
nak = cuCmul(none, ak);
|
||||||
|
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||||
|
|
||||||
|
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
cublasZdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk2);
|
||||||
|
betak = cuCdiv(rkrk2, rkrk);
|
||||||
|
rkrk = rkrk2;
|
||||||
|
|
||||||
|
cublasZscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &one, rk, 1, dk, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(rk);
|
||||||
|
cudaFree(dk);
|
||||||
|
cudaFree(Ax);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_dk);
|
||||||
|
cusparseDestroyDnVec(dvec_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clpcg(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||||
|
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
|
||||||
|
cuDoubleComplex *rk = nullptr, *dk = nullptr, *sk = nullptr, *Ax = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&rk, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&dk, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&sk, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_dk, dvec_sk, dvec_Ax;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_sk, n_size, sk, CUDA_C_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
|
||||||
|
|
||||||
|
cuDoubleComplex one, none;
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
none.x = -1.0; none.y = 0.0;
|
||||||
|
cuDoubleComplex ak, nak, d_old, betak, dkAx;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(rk, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||||
|
|
||||||
|
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_dk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
cuDoubleComplex d_new;
|
||||||
|
cublasZdotu_v2(cub_handle, n_size, rk, 1, dk, 1, &d_new);
|
||||||
|
|
||||||
|
lcg_float rk_mod;
|
||||||
|
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = rk_mod/n_size;
|
||||||
|
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
cublasZdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||||
|
ak = cuCdiv(d_new, dkAx);
|
||||||
|
nak = cuCmul(none, ak);
|
||||||
|
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||||
|
|
||||||
|
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_sk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
d_old = d_new;
|
||||||
|
cublasZdotu_v2(cub_handle, n_size, rk, 1, sk, 1, &d_new);
|
||||||
|
|
||||||
|
betak = cuCdiv(d_new, d_old);
|
||||||
|
|
||||||
|
cublasZscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||||
|
cublasZaxpy_v2(cub_handle, n_size, &one, sk, 1, dk, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(rk);
|
||||||
|
cudaFree(dk);
|
||||||
|
cudaFree(sk);
|
||||||
|
cudaFree(Ax);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_rk);
|
||||||
|
cusparseDestroyDnVec(dvec_dk);
|
||||||
|
cusparseDestroyDnVec(dvec_sk);
|
||||||
|
cusparseDestroyDnVec(dvec_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
109
src/lib/clcg_cuda.h
Normal file
109
src/lib/clcg_cuda.h
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _CLCG_CUDA_H
|
||||||
|
#define _CLCG_CUDA_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "lcg_complex_cuda.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_CUDA
|
||||||
|
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||||
|
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* @param x Multiplier of the Ax product.
|
||||||
|
* @param Ax Product of A multiplied by x.
|
||||||
|
* @param n_size Size of x and column/row numbers of A.
|
||||||
|
*/
|
||||||
|
typedef void (*clcg_axfunc_cuda_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size, cusparseOperation_t oper_t);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||||
|
* if necessary. Note that m is hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||||
|
* @param m The current solutions.
|
||||||
|
* @param converge The current value evaluating the iteration progress.
|
||||||
|
* @param n_size The size of the variables
|
||||||
|
* @param k The iteration count.
|
||||||
|
*
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will terminate the optimization process.
|
||||||
|
*/
|
||||||
|
typedef int (*clcg_progress_cuda_ptr)(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_BICG.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int clcg_solver_cuda(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m, const cuDoubleComplex* B,
|
||||||
|
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||||
|
cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_BICG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int clcg_solver_preconditioned_cuda(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
|
||||||
|
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_PCG);
|
||||||
|
|
||||||
|
#endif // LibLCG_CUDA
|
||||||
|
|
||||||
|
#endif // _CLCG_CUDA_H
|
||||||
529
src/lib/clcg_cudaf.cu
Normal file
529
src/lib/clcg_cudaf.cu
Normal file
@@ -0,0 +1,529 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "ctime"
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
#include "clcg_cudaf.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef int (*cuda_solver_ptr)(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||||
|
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||||
|
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clbicg_symmetric(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||||
|
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clcg_solver_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m, const cuComplex* B,
|
||||||
|
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||||
|
cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
cuda_solver_ptr cg_solver;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
cg_solver = clbicg;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
cg_solver = clbicg_symmetric;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return CLCG_UNKNOWN_SOLVER;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cg_solver(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef int (*cuda_precondtioned_solver_ptr)(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
|
||||||
|
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param,
|
||||||
|
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clpcg(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||||
|
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int clcg_solver_preconditioned_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
|
||||||
|
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
cuda_precondtioned_solver_ptr cgp_solver;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_PCG:
|
||||||
|
cgp_solver = clpcg; break;
|
||||||
|
default:
|
||||||
|
return CLCG_UNKNOWN_SOLVER;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cgp_solver(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||||
|
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
cuComplex *d_m = nullptr, *d_B = nullptr;
|
||||||
|
cuComplex *r1k = nullptr, *r2k = nullptr;
|
||||||
|
cuComplex *d1k = nullptr, *d2k = nullptr, *Ax = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&r1k, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&r2k, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&d1k, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&d2k, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_d1k, dvec_d2k, dvec_Ax;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_d1k, n_size, d1k, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_d2k, n_size, d2k, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
|
||||||
|
|
||||||
|
cuComplex one, none;
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
none.x = -1.0; none.y = 0.0;
|
||||||
|
cuComplex ak, nak, conj_ak, Ad1d2, r1r2_next, betak, conj_betak;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(r1k, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, r1k, 1); // r0 -= Ax
|
||||||
|
cudaMemcpy(d1k, r1k, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||||
|
|
||||||
|
clcg_vecC_conjugate(r1k, r2k, n_size);
|
||||||
|
cudaMemcpy(d2k, r2k, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
cuComplex r1r2;
|
||||||
|
cublasCdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2);
|
||||||
|
|
||||||
|
float rk_mod;
|
||||||
|
cublasScnrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||||
|
|
||||||
|
float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = rk_mod/n_size;
|
||||||
|
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_d1k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
cublasCdotc_v2(cub_handle, n_size, d2k, 1, Ax, 1, &Ad1d2);
|
||||||
|
ak = cuCdivf(r1r2, Ad1d2);
|
||||||
|
nak = cuCmulf(none, ak);
|
||||||
|
conj_ak = cuConjf(nak);
|
||||||
|
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &ak, d1k, 1, d_m, 1);
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, r1k, 1);
|
||||||
|
|
||||||
|
cublasScnrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_d2k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE);
|
||||||
|
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &conj_ak, Ax, 1, r2k, 1);
|
||||||
|
|
||||||
|
cublasCdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2_next);
|
||||||
|
betak = cuCdivf(r1r2_next, r1r2);
|
||||||
|
conj_betak = cuConjf(betak);
|
||||||
|
r1r2 = r1r2_next;
|
||||||
|
|
||||||
|
cublasCscal_v2(cub_handle, n_size, &betak, d1k, 1);
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &one, r1k, 1, d1k, 1);
|
||||||
|
|
||||||
|
cublasCscal_v2(cub_handle, n_size, &conj_betak, d2k, 1);
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &one, r2k, 1, d2k, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(r1k);
|
||||||
|
cudaFree(r2k);
|
||||||
|
cudaFree(d1k);
|
||||||
|
cudaFree(d2k);
|
||||||
|
cudaFree(Ax);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_d1k);
|
||||||
|
cusparseDestroyDnVec(dvec_d2k);
|
||||||
|
cusparseDestroyDnVec(dvec_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clbicg_symmetric(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||||
|
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
cuComplex *d_m = nullptr, *d_B = nullptr;
|
||||||
|
cuComplex *rk = nullptr, *dk = nullptr, *Ax = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&rk, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&dk, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Ax;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
|
||||||
|
|
||||||
|
cuComplex one, none;
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
none.x = -1.0; none.y = 0.0;
|
||||||
|
cuComplex ak, nak, rkrk2, betak, dkAx;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(rk, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||||
|
cudaMemcpy(dk, rk, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||||
|
|
||||||
|
cuComplex rkrk;
|
||||||
|
cublasCdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk);
|
||||||
|
|
||||||
|
float rk_mod;
|
||||||
|
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = rk_mod/n_size;
|
||||||
|
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
cublasCdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||||
|
ak = cuCdivf(rkrk, dkAx);
|
||||||
|
nak = cuCmulf(none, ak);
|
||||||
|
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||||
|
|
||||||
|
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
cublasCdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk2);
|
||||||
|
betak = cuCdivf(rkrk2, rkrk);
|
||||||
|
rkrk = rkrk2;
|
||||||
|
|
||||||
|
cublasCscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &one, rk, 1, dk, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(rk);
|
||||||
|
cudaFree(dk);
|
||||||
|
cudaFree(Ax);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_dk);
|
||||||
|
cusparseDestroyDnVec(dvec_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clpcg(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||||
|
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
cuComplex *d_m = nullptr, *d_B = nullptr;
|
||||||
|
cuComplex *rk = nullptr, *dk = nullptr, *sk = nullptr, *Ax = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&rk, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&dk, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&sk, n_size * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_dk, dvec_sk, dvec_Ax;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_sk, n_size, sk, CUDA_C_32F);
|
||||||
|
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
|
||||||
|
|
||||||
|
cuComplex one, none;
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
none.x = -1.0; none.y = 0.0;
|
||||||
|
cuComplex ak, nak, d_old, betak, dkAx;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(rk, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||||
|
|
||||||
|
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_dk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
cuComplex d_new;
|
||||||
|
cublasCdotu_v2(cub_handle, n_size, rk, 1, dk, 1, &d_new);
|
||||||
|
|
||||||
|
float rk_mod;
|
||||||
|
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = rk_mod/n_size;
|
||||||
|
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
cublasCdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||||
|
ak = cuCdivf(d_new, dkAx);
|
||||||
|
nak = cuCmulf(none, ak);
|
||||||
|
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||||
|
|
||||||
|
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_sk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||||
|
|
||||||
|
d_old = d_new;
|
||||||
|
cublasCdotu_v2(cub_handle, n_size, rk, 1, sk, 1, &d_new);
|
||||||
|
|
||||||
|
betak = cuCdivf(d_new, d_old);
|
||||||
|
|
||||||
|
cublasCscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||||
|
cublasCaxpy_v2(cub_handle, n_size, &one, sk, 1, dk, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(rk);
|
||||||
|
cudaFree(dk);
|
||||||
|
cudaFree(sk);
|
||||||
|
cudaFree(Ax);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_rk);
|
||||||
|
cusparseDestroyDnVec(dvec_dk);
|
||||||
|
cusparseDestroyDnVec(dvec_sk);
|
||||||
|
cusparseDestroyDnVec(dvec_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
109
src/lib/clcg_cudaf.h
Normal file
109
src/lib/clcg_cudaf.h
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _CLCG_CUDA_FLOAT_H
|
||||||
|
#define _CLCG_CUDA_FLOAT_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "lcg_complex_cuda.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_CUDA
|
||||||
|
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||||
|
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* @param x Multiplier of the Ax product.
|
||||||
|
* @param Ax Product of A multiplied by x.
|
||||||
|
* @param n_size Size of x and column/row numbers of A.
|
||||||
|
*/
|
||||||
|
typedef void (*clcg_axfunc_cudaf_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size, cusparseOperation_t oper_t);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||||
|
* if necessary. Note that m is hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||||
|
* @param m The current solutions.
|
||||||
|
* @param converge The current value evaluating the iteration progress.
|
||||||
|
* @param n_size The size of the variables
|
||||||
|
* @param k The iteration count.
|
||||||
|
*
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will terminate the optimization process.
|
||||||
|
*/
|
||||||
|
typedef int (*clcg_progress_cudaf_ptr)(void* instance, const cuComplex* m, const float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_BICG.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int clcg_solver_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m, const cuComplex* B,
|
||||||
|
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||||
|
cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_BICG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int clcg_solver_preconditioned_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
|
||||||
|
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_PCG);
|
||||||
|
|
||||||
|
#endif // LibLCG_CUDA
|
||||||
|
|
||||||
|
#endif // _CLCG_CUDA_FLOAT_H
|
||||||
777
src/lib/clcg_eigen.cpp
Normal file
777
src/lib/clcg_eigen.cpp
Normal file
@@ -0,0 +1,777 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "ctime"
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
#include "clcg_eigen.h"
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
typedef int (*eigen_solver_ptr)(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
int clbicg_symmetric(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
int clcgs(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
int cltfqmr(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
|
||||||
|
int clcg_solver_eigen(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
eigen_solver_ptr cg_solver;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
cg_solver = clbicg;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
cg_solver = clbicg_symmetric;
|
||||||
|
break;
|
||||||
|
case CLCG_CGS:
|
||||||
|
cg_solver = clcgs;
|
||||||
|
break;
|
||||||
|
case CLCG_TFQMR:
|
||||||
|
cg_solver = cltfqmr;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return CLCG_UNKNOWN_SOLVER;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cg_solver(Afp, Pfp, m, B, param, instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef int (*eigen_preconditioned_solver_ptr)(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
|
||||||
|
int clpcg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
int clpbicg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||||
|
|
||||||
|
int clcg_solver_preconditioned_eigen(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
eigen_preconditioned_solver_ptr cgp_solver;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_PCG:
|
||||||
|
cgp_solver = clpcg; break;
|
||||||
|
case CLCG_PBICG:
|
||||||
|
cgp_solver = clpbicg; break;
|
||||||
|
default:
|
||||||
|
return CLCG_UNKNOWN_SOLVER;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cgp_solver(Afp, Mfp, Pfp, m, B, param, instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int clbicg(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
int n_size = B.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
std::complex<lcg_float> ak, Ad1d2, r1r2_next, betak;
|
||||||
|
Eigen::VectorXcd r1k(n_size), r2k(n_size), d1k(n_size), d2k(n_size);
|
||||||
|
Eigen::VectorXcd Ax(n_size);
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
d1k = r1k = B - Ax;
|
||||||
|
d2k = r2k = r1k.conjugate();
|
||||||
|
|
||||||
|
// Eigen's dot is inner product
|
||||||
|
std::complex<lcg_float> r1r2 = r2k.dot(r1k);
|
||||||
|
|
||||||
|
lcg_float rk_mod = std::norm(r1k.dot(r1k));
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||||
|
else residual = rk_mod/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, &m, residual, ¶, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, d1k, Ax, MatNormal, NonConjugate);
|
||||||
|
Ad1d2 = d2k.dot(Ax);
|
||||||
|
ak = r1r2/Ad1d2;
|
||||||
|
|
||||||
|
m = m + ak*d1k;
|
||||||
|
r1k = r1k - ak*Ax;
|
||||||
|
|
||||||
|
rk_mod = std::norm(r1k.dot(r1k));
|
||||||
|
|
||||||
|
Afp(instance, d2k, Ax, MatTranspose, Conjugate);
|
||||||
|
|
||||||
|
r2k = r2k - std::conj(ak)*Ax;
|
||||||
|
|
||||||
|
r1r2_next = r2k.dot(r1k);
|
||||||
|
betak = r1r2_next/r1r2;
|
||||||
|
r1r2 = r1r2_next;
|
||||||
|
|
||||||
|
d1k = r1k + betak*d1k;
|
||||||
|
d2k = r2k + std::conj(betak)*d2k;
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
r1k.resize(0);
|
||||||
|
r2k.resize(0);
|
||||||
|
d1k.resize(0);
|
||||||
|
d2k.resize(0);
|
||||||
|
Ax.resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clbicg_symmetric(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
int n_size = B.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
std::complex<lcg_float> ak, rkrk2, betak, dkAx;
|
||||||
|
Eigen::VectorXcd rk(n_size), dk(n_size), Ax(n_size);
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
dk = rk = (B - Ax);
|
||||||
|
|
||||||
|
std::complex<lcg_float> rkrk = rk.conjugate().dot(rk);
|
||||||
|
|
||||||
|
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||||
|
else residual = rk_mod/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, &m, residual, ¶, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, dk, Ax, MatNormal, NonConjugate);
|
||||||
|
dkAx = dk.conjugate().dot(Ax);
|
||||||
|
ak = rkrk/dkAx;
|
||||||
|
|
||||||
|
m += ak*dk;
|
||||||
|
rk -= ak*Ax;
|
||||||
|
|
||||||
|
rk_mod = std::norm(rk.dot(rk));
|
||||||
|
|
||||||
|
rkrk2 = rk.conjugate().dot(rk);
|
||||||
|
betak = rkrk2/rkrk;
|
||||||
|
rkrk = rkrk2;
|
||||||
|
|
||||||
|
dk = rk + betak*dk;
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
rk.resize(0);
|
||||||
|
dk.resize(0);
|
||||||
|
Ax.resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clcgs(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
int n_size = B.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
std::complex<lcg_float> ak, rhok2, sigma, betak, rkmod;
|
||||||
|
Eigen::VectorXcd rk(n_size), s0, pk(n_size);
|
||||||
|
Eigen::VectorXcd Ax(n_size), uk(n_size), qk(n_size), wk(n_size);
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
pk = uk = rk = (B - Ax);
|
||||||
|
|
||||||
|
std::complex<lcg_float> rhok;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
s0 = Eigen::VectorXcd::Random(n_size);
|
||||||
|
rhok = s0.conjugate().dot(rk);
|
||||||
|
} while (std::sqrt(std::norm(rhok)) < 1e-8);
|
||||||
|
|
||||||
|
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||||
|
else residual = rk_mod/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, &m, residual, ¶, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, pk, Ax, MatNormal, NonConjugate);
|
||||||
|
sigma = s0.conjugate().dot(Ax);
|
||||||
|
ak = rhok/sigma;
|
||||||
|
|
||||||
|
qk = uk - ak*Ax;
|
||||||
|
wk = uk + qk;
|
||||||
|
|
||||||
|
Afp(instance, wk, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
m += ak*wk;
|
||||||
|
rk -= ak*Ax;
|
||||||
|
|
||||||
|
rk_mod = std::norm(rk.dot(rk));
|
||||||
|
|
||||||
|
rhok2 = s0.conjugate().dot(rk);
|
||||||
|
betak = rhok2/rhok;
|
||||||
|
rhok = rhok2;
|
||||||
|
|
||||||
|
uk = rk + betak*qk;
|
||||||
|
pk = uk + betak*(qk + betak*pk);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
rk.resize(0);
|
||||||
|
s0.resize(0);
|
||||||
|
pk.resize(0);
|
||||||
|
Ax.resize(0);
|
||||||
|
uk.resize(0);
|
||||||
|
qk.resize(0);
|
||||||
|
wk.resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cltfqmr(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
int n_size = B.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
int j;
|
||||||
|
Eigen::VectorXcd pk(n_size), uk(n_size), vk(n_size), dk(n_size);
|
||||||
|
Eigen::VectorXcd r0(n_size), rk(n_size), Ax(n_size), qk(n_size);
|
||||||
|
Eigen::VectorXcd uqk(n_size);
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
pk = uk = r0 = rk = (B - Ax);
|
||||||
|
dk.setZero();
|
||||||
|
|
||||||
|
std::complex<lcg_float> rk_mod = rk.dot(rk);
|
||||||
|
lcg_float r0_mod = std::norm(rk_mod);
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
lcg_float theta = 0.0, omega = sqrt(rk_mod.real());
|
||||||
|
lcg_float residual, tao = omega;
|
||||||
|
std::complex<lcg_float> rk_mod2, sigma, alpha, betak, rho, rho2, sign, eta(0.0, 0.0);
|
||||||
|
|
||||||
|
rho = r0.dot(r0);
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(std::norm(rk_mod))/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, sqrt(std::norm(rk_mod))/n_size, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (std::norm(rk_mod)/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, std::norm(rk_mod)/r0_mod, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
Afp(instance, pk, vk, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
sigma = r0.dot(vk);
|
||||||
|
alpha = rho/sigma;
|
||||||
|
|
||||||
|
qk = uk - alpha*vk;
|
||||||
|
uqk = uk + qk;
|
||||||
|
|
||||||
|
Afp(instance, uqk, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
rk -= alpha*Ax;
|
||||||
|
rk_mod2 = rk.dot(rk);
|
||||||
|
|
||||||
|
for (j = 1; j <= 2; j++)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = std::sqrt(std::norm(rk_mod))/n_size;
|
||||||
|
else residual = std::norm(rk_mod)/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, &m, residual, ¶, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
sign = theta*theta*(eta/alpha);
|
||||||
|
|
||||||
|
if (j == 1)
|
||||||
|
{
|
||||||
|
omega = sqrt(sqrt(rk_mod.real())*sqrt(rk_mod2.real()));
|
||||||
|
dk = uk + sign*dk;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
omega = sqrt(rk_mod2.real());
|
||||||
|
dk = qk + sign*dk;
|
||||||
|
}
|
||||||
|
|
||||||
|
theta = omega/tao;
|
||||||
|
tao = omega/sqrt(1.0+theta*theta);
|
||||||
|
eta = (1.0/(1.0+theta*theta))*alpha;
|
||||||
|
|
||||||
|
m += eta*dk;
|
||||||
|
}
|
||||||
|
rk_mod = rk_mod2;
|
||||||
|
|
||||||
|
rho2 = r0.dot(rk);
|
||||||
|
betak = rho2/rho;
|
||||||
|
rho = rho2;
|
||||||
|
|
||||||
|
uk = rk + betak*qk;
|
||||||
|
pk = uk + betak*(qk + betak*pk);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
pk.resize(0);
|
||||||
|
uk.resize(0);
|
||||||
|
vk.resize(0);
|
||||||
|
dk.resize(0);
|
||||||
|
r0.resize(0);
|
||||||
|
rk.resize(0);
|
||||||
|
Ax.resize(0);
|
||||||
|
qk.resize(0);
|
||||||
|
uqk.resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clpcg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
int n_size = B.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
Eigen::VectorXcd rk(n_size), dk(n_size), sk(n_size), Ax(n_size);
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
rk = (B - Ax);
|
||||||
|
Mfp(instance, rk, dk, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
std::complex<lcg_float> ak, d_old, betak, dkAx;
|
||||||
|
std::complex<lcg_float> d_new = rk.conjugate().dot(dk);
|
||||||
|
|
||||||
|
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||||
|
else residual = rk_mod/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, &m, residual, ¶, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, dk, Ax, MatNormal, NonConjugate);
|
||||||
|
dkAx = dk.conjugate().dot(Ax);
|
||||||
|
ak = d_new/dkAx;
|
||||||
|
|
||||||
|
m += ak*dk;
|
||||||
|
rk -= ak*Ax;
|
||||||
|
|
||||||
|
rk_mod = std::norm(rk.dot(rk));
|
||||||
|
|
||||||
|
Mfp(instance, rk, sk, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
d_old = d_new;
|
||||||
|
d_new = rk.conjugate().dot(sk);
|
||||||
|
|
||||||
|
betak = d_new/d_old;
|
||||||
|
|
||||||
|
dk = sk + betak*dk;
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
rk.resize(0);
|
||||||
|
dk.resize(0);
|
||||||
|
sk.resize(0);
|
||||||
|
Ax.resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clpbicg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||||
|
{
|
||||||
|
// set CGS parameters
|
||||||
|
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||||
|
|
||||||
|
int n_size = B.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||||
|
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
std::complex<lcg_float> ak, betak, pkAx, rhok2;
|
||||||
|
Eigen::VectorXcd rk(n_size), rsk(n_size), zk(n_size), pk(n_size), psk(n_size), Ax(n_size), Asx(n_size);
|
||||||
|
|
||||||
|
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
rk = (B - Ax);
|
||||||
|
Mfp(instance, rk, zk, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
pk = zk;
|
||||||
|
rsk = rk.conjugate();
|
||||||
|
psk = pk.conjugate();
|
||||||
|
|
||||||
|
std::complex<lcg_float> rhok = rsk.dot(zk);
|
||||||
|
|
||||||
|
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||||
|
else residual = rk_mod/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, &m, residual, ¶, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, pk, Ax, MatNormal, NonConjugate);
|
||||||
|
Afp(instance, psk, Asx, MatNormal, Conjugate);
|
||||||
|
|
||||||
|
pkAx = psk.dot(Ax);
|
||||||
|
ak = rhok/pkAx;
|
||||||
|
|
||||||
|
m += ak*pk;
|
||||||
|
rsk = rk.conjugate() - std::conj(ak)*Asx;
|
||||||
|
rk -= ak*Ax;
|
||||||
|
|
||||||
|
rk_mod = std::norm(rk.dot(rk));
|
||||||
|
|
||||||
|
Mfp(instance, rk, zk, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
rhok2 = rsk.dot(zk);
|
||||||
|
betak = rhok2/rhok;
|
||||||
|
rhok = rhok2;
|
||||||
|
|
||||||
|
pk = zk + betak*pk;
|
||||||
|
psk = zk.conjugate() + std::conj(betak)*psk;
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
rk.resize(0);
|
||||||
|
rsk.resize(0);
|
||||||
|
zk.resize(0);
|
||||||
|
pk.resize(0);
|
||||||
|
psk.resize(0);
|
||||||
|
Ax.resize(0);
|
||||||
|
Asx.resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
94
src/lib/clcg_eigen.h
Normal file
94
src/lib/clcg_eigen.h
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _CLCG_EIGEN_H
|
||||||
|
#define _CLCG_EIGEN_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "complex"
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||||
|
* by a vertical vector 'x'.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the solver functions by the client.
|
||||||
|
* @param x Multiplier of the Ax product.
|
||||||
|
* @param Ax Product of A multiplied by x.
|
||||||
|
* @param layout layout information of the matrix A passed by the solver functions.
|
||||||
|
* @param conjugate Layout information of the matrix A passed by the solver functions.
|
||||||
|
*/
|
||||||
|
typedef void (*clcg_axfunc_eigen_ptr)(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||||
|
* if necessary.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the solver functions by the client.
|
||||||
|
* @param m The current solutions.
|
||||||
|
* @param converge The current value evaluating the iteration progress.
|
||||||
|
* @param param The parameter object passed by the solver functions.
|
||||||
|
* @param k The iteration count.
|
||||||
|
*
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will terminate the optimization process.
|
||||||
|
*/
|
||||||
|
typedef int (*clcg_progress_eigen_ptr)(void* instance, const Eigen::VectorXcd *m, const lcg_float converge,
|
||||||
|
const clcg_para *param, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the solver function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is CLCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int clcg_solver_eigen(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||||
|
const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id = CLCG_CGS);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the solver function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. the value must CLCG_PBICG (default) or CLCG_PCG.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int clcg_solver_preconditioned_eigen(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id = CLCG_PBICG);
|
||||||
|
|
||||||
|
#endif // _CLCG_EIGEN_H
|
||||||
1419
src/lib/lcg.cpp
Normal file
1419
src/lib/lcg.cpp
Normal file
File diff suppressed because it is too large
Load Diff
171
src/lib/lcg.h
Normal file
171
src/lib/lcg.h
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _LCG_H
|
||||||
|
#define _LCG_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||||
|
* by a vertical vector 'x'.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||||
|
* @param x Multiplier of the Ax product.
|
||||||
|
* @param Ax Product of A multiplied by x.
|
||||||
|
* @param n_size Size of x and column/row numbers of A.
|
||||||
|
*/
|
||||||
|
typedef void (*lcg_axfunc_ptr)(void* instance, const lcg_float* x, lcg_float* prod_Ax,
|
||||||
|
const int n_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||||
|
* if necessary.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||||
|
* @param m The current solutions.
|
||||||
|
* @param converge The current value evaluating the iteration progress.
|
||||||
|
* @param n_size The size of the variables
|
||||||
|
* @param k The iteration count.
|
||||||
|
*
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will terminate the optimization process.
|
||||||
|
*/
|
||||||
|
typedef int (*lcg_progress_ptr)(void* instance, const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para* param, const int n_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||||
|
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CGS);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_PCG.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_preconditioned(lcg_axfunc_ptr Afp, lcg_axfunc_ptr Mfp, lcg_progress_ptr Pfp, lcg_float* m,
|
||||||
|
const lcg_float* B, const int n_size, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PCG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function with inequality constraints.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] low The lower boundary of the acceptable solution.
|
||||||
|
* @param[in] hig The higher boundary of the acceptable solution.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
* @param P Precondition vector (optional expect for the LCG_PCG method). The default value is NULL.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_constrained(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const lcg_float* low, const lcg_float *hig, const int n_size, const lcg_para* param,
|
||||||
|
void* instance, lcg_solver_enum solver_id = LCG_PG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Standalone function of the Linear Conjugate Gradient algorithm
|
||||||
|
*
|
||||||
|
* @note To use the lcg() function for massive inversions, it is better to provide
|
||||||
|
* external vectors Gk, Dk and ADk to avoid allocating and destroying temporary vectors.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector of the size n_size
|
||||||
|
* @param[in] B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param[in] param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param Gk Conjugate gradient vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param Dk Directional gradient vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param ADk Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||||
|
const lcg_para* param, void* instance, lcg_float* Gk = nullptr, lcg_float* Dk = nullptr,
|
||||||
|
lcg_float* ADk = nullptr);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Standalone function of the Conjugate Gradient Squared algorithm.
|
||||||
|
*
|
||||||
|
* @note Algorithm 2 in "Generalized conjugate gradient method" by Fokkema et al. (1996).
|
||||||
|
*
|
||||||
|
* @note To use the lcgs() function for massive inversions, it is better to provide
|
||||||
|
* external vectors RK, R0T, PK, AX, UK, QK, and WK to avoid allocating and destroying temporary vectors.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
|
||||||
|
* @param RK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param R0T Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param PK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param AX Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param UK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param QK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
* @param WK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcgs(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||||
|
const lcg_para* param, void* instance, lcg_float* RK = nullptr, lcg_float* R0T = nullptr,
|
||||||
|
lcg_float* PK = nullptr, lcg_float* AX = nullptr, lcg_float* UK = nullptr, lcg_float* QK = nullptr,
|
||||||
|
lcg_float* WK = nullptr);
|
||||||
|
|
||||||
|
#endif // _LCG_H
|
||||||
496
src/lib/lcg_complex.cpp
Normal file
496
src/lib/lcg_complex.cpp
Normal file
@@ -0,0 +1,496 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "ctime"
|
||||||
|
#include "random"
|
||||||
|
|
||||||
|
#include "lcg_complex.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
lcg_complex* clcg_malloc(int n)
|
||||||
|
{
|
||||||
|
lcg_complex *x = new lcg_complex [n];
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex** clcg_malloc(int m, int n)
|
||||||
|
{
|
||||||
|
lcg_complex **x = new lcg_complex* [m];
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
{
|
||||||
|
x[i] = new lcg_complex [n];
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_free(lcg_complex* x)
|
||||||
|
{
|
||||||
|
if (x != nullptr)
|
||||||
|
{
|
||||||
|
delete[] x; x = nullptr;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_free(lcg_complex **x, int m)
|
||||||
|
{
|
||||||
|
if (x != nullptr)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
{
|
||||||
|
delete[] x[i];
|
||||||
|
}
|
||||||
|
delete[] x;
|
||||||
|
x = nullptr;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecset(lcg_complex *a, lcg_complex b, int size)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
a[i] = b;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecset(lcg_complex **a, lcg_complex b, int m, int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < m; ++i)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < n; ++j)
|
||||||
|
{
|
||||||
|
a[i][j] = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef LibLCG_STD_COMPLEX
|
||||||
|
|
||||||
|
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i)
|
||||||
|
{
|
||||||
|
a->real(r);
|
||||||
|
a->imag(i);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float clcg_square(const lcg_complex *a)
|
||||||
|
{
|
||||||
|
return std::norm(*a);
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float clcg_module(const lcg_complex *a)
|
||||||
|
{
|
||||||
|
return sqrt(std::norm(*a));
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex clcg_conjugate(const lcg_complex *a)
|
||||||
|
{
|
||||||
|
lcg_complex b = std::conj(*a);
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size)
|
||||||
|
{
|
||||||
|
srand(time(0));
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
a[i].real((h.real()-l.real())*rand()*1.0/RAND_MAX + l.real());
|
||||||
|
a[i].imag((h.imag()-l.imag())*rand()*1.0/RAND_MAX + l.imag());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n)
|
||||||
|
{
|
||||||
|
srand(time(0));
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < n; j++)
|
||||||
|
{
|
||||||
|
a[i][j].real((h.real()-l.real())*rand()*1.0/RAND_MAX + l.real());
|
||||||
|
a[i][j].imag((h.imag()-l.imag())*rand()*1.0/RAND_MAX + l.imag());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||||
|
{
|
||||||
|
lcg_float re = 0.0, im = 0.0;
|
||||||
|
// <a,b> = \sum{a_i \cdot b_i}
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
re += (a[i].real()*b[i].real() - a[i].imag()*b[i].imag());
|
||||||
|
im += (a[i].real()*b[i].imag() + a[i].imag()*b[i].real());
|
||||||
|
}
|
||||||
|
ret.real(re); ret.imag(im);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||||
|
{
|
||||||
|
lcg_float re = 0.0, im = 0.0;
|
||||||
|
// <a,b> = \sum{\bar{a_i} \cdot b_i}
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
re += (a[i].real()*b[i].real() + a[i].imag()*b[i].imag());
|
||||||
|
im += (a[i].real()*b[i].imag() - a[i].imag()*b[i].real());
|
||||||
|
}
|
||||||
|
ret.real(re); ret.imag(im);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax,
|
||||||
|
int m_size, int n_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
size_t i, j;
|
||||||
|
lcg_float re, im;
|
||||||
|
if (conjugate == Conjugate)
|
||||||
|
{
|
||||||
|
if (layout == MatNormal)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re += (A[i][j].real()*x[j].real() + A[i][j].imag()*x[j].imag());
|
||||||
|
im += (A[i][j].real()*x[j].imag() - A[i][j].imag()*x[j].real());
|
||||||
|
}
|
||||||
|
Ax[i].real(re); Ax[i].imag(im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re += (A[i][j].real()*x[i].real() + A[i][j].imag()*x[i].imag());
|
||||||
|
im += (A[i][j].real()*x[i].imag() - A[i][j].imag()*x[i].real());
|
||||||
|
}
|
||||||
|
Ax[j].real(re); Ax[j].imag(im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (layout == MatNormal)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re += (A[i][j].real()*x[j].real() - A[i][j].imag()*x[j].imag());
|
||||||
|
im += (A[i][j].real()*x[j].imag() + A[i][j].imag()*x[j].real());
|
||||||
|
}
|
||||||
|
Ax[i].real(re); Ax[i].imag(im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re += (A[i][j].real()*x[i].real() - A[i][j].imag()*x[i].imag());
|
||||||
|
im += (A[i][j].real()*x[i].imag() + A[i][j].imag()*x[i].real());
|
||||||
|
}
|
||||||
|
Ax[j].real(re); Ax[j].imag(im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
lcg_complex::lcg_complex()
|
||||||
|
{
|
||||||
|
rel = img = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex::lcg_complex(lcg_float r, lcg_float i)
|
||||||
|
{
|
||||||
|
rel = r; img = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex::~lcg_complex(){}
|
||||||
|
|
||||||
|
void lcg_complex::real(lcg_float a)
|
||||||
|
{
|
||||||
|
rel = a;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_complex::imag(lcg_float a)
|
||||||
|
{
|
||||||
|
img = a;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float lcg_complex::real()
|
||||||
|
{
|
||||||
|
return rel;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float lcg_complex::imag()
|
||||||
|
{
|
||||||
|
return img;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const lcg_complex &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
if (a.rel == b.rel && a.img == b.img)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator!=(const lcg_complex &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
if (a.rel != b.rel || a.img != b.img)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex operator+(const lcg_complex &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
lcg_complex ret;
|
||||||
|
ret.rel = a.rel + b.rel;
|
||||||
|
ret.img = a.img + b.img;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex operator-(const lcg_complex &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
lcg_complex ret;
|
||||||
|
ret.rel = a.rel - b.rel;
|
||||||
|
ret.img = a.img - b.img;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex operator*(const lcg_complex &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
lcg_complex ret;
|
||||||
|
ret.rel = a.rel*b.rel - a.img*b.img;
|
||||||
|
ret.img = a.rel*b.img + a.img*b.rel;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex operator*(const lcg_float &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
lcg_complex ret;
|
||||||
|
ret.rel = a*b.rel;
|
||||||
|
ret.img = a*b.img;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex operator/(const lcg_complex &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
lcg_complex ret;
|
||||||
|
if (b.rel == 0 && b.img == 0)
|
||||||
|
{
|
||||||
|
ret.rel = ret.img = NAN;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.rel = (a.rel*b.rel + a.img*b.img)/(b.rel*b.rel + b.img*b.img);
|
||||||
|
ret.img = (a.img*b.rel - a.rel*b.img)/(b.rel*b.rel + b.img*b.img);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex operator/(const lcg_float &a, const lcg_complex &b)
|
||||||
|
{
|
||||||
|
lcg_complex ret;
|
||||||
|
if (b.rel == 0 && b.img == 0)
|
||||||
|
{
|
||||||
|
ret.rel = ret.img = NAN;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.rel = a*b.rel/(b.rel*b.rel + b.img*b.img);
|
||||||
|
ret.img = -1.0*a*b.img/(b.rel*b.rel + b.img*b.img);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const lcg_complex &a)
|
||||||
|
{
|
||||||
|
if (a.img >= 0)
|
||||||
|
os << a.rel << "+" << a.img << "i";
|
||||||
|
else
|
||||||
|
os << a.rel << a.img << "i";
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i)
|
||||||
|
{
|
||||||
|
a->rel = r;
|
||||||
|
a->img = i;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float clcg_square(const lcg_complex *a)
|
||||||
|
{
|
||||||
|
return a->rel * a->rel + a->img * a->img;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float clcg_module(const lcg_complex *a)
|
||||||
|
{
|
||||||
|
return sqrt(clcg_square(a));
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex clcg_conjugate(const lcg_complex *a)
|
||||||
|
{
|
||||||
|
lcg_complex b;
|
||||||
|
b.rel = a->rel;
|
||||||
|
b.img = -1.0 * a->img;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size)
|
||||||
|
{
|
||||||
|
srand(time(nullptr));
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
a[i].rel = (h.rel-l.rel)*rand()*1.0/RAND_MAX + l.rel;
|
||||||
|
a[i].img = (h.img-l.img)*rand()*1.0/RAND_MAX + l.img;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n)
|
||||||
|
{
|
||||||
|
srand(time(nullptr));
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < n; j++)
|
||||||
|
{
|
||||||
|
a[i][j].rel = (h.rel-l.rel)*rand()*1.0/RAND_MAX + l.rel;
|
||||||
|
a[i][j].img = (h.img-l.img)*rand()*1.0/RAND_MAX + l.img;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||||
|
{
|
||||||
|
clcg_set(&ret, 0.0, 0.0);
|
||||||
|
// <a,b> = \sum{a_i \cdot b_i}
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
ret.rel += (a[i].rel*b[i].rel - a[i].img*b[i].img);
|
||||||
|
ret.img += (a[i].rel*b[i].img + a[i].img*b[i].rel);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||||
|
{
|
||||||
|
clcg_set(&ret, 0.0, 0.0);
|
||||||
|
// <a,b> = \sum{\bar{a_i} \cdot b_i}
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
ret.rel += (a[i].rel*b[i].rel + a[i].img*b[i].img);
|
||||||
|
ret.img += (a[i].rel*b[i].img - a[i].img*b[i].rel);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax,
|
||||||
|
int m_size, int n_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
lcg_float re, im;
|
||||||
|
if (conjugate == Conjugate)
|
||||||
|
{
|
||||||
|
if (layout == MatNormal)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re += (A[i][j].rel*x[j].rel + A[i][j].img*x[j].img);
|
||||||
|
im += (A[i][j].rel*x[j].img - A[i][j].img*x[j].rel);
|
||||||
|
}
|
||||||
|
clcg_set(&Ax[i], re, im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re += (A[i][j].rel*x[i].rel + A[i][j].img*x[i].img);
|
||||||
|
im += (A[i][j].rel*x[i].img - A[i][j].img*x[i].rel);
|
||||||
|
}
|
||||||
|
clcg_set(&Ax[j], re, im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (layout == MatNormal)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re += (A[i][j].rel*x[j].rel - A[i][j].img*x[j].img);
|
||||||
|
im += (A[i][j].rel*x[j].img + A[i][j].img*x[j].rel);
|
||||||
|
}
|
||||||
|
clcg_set(&Ax[i], re, im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||||
|
for (j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
re = 0.0; im = 0.0;
|
||||||
|
for (i = 0; i < m_size; i++)
|
||||||
|
{
|
||||||
|
re+= (A[i][j].rel*x[i].rel - A[i][j].img*x[i].img);
|
||||||
|
im += (A[i][j].rel*x[i].img + A[i][j].img*x[i].rel);
|
||||||
|
}
|
||||||
|
clcg_set(&Ax[j], re, im);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // LibLCG_SYSTEM_COMPLEX
|
||||||
329
src/lib/lcg_complex.h
Normal file
329
src/lib/lcg_complex.h
Normal file
@@ -0,0 +1,329 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _LCG_COMPLEX_H
|
||||||
|
#define _LCG_COMPLEX_H
|
||||||
|
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
#include "algebra.h"
|
||||||
|
#ifdef LibLCG_STD_COMPLEX
|
||||||
|
|
||||||
|
#include "complex"
|
||||||
|
|
||||||
|
typedef std::complex<lcg_float> lcg_complex;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A simple definition of the complex number type.
|
||||||
|
* Easy to change in the future. Right now it is just two double variables
|
||||||
|
*/
|
||||||
|
struct lcg_complex
|
||||||
|
{
|
||||||
|
lcg_float rel; ///< The real part
|
||||||
|
lcg_float img; ///< The imaginary part
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Constructs a new instance.
|
||||||
|
*/
|
||||||
|
lcg_complex();
|
||||||
|
/**
|
||||||
|
* @brief Constructs a new instance.
|
||||||
|
*
|
||||||
|
* @param[in] r The real part of the complex number
|
||||||
|
* @param[in] i The imaginary part of the complex number
|
||||||
|
*/
|
||||||
|
lcg_complex(lcg_float r, lcg_float i);
|
||||||
|
/**
|
||||||
|
* @brief Destructor
|
||||||
|
*/
|
||||||
|
virtual ~lcg_complex();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set real part of a complex number
|
||||||
|
*
|
||||||
|
* @param a Input value
|
||||||
|
*/
|
||||||
|
void real(lcg_float a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set image part of a complex number
|
||||||
|
*
|
||||||
|
* @param a Input value
|
||||||
|
*/
|
||||||
|
void imag(lcg_float a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get real part of a complex number
|
||||||
|
*
|
||||||
|
* @return lcg_float Real component
|
||||||
|
*/
|
||||||
|
lcg_float real();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get image part of a complex number
|
||||||
|
*
|
||||||
|
* @return lcg_float Image component
|
||||||
|
*/
|
||||||
|
lcg_float imag();
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload equality operator.
|
||||||
|
*
|
||||||
|
* @param[in] a complex number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return equal or not
|
||||||
|
*/
|
||||||
|
bool operator==(const lcg_complex &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload inequality operator.
|
||||||
|
*
|
||||||
|
* @param[in] a complex number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return unequal or not
|
||||||
|
*/
|
||||||
|
bool operator!=(const lcg_complex &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload addition operator.
|
||||||
|
*
|
||||||
|
* @param[in] a complex number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return sum
|
||||||
|
*/
|
||||||
|
lcg_complex operator+(const lcg_complex &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload subtraction operator.
|
||||||
|
*
|
||||||
|
* @param[in] a complex number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return subtraction
|
||||||
|
*/
|
||||||
|
lcg_complex operator-(const lcg_complex &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload multiplication operator.
|
||||||
|
*
|
||||||
|
* @param[in] a complex number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return product
|
||||||
|
*/
|
||||||
|
lcg_complex operator*(const lcg_complex &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload multiplication operator.
|
||||||
|
*
|
||||||
|
* @param[in] a real number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return product
|
||||||
|
*/
|
||||||
|
lcg_complex operator*(const lcg_float &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload division operator.
|
||||||
|
*
|
||||||
|
* @param[in] a complex number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return quotient
|
||||||
|
*/
|
||||||
|
lcg_complex operator/(const lcg_complex &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload division operator.
|
||||||
|
*
|
||||||
|
* @param[in] a real number a
|
||||||
|
* @param[in] b complex number b
|
||||||
|
*
|
||||||
|
* @return quotient
|
||||||
|
*/
|
||||||
|
lcg_complex operator/(const lcg_float &a, const lcg_complex &b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reload ostream operator.
|
||||||
|
*
|
||||||
|
* @param os The ostream
|
||||||
|
* @param[in] a complex number a
|
||||||
|
*
|
||||||
|
* @return The ostream
|
||||||
|
*/
|
||||||
|
std::ostream &operator<<(std::ostream &os, const lcg_complex &a);
|
||||||
|
|
||||||
|
#endif // LibLCG_STD_COMPLEX
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Locate memory for a lcg_complex pointer type.
|
||||||
|
*
|
||||||
|
* @param[in] n Size of the lcg_float array.
|
||||||
|
*
|
||||||
|
* @return Pointer of the array's location.
|
||||||
|
*/
|
||||||
|
lcg_complex* clcg_malloc(int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Locate memory for a lcg_complex second pointer type.
|
||||||
|
*
|
||||||
|
* @param[in] n Size of the lcg_float array.
|
||||||
|
*
|
||||||
|
* @return Pointer of the array's location.
|
||||||
|
*/
|
||||||
|
lcg_complex** clcg_malloc(int m, int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy memory used by the lcg_complex type array.
|
||||||
|
*
|
||||||
|
* @param x Pointer of the array.
|
||||||
|
*/
|
||||||
|
void clcg_free(lcg_complex* x);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy memory used by the 2D lcg_complex type array.
|
||||||
|
*
|
||||||
|
* @param x Pointer of the array.
|
||||||
|
*/
|
||||||
|
void clcg_free(lcg_complex **x, int m);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a complex vector's value
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param[in] b initial value
|
||||||
|
* @param[in] size vector size
|
||||||
|
*/
|
||||||
|
void clcg_vecset(lcg_complex *a, lcg_complex b, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a 2d complex vector's value
|
||||||
|
*
|
||||||
|
* @param a pointer of the matrix
|
||||||
|
* @param[in] b initial value
|
||||||
|
* @param[in] m row size of the matrix
|
||||||
|
* @param[in] n column size of the matrix
|
||||||
|
*/
|
||||||
|
void clcg_vecset(lcg_complex **a, lcg_complex b, int m, int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief setup a complex number
|
||||||
|
*
|
||||||
|
* @param[in] r The real part of the complex number
|
||||||
|
* @param[in] i The imaginary part of the complex number
|
||||||
|
*/
|
||||||
|
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the squared module of a complex number
|
||||||
|
*
|
||||||
|
* @return The module
|
||||||
|
*/
|
||||||
|
lcg_float clcg_square(const lcg_complex *a);
|
||||||
|
/**
|
||||||
|
* @brief Calculate the module of a complex number
|
||||||
|
*
|
||||||
|
* @return The module
|
||||||
|
*/
|
||||||
|
lcg_float clcg_module(const lcg_complex *a);
|
||||||
|
/**
|
||||||
|
* @brief Calculate the conjugate of a complex number
|
||||||
|
*
|
||||||
|
* @return The complex conjugate.
|
||||||
|
*/
|
||||||
|
lcg_complex clcg_conjugate(const lcg_complex *a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a complex vector using random values
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param[in] l the lower bound of random values
|
||||||
|
* @param[in] h the higher bound of random values
|
||||||
|
* @param[in] size size of the vector
|
||||||
|
*/
|
||||||
|
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a 2D complex vector using random values
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param[in] l the lower bound of random values
|
||||||
|
* @param[in] h the higher bound of random values
|
||||||
|
* @param[in] m row size of the vector
|
||||||
|
* @param[in] n column size of the vector
|
||||||
|
*/
|
||||||
|
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief calculate dot product of two complex vectors
|
||||||
|
*
|
||||||
|
* the product of two complex vectors are defined as <a, b> = \sum{a_i \cdot b_i}
|
||||||
|
*
|
||||||
|
* @param[in] a complex vector a
|
||||||
|
* @param[in] b complex vector b
|
||||||
|
* @param[in] x_size size of the vector
|
||||||
|
*
|
||||||
|
* @return product
|
||||||
|
*/
|
||||||
|
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief calculate inner product of two complex vectors
|
||||||
|
*
|
||||||
|
* the product of two complex vectors are defined as <a, b> = \sum{\bar{a_i} \cdot b_i}
|
||||||
|
*
|
||||||
|
* @param[in] a complex vector a
|
||||||
|
* @param[in] b complex vector b
|
||||||
|
* @param[in] x_size size of the vector
|
||||||
|
*
|
||||||
|
* @return product
|
||||||
|
*/
|
||||||
|
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief calculate product of a complex matrix and a complex vector
|
||||||
|
*
|
||||||
|
* the product of two complex vectors are defined as <a, b> = \sum{\bar{a_i}\cdot\b_i}.
|
||||||
|
* Different configurations:
|
||||||
|
* layout=Normal,conjugate=false -> A
|
||||||
|
* layout=Transpose,conjugate=false -> A^T
|
||||||
|
* layout=Normal,conjugate=true -> \bar{A}
|
||||||
|
* layout=Transpose,conjugate=true -> A^H
|
||||||
|
*
|
||||||
|
* @param A complex matrix A
|
||||||
|
* @param[in] x complex vector x
|
||||||
|
* @param Ax product of Ax
|
||||||
|
* @param[in] m_size row size of A
|
||||||
|
* @param[in] n_size column size of A
|
||||||
|
* @param[in] layout layout of A used for multiplication. Must be Normal or Transpose
|
||||||
|
* @param[in] conjugate whether to use the complex conjugate of A for calculation
|
||||||
|
*/
|
||||||
|
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax, int m_size, int n_size,
|
||||||
|
lcg_matrix_e layout = MatNormal, clcg_complex_e conjugate = NonConjugate);
|
||||||
|
|
||||||
|
#endif // _LCG_COMPLEX_H
|
||||||
356
src/lib/lcg_complex_cuda.cu
Normal file
356
src/lib/lcg_complex_cuda.cu
Normal file
@@ -0,0 +1,356 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "lcg_complex_cuda.h"
|
||||||
|
#include "complex"
|
||||||
|
#include "map"
|
||||||
|
|
||||||
|
__global__ void smCcsr_get_diagonal_device(const int *A_row, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag)
|
||||||
|
{
|
||||||
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < A_len)
|
||||||
|
{
|
||||||
|
const int num_non0_row = A_row[i + 1] - A_row[i];
|
||||||
|
|
||||||
|
for (int j = 0; j < num_non0_row; j++)
|
||||||
|
{
|
||||||
|
if (A_col[j + A_row[i]] == i)
|
||||||
|
{
|
||||||
|
A_diag[i] = A_val[j + A_row[i]];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void smZcsr_get_diagonal_device(const int *A_row, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag)
|
||||||
|
{
|
||||||
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < A_len)
|
||||||
|
{
|
||||||
|
const int num_non0_row = A_row[i + 1] - A_row[i];
|
||||||
|
|
||||||
|
for (int j = 0; j < num_non0_row; j++)
|
||||||
|
{
|
||||||
|
if (A_col[j + A_row[i]] == i)
|
||||||
|
{
|
||||||
|
A_diag[i] = A_val[j + A_row[i]];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecMvecC_element_wise_device(const cuComplex *a, const cuComplex *b, cuComplex *c, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
c[i] = cuCmulf(a[i], b[i]);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecMvecZ_element_wise_device(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
c[i] = cuCmul(a[i], b[i]);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecDvecC_element_wise_device(const cuComplex *a, const cuComplex *b, cuComplex *c, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
c[i] = cuCdivf(a[i], b[i]);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecDvecZ_element_wise_device(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
c[i] = cuCdiv(a[i], b[i]);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecC_conjugate_device(const cuComplex *a, cuComplex *ca, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
ca[i] = a[i];
|
||||||
|
ca[i].y *= -1.0;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecZ_conjugate_device(const cuDoubleComplex *a, cuDoubleComplex *ca, int n)
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n)
|
||||||
|
{
|
||||||
|
ca[i] = a[i];
|
||||||
|
ca[i].y *= -1.0;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_complex cuda2lcg_complex(cuDoubleComplex a)
|
||||||
|
{
|
||||||
|
return lcg_complex(a.x, a.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef LibLCG_STD_COMPLEX
|
||||||
|
|
||||||
|
cuDoubleComplex lcg2cuda_complex(lcg_complex a)
|
||||||
|
{
|
||||||
|
cuDoubleComplex o;
|
||||||
|
o.x = a.real(); o.y = a.imag();
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
cuDoubleComplex lcg2cuda_complex(lcg_complex a)
|
||||||
|
{
|
||||||
|
cuDoubleComplex o;
|
||||||
|
o.x = a.rel(); o.y = a.img();
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // LibLCG_STD_COMPLEX
|
||||||
|
|
||||||
|
cuDoubleComplex* clcg_malloc_cuda(size_t n)
|
||||||
|
{
|
||||||
|
cuDoubleComplex *x = new cuDoubleComplex [n];
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_free_cuda(cuDoubleComplex *x)
|
||||||
|
{
|
||||||
|
if (x != nullptr)
|
||||||
|
{
|
||||||
|
delete[] x; x = nullptr;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecset_cuda(cuDoubleComplex *a, cuDoubleComplex b, size_t size)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
a[i].x = b.x; a[i].y = b.y;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuComplex clcg_Cscale(float s, cuComplex a)
|
||||||
|
{
|
||||||
|
cuComplex o;
|
||||||
|
o.x = s*a.x;
|
||||||
|
o.y = s*a.y;
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuComplex clcg_Csum(cuComplex a, cuComplex b)
|
||||||
|
{
|
||||||
|
cuComplex o;
|
||||||
|
o.x = a.x + b.x;
|
||||||
|
o.y = a.y + b.y;
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuComplex clcg_Cdiff(cuComplex a, cuComplex b)
|
||||||
|
{
|
||||||
|
cuComplex o;
|
||||||
|
o.x = a.x - b.x;
|
||||||
|
o.y = a.y - b.y;
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuComplex clcg_Csqrt(cuComplex a)
|
||||||
|
{
|
||||||
|
std::complex<float> c = std::sqrt(std::complex<float>(a.x, a.y));
|
||||||
|
cuComplex s;
|
||||||
|
s.x = c.real(); s.y = c.imag();
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuDoubleComplex clcg_Zscale(lcg_float s, cuDoubleComplex a)
|
||||||
|
{
|
||||||
|
cuDoubleComplex o;
|
||||||
|
o.x = s*a.x;
|
||||||
|
o.y = s*a.y;
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuDoubleComplex clcg_Zsum(cuDoubleComplex a, cuDoubleComplex b)
|
||||||
|
{
|
||||||
|
cuDoubleComplex o;
|
||||||
|
o.x = a.x + b.x;
|
||||||
|
o.y = a.y + b.y;
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuDoubleComplex clcg_Zdiff(cuDoubleComplex a, cuDoubleComplex b)
|
||||||
|
{
|
||||||
|
cuDoubleComplex o;
|
||||||
|
o.x = a.x - b.x;
|
||||||
|
o.y = a.y - b.y;
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuDoubleComplex clcg_Zsqrt(cuDoubleComplex a)
|
||||||
|
{
|
||||||
|
std::complex<lcg_float> c = std::sqrt(std::complex<lcg_float>(a.x, a.y));
|
||||||
|
cuDoubleComplex s;
|
||||||
|
s.x = c.real(); s.y = c.imag();
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_smCcoo_row2col(const int *A_row, const int *A_col, const cuComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuComplex *Ac_val)
|
||||||
|
{
|
||||||
|
size_t i, order;
|
||||||
|
std::map<size_t, cuComplex> sort_map;
|
||||||
|
std::map<size_t, cuComplex>::iterator st_iter;
|
||||||
|
|
||||||
|
for (i = 0; i < nz; i++)
|
||||||
|
{
|
||||||
|
order = N*A_col[i] + A_row[i];
|
||||||
|
sort_map[order] = A[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
for (st_iter = sort_map.begin(); st_iter != sort_map.end(); st_iter++)
|
||||||
|
{
|
||||||
|
order = st_iter->first;
|
||||||
|
// exchange the row and column indice to rotate the matrix
|
||||||
|
Ac_row[i] = order/N;
|
||||||
|
Ac_col[i] = order%N;
|
||||||
|
Ac_val[i] = st_iter->second;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sort_map.clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_smZcoo_row2col(const int *A_row, const int *A_col, const cuDoubleComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuDoubleComplex *Ac_val)
|
||||||
|
{
|
||||||
|
size_t i, order;
|
||||||
|
std::map<size_t, cuDoubleComplex> sort_map;
|
||||||
|
std::map<size_t, cuDoubleComplex>::iterator st_iter;
|
||||||
|
|
||||||
|
for (i = 0; i < nz; i++)
|
||||||
|
{
|
||||||
|
order = N*A_col[i] + A_row[i];
|
||||||
|
sort_map[order] = A[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
for (st_iter = sort_map.begin(); st_iter != sort_map.end(); st_iter++)
|
||||||
|
{
|
||||||
|
order = st_iter->first;
|
||||||
|
// exchange the row and column indice to rotate the matrix
|
||||||
|
Ac_row[i] = order/N;
|
||||||
|
Ac_col[i] = order%N;
|
||||||
|
Ac_val[i] = st_iter->second;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sort_map.clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_smCcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (A_len + blockSize - 1) / blockSize;
|
||||||
|
smCcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_smZcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (A_len + blockSize - 1) / blockSize;
|
||||||
|
smZcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecMvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
vecMvecC_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecMvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
vecMvecZ_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecDvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
vecDvecC_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecDvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
vecDvecZ_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecC_conjugate(const cuComplex *a, cuComplex *ca, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
vecC_conjugate_device<<<numBlocks, blockSize>>>(a, ca, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_vecZ_conjugate(const cuDoubleComplex *a, cuDoubleComplex *ca, int n, int bk_size)
|
||||||
|
{
|
||||||
|
int blockSize = bk_size;
|
||||||
|
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||||
|
vecZ_conjugate_device<<<numBlocks, blockSize>>>(a, ca, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
278
src/lib/lcg_complex_cuda.h
Normal file
278
src/lib/lcg_complex_cuda.h
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _LCG_COMPLEX_CUDA_H
|
||||||
|
#define _LCG_COMPLEX_CUDA_H
|
||||||
|
|
||||||
|
#include "lcg_complex.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_CUDA
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cuComplex.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert cuda complex number to lcg complex number
|
||||||
|
*
|
||||||
|
* @param a CUDA complex number
|
||||||
|
* @return lcg_complex lcg complex number
|
||||||
|
*/
|
||||||
|
lcg_complex cuda2lcg_complex(cuDoubleComplex a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert lcg complex number to CUDA complex number
|
||||||
|
*
|
||||||
|
* @param a lcg complex number
|
||||||
|
* @return cuDoubleComplex CUDA complex number
|
||||||
|
*/
|
||||||
|
cuDoubleComplex lcg2cuda_complex(lcg_complex a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Locate memory for a cuDoubleComplex pointer type.
|
||||||
|
*
|
||||||
|
* @param[in] n Size of the lcg_float array.
|
||||||
|
*
|
||||||
|
* @return Pointer of the array's location.
|
||||||
|
*/
|
||||||
|
cuDoubleComplex* clcg_malloc_cuda(size_t n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy memory used by the cuDoubleComplex type array.
|
||||||
|
*
|
||||||
|
* @param x Pointer of the array.
|
||||||
|
*/
|
||||||
|
void clcg_free_cuda(cuDoubleComplex *x);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief set a complex vector's value
|
||||||
|
*
|
||||||
|
* @param a pointer of the vector
|
||||||
|
* @param[in] b initial value
|
||||||
|
* @param[in] size vector size
|
||||||
|
*/
|
||||||
|
void clcg_vecset_cuda(cuDoubleComplex *a, cuDoubleComplex b, size_t size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Host side function for scale a cuDoubleComplex object
|
||||||
|
*
|
||||||
|
* @param s scale factor
|
||||||
|
* @param a Complex number
|
||||||
|
* @return cuComplex scaled complex number
|
||||||
|
*/
|
||||||
|
cuComplex clcg_Cscale(lcg_float s, cuComplex a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the sum of two cuda complex number. This is a host side function.
|
||||||
|
*
|
||||||
|
* @param a Complex number
|
||||||
|
* @param b Complex number
|
||||||
|
* @return cuComplex Sum of the input complex number
|
||||||
|
*/
|
||||||
|
cuComplex clcg_Csum(cuComplex a, cuComplex b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the difference of two cuda complex number. This is a host side function.
|
||||||
|
*
|
||||||
|
* @param a Complex number
|
||||||
|
* @param b Complex number
|
||||||
|
* @return cuComplex Difference of the input complex number
|
||||||
|
*/
|
||||||
|
cuComplex clcg_Cdiff(cuComplex a, cuComplex b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the sqrt() of a cuda complex number
|
||||||
|
*
|
||||||
|
* @param a Complex number
|
||||||
|
* @return cuComplex root value
|
||||||
|
*/
|
||||||
|
cuComplex clcg_Csqrt(cuComplex a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Host side function for scale a cuDoubleComplex object
|
||||||
|
*
|
||||||
|
* @param s scale factor
|
||||||
|
* @param a Complex number
|
||||||
|
* @return cuDoubleComplex scaled complex number
|
||||||
|
*/
|
||||||
|
cuDoubleComplex clcg_Zscale(lcg_float s, cuDoubleComplex a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the sum of two cuda complex number. This is a host side function.
|
||||||
|
*
|
||||||
|
* @param a Complex number
|
||||||
|
* @param b Complex number
|
||||||
|
* @return cuDoubleComplex Sum of the input complex number
|
||||||
|
*/
|
||||||
|
cuDoubleComplex clcg_Zsum(cuDoubleComplex a, cuDoubleComplex b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the difference of two cuda complex number. This is a host side function.
|
||||||
|
*
|
||||||
|
* @param a Complex number
|
||||||
|
* @param b Complex number
|
||||||
|
* @return cuDoubleComplex Difference of the input complex number
|
||||||
|
*/
|
||||||
|
cuDoubleComplex clcg_Zdiff(cuDoubleComplex a, cuDoubleComplex b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the sqrt() of a cuda complex number
|
||||||
|
*
|
||||||
|
* @param a Complex number
|
||||||
|
* @return cuDoubleComplex root value
|
||||||
|
*/
|
||||||
|
cuDoubleComplex clcg_Zsqrt(cuDoubleComplex a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert the indexing sequence of a sparse matrix from the row-major to col-major format.
|
||||||
|
*
|
||||||
|
* @note The sparse matrix is stored in the COO foramt. This is a host side function.
|
||||||
|
*
|
||||||
|
* @param A_row Row index
|
||||||
|
* @param A_col Column index
|
||||||
|
* @param A Non-zero values of the matrix
|
||||||
|
* @param N Row/column length of A
|
||||||
|
* @param nz Number of the non-zero values in A
|
||||||
|
* @param Ac_row Output row index
|
||||||
|
* @param Ac_col Output column index
|
||||||
|
* @param Ac_val Non-zero values of the output matrix
|
||||||
|
*/
|
||||||
|
void clcg_smCcoo_row2col(const int *A_row, const int *A_col, const cuComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuComplex *Ac_val);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert the indexing sequence of a sparse matrix from the row-major to col-major format.
|
||||||
|
*
|
||||||
|
* @note The sparse matrix is stored in the COO foramt. This is a host side function.
|
||||||
|
*
|
||||||
|
* @param A_row Row index
|
||||||
|
* @param A_col Column index
|
||||||
|
* @param A Non-zero values of the matrix
|
||||||
|
* @param N Row/column length of A
|
||||||
|
* @param nz Number of the non-zero values in A
|
||||||
|
* @param Ac_row Output row index
|
||||||
|
* @param Ac_col Output column index
|
||||||
|
* @param Ac_val Non-zero values of the output matrix
|
||||||
|
*/
|
||||||
|
void clcg_smZcoo_row2col(const int *A_row, const int *A_col, const cuDoubleComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuDoubleComplex *Ac_val);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] A_ptr Row index pointer
|
||||||
|
* @param[in] A_col Column index
|
||||||
|
* @param[in] A_val Non-zero values of the matrix
|
||||||
|
* @param[in] A_len Dimension of the matrix
|
||||||
|
* @param A_diag Output digonal elements
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_smCcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] A_ptr Row index pointer
|
||||||
|
* @param[in] A_col Column index
|
||||||
|
* @param[in] A_val Non-zero values of the matrix
|
||||||
|
* @param[in] A_len Dimension of the matrix
|
||||||
|
* @param A_diag Output digonal elements
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_smZcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Element-wise muplication between two CUDA arries.
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] a Pointer of the input array
|
||||||
|
* @param[in] b Pointer of the input array
|
||||||
|
* @param c Pointer of the output array
|
||||||
|
* @param[in] n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_vecMvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Element-wise muplication between two CUDA arries.
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] a Pointer of the input array
|
||||||
|
* @param[in] b Pointer of the input array
|
||||||
|
* @param c Pointer of the output array
|
||||||
|
* @param[in] n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_vecMvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Element-wise division between two CUDA arries.
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] a Pointer of the input array
|
||||||
|
* @param[in] b Pointer of the input array
|
||||||
|
* @param c Pointer of the output array
|
||||||
|
* @param[in] n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_vecDvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Element-wise division between two CUDA arries.
|
||||||
|
*
|
||||||
|
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] a Pointer of the input array
|
||||||
|
* @param[in] b Pointer of the input array
|
||||||
|
* @param c Pointer of the output array
|
||||||
|
* @param[in] n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_vecDvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return complex conjugates of an input CUDA complex array
|
||||||
|
*
|
||||||
|
* @param a Pointer of the input arra
|
||||||
|
* @param ca Pointer of the output array
|
||||||
|
* @param n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_vecC_conjugate(const cuComplex *a, cuComplex *ca, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return complex conjugates of an input CUDA complex array
|
||||||
|
*
|
||||||
|
* @param a Pointer of the input arra
|
||||||
|
* @param ca Pointer of the output array
|
||||||
|
* @param n Length of the arraies
|
||||||
|
* @param[in] bk_size Default CUDA block size.
|
||||||
|
*/
|
||||||
|
void clcg_vecZ_conjugate(const cuDoubleComplex *a, cuDoubleComplex *ca, int n, int bk_size = 1024);
|
||||||
|
|
||||||
|
#endif // LibLCG_CUDA
|
||||||
|
|
||||||
|
#endif // _LCG_COMPLEX_CUDA_H
|
||||||
685
src/lib/lcg_cuda.cu
Normal file
685
src/lib/lcg_cuda.cu
Normal file
@@ -0,0 +1,685 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "ctime"
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
#include "lcg_cuda.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef int (*lcg_solver_cuda_ptr)(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int lcg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
|
||||||
|
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int lcgs(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
|
||||||
|
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
|
||||||
|
int lcg_solver_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
|
||||||
|
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
lcg_solver_cuda_ptr cg_solver_cuda;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_CG:
|
||||||
|
cg_solver_cuda = lcg;
|
||||||
|
break;
|
||||||
|
case LCG_CGS:
|
||||||
|
cg_solver_cuda = lcgs;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
cg_solver_cuda = lcg;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cg_solver_cuda(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int lpcg(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int lcg_solver_preconditioned_cuda(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp,
|
||||||
|
lcg_float* m, const lcg_float* B, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
return lpcg(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int lpg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param,
|
||||||
|
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
int lcg_solver_constrained_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
|
||||||
|
{
|
||||||
|
return lpg(Afp, Pfp, m, B, low, hig, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int lcg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||||
|
const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CG parameters
|
||||||
|
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
// locate memory
|
||||||
|
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||||
|
lcg_float *gk = nullptr, *dk = nullptr, *Adk = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&gk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&dk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&Adk, n_size * sizeof(lcg_float));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Adk;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
|
||||||
|
|
||||||
|
lcg_float none = -1.0;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
|
||||||
|
|
||||||
|
// g0 = Ax - B
|
||||||
|
cudaMemcpy(gk, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
|
||||||
|
cudaMemset(dk, 0, n_size * sizeof(lcg_float)); // d0 = 0
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, dk, 1); // d0 = -g0
|
||||||
|
|
||||||
|
lcg_float gk_mod;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
|
||||||
|
|
||||||
|
lcg_float g0_mod = gk_mod;
|
||||||
|
if (g0_mod < 1.0) g0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(gk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, sqrt(gk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (gk_mod/g0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, gk_mod/g0_mod, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float dTAd, ak, betak, gk1_mod, residual;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(gk_mod)/n_size;
|
||||||
|
else residual = gk_mod/g0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = LCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Adk, n_size, nz_size);
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, dk, 1, Adk, 1, &dTAd); // dTAd = dk^T * Adk
|
||||||
|
ak = gk_mod/dTAd;
|
||||||
|
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1); // m += ak*dk
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &ak, Adk, 1, gk, 1); // gk += ak*Adk
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk1_mod); // gk1_mod = ||gk||
|
||||||
|
betak = gk1_mod/gk_mod;
|
||||||
|
gk_mod = gk1_mod;
|
||||||
|
|
||||||
|
cublasDscal_v2(cub_handle, n_size, &betak, dk, 1); // dk *= betak
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, dk, 1); // dk -= gk
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(dk);
|
||||||
|
cudaFree(gk);
|
||||||
|
cudaFree(Adk);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_dk);
|
||||||
|
cusparseDestroyDnVec(dvec_Adk);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lcgs(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||||
|
const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CG parameters
|
||||||
|
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
// locate memory
|
||||||
|
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||||
|
lcg_float *rk = nullptr, *r0T = nullptr, *pk = nullptr, *qpk = nullptr;
|
||||||
|
lcg_float *Ax = nullptr, *uk = nullptr, *qk = nullptr, *wk = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&rk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&r0T, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&pk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&qpk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&Ax, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&uk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&qk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&wk, n_size * sizeof(lcg_float));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_wk, dvec_pk, dvec_Ax;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_wk, n_size, wk, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_pk, n_size, pk, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_R_64F);
|
||||||
|
|
||||||
|
lcg_float one = 1.0;
|
||||||
|
lcg_float none = -1.0;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(rk, d_B, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||||
|
// p0 = u0 = r0T = r0
|
||||||
|
cudaMemcpy(pk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cudaMemcpy(uk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cudaMemcpy(r0T, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
lcg_float rkr0T;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, rk, 1, r0T, 1, &rkr0T);
|
||||||
|
|
||||||
|
lcg_float rk_mod;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod); // rk_mod = ||rk||
|
||||||
|
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, sqrt(rk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/r0_mod, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float ak, nak, rkr0T1, AprT, betak, residual;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(rk_mod)/n_size;
|
||||||
|
else residual = rk_mod/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = LCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_pk, dvec_Ax, n_size, nz_size);
|
||||||
|
|
||||||
|
AprT = 0.0;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, r0T, 1, Ax, 1, &AprT);
|
||||||
|
ak = rkr0T/AprT;
|
||||||
|
nak = -1.0*ak;
|
||||||
|
|
||||||
|
cudaMemcpy(qk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cudaMemcpy(wk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &nak, Ax, 1, qk, 1);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &one, qk, 1, wk, 1);
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_wk, dvec_Ax, n_size, nz_size);
|
||||||
|
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &ak, wk, 1, d_m, 1);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, rk, 1, r0T, 1, &rkr0T1);
|
||||||
|
betak = rkr0T1/rkr0T;
|
||||||
|
rkr0T = rkr0T1;
|
||||||
|
|
||||||
|
cudaMemcpy(uk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &betak, qk, 1, uk, 1);
|
||||||
|
|
||||||
|
cudaMemcpy(qpk, qk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &betak, pk, 1, qpk, 1);
|
||||||
|
|
||||||
|
cudaMemcpy(pk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &betak, qpk, 1, pk, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(rk);
|
||||||
|
cudaFree(r0T);
|
||||||
|
cudaFree(pk);
|
||||||
|
cudaFree(qpk);
|
||||||
|
cudaFree(Ax);
|
||||||
|
cudaFree(uk);
|
||||||
|
cudaFree(qk);
|
||||||
|
cudaFree(wk);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_wk);
|
||||||
|
cusparseDestroyDnVec(dvec_pk);
|
||||||
|
cusparseDestroyDnVec(dvec_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lpcg(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CG parameters
|
||||||
|
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||||
|
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
// locate memory
|
||||||
|
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||||
|
lcg_float *rk = nullptr, *zk = nullptr, *dk = nullptr, *Adk = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&rk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&zk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&dk, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&Adk, n_size * sizeof(lcg_float));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_zk, dvec_dk, dvec_Adk;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_zk, n_size, zk, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
|
||||||
|
|
||||||
|
lcg_float one = 1.0;
|
||||||
|
lcg_float none = -1.0;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
|
||||||
|
|
||||||
|
// r0 = B - Ax
|
||||||
|
cudaMemcpy(rk, d_B, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // r0 = B
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, Adk, 1, rk, 1); // r0 -= Ax
|
||||||
|
|
||||||
|
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_zk, n_size, nz_size);
|
||||||
|
|
||||||
|
// d0 = z0
|
||||||
|
cudaMemcpy(dk, zk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
lcg_float rk_mod;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod); // rk_mod = ||rk||
|
||||||
|
|
||||||
|
lcg_float r0_mod = rk_mod;
|
||||||
|
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||||
|
|
||||||
|
lcg_float zTr;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, zk, 1, rk, 1, &zTr);
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, sqrt(rk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (rk_mod/r0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, rk_mod/r0_mod, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float dTAd, ak, nak, betak, zTr1, residual;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(rk_mod)/n_size;
|
||||||
|
else residual = rk_mod/r0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = LCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Adk, n_size, nz_size);
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, dk, 1, Adk, 1, &dTAd);
|
||||||
|
ak = zTr/dTAd;
|
||||||
|
nak = -1.0*ak;
|
||||||
|
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &nak, Adk, 1, rk, 1);
|
||||||
|
|
||||||
|
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_zk, n_size, nz_size);
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod);
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, zk, 1, rk, 1, &zTr1);
|
||||||
|
betak = zTr1/zTr;
|
||||||
|
zTr = zTr1;
|
||||||
|
|
||||||
|
cublasDscal_v2(cub_handle, n_size, &betak, dk, 1); // dk *= betak
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &one, zk, 1, dk, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(rk);
|
||||||
|
cudaFree(zk);
|
||||||
|
cudaFree(dk);
|
||||||
|
cudaFree(Adk);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_rk);
|
||||||
|
cusparseDestroyDnVec(dvec_zk);
|
||||||
|
cusparseDestroyDnVec(dvec_dk);
|
||||||
|
cusparseDestroyDnVec(dvec_Adk);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int lpg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param,
|
||||||
|
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
// set CG parameters
|
||||||
|
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||||
|
|
||||||
|
// check parameters
|
||||||
|
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||||
|
if (para.step <= 0.0) return LCG_INVALID_LAMBDA;
|
||||||
|
|
||||||
|
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (low == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (hig == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||||
|
|
||||||
|
// locate memory
|
||||||
|
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||||
|
lcg_float *gk = nullptr, *Adk = nullptr;
|
||||||
|
lcg_float *m_new = nullptr, *gk_new = nullptr;
|
||||||
|
lcg_float *sk = nullptr, *yk = nullptr;
|
||||||
|
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||||
|
cudaMalloc(&gk, n_size *sizeof(lcg_float));
|
||||||
|
cudaMalloc(&Adk, n_size *sizeof(lcg_float));
|
||||||
|
cudaMalloc(&m_new, n_size *sizeof(lcg_float));
|
||||||
|
cudaMalloc(&gk_new, n_size *sizeof(lcg_float));
|
||||||
|
cudaMalloc(&sk, n_size *sizeof(lcg_float));
|
||||||
|
cudaMalloc(&yk, n_size *sizeof(lcg_float));
|
||||||
|
|
||||||
|
// Copy initial solutions
|
||||||
|
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusparseDnVecDescr_t dvec_m, dvec_mnew, dvec_Adk;
|
||||||
|
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_mnew, n_size, m_new, CUDA_R_64F);
|
||||||
|
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
|
||||||
|
|
||||||
|
lcg_float none = -1.0;
|
||||||
|
lcg_float nalpha_k, alpha_k = para.step;
|
||||||
|
|
||||||
|
lcg_set2box_cuda(low, hig, m, n_size);
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
|
||||||
|
|
||||||
|
// g0 = Ax - B
|
||||||
|
cudaMemcpy(gk, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
|
||||||
|
|
||||||
|
lcg_float gk_mod;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
|
||||||
|
|
||||||
|
lcg_float g0_mod = gk_mod;
|
||||||
|
if (g0_mod < 1.0) g0_mod = 1.0;
|
||||||
|
|
||||||
|
int ret, t = 0;
|
||||||
|
if (para.abs_diff && sqrt(gk_mod)/n_size <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, sqrt(gk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
else if (gk_mod/g0_mod <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_ALREADY_OPTIMIZIED;
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
Pfp(instance, d_m, gk_mod/g0_mod, ¶, n_size, nz_size, 0);
|
||||||
|
}
|
||||||
|
goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
lcg_float sk_mod, syk_mod, residual;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (para.abs_diff) residual = sqrt(gk_mod)/n_size;
|
||||||
|
else residual = gk_mod/g0_mod;
|
||||||
|
|
||||||
|
if (Pfp != nullptr)
|
||||||
|
{
|
||||||
|
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||||
|
{
|
||||||
|
ret = LCG_STOP; goto func_ends;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= para.epsilon)
|
||||||
|
{
|
||||||
|
ret = LCG_CONVERGENCE; goto func_ends;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||||
|
{
|
||||||
|
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
nalpha_k = -1.0*alpha_k;
|
||||||
|
cudaMemcpy(m_new, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &nalpha_k, gk, 1, m_new, 1);
|
||||||
|
|
||||||
|
lcg_set2box_cuda(low, hig, m_new, n_size);
|
||||||
|
Afp(instance, cub_handle, cus_handle, dvec_mnew, dvec_Adk, n_size, nz_size);
|
||||||
|
|
||||||
|
cudaMemcpy(gk_new, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
|
||||||
|
|
||||||
|
cudaMemcpy(sk, m_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, d_m, 1, sk, 1);
|
||||||
|
|
||||||
|
cudaMemcpy(yk, gk_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, sk, 1);
|
||||||
|
|
||||||
|
cublasDdot_v2(cub_handle, n_size, sk, 1, sk, 1, &sk_mod);
|
||||||
|
cublasDdot_v2(cub_handle, n_size, sk, 1, yk, 1, &syk_mod);
|
||||||
|
alpha_k = sk_mod/syk_mod;
|
||||||
|
|
||||||
|
cudaMemcpy(d_m, m_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
cudaMemcpy(gk, gk_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
lcg_float gk_mod;
|
||||||
|
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
|
||||||
|
}
|
||||||
|
|
||||||
|
func_ends:
|
||||||
|
{
|
||||||
|
// Copy to host memories
|
||||||
|
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
cudaFree(d_m);
|
||||||
|
cudaFree(d_B);
|
||||||
|
cudaFree(gk);
|
||||||
|
cudaFree(gk_new);
|
||||||
|
cudaFree(m_new);
|
||||||
|
cudaFree(sk);
|
||||||
|
cudaFree(yk);
|
||||||
|
cudaFree(Adk);
|
||||||
|
cusparseDestroyDnVec(dvec_m);
|
||||||
|
cusparseDestroyDnVec(dvec_mnew);
|
||||||
|
cusparseDestroyDnVec(dvec_Adk);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
135
src/lib/lcg_cuda.h
Normal file
135
src/lib/lcg_cuda.h
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _LCG_CUDA_H
|
||||||
|
#define _LCG_CUDA_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "algebra_cuda.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_CUDA
|
||||||
|
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||||
|
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* @param x Multiplier of the Ax product.
|
||||||
|
* @param Ax Product of A multiplied by x.
|
||||||
|
* @param n_size Size of x and column/row numbers of A.
|
||||||
|
*/
|
||||||
|
typedef void (*lcg_axfunc_cuda_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||||
|
* if necessary. Note that m is hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||||
|
* @param m The current solutions.
|
||||||
|
* @param converge The current value evaluating the iteration progress.
|
||||||
|
* @param n_size The size of the variables
|
||||||
|
* @param k The iteration count.
|
||||||
|
*
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will terminate the optimization process.
|
||||||
|
*/
|
||||||
|
typedef int (*lcg_progress_cuda_ptr)(void* instance, const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para* param, const int n_size, const int nz_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||||
|
cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_CG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param[in] nz_size Size of the non-zero element of a cusparse object.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_preconditioned_cuda(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp,
|
||||||
|
lcg_float* m, const lcg_float* B, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_PCG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param low Lower bound of the acceptable solution.
|
||||||
|
* @param hig Higher bound of the acceptable solution.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] n_size Size of the solution vector and objective vector.
|
||||||
|
* @param[in] nz_size Size of the non-zero element of a cusparse object.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* @param cub_handle Handler of the cublas object.
|
||||||
|
* @param cus_handle Handlee of the cusparse object.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_constrained_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||||
|
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||||
|
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_PG);
|
||||||
|
|
||||||
|
#endif // LibLCG_CUDA
|
||||||
|
|
||||||
|
#endif // _LCG_CUDA_H
|
||||||
1128
src/lib/lcg_eigen.cpp
Normal file
1128
src/lib/lcg_eigen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
110
src/lib/lcg_eigen.h
Normal file
110
src/lib/lcg_eigen.h
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _LCG_EIGEN_H
|
||||||
|
#define _LCG_EIGEN_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "algebra_eigen.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||||
|
* by a vertical vector 'x'.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||||
|
* @param x Multiplier of the Ax product.
|
||||||
|
* @param Ax Product of A multiplied by x.
|
||||||
|
*/
|
||||||
|
typedef void (*lcg_axfunc_eigen_ptr)(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||||
|
* if necessary.
|
||||||
|
*
|
||||||
|
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||||
|
* @param m The current solutions.
|
||||||
|
* @param converge The current value evaluating the iteration progress.
|
||||||
|
* @param k The iteration count.
|
||||||
|
*
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will terminate the optimization process.
|
||||||
|
*/
|
||||||
|
typedef int (*lcg_progress_eigen_ptr)(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
|
||||||
|
const lcg_para *param, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_eigen(lcg_axfunc_eigen_ptr Afp, lcg_progress_eigen_ptr Pfp, Eigen::VectorXd &m,
|
||||||
|
const Eigen::VectorXd &B, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_PCG.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_preconditioned_eigen(lcg_axfunc_eigen_ptr Afp, lcg_axfunc_eigen_ptr Mfp, lcg_progress_eigen_ptr Pfp,
|
||||||
|
Eigen::VectorXd &m, const Eigen::VectorXd &B, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PCG);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A combined conjugate gradient solver function with inequality constraints.
|
||||||
|
*
|
||||||
|
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||||
|
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||||
|
* @param m Initial solution vector.
|
||||||
|
* @param B Objective vector of the linear system.
|
||||||
|
* @param[in] low The lower boundary of the acceptable solution.
|
||||||
|
* @param[in] hig The higher boundary of the acceptable solution.
|
||||||
|
* @param param Parameter setup for the conjugate gradient methods.
|
||||||
|
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||||
|
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||||
|
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||||
|
* @param P Precondition vector (optional expect for the LCG_PCG method). The default value is NULL.
|
||||||
|
*
|
||||||
|
* @return Status of the function.
|
||||||
|
*/
|
||||||
|
int lcg_solver_constrained_eigen(lcg_axfunc_eigen_ptr Afp, lcg_progress_eigen_ptr Pfp, Eigen::VectorXd &m,
|
||||||
|
const Eigen::VectorXd &B, const Eigen::VectorXd &low, const Eigen::VectorXd &hig,
|
||||||
|
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PG);
|
||||||
|
|
||||||
|
#endif //_LCG_EIGEN_H
|
||||||
381
src/lib/preconditioner.cpp
Normal file
381
src/lib/preconditioner.cpp
Normal file
@@ -0,0 +1,381 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "preconditioner.h"
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "map"
|
||||||
|
|
||||||
|
void lcg_incomplete_Cholesky_half_buffsize_coo(const int *row, const int *col, int nz_size, int *lnz_size)
|
||||||
|
{
|
||||||
|
size_t c = 0;
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i])
|
||||||
|
{
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*lnz_size = c;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_incomplete_Cholesky_half_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size,
|
||||||
|
int lnz_size, int *IC_row, int *IC_col, lcg_float *IC_val)
|
||||||
|
{
|
||||||
|
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||||
|
lcg_float *diagonal = new lcg_float [N];
|
||||||
|
// A temporary row
|
||||||
|
lcg_float *tmp_row = new lcg_float [N];
|
||||||
|
// index of non-zero elements in tmp_row
|
||||||
|
int *filled_idx = new int [N];
|
||||||
|
// Begining index of each row in the input matrix
|
||||||
|
int *row_st_idx = new int [N];
|
||||||
|
|
||||||
|
size_t i, j, f;
|
||||||
|
|
||||||
|
// Set initial values
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diagonal[i] = 0.0;
|
||||||
|
tmp_row[i] = 0.0;
|
||||||
|
filled_idx[i] = -1;
|
||||||
|
row_st_idx[i] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy elements in the lower triangle to the output matrix
|
||||||
|
j = 0;
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i])
|
||||||
|
{
|
||||||
|
IC_row[j] = row[i];
|
||||||
|
IC_col[j] = col[i];
|
||||||
|
IC_val[j] = val[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the begining index of each row in the matrix
|
||||||
|
j = 1;
|
||||||
|
row_st_idx[0] = IC_row[0];
|
||||||
|
size_t old_row = IC_row[0];
|
||||||
|
for (i = 1; i < lnz_size; i++)
|
||||||
|
{
|
||||||
|
if (IC_row[i] > old_row)
|
||||||
|
{
|
||||||
|
row_st_idx[j] = i;
|
||||||
|
old_row = IC_row[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the first element
|
||||||
|
IC_val[0] = sqrt(IC_val[0]);
|
||||||
|
diagonal[0] = IC_val[0];
|
||||||
|
|
||||||
|
lcg_float dia_sum;
|
||||||
|
dia_sum = 0.0;
|
||||||
|
// The first one is already calculated
|
||||||
|
for (i = 1; i < lnz_size; i++)
|
||||||
|
{
|
||||||
|
// Calculate the first column if there is one
|
||||||
|
if (IC_col[i] == 0)
|
||||||
|
{
|
||||||
|
IC_val[i] = IC_val[i]/IC_val[0];
|
||||||
|
dia_sum = dia_sum + IC_val[i]*IC_val[i];
|
||||||
|
continue; // Case 1 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate elements in the middle of a row
|
||||||
|
if (IC_row[i] > IC_col[i])
|
||||||
|
{
|
||||||
|
// Find needed values from previous elements
|
||||||
|
f = 0;
|
||||||
|
j = row_st_idx[IC_col[i]];
|
||||||
|
while (IC_col[j] < IC_col[i])
|
||||||
|
{
|
||||||
|
tmp_row[IC_col[j]] = IC_val[j];
|
||||||
|
filled_idx[f] = IC_col[j];
|
||||||
|
f++;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = row_st_idx[IC_row[i]];
|
||||||
|
while (IC_col[j] < IC_col[i])
|
||||||
|
{
|
||||||
|
IC_val[i] = IC_val[i] - IC_val[j]*tmp_row[IC_col[j]];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
IC_val[i] = IC_val[i]/diagonal[IC_col[i]];
|
||||||
|
dia_sum = dia_sum + IC_val[i]*IC_val[i];
|
||||||
|
|
||||||
|
// reset tmp variables
|
||||||
|
for (j = 0; j < f; j++)
|
||||||
|
{
|
||||||
|
tmp_row[filled_idx[j]] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue; // Case 2 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have rearched the diagonal position
|
||||||
|
if (IC_row[i] == IC_col[i])
|
||||||
|
{
|
||||||
|
IC_val[i] = sqrt(IC_val[i] - dia_sum);
|
||||||
|
diagonal[IC_col[i]] = IC_val[i];
|
||||||
|
dia_sum = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] diagonal;
|
||||||
|
delete[] tmp_row;
|
||||||
|
delete[] row_st_idx;
|
||||||
|
delete[] filled_idx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_incomplete_Cholesky_full_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int *IC_row, int *IC_col, lcg_float *IC_val)
|
||||||
|
{
|
||||||
|
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||||
|
lcg_float *diagonal = new lcg_float [N];
|
||||||
|
// A temporary row
|
||||||
|
lcg_float *tmp_row = new lcg_float [N];
|
||||||
|
// index of non-zero elements in tmp_row
|
||||||
|
int *filled_idx = new int [N];
|
||||||
|
// Begining index of each row in the input matrix
|
||||||
|
int *row_st_idx = new int [N];
|
||||||
|
|
||||||
|
size_t i, j, f, l;
|
||||||
|
|
||||||
|
// Set initial values
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diagonal[i] = 0.0;
|
||||||
|
tmp_row[i] = 0.0;
|
||||||
|
filled_idx[i] = -1;
|
||||||
|
row_st_idx[i] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy elements to the output matrix
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
IC_row[i] = row[i];
|
||||||
|
IC_col[i] = col[i];
|
||||||
|
IC_val[i] = val[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// count element number in the lower triangular part (including the diagonal) and the upper triangular part (excluding the diagonal)
|
||||||
|
// build map from elements' cooridnate to their index in the array
|
||||||
|
size_t order, L_nz = 0;
|
||||||
|
std::map<size_t, size_t> index_map;
|
||||||
|
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i]) // Count number for thr lower triangular part
|
||||||
|
{
|
||||||
|
L_nz++;
|
||||||
|
}
|
||||||
|
else // Only need to build the map for the upper triangular part
|
||||||
|
{
|
||||||
|
order = N*row[i] + col[i];
|
||||||
|
index_map[order] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We use to store element index in the lower triangle
|
||||||
|
j = 0;
|
||||||
|
size_t *low_idx = new size_t [L_nz];
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i])
|
||||||
|
{
|
||||||
|
low_idx[j] = i;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the begining index of each row in the matrix
|
||||||
|
j = 1;
|
||||||
|
row_st_idx[0] = IC_row[0];
|
||||||
|
size_t old_row = IC_row[0];
|
||||||
|
for (i = 1; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (IC_row[i] > old_row)
|
||||||
|
{
|
||||||
|
row_st_idx[j] = i;
|
||||||
|
old_row = IC_row[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the first element
|
||||||
|
IC_val[0] = sqrt(IC_val[0]);
|
||||||
|
diagonal[0] = IC_val[0];
|
||||||
|
|
||||||
|
lcg_float dia_sum;
|
||||||
|
dia_sum = 0.0;
|
||||||
|
// The first one is already calculated
|
||||||
|
for (i = 1; i < L_nz; i++)
|
||||||
|
{
|
||||||
|
l = low_idx[i];
|
||||||
|
|
||||||
|
// Calculate the first column if there is one
|
||||||
|
if (IC_col[l] == 0)
|
||||||
|
{
|
||||||
|
IC_val[l] = IC_val[l]/IC_val[0];
|
||||||
|
dia_sum = dia_sum + IC_val[l]*IC_val[l];
|
||||||
|
// Set value at the upper triangle
|
||||||
|
order = IC_row[l];
|
||||||
|
IC_val[index_map[order]] = IC_val[l];
|
||||||
|
continue; // Case 1 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate elements in the middle of a row
|
||||||
|
if (IC_row[l] > IC_col[l])
|
||||||
|
{
|
||||||
|
// Find needed values from previous elements
|
||||||
|
f = 0;
|
||||||
|
j = row_st_idx[IC_col[l]];
|
||||||
|
while (IC_col[j] < IC_col[l])
|
||||||
|
{
|
||||||
|
tmp_row[IC_col[j]] = IC_val[j];
|
||||||
|
filled_idx[f] = IC_col[j];
|
||||||
|
f++;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = row_st_idx[IC_row[l]];
|
||||||
|
while (IC_col[j] < IC_col[l])
|
||||||
|
{
|
||||||
|
IC_val[l] = IC_val[l] - IC_val[j]*tmp_row[IC_col[j]];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
IC_val[l] = IC_val[l]/diagonal[IC_col[l]];
|
||||||
|
dia_sum = dia_sum + IC_val[l]*IC_val[l];
|
||||||
|
|
||||||
|
// Set value at the upper triangle
|
||||||
|
order = N*IC_col[l] + IC_row[l];
|
||||||
|
IC_val[index_map[order]] = IC_val[l];
|
||||||
|
|
||||||
|
// reset tmp variables
|
||||||
|
for (j = 0; j < f; j++)
|
||||||
|
{
|
||||||
|
tmp_row[filled_idx[j]] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue; // Case 2 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have rearched the diagonal position
|
||||||
|
if (IC_row[l] == IC_col[l])
|
||||||
|
{
|
||||||
|
IC_val[l] = sqrt(IC_val[l] - dia_sum);
|
||||||
|
diagonal[IC_col[l]] = IC_val[l];
|
||||||
|
dia_sum = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] diagonal;
|
||||||
|
delete[] tmp_row;
|
||||||
|
delete[] row_st_idx;
|
||||||
|
delete[] filled_idx;
|
||||||
|
delete[] low_idx;
|
||||||
|
index_map.clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_solve_upper_triangle_coo(const int *row, const int *col, const lcg_float *U, const lcg_float *B, lcg_float *x, int N, int nz_size)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
x[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t iter = nz_size - 1;
|
||||||
|
double sum;
|
||||||
|
for (size_t i = N-1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
sum = 0.0;
|
||||||
|
for (size_t j = iter; j >= 0; j--)
|
||||||
|
{
|
||||||
|
if (row[j] == i && col[j] > i)
|
||||||
|
{
|
||||||
|
sum += U[j] * x[col[j]];
|
||||||
|
}
|
||||||
|
else if (row[j] == i && col[j] == i)
|
||||||
|
{
|
||||||
|
x[i] = (B[i] - sum)/U[j];
|
||||||
|
if (j == 0) return;
|
||||||
|
else iter = j-1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_solve_lower_triangle_coo(const int *row, const int *col, const lcg_float *L, const lcg_float *B, lcg_float *x, int N, int nz_size)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
x[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t iter = 0;
|
||||||
|
double sum;
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
sum = 0.0;
|
||||||
|
for (size_t j = iter; j < nz_size; j++)
|
||||||
|
{
|
||||||
|
if (row[j] == i && col[j] < i)
|
||||||
|
{
|
||||||
|
sum += L[j] * x[col[j]];
|
||||||
|
}
|
||||||
|
else if (row[j] == i && col[j] == i)
|
||||||
|
{
|
||||||
|
x[i] = (B[i] - sum)/L[j];
|
||||||
|
iter = j+1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool lcg_full_rank_coo(const int *row, const int *col, const lcg_float *M, int N, int nz_size)
|
||||||
|
{
|
||||||
|
size_t s = 0;
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] == col[i] && M[i] != 0.0)
|
||||||
|
{
|
||||||
|
s++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s == N) return true;
|
||||||
|
else return false;
|
||||||
|
}
|
||||||
110
src/lib/preconditioner.h
Normal file
110
src/lib/preconditioner.h
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _PRECONDITIONER_H
|
||||||
|
#define _PRECONDITIONER_H
|
||||||
|
|
||||||
|
#include "algebra.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return the number of non-zero elements in the lower triangular part of the input matrix
|
||||||
|
*
|
||||||
|
* @param row[in] Row index of the input sparse matrix.
|
||||||
|
* @param col[in] Column index of the input sparse matrix.
|
||||||
|
* @param nz_size[in] Length of the non-zero elements.
|
||||||
|
* @param lnz_size[out] Legnth of the non-zero elements in the lower triangle
|
||||||
|
*/
|
||||||
|
void lcg_incomplete_Cholesky_half_buffsize_coo(const int *row, const int *col, int nz_size, int *lnz_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||||
|
*
|
||||||
|
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param val Non-zero values of the input sparse matrix.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zero elements.
|
||||||
|
* @param lnz_size Legnth of the non-zero elements in the lower triangle
|
||||||
|
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||||
|
*/
|
||||||
|
void lcg_incomplete_Cholesky_half_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, lcg_float *IC_val);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||||
|
*
|
||||||
|
* @note The factorized lower and upper triangular matrixes are stored in the lower and upper triangular parts of the output matrix accordingly.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param val Non-zero values of the input sparse matrix.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zeor elements.
|
||||||
|
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||||
|
*/
|
||||||
|
void lcg_incomplete_Cholesky_full_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int *IC_row, int *IC_col, lcg_float *IC_val);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param U Non-zero values of the input sparse matrix.
|
||||||
|
* @param B Object array.
|
||||||
|
* @param x The returned solution.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zeor elements.
|
||||||
|
*/
|
||||||
|
void lcg_solve_upper_triangle_coo(const int *row, const int *col, const lcg_float *U, const lcg_float *B, lcg_float *x, int N, int nz_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param L Non-zero values of the input sparse matrix.
|
||||||
|
* @param B Object array.
|
||||||
|
* @param x The returned solution.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zeor elements.
|
||||||
|
*/
|
||||||
|
void lcg_solve_lower_triangle_coo(const int *row, const int *col, const lcg_float *L, const lcg_float *B, lcg_float *x, int N, int nz_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check to see if a square matrix is full ranked or not. The sparse matrix is stored in the COO format.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param M Non-zero values of the input sparse matrix.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zeor elements.
|
||||||
|
* @return true The matrix is full ranked.
|
||||||
|
* @return false The matrix is not full ranked.
|
||||||
|
*/
|
||||||
|
bool lcg_full_rank_coo(const int *row, const int *col, const lcg_float *M, int N, int nz_size);
|
||||||
|
|
||||||
|
#endif // _PRECONDITIONER_H
|
||||||
421
src/lib/preconditioner_cuda.cu
Normal file
421
src/lib/preconditioner_cuda.cu
Normal file
@@ -0,0 +1,421 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "preconditioner_cuda.h"
|
||||||
|
#include "map"
|
||||||
|
|
||||||
|
void clcg_incomplete_Cholesky_cuda_half_buffsize(const int *row, const int *col, int nz_size, int *lnz_size)
|
||||||
|
{
|
||||||
|
size_t c = 0;
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i])
|
||||||
|
{
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*lnz_size = c;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuComplex *val, int N, int nz_size,
|
||||||
|
int lnz_size, int *IC_row, int *IC_col, cuComplex *IC_val)
|
||||||
|
{
|
||||||
|
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||||
|
cuComplex *diagonal = new cuComplex [N];
|
||||||
|
// A temporary row
|
||||||
|
cuComplex *tmp_row = new cuComplex [N];
|
||||||
|
// index of non-zero elements in tmp_row
|
||||||
|
int *filled_idx = new int [N];
|
||||||
|
// Begining index of each row in the input matrix
|
||||||
|
int *row_st_idx = new int [N];
|
||||||
|
|
||||||
|
size_t i, j, f;
|
||||||
|
|
||||||
|
// Set initial values
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
|
||||||
|
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
|
||||||
|
filled_idx[i] = -1;
|
||||||
|
row_st_idx[i] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy elements in the lower triangle to the output matrix
|
||||||
|
j = 0;
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i])
|
||||||
|
{
|
||||||
|
IC_row[j] = row[i];
|
||||||
|
IC_col[j] = col[i];
|
||||||
|
IC_val[j] = val[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the begining index of each row in the matrix
|
||||||
|
j = 1;
|
||||||
|
row_st_idx[0] = IC_row[0];
|
||||||
|
size_t old_row = IC_row[0];
|
||||||
|
for (i = 1; i < lnz_size; i++)
|
||||||
|
{
|
||||||
|
if (IC_row[i] > old_row)
|
||||||
|
{
|
||||||
|
row_st_idx[j] = i;
|
||||||
|
old_row = IC_row[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the first element
|
||||||
|
IC_val[0] = clcg_Csqrt(IC_val[0]);
|
||||||
|
diagonal[0] = IC_val[0];
|
||||||
|
|
||||||
|
cuComplex dia_sum;
|
||||||
|
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||||
|
// The first one is already calculated
|
||||||
|
for (i = 1; i < lnz_size; i++)
|
||||||
|
{
|
||||||
|
// Calculate the first column if there is one
|
||||||
|
if (IC_col[i] == 0)
|
||||||
|
{
|
||||||
|
IC_val[i] = cuCdivf(IC_val[i], IC_val[0]);
|
||||||
|
dia_sum = clcg_Csum(dia_sum, cuCmulf(IC_val[i], IC_val[i]));
|
||||||
|
continue; // Case 1 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate elements in the middle of a row
|
||||||
|
if (IC_row[i] > IC_col[i])
|
||||||
|
{
|
||||||
|
// Find needed values from previous elements
|
||||||
|
f = 0;
|
||||||
|
j = row_st_idx[IC_col[i]];
|
||||||
|
while (IC_col[j] < IC_col[i])
|
||||||
|
{
|
||||||
|
tmp_row[IC_col[j]] = IC_val[j];
|
||||||
|
filled_idx[f] = IC_col[j];
|
||||||
|
f++;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = row_st_idx[IC_row[i]];
|
||||||
|
while (IC_col[j] < IC_col[i])
|
||||||
|
{
|
||||||
|
IC_val[i] = clcg_Cdiff(IC_val[i], cuCmulf(IC_val[j], tmp_row[IC_col[j]]));
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
IC_val[i] = cuCdivf(IC_val[i], diagonal[IC_col[i]]);
|
||||||
|
dia_sum = clcg_Csum(dia_sum, cuCmulf(IC_val[i], IC_val[i]));
|
||||||
|
|
||||||
|
// reset tmp variables
|
||||||
|
for (j = 0; j < f; j++)
|
||||||
|
{
|
||||||
|
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue; // Case 2 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have rearched the diagonal position
|
||||||
|
if (IC_row[i] == IC_col[i])
|
||||||
|
{
|
||||||
|
IC_val[i] = clcg_Csqrt(clcg_Cdiff(IC_val[i], dia_sum));
|
||||||
|
diagonal[IC_col[i]] = IC_val[i];
|
||||||
|
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] diagonal;
|
||||||
|
delete[] tmp_row;
|
||||||
|
delete[] row_st_idx;
|
||||||
|
delete[] filled_idx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size,
|
||||||
|
int lnz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val)
|
||||||
|
{
|
||||||
|
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||||
|
cuDoubleComplex *diagonal = new cuDoubleComplex [N];
|
||||||
|
// A temporary row
|
||||||
|
cuDoubleComplex *tmp_row = new cuDoubleComplex [N];
|
||||||
|
// index of non-zero elements in tmp_row
|
||||||
|
int *filled_idx = new int [N];
|
||||||
|
// Begining index of each row in the input matrix
|
||||||
|
int *row_st_idx = new int [N];
|
||||||
|
|
||||||
|
size_t i, j, f;
|
||||||
|
|
||||||
|
// Set initial values
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
|
||||||
|
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
|
||||||
|
filled_idx[i] = -1;
|
||||||
|
row_st_idx[i] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy elements in the lower triangle to the output matrix
|
||||||
|
j = 0;
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i])
|
||||||
|
{
|
||||||
|
IC_row[j] = row[i];
|
||||||
|
IC_col[j] = col[i];
|
||||||
|
IC_val[j] = val[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the begining index of each row in the matrix
|
||||||
|
j = 1;
|
||||||
|
row_st_idx[0] = IC_row[0];
|
||||||
|
size_t old_row = IC_row[0];
|
||||||
|
for (i = 1; i < lnz_size; i++)
|
||||||
|
{
|
||||||
|
if (IC_row[i] > old_row)
|
||||||
|
{
|
||||||
|
row_st_idx[j] = i;
|
||||||
|
old_row = IC_row[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the first element
|
||||||
|
IC_val[0] = clcg_Zsqrt(IC_val[0]);
|
||||||
|
diagonal[0] = IC_val[0];
|
||||||
|
|
||||||
|
cuDoubleComplex dia_sum;
|
||||||
|
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||||
|
// The first one is already calculated
|
||||||
|
for (i = 1; i < lnz_size; i++)
|
||||||
|
{
|
||||||
|
// Calculate the first column if there is one
|
||||||
|
if (IC_col[i] == 0)
|
||||||
|
{
|
||||||
|
IC_val[i] = cuCdiv(IC_val[i], IC_val[0]);
|
||||||
|
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[i], IC_val[i]));
|
||||||
|
continue; // Case 1 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate elements in the middle of a row
|
||||||
|
if (IC_row[i] > IC_col[i])
|
||||||
|
{
|
||||||
|
// Find needed values from previous elements
|
||||||
|
f = 0;
|
||||||
|
j = row_st_idx[IC_col[i]];
|
||||||
|
while (IC_col[j] < IC_col[i])
|
||||||
|
{
|
||||||
|
tmp_row[IC_col[j]] = IC_val[j];
|
||||||
|
filled_idx[f] = IC_col[j];
|
||||||
|
f++;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = row_st_idx[IC_row[i]];
|
||||||
|
while (IC_col[j] < IC_col[i])
|
||||||
|
{
|
||||||
|
IC_val[i] = clcg_Zdiff(IC_val[i], cuCmul(IC_val[j], tmp_row[IC_col[j]]));
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
IC_val[i] = cuCdiv(IC_val[i], diagonal[IC_col[i]]);
|
||||||
|
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[i], IC_val[i]));
|
||||||
|
|
||||||
|
// reset tmp variables
|
||||||
|
for (j = 0; j < f; j++)
|
||||||
|
{
|
||||||
|
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue; // Case 2 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have rearched the diagonal position
|
||||||
|
if (IC_row[i] == IC_col[i])
|
||||||
|
{
|
||||||
|
IC_val[i] = clcg_Zsqrt(clcg_Zdiff(IC_val[i], dia_sum));
|
||||||
|
diagonal[IC_col[i]] = IC_val[i];
|
||||||
|
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] diagonal;
|
||||||
|
delete[] tmp_row;
|
||||||
|
delete[] row_st_idx;
|
||||||
|
delete[] filled_idx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_incomplete_Cholesky_cuda_full(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val)
|
||||||
|
{
|
||||||
|
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||||
|
cuDoubleComplex *diagonal = new cuDoubleComplex [N];
|
||||||
|
// A temporary row
|
||||||
|
cuDoubleComplex *tmp_row = new cuDoubleComplex [N];
|
||||||
|
// index of non-zero elements in tmp_row
|
||||||
|
int *filled_idx = new int [N];
|
||||||
|
// Begining index of each row in the input matrix
|
||||||
|
int *row_st_idx = new int [N];
|
||||||
|
|
||||||
|
size_t i, j, f, l;
|
||||||
|
|
||||||
|
// Set initial values
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
|
||||||
|
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
|
||||||
|
filled_idx[i] = -1;
|
||||||
|
row_st_idx[i] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy elements to the output matrix
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
IC_row[i] = row[i];
|
||||||
|
IC_col[i] = col[i];
|
||||||
|
IC_val[i] = val[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// count element number in the lower triangular part (including the diagonal) and the upper triangular part (excluding the diagonal)
|
||||||
|
// build map from elements' cooridnate to their index in the array
|
||||||
|
size_t order, L_nz = 0;
|
||||||
|
std::map<size_t, size_t> index_map;
|
||||||
|
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i]) // Count number for thr lower triangular part
|
||||||
|
{
|
||||||
|
L_nz++;
|
||||||
|
}
|
||||||
|
else // Only need to build the map for the upper triangular part
|
||||||
|
{
|
||||||
|
order = N*row[i] + col[i];
|
||||||
|
index_map[order] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We use to store element index in the lower triangle
|
||||||
|
j = 0;
|
||||||
|
size_t *low_idx = new size_t [L_nz];
|
||||||
|
for (i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (row[i] >= col[i])
|
||||||
|
{
|
||||||
|
low_idx[j] = i;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the begining index of each row in the matrix
|
||||||
|
j = 1;
|
||||||
|
row_st_idx[0] = IC_row[0];
|
||||||
|
size_t old_row = IC_row[0];
|
||||||
|
for (i = 1; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
if (IC_row[i] > old_row)
|
||||||
|
{
|
||||||
|
row_st_idx[j] = i;
|
||||||
|
old_row = IC_row[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the first element
|
||||||
|
IC_val[0] = clcg_Zsqrt(IC_val[0]);
|
||||||
|
diagonal[0] = IC_val[0];
|
||||||
|
|
||||||
|
cuDoubleComplex dia_sum;
|
||||||
|
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||||
|
// The first one is already calculated
|
||||||
|
for (i = 1; i < L_nz; i++)
|
||||||
|
{
|
||||||
|
l = low_idx[i];
|
||||||
|
|
||||||
|
// Calculate the first column if there is one
|
||||||
|
if (IC_col[l] == 0)
|
||||||
|
{
|
||||||
|
IC_val[l] = cuCdiv(IC_val[l], IC_val[0]);
|
||||||
|
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[l], IC_val[l]));
|
||||||
|
// Set value at the upper triangle
|
||||||
|
order = IC_row[l];
|
||||||
|
IC_val[index_map[order]] = IC_val[l];
|
||||||
|
continue; // Case 1 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate elements in the middle of a row
|
||||||
|
if (IC_row[l] > IC_col[l])
|
||||||
|
{
|
||||||
|
// Find needed values from previous elements
|
||||||
|
f = 0;
|
||||||
|
j = row_st_idx[IC_col[l]];
|
||||||
|
while (IC_col[j] < IC_col[l])
|
||||||
|
{
|
||||||
|
tmp_row[IC_col[j]] = IC_val[j];
|
||||||
|
filled_idx[f] = IC_col[j];
|
||||||
|
f++;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = row_st_idx[IC_row[l]];
|
||||||
|
while (IC_col[j] < IC_col[l])
|
||||||
|
{
|
||||||
|
IC_val[l] = clcg_Zdiff(IC_val[l], cuCmul(IC_val[j], tmp_row[IC_col[j]]));
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
IC_val[l] = cuCdiv(IC_val[l], diagonal[IC_col[l]]);
|
||||||
|
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[l], IC_val[l]));
|
||||||
|
|
||||||
|
// Set value at the upper triangle
|
||||||
|
order = N*IC_col[l] + IC_row[l];
|
||||||
|
IC_val[index_map[order]] = IC_val[l];
|
||||||
|
|
||||||
|
// reset tmp variables
|
||||||
|
for (j = 0; j < f; j++)
|
||||||
|
{
|
||||||
|
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue; // Case 2 break
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have rearched the diagonal position
|
||||||
|
if (IC_row[l] == IC_col[l])
|
||||||
|
{
|
||||||
|
IC_val[l] = clcg_Zsqrt(clcg_Zdiff(IC_val[l], dia_sum));
|
||||||
|
diagonal[IC_col[l]] = IC_val[l];
|
||||||
|
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] diagonal;
|
||||||
|
delete[] tmp_row;
|
||||||
|
delete[] row_st_idx;
|
||||||
|
delete[] filled_idx;
|
||||||
|
delete[] low_idx;
|
||||||
|
index_map.clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
92
src/lib/preconditioner_cuda.h
Normal file
92
src/lib/preconditioner_cuda.h
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _PRECONDITIONER_CUDA_H
|
||||||
|
#define _PRECONDITIONER_CUDA_H
|
||||||
|
|
||||||
|
#include "lcg_complex_cuda.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_CUDA
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return the number of non-zero elements in the lower triangular part of the input matrix
|
||||||
|
*
|
||||||
|
* @param row[in] Row index of the input sparse matrix.
|
||||||
|
* @param col[in] Column index of the input sparse matrix.
|
||||||
|
* @param nz_size[in] Length of the non-zero elements.
|
||||||
|
* @param lnz_size[out] Legnth of the non-zero elements in the lower triangle
|
||||||
|
*/
|
||||||
|
void clcg_incomplete_Cholesky_cuda_half_buffsize(const int *row, const int *col, int nz_size, int *lnz_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||||
|
*
|
||||||
|
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param val Non-zero values of the input sparse matrix.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zero elements.
|
||||||
|
* @param lnz_size Legnth of the non-zero elements in the lower triangle
|
||||||
|
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||||
|
*/
|
||||||
|
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuComplex *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, cuComplex *IC_val);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||||
|
*
|
||||||
|
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param val Non-zero values of the input sparse matrix.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zero elements.
|
||||||
|
* @param lnz_size Legnth of the non-zero elements in the lower triangle
|
||||||
|
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||||
|
*/
|
||||||
|
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||||
|
*
|
||||||
|
* @note The factorized lower and upper triangular matrixes are stored in the lower and upper triangular parts of the output matrix accordingly.
|
||||||
|
*
|
||||||
|
* @param row Row index of the input sparse matrix.
|
||||||
|
* @param col Column index of the input sparse matrix.
|
||||||
|
* @param val Non-zero values of the input sparse matrix.
|
||||||
|
* @param N Row/Column size of the sparse matrix.
|
||||||
|
* @param nz_size Length of the non-zeor elements.
|
||||||
|
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||||
|
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||||
|
*/
|
||||||
|
void clcg_incomplete_Cholesky_cuda_full(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val);
|
||||||
|
|
||||||
|
#endif // LibLCG_CUDA
|
||||||
|
|
||||||
|
#endif // _PRECONDITIONER_CUDA_H
|
||||||
1047
src/lib/preconditioner_eigen.cpp
Normal file
1047
src/lib/preconditioner_eigen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
159
src/lib/preconditioner_eigen.h
Normal file
159
src/lib/preconditioner_eigen.h
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _PRECONDITIONER_EIGEN_H
|
||||||
|
#define _PRECONDITIONER_EIGEN_H
|
||||||
|
|
||||||
|
#include "complex"
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
#include "Eigen/SparseCore"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Perform the Cholesky decomposition and return the lower triangular matrix.
|
||||||
|
*
|
||||||
|
* @note This could serve as a direct solver.
|
||||||
|
*
|
||||||
|
* @param A The input matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||||
|
* @param L The output low triangular matrix
|
||||||
|
*/
|
||||||
|
void lcg_Cholesky(const Eigen::MatrixXd &A, Eigen::MatrixXd &L);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Perform the Cholesky decomposition and return the lower triangular matrix
|
||||||
|
*
|
||||||
|
* @note This could serve as a direct solver.
|
||||||
|
*
|
||||||
|
* @param[in] A The input matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||||
|
* @param L The output low triangular matrix
|
||||||
|
*/
|
||||||
|
void clcg_Cholesky(const Eigen::MatrixXcd &A, Eigen::MatrixXcd &L);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the invert of a lower triangle matrix (Full rank only).
|
||||||
|
*
|
||||||
|
* @param L The operating lower triangle matrix
|
||||||
|
* @param Linv The inverted lower triangle matrix
|
||||||
|
*/
|
||||||
|
void lcg_invert_lower_triangle(const Eigen::MatrixXd &L, Eigen::MatrixXd &Linv);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the invert of a upper triangle matrix (Full rank only).
|
||||||
|
*
|
||||||
|
* @param U The operating upper triangle matrix
|
||||||
|
* @param Uinv The inverted upper triangle matrix
|
||||||
|
*/
|
||||||
|
void lcg_invert_upper_triangle(const Eigen::MatrixXd &U, Eigen::MatrixXd &Uinv);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the invert of a lower triangle matrix (Full rank only).
|
||||||
|
*
|
||||||
|
* @param L The operating lower triangle matrix
|
||||||
|
* @param Linv The inverted lower triangle matrix
|
||||||
|
*/
|
||||||
|
void clcg_invert_lower_triangle(const Eigen::MatrixXcd &L, Eigen::MatrixXcd &Linv);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the invert of a upper triangle matrix (Full rank only).
|
||||||
|
*
|
||||||
|
* @param U The operating upper triangle matrix
|
||||||
|
* @param Uinv The inverted upper triangle matrix
|
||||||
|
*/
|
||||||
|
void clcg_invert_upper_triangle(const Eigen::MatrixXcd &U, Eigen::MatrixXcd &Uinv);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the incomplete Cholesky decomposition and return the lower triangular matrix
|
||||||
|
*
|
||||||
|
* @param[in] A The input sparse matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||||
|
* @param L The output lower triangular matrix
|
||||||
|
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||||
|
*/
|
||||||
|
void lcg_incomplete_Cholesky(const Eigen::SparseMatrix<double, Eigen::RowMajor> &A, Eigen::SparseMatrix<double, Eigen::RowMajor> &L, size_t fill = 0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the incomplete Cholesky decomposition and return the lower triangular matrix
|
||||||
|
*
|
||||||
|
* @param[in] A The input sparse matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||||
|
* @param L The output lower triangular matrix
|
||||||
|
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||||
|
*/
|
||||||
|
void clcg_incomplete_Cholesky(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &A, Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L, size_t fill = 0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the incomplete LU factorizations
|
||||||
|
*
|
||||||
|
* @param A The input sparse matrix. Must be full rank.
|
||||||
|
* @param L The output lower triangular matrix.
|
||||||
|
* @param U The output upper triangular matrix.
|
||||||
|
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||||
|
*/
|
||||||
|
void lcg_incomplete_LU(const Eigen::SparseMatrix<double, Eigen::RowMajor> &A, Eigen::SparseMatrix<double, Eigen::RowMajor> &L, Eigen::SparseMatrix<double, Eigen::RowMajor> &U, size_t fill = 0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Calculate the incomplete LU factorizations
|
||||||
|
*
|
||||||
|
* @param A The input sparse matrix. Must be full rank.
|
||||||
|
* @param L The output lower triangular matrix.
|
||||||
|
* @param U The output upper triangular matrix.
|
||||||
|
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||||
|
*/
|
||||||
|
void clcg_incomplete_LU(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &A, Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L,
|
||||||
|
Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &U, size_t fill = 0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
|
||||||
|
*
|
||||||
|
* @param L The input lower triangle matrix
|
||||||
|
* @param B The object vector
|
||||||
|
* @param X The solution vector
|
||||||
|
*/
|
||||||
|
void lcg_solve_lower_triangle(const Eigen::SparseMatrix<double, Eigen::RowMajor> &L, const Eigen::VectorXd &B, Eigen::VectorXd &X);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
|
||||||
|
*
|
||||||
|
* @param U The input upper triangle matrix
|
||||||
|
* @param B The object vector
|
||||||
|
* @param X The solution vector
|
||||||
|
*/
|
||||||
|
void lcg_solve_upper_triangle(const Eigen::SparseMatrix<double, Eigen::RowMajor> &U, const Eigen::VectorXd &B, Eigen::VectorXd &X);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
|
||||||
|
*
|
||||||
|
* @param L The input lower triangle matrix
|
||||||
|
* @param B The object vector
|
||||||
|
* @param X The solution vector
|
||||||
|
*/
|
||||||
|
void clcg_solve_lower_triangle(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L, const Eigen::VectorXcd &B, Eigen::VectorXcd &X);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
|
||||||
|
*
|
||||||
|
* @param U The input upper triangle matrix
|
||||||
|
* @param B The object vector
|
||||||
|
* @param X The solution vector
|
||||||
|
*/
|
||||||
|
void clcg_solve_upper_triangle(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &U, const Eigen::VectorXcd &B, Eigen::VectorXcd &X);
|
||||||
|
|
||||||
|
|
||||||
|
#endif // _PRECONDITIONER_EIGEN_H
|
||||||
311
src/lib/solver.cpp
Normal file
311
src/lib/solver.cpp
Normal file
@@ -0,0 +1,311 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "solver.h"
|
||||||
|
|
||||||
|
#include "ctime"
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LCG_Solver::LCG_Solver()
|
||||||
|
{
|
||||||
|
param_ = lcg_default_parameters();
|
||||||
|
inter_ = 1;
|
||||||
|
silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int LCG_Solver::Progress(const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para *param, const int n_size, const int k)
|
||||||
|
{
|
||||||
|
if (inter_ > 0 && k%inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converge <= param->epsilon)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_Solver::silent()
|
||||||
|
{
|
||||||
|
silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_Solver::set_report_interval(unsigned int inter)
|
||||||
|
{
|
||||||
|
inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_Solver::set_lcg_parameter(const lcg_para &in_param)
|
||||||
|
{
|
||||||
|
param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_Solver::Minimize(lcg_float *m, const lcg_float *b, int x_size,
|
||||||
|
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver(_AxProduct, nullptr, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = lcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_CG:
|
||||||
|
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_CGS:
|
||||||
|
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_BICGSTAB:
|
||||||
|
std::clog << "Solver: BICGSTAB. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_BICGSTAB2:
|
||||||
|
std::clog << "Solver: BICGSTAB2. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_Solver::MinimizePreconditioned(lcg_float *m, const lcg_float *b, int x_size,
|
||||||
|
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, nullptr, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_PCG:
|
||||||
|
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_Solver::MinimizeConstrained(lcg_float *m, const lcg_float *b, const lcg_float* low,
|
||||||
|
const lcg_float *hig, int x_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_constrained(_AxProduct, nullptr, m, b, low, hig, x_size, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = lcg_solver_constrained(_AxProduct, _Progress, m, b, low, hig, x_size, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_constrained(_AxProduct, _Progress, m, b, low, hig, x_size, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_PG:
|
||||||
|
std::clog << "Solver: PG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_SPG:
|
||||||
|
std::clog << "Solver: SPG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CLCG_Solver::CLCG_Solver()
|
||||||
|
{
|
||||||
|
param_ = clcg_default_parameters();
|
||||||
|
inter_ = 1;
|
||||||
|
silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CLCG_Solver::Progress(const lcg_complex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int k)
|
||||||
|
{
|
||||||
|
if (inter_ > 0 && k%inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converge <= param->epsilon)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_Solver::silent()
|
||||||
|
{
|
||||||
|
silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_Solver::set_report_interval(unsigned int inter)
|
||||||
|
{
|
||||||
|
inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||||
|
{
|
||||||
|
param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_Solver::Minimize(lcg_complex *m, const lcg_complex *b, int x_size,
|
||||||
|
clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = clcg_solver(_AxProduct, nullptr, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) clcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = clcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = clcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
std::clog << "Solver: Bi-CG. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
std::clog << "Solver: Bi-CG (symmetrically accelerated). Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_CGS:
|
||||||
|
std::clog << "Solver: CGS. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_TFQMR:
|
||||||
|
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) clcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) clcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
285
src/lib/solver.h
Normal file
285
src/lib/solver.h
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _SOLVER_H
|
||||||
|
#define _SOLVER_H
|
||||||
|
|
||||||
|
#include "lcg.h"
|
||||||
|
#include "clcg.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Linear conjugate gradient solver class
|
||||||
|
*/
|
||||||
|
class LCG_Solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
lcg_para param_;
|
||||||
|
unsigned int inter_;
|
||||||
|
bool silent_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
LCG_Solver();
|
||||||
|
virtual ~LCG_Solver(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param a[in] Pointer of the multiplier
|
||||||
|
* @param b[out] Pointer of the product
|
||||||
|
* @param num Size of the array
|
||||||
|
*/
|
||||||
|
static void _AxProduct(void* instance, const lcg_float* a, lcg_float* b, const int num)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_Solver*>(instance)->AxProduct(a, b, num);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param a[in] Pointer of the multiplier
|
||||||
|
* @param b[out] Pointer of the product
|
||||||
|
* @param num Size of the array
|
||||||
|
*/
|
||||||
|
virtual void AxProduct(const lcg_float* a, lcg_float* b, const int num) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param a[in] Pointer of the multiplier
|
||||||
|
* @param b[out] Pointer of the product
|
||||||
|
* @param num Size of the array
|
||||||
|
*/
|
||||||
|
static void _MxProduct(void* instance, const lcg_float* a, lcg_float* b, const int num)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_Solver*>(instance)->MxProduct(a, b, num);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param a[in] Pointer of the multiplier
|
||||||
|
* @param b[out] Pointer of the product
|
||||||
|
* @param num Size of the array
|
||||||
|
*/
|
||||||
|
virtual void MxProduct(const lcg_float* a, lcg_float* b, const int num) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
static int _Progress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para *param, const int n_size, const int k)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_Solver*>(instance)->Progress(m, converge, param, n_size, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
virtual int Progress(const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para *param, const int n_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Do not report any processes
|
||||||
|
*/
|
||||||
|
void silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_report_interval(unsigned int inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the parameters of the algorithms
|
||||||
|
*
|
||||||
|
* @param in_param the input parameters
|
||||||
|
*/
|
||||||
|
void set_lcg_parameter(const lcg_para &in_param);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param x_size Size of the solution vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void Minimize(lcg_float *m, const lcg_float *b, int x_size,
|
||||||
|
lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the preconitioned minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param x_size Size of the solution vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizePreconditioned(lcg_float *m, const lcg_float *b, int x_size,
|
||||||
|
lcg_solver_enum solver_id = LCG_PCG, bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the constrained minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param low Lower bound of the solution vector
|
||||||
|
* @param hig Higher bound of the solution vector
|
||||||
|
* @param x_size Size of the solution vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizeConstrained(lcg_float *m, const lcg_float *b, const lcg_float* low,
|
||||||
|
const lcg_float *hig, int x_size, lcg_solver_enum solver_id = LCG_PG,
|
||||||
|
bool verbose = true, bool er_throw = false);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Complex linear conjugate gradient solver class
|
||||||
|
*/
|
||||||
|
class CLCG_Solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
clcg_para param_;
|
||||||
|
unsigned int inter_;
|
||||||
|
bool silent_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CLCG_Solver();
|
||||||
|
virtual ~CLCG_Solver(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param x_size Size of the array
|
||||||
|
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
*/
|
||||||
|
static void _AxProduct(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
|
||||||
|
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_Solver*>(instance)->AxProduct(x, prod_Ax, x_size, layout, conjugate);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param x_size Size of the array
|
||||||
|
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
*/
|
||||||
|
virtual void AxProduct(const lcg_complex *x, lcg_complex *prod_Ax,
|
||||||
|
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
static int _Progress(void* instance, const lcg_complex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int k)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_Solver*>(instance)->Progress(m, converge, param, n_size, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
virtual int Progress(const lcg_complex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Do not report any processes
|
||||||
|
*/
|
||||||
|
void silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_report_interval(unsigned int inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the parameters of the algorithms
|
||||||
|
*
|
||||||
|
* @param in_param the input parameters
|
||||||
|
*/
|
||||||
|
void set_clcg_parameter(const clcg_para &in_param);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param x_size Size of the solution vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void Minimize(lcg_complex *m, const lcg_complex *b, int x_size,
|
||||||
|
clcg_solver_enum solver_id = CLCG_CGS, bool verbose = true,
|
||||||
|
bool er_throw = false);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _SOLVER_H
|
||||||
414
src/lib/solver_cuda.cu
Normal file
414
src/lib/solver_cuda.cu
Normal file
@@ -0,0 +1,414 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "solver_cuda.h"
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "ctime"
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
LCG_CUDA_Solver::LCG_CUDA_Solver()
|
||||||
|
{
|
||||||
|
param_ = lcg_default_parameters();
|
||||||
|
inter_ = 1;
|
||||||
|
silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int LCG_CUDA_Solver::Progress(const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
if (inter_ > 0 && k%inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converge <= param->epsilon)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_CUDA_Solver::silent()
|
||||||
|
{
|
||||||
|
silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_CUDA_Solver::set_report_interval(unsigned int inter)
|
||||||
|
{
|
||||||
|
inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_CUDA_Solver::set_lcg_parameter(const lcg_para &in_param)
|
||||||
|
{
|
||||||
|
param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_CUDA_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||||
|
const int n_size, const int nz_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_CG:
|
||||||
|
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_CGS:
|
||||||
|
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_CUDA_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||||
|
const int n_size, const int nz_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_PCG:
|
||||||
|
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_CUDA_Solver::MinimizeConstrained(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, const lcg_float *b,
|
||||||
|
const lcg_float* low, const lcg_float *hig, const int n_size, const int nz_size, lcg_solver_enum solver_id,
|
||||||
|
bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_constrained_cuda(_AxProduct, nullptr, x, b, low, hig, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_constrained_cuda(_AxProduct, _Progress, x, b, low, hig, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_PG:
|
||||||
|
std::clog << "Solver: PG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CLCG_CUDAF_Solver::CLCG_CUDAF_Solver()
|
||||||
|
{
|
||||||
|
param_ = clcg_default_parameters();
|
||||||
|
inter_ = 1;
|
||||||
|
silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CLCG_CUDAF_Solver::Progress(const cuComplex* m, const float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
if (inter_ > 0 && k%inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converge <= param->epsilon)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDAF_Solver::silent()
|
||||||
|
{
|
||||||
|
silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDAF_Solver::set_report_interval(unsigned int inter)
|
||||||
|
{
|
||||||
|
inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDAF_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||||
|
{
|
||||||
|
param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDAF_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = clcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = clcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDAF_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_PCG:
|
||||||
|
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CLCG_CUDA_Solver::CLCG_CUDA_Solver()
|
||||||
|
{
|
||||||
|
param_ = clcg_default_parameters();
|
||||||
|
inter_ = 1;
|
||||||
|
silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CLCG_CUDA_Solver::Progress(const cuDoubleComplex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
if (inter_ > 0 && k%inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converge <= param->epsilon)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDA_Solver::silent()
|
||||||
|
{
|
||||||
|
silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDA_Solver::set_report_interval(unsigned int inter)
|
||||||
|
{
|
||||||
|
inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDA_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||||
|
{
|
||||||
|
param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDA_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = clcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = clcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_CUDA_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_PCG:
|
||||||
|
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
545
src/lib/solver_cuda.h
Normal file
545
src/lib/solver_cuda.h
Normal file
@@ -0,0 +1,545 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _SOLVER_CUDA_H
|
||||||
|
#define _SOLVER_CUDA_H
|
||||||
|
|
||||||
|
#include "lcg_cuda.h"
|
||||||
|
#include "clcg_cuda.h"
|
||||||
|
#include "clcg_cudaf.h"
|
||||||
|
|
||||||
|
#ifdef LibLCG_CUDA
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Linear conjugate gradient solver class
|
||||||
|
*/
|
||||||
|
class LCG_CUDA_Solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
lcg_para param_;
|
||||||
|
unsigned int inter_;
|
||||||
|
bool silent_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
LCG_CUDA_Solver();
|
||||||
|
virtual ~LCG_CUDA_Solver(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx, const int n_size, const int nz_size)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx, const int n_size, const int nz_size) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
static int _Progress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
virtual int Progress(const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para* param, const int n_size, const int nz_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Do not report any processes
|
||||||
|
*/
|
||||||
|
void silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_report_interval(unsigned int inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the parameters of the algorithms
|
||||||
|
*
|
||||||
|
* @param in_param the input parameters
|
||||||
|
*/
|
||||||
|
void set_lcg_parameter(const lcg_para &in_param);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the constrained minimizing process
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param n_size Size of the solution vector
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||||
|
const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the preconditioned minimizing process
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param n_size Size of the solution vector
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||||
|
const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the constrained minimizing process
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param low Lower bound of the solution vector
|
||||||
|
* @param hig Higher bound of the solution vector
|
||||||
|
* @param n_size Size of the solution vector
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizeConstrained(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, const lcg_float *b,
|
||||||
|
const lcg_float* low, const lcg_float *hig, const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_PG,
|
||||||
|
bool verbose = true, bool er_throw = false);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Complex linear conjugate gradient solver class
|
||||||
|
*/
|
||||||
|
class CLCG_CUDAF_Solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
clcg_para param_;
|
||||||
|
unsigned int inter_;
|
||||||
|
bool silent_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CLCG_CUDAF_Solver();
|
||||||
|
virtual ~CLCG_CUDAF_Solver(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size, oper_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->MxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size, oper_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
static int _Progress(void* instance, const cuComplex* m, const float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
virtual int Progress(const cuComplex* m, const float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Do not report any processes
|
||||||
|
*/
|
||||||
|
void silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_report_interval(unsigned int inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the parameters of the algorithms
|
||||||
|
*
|
||||||
|
* @param in_param the input parameters
|
||||||
|
*/
|
||||||
|
void set_clcg_parameter(const clcg_para &in_param);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the constrained minimizing process
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param n_size Size of the solution vector
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_BICG, bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the preconditioned minimizing process
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param n_size Size of the solution vector
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_PCG, bool verbose = true, bool er_throw = false);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Complex linear conjugate gradient solver class
|
||||||
|
*/
|
||||||
|
class CLCG_CUDA_Solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
clcg_para param_;
|
||||||
|
unsigned int inter_;
|
||||||
|
bool silent_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CLCG_CUDA_Solver();
|
||||||
|
virtual ~CLCG_CUDA_Solver(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size, oper_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->MxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size, oper_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
*/
|
||||||
|
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
static int _Progress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param n_size Size of the solution
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
virtual int Progress(const cuDoubleComplex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Do not report any processes
|
||||||
|
*/
|
||||||
|
void silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_report_interval(unsigned int inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the parameters of the algorithms
|
||||||
|
*
|
||||||
|
* @param in_param the input parameters
|
||||||
|
*/
|
||||||
|
void set_clcg_parameter(const clcg_para &in_param);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the constrained minimizing process
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param n_size Size of the solution vector
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_BICG, bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the preconditioned minimizing process
|
||||||
|
*
|
||||||
|
* @param cub_handle Handler of the CuBLAS library
|
||||||
|
* @param cus_handle Handler of the CuSparse library
|
||||||
|
* @param x Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param n_size Size of the solution vector
|
||||||
|
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||||
|
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_PCG, bool verbose = true, bool er_throw = false);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // LibLCG_CUDA
|
||||||
|
|
||||||
|
#endif // _SOLVER_CUDA_H
|
||||||
365
src/lib/solver_eigen.cpp
Normal file
365
src/lib/solver_eigen.cpp
Normal file
@@ -0,0 +1,365 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "solver_eigen.h"
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "ctime"
|
||||||
|
#include "iostream"
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LCG_EIGEN_Solver::LCG_EIGEN_Solver()
|
||||||
|
{
|
||||||
|
param_ = lcg_default_parameters();
|
||||||
|
inter_ = 1;
|
||||||
|
silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int LCG_EIGEN_Solver::Progress(const Eigen::VectorXd *m, const lcg_float converge, const lcg_para *param,
|
||||||
|
const int k)
|
||||||
|
{
|
||||||
|
if (inter_ > 0 && k%inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converge <= param->epsilon)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_EIGEN_Solver::silent()
|
||||||
|
{
|
||||||
|
silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_EIGEN_Solver::set_report_interval(unsigned int inter)
|
||||||
|
{
|
||||||
|
inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_EIGEN_Solver::set_lcg_parameter(const lcg_para &in_param)
|
||||||
|
{
|
||||||
|
param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_EIGEN_Solver::Minimize(Eigen::VectorXd &m, const Eigen::VectorXd &b,
|
||||||
|
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_eigen(_AxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_CG:
|
||||||
|
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_CGS:
|
||||||
|
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_BICGSTAB:
|
||||||
|
std::clog << "Solver: BICGSTAB. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_BICGSTAB2:
|
||||||
|
std::clog << "Solver: BICGSTAB2. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_EIGEN_Solver::MinimizePreconditioned(Eigen::VectorXd &m, const Eigen::VectorXd &b,
|
||||||
|
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = lcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_PCG:
|
||||||
|
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LCG_EIGEN_Solver::MinimizeConstrained(Eigen::VectorXd &m, const Eigen::VectorXd &B, const Eigen::VectorXd &low,
|
||||||
|
const Eigen::VectorXd &hig, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = lcg_solver_constrained_eigen(_AxProduct, nullptr, m, B, low, hig, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) lcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = lcg_solver_constrained_eigen(_AxProduct, _Progress, m, B, low, hig, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_constrained_eigen(_AxProduct, _Progress, m, B, low, hig, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case LCG_PG:
|
||||||
|
std::clog << "Solver: PG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case LCG_SPG:
|
||||||
|
std::clog << "Solver: SPG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) lcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CLCG_EIGEN_Solver::CLCG_EIGEN_Solver()
|
||||||
|
{
|
||||||
|
param_ = clcg_default_parameters();
|
||||||
|
inter_ = 1;
|
||||||
|
silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CLCG_EIGEN_Solver::Progress(const Eigen::VectorXcd *m, const lcg_float converge, const clcg_para *param,
|
||||||
|
const int k)
|
||||||
|
{
|
||||||
|
if (inter_ > 0 && (k%inter_) == 0)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converge <= param->epsilon)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_EIGEN_Solver::silent()
|
||||||
|
{
|
||||||
|
silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_EIGEN_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||||
|
{
|
||||||
|
param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_EIGEN_Solver::set_report_interval(unsigned int inter)
|
||||||
|
{
|
||||||
|
inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_EIGEN_Solver::Minimize(Eigen::VectorXcd &m, const Eigen::VectorXcd &b,
|
||||||
|
clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = clcg_solver_eigen(_AxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) clcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = clcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = clcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_CGS:
|
||||||
|
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_TFQMR:
|
||||||
|
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_PCG:
|
||||||
|
std::clog << "Solver: PCG. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_PBICG:
|
||||||
|
std::clog << "Solver: PBICG. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) clcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) clcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CLCG_EIGEN_Solver::MinimizePreconditioned(Eigen::VectorXcd &m, const Eigen::VectorXcd &b,
|
||||||
|
clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (silent_)
|
||||||
|
{
|
||||||
|
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||||
|
if (ret < 0) clcg_error_str(ret, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef LibLCG_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_PCG:
|
||||||
|
std::clog << "Solver: PCG. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_PBICG:
|
||||||
|
std::clog << "Solver: PBICG. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) clcg_error_str(ret, er_throw);
|
||||||
|
else if (ret < 0) clcg_error_str(ret, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
308
src/lib/solver_eigen.h
Normal file
308
src/lib/solver_eigen.h
Normal file
@@ -0,0 +1,308 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _SOLVER_EIGEN_H
|
||||||
|
#define _SOLVER_EIGEN_H
|
||||||
|
|
||||||
|
#include "lcg_eigen.h"
|
||||||
|
#include "clcg_eigen.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Linear conjugate gradient solver class
|
||||||
|
*/
|
||||||
|
class LCG_EIGEN_Solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
lcg_para param_;
|
||||||
|
unsigned int inter_;
|
||||||
|
bool silent_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
LCG_EIGEN_Solver();
|
||||||
|
virtual ~LCG_EIGEN_Solver(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
*/
|
||||||
|
static void _AxProduct(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->AxProduct(x, prod_Ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
*/
|
||||||
|
virtual void AxProduct(const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
*/
|
||||||
|
static void _MxProduct(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->MxProduct(x, prod_Mx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
*/
|
||||||
|
virtual void MxProduct(const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
static int _Progress(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
|
||||||
|
const lcg_para *param, const int k)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->Progress(m, converge, param, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
virtual int Progress(const Eigen::VectorXd *m, const lcg_float converge, const lcg_para *param,
|
||||||
|
const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Do not report any processes
|
||||||
|
*/
|
||||||
|
void silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_report_interval(unsigned int inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the parameters of the algorithms
|
||||||
|
*
|
||||||
|
* @param in_param the input parameters
|
||||||
|
*/
|
||||||
|
void set_lcg_parameter(const lcg_para &in_param);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void Minimize(Eigen::VectorXd &m, const Eigen::VectorXd &b, lcg_solver_enum solver_id = LCG_CG,
|
||||||
|
bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the preconitioned minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizePreconditioned(Eigen::VectorXd &m, const Eigen::VectorXd &b, lcg_solver_enum solver_id = LCG_PCG,
|
||||||
|
bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the constrained minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param low Lower bound of the solution vector
|
||||||
|
* @param hig Higher bound of the solution vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizeConstrained(Eigen::VectorXd &m, const Eigen::VectorXd &B, const Eigen::VectorXd &low,
|
||||||
|
const Eigen::VectorXd &hig, lcg_solver_enum solver_id = LCG_PG, bool verbose = true,
|
||||||
|
bool er_throw = false);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Complex linear conjugate gradient solver class
|
||||||
|
*/
|
||||||
|
class CLCG_EIGEN_Solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
clcg_para param_;
|
||||||
|
unsigned int inter_;
|
||||||
|
bool silent_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CLCG_EIGEN_Solver();
|
||||||
|
virtual ~CLCG_EIGEN_Solver(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
*/
|
||||||
|
static void _AxProduct(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->AxProduct(x, prod_Ax, layout, conjugate);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of A*x
|
||||||
|
*
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Ax[out] Pointer of the product
|
||||||
|
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
*/
|
||||||
|
virtual void AxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
*/
|
||||||
|
static void _MxProduct(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->MxProduct(x, prod_Mx, layout, conjugate);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the product of M^-1*x
|
||||||
|
*
|
||||||
|
* @param x[in] Pointer of the multiplier
|
||||||
|
* @param prod_Mx[out] Pointer of the product
|
||||||
|
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||||
|
*/
|
||||||
|
virtual void MxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface of the virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param instance User data sent to identify the function address
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
static int _Progress(void* instance, const Eigen::VectorXcd *m, const lcg_float converge,
|
||||||
|
const clcg_para *param, const int k)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->Progress(m, converge, param, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function of the process monitoring
|
||||||
|
*
|
||||||
|
* @param m Pointer of the current solution
|
||||||
|
* @param converge Current value of the convergence
|
||||||
|
* @param param Pointer of the parameters used in the algorithms
|
||||||
|
* @param k Current iteration times
|
||||||
|
* @return int Status of the process
|
||||||
|
*/
|
||||||
|
virtual int Progress(const Eigen::VectorXcd *m, const lcg_float converge, const clcg_para *param,
|
||||||
|
const int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Do not report any processes
|
||||||
|
*/
|
||||||
|
void silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_report_interval(unsigned int inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the interval to run the process monitoring function
|
||||||
|
*
|
||||||
|
* @param inter the interval
|
||||||
|
*/
|
||||||
|
void set_clcg_parameter(const clcg_para &in_param);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void Minimize(Eigen::VectorXcd &m, const Eigen::VectorXcd &b, clcg_solver_enum solver_id = CLCG_CGS,
|
||||||
|
bool verbose = true, bool er_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the preconitioned minimizing process
|
||||||
|
*
|
||||||
|
* @param m Pointer of the solution vector
|
||||||
|
* @param b Pointer of the targeting vector
|
||||||
|
* @param solver_id Solver type
|
||||||
|
* @param verbose Report more information of the full process
|
||||||
|
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||||
|
*/
|
||||||
|
void MinimizePreconditioned(Eigen::VectorXcd &m, const Eigen::VectorXcd &b, clcg_solver_enum solver_id = CLCG_PBICG,
|
||||||
|
bool verbose = true, bool er_throw = false);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _SOLVER_EIGEN_H
|
||||||
253
src/lib/util.cpp
Normal file
253
src/lib/util.cpp
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "iostream"
|
||||||
|
#include "exception"
|
||||||
|
#include "stdexcept"
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
#include "windows.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
lcg_para lcg_default_parameters()
|
||||||
|
{
|
||||||
|
lcg_para param = defparam;
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_solver_enum lcg_select_solver(std::string slr_char)
|
||||||
|
{
|
||||||
|
lcg_solver_enum slr_id;
|
||||||
|
if (slr_char == "LCG_CG") slr_id = LCG_CG;
|
||||||
|
else if (slr_char == "LCG_PCG") slr_id = LCG_PCG;
|
||||||
|
else if (slr_char == "LCG_CGS") slr_id = LCG_CGS;
|
||||||
|
else if (slr_char == "LCG_BICGSTAB") slr_id = LCG_BICGSTAB;
|
||||||
|
else if (slr_char == "LCG_BICGSTAB2") slr_id = LCG_BICGSTAB2;
|
||||||
|
else if (slr_char == "LCG_PG") slr_id = LCG_PG;
|
||||||
|
else if (slr_char == "LCG_SPG") slr_id = LCG_SPG;
|
||||||
|
else throw std::invalid_argument("Invalid solver type.");
|
||||||
|
return slr_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lcg_error_str(int er_index, bool er_throw)
|
||||||
|
{
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
|
||||||
|
std::cerr << "Success! ";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
|
||||||
|
std::cerr << "Fail! ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
std::cerr << "\033[1m\033[32mSuccess! ";
|
||||||
|
else
|
||||||
|
std::cerr << "\033[1m\033[31mFail! ";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string err_str;
|
||||||
|
switch (er_index)
|
||||||
|
{
|
||||||
|
case LCG_SUCCESS:
|
||||||
|
err_str = "Iteration reached convergence."; break;
|
||||||
|
case LCG_STOP:
|
||||||
|
err_str = "Iteration is stopped by the progress evaluation function."; break;
|
||||||
|
case LCG_ALREADY_OPTIMIZIED:
|
||||||
|
err_str = "The variables are already optimized."; break;
|
||||||
|
case LCG_UNKNOWN_ERROR:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
case LCG_INVILAD_VARIABLE_SIZE:
|
||||||
|
err_str = "The size of the variables is negative."; break;
|
||||||
|
case LCG_INVILAD_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal iteration times can't be negative."; break;
|
||||||
|
case LCG_INVILAD_EPSILON:
|
||||||
|
err_str = "The epsilon is not in the range (0, 1)."; break;
|
||||||
|
case LCG_INVILAD_RESTART_EPSILON:
|
||||||
|
err_str = "The restart threshold can't be negative."; break;
|
||||||
|
case LCG_REACHED_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal iteration has been reached."; break;
|
||||||
|
case LCG_NULL_PRECONDITION_MATRIX:
|
||||||
|
err_str = "The precondition matrix can't be null."; break;
|
||||||
|
case LCG_NAN_VALUE:
|
||||||
|
err_str = "The model values are NaN."; break;
|
||||||
|
case LCG_INVALID_POINTER:
|
||||||
|
err_str = "Invalid pointer."; break;
|
||||||
|
case LCG_INVALID_LAMBDA:
|
||||||
|
err_str = "Invalid value for lambda."; break;
|
||||||
|
case LCG_INVALID_SIGMA:
|
||||||
|
err_str = "Invalid value for sigma."; break;
|
||||||
|
case LCG_INVALID_BETA:
|
||||||
|
err_str = "Invalid value for beta."; break;
|
||||||
|
case LCG_INVALID_MAXIM:
|
||||||
|
err_str = "Invalid value for maxi_m."; break;
|
||||||
|
case LCG_SIZE_NOT_MATCH:
|
||||||
|
err_str = "The sizes of solution and target do not match."; break;
|
||||||
|
default:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (er_throw && er_index < 0) throw std::runtime_error(err_str.c_str());
|
||||||
|
else std::cerr << err_str;
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
std::cerr << std::endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
std::cerr << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
std::cerr << "\033[0m" << std::endl;
|
||||||
|
else
|
||||||
|
std::cerr << "\033[0m" << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
clcg_para clcg_default_parameters()
|
||||||
|
{
|
||||||
|
clcg_para param = defparam2;
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
clcg_solver_enum clcg_select_solver(std::string slr_char)
|
||||||
|
{
|
||||||
|
clcg_solver_enum slr_id;
|
||||||
|
if (slr_char == "CLCG_BICG") slr_id = CLCG_BICG;
|
||||||
|
else if (slr_char == "CLCG_BICG_SYM") slr_id = CLCG_BICG_SYM;
|
||||||
|
else if (slr_char == "CLCG_CGS") slr_id = CLCG_CGS;
|
||||||
|
else if (slr_char == "CLCG_TFQMR") slr_id = CLCG_TFQMR;
|
||||||
|
else throw std::invalid_argument("Invalid solver type.");
|
||||||
|
return slr_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clcg_error_str(int er_index, bool er_throw)
|
||||||
|
{
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
|
||||||
|
std::cerr << "Success! ";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
|
||||||
|
std::cerr << "Fail! ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
std::cerr << "\033[1m\033[32mSuccess! ";
|
||||||
|
else
|
||||||
|
std::cerr << "\033[1m\033[31mFail! ";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string err_str;
|
||||||
|
switch (er_index)
|
||||||
|
{
|
||||||
|
case CLCG_SUCCESS:
|
||||||
|
err_str = "Iteration reached convergence."; break;
|
||||||
|
case CLCG_STOP:
|
||||||
|
err_str = "Iteration is stopped by the progress evaluation function."; break;
|
||||||
|
case CLCG_ALREADY_OPTIMIZIED:
|
||||||
|
err_str = "The variables are already optimized."; break;
|
||||||
|
case CLCG_UNKNOWN_ERROR:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
case CLCG_INVILAD_VARIABLE_SIZE:
|
||||||
|
err_str = "The size of the variables is negative."; break;
|
||||||
|
case CLCG_INVILAD_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal iteration times is negative."; break;
|
||||||
|
case CLCG_INVILAD_EPSILON:
|
||||||
|
err_str = "The epsilon is not in the range (0, 1)."; break;
|
||||||
|
case CLCG_REACHED_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal iteration has been reached."; break;
|
||||||
|
case CLCG_NAN_VALUE:
|
||||||
|
err_str = "The model values are NaN."; break;
|
||||||
|
case CLCG_INVALID_POINTER:
|
||||||
|
err_str = "Invalid pointer."; break;
|
||||||
|
case CLCG_SIZE_NOT_MATCH:
|
||||||
|
err_str = "The sizes of the solution and target do not match."; break;
|
||||||
|
case CLCG_UNKNOWN_SOLVER:
|
||||||
|
err_str = "Unknown solver."; break;
|
||||||
|
default:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (er_throw && er_index < 0) throw std::runtime_error(err_str.c_str());
|
||||||
|
else std::cerr << err_str;
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
std::cerr << std::endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
std::cerr << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (er_index >= 0)
|
||||||
|
std::cerr << "\033[0m" << std::endl;
|
||||||
|
else
|
||||||
|
std::cerr << "\033[0m" << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
308
src/lib/util.h
Normal file
308
src/lib/util.h
Normal file
@@ -0,0 +1,308 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _LCG_UTIL_H
|
||||||
|
#define _LCG_UTIL_H
|
||||||
|
|
||||||
|
#include "string"
|
||||||
|
#include "algebra.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Types of method that could be recognized by the lcg_solver() function.
|
||||||
|
*/
|
||||||
|
enum lcg_solver_enum
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Conjugate gradient method.
|
||||||
|
*/
|
||||||
|
LCG_CG,
|
||||||
|
/**
|
||||||
|
* Preconditioned conjugate gradient method.
|
||||||
|
*/
|
||||||
|
LCG_PCG,
|
||||||
|
/**
|
||||||
|
* Conjugate gradient squared method.
|
||||||
|
*/
|
||||||
|
LCG_CGS,
|
||||||
|
/**
|
||||||
|
* Biconjugate gradient method.
|
||||||
|
*/
|
||||||
|
LCG_BICGSTAB,
|
||||||
|
/**
|
||||||
|
* Biconjugate gradient method with restart.
|
||||||
|
*/
|
||||||
|
LCG_BICGSTAB2,
|
||||||
|
/**
|
||||||
|
* Conjugate gradient method with projected gradient for inequality constraints.
|
||||||
|
* This algorithm comes without non-monotonic linear search for the step length.
|
||||||
|
*/
|
||||||
|
LCG_PG,
|
||||||
|
/**
|
||||||
|
* Conjugate gradient method with spectral projected gradient for inequality constraints.
|
||||||
|
* This algorithm comes with non-monotonic linear search for the step length.
|
||||||
|
*/
|
||||||
|
LCG_SPG,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return value of the lcg_solver() function
|
||||||
|
*/
|
||||||
|
enum lcg_return_enum
|
||||||
|
{
|
||||||
|
LCG_SUCCESS = 0, ///< The solver function terminated successfully.
|
||||||
|
LCG_CONVERGENCE = 0, ///< The iteration reached convergence.
|
||||||
|
LCG_STOP, ///< The iteration is stopped by the monitoring function.
|
||||||
|
LCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
|
||||||
|
// A negative number means a error
|
||||||
|
LCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
|
||||||
|
LCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
|
||||||
|
LCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
|
||||||
|
LCG_INVILAD_EPSILON, ///< The epsilon is negative.
|
||||||
|
LCG_INVILAD_RESTART_EPSILON, ///< The restart epsilon is negative.
|
||||||
|
LCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
|
||||||
|
LCG_NULL_PRECONDITION_MATRIX, ///< Null precondition matrix.
|
||||||
|
LCG_NAN_VALUE, ///< Nan value.
|
||||||
|
LCG_INVALID_POINTER, ///< Invalid pointer.
|
||||||
|
LCG_INVALID_LAMBDA, ///< Invalid range for lambda.
|
||||||
|
LCG_INVALID_SIGMA, ///< Invalid range for sigma.
|
||||||
|
LCG_INVALID_BETA, ///< Invalid range for beta.
|
||||||
|
LCG_INVALID_MAXIM, ///< Invalid range for maxi_m.
|
||||||
|
LCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parameters of the conjugate gradient methods.
|
||||||
|
*/
|
||||||
|
struct lcg_para
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Maximal iteration times. The process will continue till the convergence is met
|
||||||
|
* if this option is set to zero (default).
|
||||||
|
*/
|
||||||
|
int max_iterations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Epsilon for convergence test.
|
||||||
|
* This parameter determines the accuracy with which the solution is to be
|
||||||
|
* found. A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or
|
||||||
|
* sqrt(||g||)/N <= epsilon for the lcg_solver() function, where ||.|| denotes
|
||||||
|
* the Euclidean (L2) norm. The default value of epsilon is 1e-8.
|
||||||
|
*/
|
||||||
|
lcg_float epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
|
||||||
|
* The default value is false which means the gradient based evaluating method is used.
|
||||||
|
* The AMD based method will be used if this variable is set to true. This parameter is only
|
||||||
|
* applied to the non-constrained methods.
|
||||||
|
*/
|
||||||
|
int abs_diff;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Restart epsilon for the LCG_BICGSTAB2 algorithm. The default value is 1e-6
|
||||||
|
*/
|
||||||
|
lcg_float restart_epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initial step length for the project gradient method. The default is 1.0
|
||||||
|
*/
|
||||||
|
lcg_float step;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* multiplier for updating solutions with the spectral projected gradient method. The range of
|
||||||
|
* this variable is (0, 1). The default is given as 0.95
|
||||||
|
*/
|
||||||
|
lcg_float sigma;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* descending ratio for conducting the non-monotonic linear search. The range of
|
||||||
|
* this variable is (0, 1). The default is given as 0.9
|
||||||
|
*/
|
||||||
|
lcg_float beta;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximal record times of the objective values for the SPG method. The method use the
|
||||||
|
* objective values from the most recent maxi_m times to preform the non-monotonic linear search.
|
||||||
|
* The default value is 10.
|
||||||
|
*/
|
||||||
|
int maxi_m;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default parameter for conjugate gradient methods
|
||||||
|
*/
|
||||||
|
static const lcg_para defparam = {0, 1e-8, 0, 1e-6, 1.0, 0.95, 0.9, 10};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return a lcg_para type instance with default values.
|
||||||
|
*
|
||||||
|
* Users can use this function to get default parameters' value for the conjugate gradient methods.
|
||||||
|
*
|
||||||
|
* @return A lcg_para type instance.
|
||||||
|
*/
|
||||||
|
lcg_para lcg_default_parameters();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Select a type of solver according to the name
|
||||||
|
*
|
||||||
|
* @param[in] slr_char Name of the solver
|
||||||
|
*
|
||||||
|
* @return The lcg solver enum.
|
||||||
|
*/
|
||||||
|
lcg_solver_enum lcg_select_solver(std::string slr_char);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Display or throw out a string explanation for the lcg_solver() function's return values.
|
||||||
|
*
|
||||||
|
* @param[in] er_index The error index returned by the lcg_solver() function.
|
||||||
|
* @param[in] er_throw throw out a char string of the explanation.
|
||||||
|
*
|
||||||
|
* @return A string explanation of the error.
|
||||||
|
*/
|
||||||
|
void lcg_error_str(int er_index, bool er_throw = false);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Types of method that could be recognized by the clcg_solver() function.
|
||||||
|
*/
|
||||||
|
enum clcg_solver_enum
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Jacob's Bi-Conjugate Gradient Method
|
||||||
|
*/
|
||||||
|
CLCG_BICG,
|
||||||
|
/**
|
||||||
|
* Bi-Conjugate Gradient Method accelerated for complex symmetric A
|
||||||
|
*/
|
||||||
|
CLCG_BICG_SYM,
|
||||||
|
/**
|
||||||
|
* Conjugate Gradient Squared Method with real coefficients.
|
||||||
|
*/
|
||||||
|
CLCG_CGS,
|
||||||
|
/**
|
||||||
|
* Biconjugate gradient method.
|
||||||
|
*/
|
||||||
|
CLCG_BICGSTAB,
|
||||||
|
/**
|
||||||
|
* Quasi-Minimal Residual Method
|
||||||
|
*/
|
||||||
|
//CLCG_QMR,
|
||||||
|
/**
|
||||||
|
* Transpose Free Quasi-Minimal Residual Method
|
||||||
|
*/
|
||||||
|
CLCG_TFQMR,
|
||||||
|
/**
|
||||||
|
* Preconditioned conjugate gradient
|
||||||
|
*/
|
||||||
|
CLCG_PCG,
|
||||||
|
/**
|
||||||
|
* Preconditioned Bi-Conjugate Gradient Method
|
||||||
|
*/
|
||||||
|
CLCG_PBICG,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return value of the clcg_solver() function
|
||||||
|
*/
|
||||||
|
enum clcg_return_enum
|
||||||
|
{
|
||||||
|
CLCG_SUCCESS = 0, ///< The solver function terminated successfully.
|
||||||
|
CLCG_CONVERGENCE = 0, ///< The iteration reached convergence.
|
||||||
|
CLCG_STOP, ///< The iteration is stopped by the monitoring function.
|
||||||
|
CLCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
|
||||||
|
// A negative number means a error
|
||||||
|
CLCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
|
||||||
|
CLCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
|
||||||
|
CLCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
|
||||||
|
CLCG_INVILAD_EPSILON, ///< The epsilon is negative.
|
||||||
|
CLCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
|
||||||
|
CLCG_NAN_VALUE, ///< Nan value.
|
||||||
|
CLCG_INVALID_POINTER, ///< Invalid pointer.
|
||||||
|
CLCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
|
||||||
|
CLCG_UNKNOWN_SOLVER, ///< Unknown solver
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parameters of the conjugate gradient methods.
|
||||||
|
*/
|
||||||
|
struct clcg_para
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Maximal iteration times. The process will continue till the convergence is met
|
||||||
|
* if this option is set to zero (default).
|
||||||
|
*/
|
||||||
|
int max_iterations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Epsilon for convergence test.
|
||||||
|
* This parameter determines the accuracy with which the solution is to be found.
|
||||||
|
* A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or sqrt(||g||)/N
|
||||||
|
* <= epsilon for the lcg_solver() function, where ||.|| denotes the Euclidean (L2) norm.
|
||||||
|
* The default value of epsilon is 1e-8. For box-constrained methods,the convergence test
|
||||||
|
* is implemented using ||P(m-g) - m|| <= epsilon, in which P is the projector that
|
||||||
|
* transfers m into the constrained domain.
|
||||||
|
*/
|
||||||
|
lcg_float epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
|
||||||
|
* The default value is false which means the gradient based evaluating method is used.
|
||||||
|
* The AMD based method will be used if this variable is set to true. This parameter is only
|
||||||
|
* applied to the non-constrained methods.
|
||||||
|
*/
|
||||||
|
int abs_diff;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default parameter for conjugate gradient methods
|
||||||
|
*/
|
||||||
|
static const clcg_para defparam2 = {0, 1e-8, 0};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return a clcg_para type instance with default values.
|
||||||
|
*
|
||||||
|
* Users can use this function to get default parameters' value for the complex conjugate gradient methods.
|
||||||
|
*
|
||||||
|
* @return A clcg_para type instance.
|
||||||
|
*/
|
||||||
|
clcg_para clcg_default_parameters();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Select a type of solver according to the name
|
||||||
|
*
|
||||||
|
* @param[in] slr_char Name of the solver
|
||||||
|
*
|
||||||
|
* @return The clcg solver enum.
|
||||||
|
*/
|
||||||
|
clcg_solver_enum clcg_select_solver(std::string slr_char);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Display or throw out a string explanation for the clcg_solver() function's return values.
|
||||||
|
*
|
||||||
|
* @param[in] er_index The error index returned by the lcg_solver() function.
|
||||||
|
* @param[in] er_throw throw out a char string of the explanation.
|
||||||
|
*
|
||||||
|
* @return A string explanation of the error.
|
||||||
|
*/
|
||||||
|
void clcg_error_str(int er_index, bool er_throw = false);
|
||||||
|
|
||||||
|
#endif // _LCG_UTIL_H
|
||||||
167
src/sample/sample1.cpp
Normal file
167
src/sample/sample1.cpp
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "cmath"
|
||||||
|
#include "iostream"
|
||||||
|
#include "../lib/lcg.h"
|
||||||
|
|
||||||
|
#define M 100
|
||||||
|
#define N 80
|
||||||
|
|
||||||
|
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
|
||||||
|
{
|
||||||
|
lcg_float max = -1;
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 普通二维数组做核矩阵
|
||||||
|
lcg_float **kernel;
|
||||||
|
// 中间结果数组
|
||||||
|
lcg_float *tmp_arr;
|
||||||
|
// 预优矩阵
|
||||||
|
lcg_float *p;
|
||||||
|
|
||||||
|
// 计算核矩阵乘向量的乘积
|
||||||
|
void CalAx(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_s)
|
||||||
|
{
|
||||||
|
lcg_matvec(kernel, x, tmp_arr, M, n_s, MatNormal);
|
||||||
|
lcg_matvec(kernel, tmp_arr, prod_Ax, M, n_s, MatTranspose);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CalMx(void* instance, const lcg_float* x, lcg_float* prod_Mx, const int n_s)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < n_s; i++)
|
||||||
|
{
|
||||||
|
prod_Mx[i] = p[i]*x[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//定义共轭梯度监控函数
|
||||||
|
int Prog(void* instance, const lcg_float* m, const lcg_float converge, const lcg_para* param, const int n_s, const int k)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
kernel = lcg_malloc(M, N);
|
||||||
|
tmp_arr = lcg_malloc(M);
|
||||||
|
p = lcg_malloc(N);
|
||||||
|
|
||||||
|
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
|
||||||
|
|
||||||
|
// 生成一组正演解
|
||||||
|
lcg_float *fm = lcg_malloc(N);
|
||||||
|
lcg_vecrnd(fm, 1.0, 2.0, N);
|
||||||
|
|
||||||
|
// 计算共轭梯度B项
|
||||||
|
lcg_float *B = lcg_malloc(N);
|
||||||
|
lcg_matvec(kernel, fm, tmp_arr, M, N, MatNormal);
|
||||||
|
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
|
||||||
|
|
||||||
|
/********************准备工作完成************************/
|
||||||
|
lcg_para self_para = lcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-7;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
lcg_float *m = lcg_malloc(N);
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
|
||||||
|
// 声明一组预优因子
|
||||||
|
lcg_float diag;
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diag = 0.0;
|
||||||
|
for (size_t j = 0; j < M; j++)
|
||||||
|
{
|
||||||
|
diag += kernel[j][i]*kernel[j][i];
|
||||||
|
}
|
||||||
|
p[i] = 1.0/diag;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 约束解的范围
|
||||||
|
lcg_float *low = lcg_malloc(N);
|
||||||
|
lcg_float *hig = lcg_malloc(N);
|
||||||
|
lcg_vecset(low, 1.0, N);
|
||||||
|
lcg_vecset(hig, 2.0, N);
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
std::clog << "solver: cg" << std::endl;
|
||||||
|
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CG);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
std::clog << "solver: pcg" << std::endl;
|
||||||
|
ret = lcg_solver_preconditioned(CalAx, CalMx, Prog, m, B, N, &self_para, NULL, LCG_PCG);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
std::clog << "solver: cgs" << std::endl;
|
||||||
|
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CGS);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
std::clog << "solver: bicgstab" << std::endl;
|
||||||
|
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_BICGSTAB);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
std::clog << "solver: bicgstab2" << std::endl;
|
||||||
|
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_BICGSTAB2);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
std::clog << "solver: pg" << std::endl;
|
||||||
|
ret = lcg_solver_constrained(CalAx, Prog, m, B, low, hig, N, &self_para, NULL, LCG_PG);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
std::clog << "solver: spg" << std::endl;
|
||||||
|
ret = lcg_solver_constrained(CalAx, Prog, m, B, low, hig, N, &self_para, NULL, LCG_SPG);
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_free(kernel, M);
|
||||||
|
lcg_free(tmp_arr);
|
||||||
|
lcg_free(fm);
|
||||||
|
lcg_free(B);
|
||||||
|
lcg_free(m);
|
||||||
|
lcg_free(p);
|
||||||
|
lcg_free(low);
|
||||||
|
lcg_free(hig);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
318
src/sample/sample10.cu
Normal file
318
src/sample/sample10.cu
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/solver_cuda.h"
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
cuDoubleComplex one = {1.0, 0.0};
|
||||||
|
cuDoubleComplex zero = {0.0, 0.0};
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||||
|
{
|
||||||
|
lcg_float avg = 0.0;
|
||||||
|
cuDoubleComplex tmp;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
tmp = clcg_Zdiff(a[i], b[i]);
|
||||||
|
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
class sample10 : public CLCG_CUDA_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
sample10(){}
|
||||||
|
virtual ~sample10(){}
|
||||||
|
|
||||||
|
void solve(std::string inputPath, std::string answerPath);
|
||||||
|
|
||||||
|
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||||
|
cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
// Calculate the product of A*x
|
||||||
|
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||||
|
cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
void *d_x, *d_Ax;
|
||||||
|
cusparseDnVecGetValues(x, &d_x);
|
||||||
|
cusparseDnVecGetValues(prod_Ax, &d_Ax);
|
||||||
|
|
||||||
|
if (use_incomplete_cholesky)
|
||||||
|
{
|
||||||
|
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, (cuDoubleComplex*) d_x, (cuDoubleComplex*) d_pd,
|
||||||
|
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
|
||||||
|
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, (cuDoubleComplex*) d_pd, (cuDoubleComplex*) d_Ax,
|
||||||
|
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
clcg_vecDvecZ_element_wise((cuDoubleComplex*) d_x, d_pd, (cuDoubleComplex*) d_Ax, n_size);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool use_incomplete_cholesky;
|
||||||
|
|
||||||
|
int N, nz;
|
||||||
|
int *rowIdxA, *colIdxA;
|
||||||
|
cuDoubleComplex *A, *b;
|
||||||
|
cuDoubleComplex *ans_x;
|
||||||
|
|
||||||
|
void *d_buf;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
cuDoubleComplex *d_A;
|
||||||
|
cuDoubleComplex *d_pd;
|
||||||
|
cuDoubleComplex *d_ic;
|
||||||
|
|
||||||
|
cusparseMatDescr_t descr_A;
|
||||||
|
cusparseMatDescr_t descr_L;
|
||||||
|
csric02Info_t icinfo_A;
|
||||||
|
csrsv2Info_t info_L;
|
||||||
|
csrsv2Info_t info_LT;
|
||||||
|
|
||||||
|
cuDoubleComplex *host_m;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
};
|
||||||
|
|
||||||
|
void sample10::solve(std::string inputPath, std::string answerPath)
|
||||||
|
{
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
// Create handles
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_pd, N * sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||||
|
|
||||||
|
// This is just used to get bufferSize;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_C_64F);
|
||||||
|
|
||||||
|
size_t bufferSize_B;
|
||||||
|
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||||
|
|
||||||
|
// --- Start of the preconditioning part ---
|
||||||
|
// Get the diagonal elemenets
|
||||||
|
clcg_smZcsr_get_diagonal(d_rowPtrA, d_colIdxA, d_A, N, d_pd);
|
||||||
|
|
||||||
|
// Copy A
|
||||||
|
cudaMalloc(&d_ic, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMemcpy(d_ic, d_A, nz * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
// create descriptor for matrix A
|
||||||
|
cusparseCreateMatDescr(&descr_A);
|
||||||
|
|
||||||
|
// initialize properties of matrix A
|
||||||
|
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_SYMMETRIC);
|
||||||
|
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
|
||||||
|
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||||
|
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// create descriptor for matrix L
|
||||||
|
cusparseCreateMatDescr(&descr_L);
|
||||||
|
|
||||||
|
// initialize properties of matrix L
|
||||||
|
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
|
||||||
|
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||||
|
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create empty info objects for incomplete-cholesky factorization
|
||||||
|
cusparseCreateCsric02Info(&icinfo_A);
|
||||||
|
cusparseCreateCsrsv2Info(&info_L);
|
||||||
|
cusparseCreateCsrsv2Info(&info_LT);
|
||||||
|
|
||||||
|
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_LT;
|
||||||
|
bufferSize = bufferSize_B;
|
||||||
|
|
||||||
|
// Compute buffer size in computing ic factorization
|
||||||
|
cusparseZcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||||
|
d_colIdxA, icinfo_A, &bufferSize_A);
|
||||||
|
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
|
||||||
|
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, &bufferSize_LT);
|
||||||
|
|
||||||
|
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_LT);
|
||||||
|
cudaMalloc(&d_buf, bufferSize);
|
||||||
|
|
||||||
|
// Perform incomplete-choleskey factorization: analysis phase
|
||||||
|
cusparseZcsric02_analysis(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA,
|
||||||
|
d_colIdxA, icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
|
||||||
|
// Perform incomplete-choleskey factorization: solve phase
|
||||||
|
cusparseZcsric02(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA, d_colIdxA,
|
||||||
|
icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
// --- End of the preconditioning part ---
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
host_m = new cuDoubleComplex[N];
|
||||||
|
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
|
||||||
|
// Preconditioning with Diagonal elements
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
use_incomplete_cholesky = false;
|
||||||
|
MinimizePreconditioned(cubHandle, cusHandle, host_m, b, N, nz, CLCG_PCG);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Preconditioning with incomplete-Cholesky factorization
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
use_incomplete_cholesky = true;
|
||||||
|
MinimizePreconditioned(cubHandle, cusHandle, host_m, b, N, nz, CLCG_PCG);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
delete[] A;
|
||||||
|
delete[] rowIdxA;
|
||||||
|
delete[] colIdxA;
|
||||||
|
delete[] b;
|
||||||
|
delete[] ans_x;
|
||||||
|
delete[] host_m;
|
||||||
|
|
||||||
|
// Free Device memory
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
cudaFree(d_pd);
|
||||||
|
cudaFree(d_ic);
|
||||||
|
|
||||||
|
cusparseDestroyDnVec(dvec_tmp);
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cudaFree(d_buf);
|
||||||
|
|
||||||
|
cusparseDestroyMatDescr(descr_A);
|
||||||
|
cusparseDestroyMatDescr(descr_L);
|
||||||
|
cusparseDestroyCsric02Info(icinfo_A);
|
||||||
|
cusparseDestroyCsrsv2Info(info_L);
|
||||||
|
cusparseDestroyCsrsv2Info(info_LT);
|
||||||
|
|
||||||
|
// Free handles
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_10K_cA";
|
||||||
|
std::string answerPath = "data/case_10K_cB";
|
||||||
|
|
||||||
|
sample10 sp;
|
||||||
|
sp.set_report_interval(0);
|
||||||
|
sp.solve(inputPath, answerPath);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
299
src/sample/sample11.cu
Normal file
299
src/sample/sample11.cu
Normal file
@@ -0,0 +1,299 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/clcg_cuda.h"
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||||
|
{
|
||||||
|
lcg_float avg = 0.0;
|
||||||
|
cuDoubleComplex tmp;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
tmp = clcg_Zdiff(a[i], b[i]);
|
||||||
|
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
cuDoubleComplex one, zero;
|
||||||
|
|
||||||
|
void *d_buf;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
cuDoubleComplex *d_A;
|
||||||
|
cuDoubleComplex *d_pd;
|
||||||
|
cuDoubleComplex *d_iu;
|
||||||
|
|
||||||
|
cusparseMatDescr_t descr_A = 0;
|
||||||
|
cusparseMatDescr_t descr_L = 0;
|
||||||
|
cusparseMatDescr_t descr_U = 0;
|
||||||
|
csrilu02Info_t info_ILU = 0;
|
||||||
|
csrsv2Info_t info_L = 0;
|
||||||
|
csrsv2Info_t info_U = 0;
|
||||||
|
|
||||||
|
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||||
|
cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
zero.x = 0.0; zero.y = 0.0;
|
||||||
|
// Calculate the product of A*x
|
||||||
|
//cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
|
||||||
|
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cudaMx_ILU(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||||
|
cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
void *d_x, *d_Ax;
|
||||||
|
cusparseDnVecGetValues(x, &d_x);
|
||||||
|
cusparseDnVecGetValues(prod_Ax, &d_Ax);
|
||||||
|
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, (cuDoubleComplex*) d_x, (cuDoubleComplex*) d_pd,
|
||||||
|
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
|
||||||
|
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, (cuDoubleComplex*) d_pd, (cuDoubleComplex*) d_Ax,
|
||||||
|
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cudaProgress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
if (converge <= param->epsilon) {
|
||||||
|
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_1M_cA";
|
||||||
|
std::string answerPath = "data/case_1M_cB";
|
||||||
|
|
||||||
|
int N;
|
||||||
|
int nz;
|
||||||
|
cuDoubleComplex *A;
|
||||||
|
int *rowIdxA;
|
||||||
|
int *colIdxA;
|
||||||
|
cuDoubleComplex *b;
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
|
||||||
|
cuDoubleComplex *ans_x;
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
// Create handles
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_pd, N * sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||||
|
|
||||||
|
// This is just used to get bufferSize;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_C_64F);
|
||||||
|
|
||||||
|
size_t bufferSize_B;
|
||||||
|
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||||
|
|
||||||
|
// --- Start of the preconditioning part ---
|
||||||
|
// Copy A
|
||||||
|
cudaMalloc(&d_iu, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMemcpy(d_iu, d_A, nz * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_U;
|
||||||
|
bufferSize = bufferSize_B;
|
||||||
|
|
||||||
|
// create descriptor for matrix A
|
||||||
|
cusparseCreateMatDescr(&descr_A);
|
||||||
|
|
||||||
|
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// create descriptor for matrix L
|
||||||
|
cusparseCreateMatDescr(&descr_L);
|
||||||
|
|
||||||
|
// initialize properties of matrix L
|
||||||
|
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
|
||||||
|
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT);
|
||||||
|
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// create descriptor for matrix U
|
||||||
|
cusparseCreateMatDescr(&descr_U);
|
||||||
|
|
||||||
|
cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER);
|
||||||
|
cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||||
|
cusparseSetMatIndexBase(descr_U, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create empty info objects for incomplete-cholesky factorization
|
||||||
|
cusparseCreateCsrilu02Info(&info_ILU);
|
||||||
|
cusparseCreateCsrsv2Info(&info_L);
|
||||||
|
cusparseCreateCsrsv2Info(&info_U);
|
||||||
|
|
||||||
|
// Compute buffer size in computing ic factorization
|
||||||
|
cusparseZcsrilu02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||||
|
d_colIdxA, info_ILU, &bufferSize_A);
|
||||||
|
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
|
||||||
|
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, &bufferSize_U);
|
||||||
|
|
||||||
|
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_U);
|
||||||
|
cudaMalloc(&d_buf, bufferSize);
|
||||||
|
|
||||||
|
// Perform incomplete-choleskey factorization: analysis phase
|
||||||
|
cusparseZcsrilu02_analysis(cusHandle, N, nz, descr_A, d_iu, d_rowPtrA,
|
||||||
|
d_colIdxA, info_ILU, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
|
||||||
|
// Perform incomplete-choleskey factorization: solve phase
|
||||||
|
cusparseZcsrilu02(cusHandle, N, nz, descr_A, d_iu, d_rowPtrA, d_colIdxA,
|
||||||
|
info_ILU, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
// --- End of the preconditioning part ---
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
cuDoubleComplex *host_m = new cuDoubleComplex[N];
|
||||||
|
|
||||||
|
// Preconditioning with incomplete-LU factorization
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = clcg_solver_preconditioned_cuda(cudaAx, cudaMx_ILU, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_PCG);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
delete[] A;
|
||||||
|
delete[] rowIdxA;
|
||||||
|
delete[] colIdxA;
|
||||||
|
delete[] b;
|
||||||
|
delete[] ans_x;
|
||||||
|
delete[] host_m;
|
||||||
|
|
||||||
|
// Free Device memory
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
cudaFree(d_pd);
|
||||||
|
cudaFree(d_iu);
|
||||||
|
|
||||||
|
cusparseDestroyDnVec(dvec_tmp);
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cudaFree(d_buf);
|
||||||
|
|
||||||
|
cusparseDestroyMatDescr(descr_A);
|
||||||
|
cusparseDestroyMatDescr(descr_L);
|
||||||
|
cusparseDestroyMatDescr(descr_U);
|
||||||
|
cusparseDestroyCsrilu02Info(info_ILU);
|
||||||
|
cusparseDestroyCsrsv2Info(info_L);
|
||||||
|
cusparseDestroyCsrsv2Info(info_U);
|
||||||
|
|
||||||
|
// Free handles
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
306
src/sample/sample12.cu
Normal file
306
src/sample/sample12.cu
Normal file
@@ -0,0 +1,306 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/solver_cuda.h"
|
||||||
|
#include "../lib/preconditioner_cuda.h"
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
cuDoubleComplex one = {1.0, 0.0};
|
||||||
|
cuDoubleComplex zero = {0.0, 0.0};
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||||
|
{
|
||||||
|
lcg_float avg = 0.0;
|
||||||
|
cuDoubleComplex tmp;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
tmp = clcg_Zdiff(a[i], b[i]);
|
||||||
|
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
class sample12 : public CLCG_CUDA_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
sample12(){}
|
||||||
|
virtual ~sample12(){}
|
||||||
|
|
||||||
|
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
// Calculate the product of A*x
|
||||||
|
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, x, dvec_p,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
|
||||||
|
|
||||||
|
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p, prod_Ax,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int N, nz;
|
||||||
|
int *rowIdxA, *colIdxA;
|
||||||
|
cuDoubleComplex *A, *b;
|
||||||
|
cuDoubleComplex *ans_x;
|
||||||
|
|
||||||
|
int *IC_row, *IC_col;
|
||||||
|
cuDoubleComplex *IC_val;
|
||||||
|
|
||||||
|
void *d_buf, *d_buf2;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
cusparseSpMatDescr_t smat_IC;
|
||||||
|
cusparseSpSVDescr_t descr_L, descr_LT;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
cuDoubleComplex *d_A;
|
||||||
|
cuDoubleComplex *d_p;
|
||||||
|
cusparseDnVecDescr_t dvec_p;
|
||||||
|
|
||||||
|
int *d_rowIdxIC; // COO
|
||||||
|
int *d_rowPtrIC; // CSR
|
||||||
|
int *d_colIdxIC;
|
||||||
|
cuDoubleComplex *d_IC;
|
||||||
|
|
||||||
|
cuDoubleComplex *host_m;
|
||||||
|
cuDoubleComplex *d_t;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
};
|
||||||
|
|
||||||
|
void sample12::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
IC_row = new int [nz];
|
||||||
|
IC_col = new int [nz];
|
||||||
|
IC_val = new cuDoubleComplex [nz];
|
||||||
|
|
||||||
|
clcg_incomplete_Cholesky_cuda_full(rowIdxA, colIdxA, A, N, nz, IC_row, IC_col, IC_val);
|
||||||
|
/*
|
||||||
|
for (size_t i = 0; i < nz; i++)
|
||||||
|
{
|
||||||
|
if (IC_row[i] >= IC_col[i])
|
||||||
|
{
|
||||||
|
std::cout << IC_row[i] << " " << IC_col[i] << " (" << IC_val[i].x << "," << IC_val[i].y << ")\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_p, N * sizeof(cuDoubleComplex));
|
||||||
|
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_64F);
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cudaMalloc(&d_IC, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_rowIdxIC, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrIC, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxIC, nz * sizeof(int));
|
||||||
|
|
||||||
|
cudaMemcpy(d_IC, IC_val, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxIC, IC_row, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxIC, IC_col, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||||
|
|
||||||
|
// Convert matrix L from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cus_handle, d_rowIdxIC, nz, N, d_rowPtrIC, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_IC, N, N, nz, d_rowPtrIC, d_colIdxIC, d_IC, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||||
|
|
||||||
|
// Specify Non-Unit diagonal type.
|
||||||
|
//cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
|
||||||
|
//cusparseSpMatSetAttribute(smat_IC, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
|
||||||
|
|
||||||
|
// This is just used to get bufferSize;
|
||||||
|
cudaMalloc(&d_t, N * sizeof(cuDoubleComplex));
|
||||||
|
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_64F);
|
||||||
|
|
||||||
|
size_t bufferSize_B;
|
||||||
|
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
|
||||||
|
|
||||||
|
// --- Start of the preconditioning part ---
|
||||||
|
cusparseSpSV_createDescr(&descr_L);
|
||||||
|
cusparseSpSV_createDescr(&descr_LT);
|
||||||
|
|
||||||
|
size_t bufferSize, bufferSize_L, bufferSize_LT;
|
||||||
|
bufferSize = bufferSize_B;
|
||||||
|
|
||||||
|
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, dvec_p,
|
||||||
|
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
|
||||||
|
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p,
|
||||||
|
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
|
||||||
|
|
||||||
|
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
|
||||||
|
cudaMalloc(&d_buf, bufferSize);
|
||||||
|
cudaMalloc(&d_buf2, bufferSize);
|
||||||
|
|
||||||
|
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, dvec_tmp, dvec_p,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_buf);
|
||||||
|
|
||||||
|
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p, dvec_tmp,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_buf2);
|
||||||
|
|
||||||
|
// --- End of the preconditioning part ---
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
host_m = new cuDoubleComplex[N];
|
||||||
|
|
||||||
|
// Preconditioning with incomplete-chelosky factorization
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MinimizePreconditioned(cub_handle, cus_handle, host_m, b, N, nz, CLCG_PCG);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
if (rowIdxA != nullptr) delete[] rowIdxA;
|
||||||
|
if (colIdxA != nullptr) delete[] colIdxA;
|
||||||
|
if (A != nullptr) delete[] A;
|
||||||
|
if (b != nullptr) delete[] b;
|
||||||
|
if (ans_x != nullptr) delete[] ans_x;
|
||||||
|
|
||||||
|
if (IC_row != nullptr) delete[] IC_row;
|
||||||
|
if (IC_col != nullptr) delete[] IC_col;
|
||||||
|
if (IC_val != nullptr) delete[] IC_val;
|
||||||
|
|
||||||
|
if (host_m != nullptr) delete[] host_m;
|
||||||
|
|
||||||
|
cusparseDestroyDnVec(dvec_tmp);
|
||||||
|
cusparseDestroyDnVec(dvec_p);
|
||||||
|
|
||||||
|
cudaFree(d_buf);
|
||||||
|
cudaFree(d_buf2);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_p);
|
||||||
|
cudaFree(d_t);
|
||||||
|
|
||||||
|
cudaFree(d_rowIdxIC);
|
||||||
|
cudaFree(d_rowPtrIC);
|
||||||
|
cudaFree(d_colIdxIC);
|
||||||
|
cudaFree(d_IC);
|
||||||
|
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cusparseDestroySpMat(smat_IC);
|
||||||
|
cusparseSpSV_destroyDescr(descr_L);
|
||||||
|
cusparseSpSV_destroyDescr(descr_LT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_1M_cA";
|
||||||
|
std::string answerPath = "data/case_1M_cB";
|
||||||
|
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
sample12 sp;
|
||||||
|
sp.set_report_interval(0);
|
||||||
|
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
|
||||||
|
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
305
src/sample/sample13.cu
Normal file
305
src/sample/sample13.cu
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/solver_cuda.h"
|
||||||
|
#include "../lib/preconditioner_cuda.h"
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
cuDoubleComplex one = {1.0, 0.0};
|
||||||
|
cuDoubleComplex zero = {0.0, 0.0};
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||||
|
{
|
||||||
|
lcg_float avg = 0.0;
|
||||||
|
cuDoubleComplex tmp;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
tmp = clcg_Zdiff(a[i], b[i]);
|
||||||
|
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
class sample13 : public CLCG_CUDA_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
sample13(){}
|
||||||
|
virtual ~sample13(){}
|
||||||
|
|
||||||
|
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
// Calculate the product of A*x
|
||||||
|
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_tuf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, x, dvec_p,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
|
||||||
|
|
||||||
|
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, prod_Ax,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int N, nz, lnz;
|
||||||
|
int *rowIdxA, *colIdxA;
|
||||||
|
cuDoubleComplex *A, *b;
|
||||||
|
cuDoubleComplex *ans_x;
|
||||||
|
|
||||||
|
int *L_row, *L_col;
|
||||||
|
cuDoubleComplex *L_val;
|
||||||
|
|
||||||
|
void *d_tuf, *d_tuf2;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
cusparseSpMatDescr_t smat_L;
|
||||||
|
cusparseSpSVDescr_t descr_L, descr_LT;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
cuDoubleComplex *d_A;
|
||||||
|
cuDoubleComplex *d_t;
|
||||||
|
cuDoubleComplex *d_p;
|
||||||
|
cusparseDnVecDescr_t dvec_p;
|
||||||
|
|
||||||
|
int *d_rowIdxL; // COO
|
||||||
|
int *d_rowPtrL; // CSR
|
||||||
|
int *d_colIdxL;
|
||||||
|
cuDoubleComplex *d_L;
|
||||||
|
|
||||||
|
cuDoubleComplex *host_m;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
};
|
||||||
|
|
||||||
|
void sample13::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
clcg_incomplete_Cholesky_cuda_half_buffsize(rowIdxA, colIdxA, nz, &lnz);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
std::clog << "lnz = " << lnz << std::endl;
|
||||||
|
|
||||||
|
L_row = new int [lnz];
|
||||||
|
L_col = new int [lnz];
|
||||||
|
L_val = new cuDoubleComplex [lnz];
|
||||||
|
|
||||||
|
clcg_incomplete_Cholesky_cuda_half(rowIdxA, colIdxA, A, N, nz, lnz, L_row, L_col, L_val);
|
||||||
|
/*
|
||||||
|
for (size_t i = 0; i < lnz; i++)
|
||||||
|
{
|
||||||
|
std::cout << L_row[i] << " " << L_col[i] << " (" << L_val[i].x << "," << L_val[i].y << ")\n";
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_t, N * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_p, N * sizeof(cuDoubleComplex));
|
||||||
|
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_64F);
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cudaMalloc(&d_L, lnz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_rowIdxL, lnz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrL, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxL, lnz * sizeof(int));
|
||||||
|
|
||||||
|
cudaMemcpy(d_L, L_val, lnz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxL, L_row, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxL, L_col, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||||
|
|
||||||
|
// Convert matrix L from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cus_handle, d_rowIdxL, lnz, N, d_rowPtrL, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_L, N, N, lnz, d_rowPtrL, d_colIdxL, d_L, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||||
|
|
||||||
|
// Specify Lower fill mode.
|
||||||
|
cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
|
||||||
|
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
|
||||||
|
|
||||||
|
// Specify Non-Unit diagonal type.
|
||||||
|
cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
|
||||||
|
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
|
||||||
|
|
||||||
|
// This is just used to get bufferSize;
|
||||||
|
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_64F);
|
||||||
|
|
||||||
|
size_t bufferSize_B;
|
||||||
|
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
|
||||||
|
|
||||||
|
// --- Start of the preconditioning part ---
|
||||||
|
cusparseSpSV_createDescr(&descr_L);
|
||||||
|
cusparseSpSV_createDescr(&descr_LT);
|
||||||
|
|
||||||
|
size_t bufferSize, bufferSize_L, bufferSize_LT;
|
||||||
|
bufferSize = bufferSize_B;
|
||||||
|
|
||||||
|
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_p,
|
||||||
|
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
|
||||||
|
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p,
|
||||||
|
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
|
||||||
|
|
||||||
|
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
|
||||||
|
cudaMalloc(&d_tuf, bufferSize);
|
||||||
|
cudaMalloc(&d_tuf2, bufferSize);
|
||||||
|
|
||||||
|
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_tmp, dvec_p,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_tuf);
|
||||||
|
|
||||||
|
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, dvec_tmp,
|
||||||
|
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_tuf2);
|
||||||
|
// --- End of the preconditioning part ---
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
// Preconditioning with incomplete-chelosky factorization
|
||||||
|
host_m = clcg_malloc_cuda(N);
|
||||||
|
clcg_vecset_cuda(host_m, zero, N);
|
||||||
|
|
||||||
|
MinimizePreconditioned(cub_handle, cus_handle, host_m, b, N, nz, CLCG_PCG);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
if (rowIdxA != nullptr) delete[] rowIdxA;
|
||||||
|
if (colIdxA != nullptr) delete[] colIdxA;
|
||||||
|
if (A != nullptr) delete[] A;
|
||||||
|
if (b != nullptr) delete[] b;
|
||||||
|
if (ans_x != nullptr) delete[] ans_x;
|
||||||
|
|
||||||
|
if (L_row != nullptr) delete[] L_row;
|
||||||
|
if (L_col != nullptr) delete[] L_col;
|
||||||
|
if (L_val != nullptr) delete[] L_val;
|
||||||
|
|
||||||
|
clcg_free_cuda(host_m);
|
||||||
|
|
||||||
|
cusparseDestroyDnVec(dvec_tmp);
|
||||||
|
cusparseDestroyDnVec(dvec_p);
|
||||||
|
|
||||||
|
cudaFree(d_tuf);
|
||||||
|
cudaFree(d_tuf2);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_t);
|
||||||
|
cudaFree(d_p);
|
||||||
|
|
||||||
|
cudaFree(d_rowIdxL);
|
||||||
|
cudaFree(d_rowPtrL);
|
||||||
|
cudaFree(d_colIdxL);
|
||||||
|
cudaFree(d_L);
|
||||||
|
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cusparseDestroySpMat(smat_L);
|
||||||
|
cusparseSpSV_destroyDescr(descr_L);
|
||||||
|
cusparseSpSV_destroyDescr(descr_LT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_10K_cA";
|
||||||
|
std::string answerPath = "data/case_10K_cB";
|
||||||
|
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
sample13 sp;
|
||||||
|
sp.set_report_interval(0);
|
||||||
|
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
|
||||||
|
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
327
src/sample/sample14.cu
Normal file
327
src/sample/sample14.cu
Normal file
@@ -0,0 +1,327 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/solver_cuda.h"
|
||||||
|
#include "../lib/preconditioner_cuda.h"
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
cuComplex one = {1.0, 0.0};
|
||||||
|
cuComplex zero = {0.0, 0.0};
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
float avg_error(cuComplex *a, cuComplex *b, int n)
|
||||||
|
{
|
||||||
|
float avg = 0.0;
|
||||||
|
cuComplex tmp;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
tmp = clcg_Cdiff(a[i], b[i]);
|
||||||
|
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
class sample14 : public CLCG_CUDAF_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
sample14(){}
|
||||||
|
virtual ~sample14(){}
|
||||||
|
|
||||||
|
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||||
|
|
||||||
|
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
// Calculate the product of A*x
|
||||||
|
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_32F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||||
|
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, x, dvec_p,
|
||||||
|
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
|
||||||
|
|
||||||
|
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, prod_Ax,
|
||||||
|
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int N, nz, lnz;
|
||||||
|
int *rowIdxA, *colIdxA;
|
||||||
|
cuDoubleComplex *A, *b;
|
||||||
|
cuDoubleComplex *ans_x;
|
||||||
|
cuComplex *Af, *bf;
|
||||||
|
cuComplex *ans_xf;
|
||||||
|
|
||||||
|
int *L_row, *L_col;
|
||||||
|
cuComplex *L_val;
|
||||||
|
|
||||||
|
void *d_buf, *d_buf2;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
cusparseSpMatDescr_t smat_L;
|
||||||
|
cusparseSpSVDescr_t descr_L, descr_LT;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
cuComplex *d_A;
|
||||||
|
cuComplex *d_t;
|
||||||
|
cuComplex *d_p;
|
||||||
|
cusparseDnVecDescr_t dvec_p;
|
||||||
|
|
||||||
|
int *d_rowIdxL; // COO
|
||||||
|
int *d_rowPtrL; // CSR
|
||||||
|
int *d_colIdxL;
|
||||||
|
cuComplex *d_L;
|
||||||
|
|
||||||
|
cuComplex *host_m;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
};
|
||||||
|
|
||||||
|
void sample14::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||||
|
{
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
clcg_incomplete_Cholesky_cuda_half_buffsize(rowIdxA, colIdxA, nz, &lnz);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
std::clog << "lnz = " << lnz << std::endl;
|
||||||
|
|
||||||
|
Af = new cuComplex [nz];
|
||||||
|
bf = new cuComplex [nz];
|
||||||
|
ans_xf = new cuComplex [nz];
|
||||||
|
|
||||||
|
// Note that converting complex numbers from double to single precisions may case stack overflow
|
||||||
|
for (size_t i = 0; i < nz; i++)
|
||||||
|
{
|
||||||
|
Af[i].x = A[i].x; Af[i].y = A[i].y;
|
||||||
|
bf[i].x = b[i].x; bf[i].y = b[i].y;
|
||||||
|
ans_xf[i].x = ans_x[i].x; ans_xf[i].y = ans_x[i].y;
|
||||||
|
}
|
||||||
|
|
||||||
|
L_row = new int [lnz];
|
||||||
|
L_col = new int [lnz];
|
||||||
|
L_val = new cuComplex [lnz];
|
||||||
|
|
||||||
|
clcg_incomplete_Cholesky_cuda_half(rowIdxA, colIdxA, Af, N, nz, lnz, L_row, L_col, L_val);
|
||||||
|
/*
|
||||||
|
for (size_t i = 0; i < lnz; i++)
|
||||||
|
{
|
||||||
|
std::cout << L_row[i] << " " << L_col[i] << " (" << L_val[i].x << "," << L_val[i].y << ")\n";
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_t, N * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&d_p, N * sizeof(cuComplex));
|
||||||
|
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_32F);
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, Af, nz * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_t, bf, N * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cudaMalloc(&d_L, lnz * sizeof(cuComplex));
|
||||||
|
cudaMalloc(&d_rowIdxL, lnz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrL, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxL, lnz * sizeof(int));
|
||||||
|
|
||||||
|
cudaMemcpy(d_L, L_val, lnz * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxL, L_row, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxL, L_col, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
|
||||||
|
|
||||||
|
// Convert matrix L from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cus_handle, d_rowIdxL, lnz, N, d_rowPtrL, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_L, N, N, lnz, d_rowPtrL, d_colIdxL, d_L, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
|
||||||
|
|
||||||
|
// Specify Lower fill mode.
|
||||||
|
cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
|
||||||
|
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
|
||||||
|
|
||||||
|
// Specify Non-Unit diagonal type.
|
||||||
|
cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
|
||||||
|
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
|
||||||
|
|
||||||
|
// This is just used to get bufferSize;
|
||||||
|
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_32F);
|
||||||
|
|
||||||
|
size_t bufferSize_B;
|
||||||
|
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_C_32F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
|
||||||
|
|
||||||
|
// --- Start of the preconditioning part ---
|
||||||
|
cusparseSpSV_createDescr(&descr_L);
|
||||||
|
cusparseSpSV_createDescr(&descr_LT);
|
||||||
|
|
||||||
|
size_t bufferSize, bufferSize_L, bufferSize_LT;
|
||||||
|
bufferSize = bufferSize_B;
|
||||||
|
|
||||||
|
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_p,
|
||||||
|
dvec_tmp, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
|
||||||
|
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p,
|
||||||
|
dvec_tmp, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
|
||||||
|
|
||||||
|
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
|
||||||
|
cudaMalloc(&d_buf, bufferSize);
|
||||||
|
cudaMalloc(&d_buf2, bufferSize);
|
||||||
|
|
||||||
|
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_tmp, dvec_p,
|
||||||
|
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_buf);
|
||||||
|
|
||||||
|
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, dvec_tmp,
|
||||||
|
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_buf2);
|
||||||
|
// --- End of the preconditioning part ---
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
host_m = new cuComplex[N];
|
||||||
|
|
||||||
|
// Preconditioning with incomplete-chelosky factorization
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MinimizePreconditioned(cub_handle, cus_handle, host_m, bf, N, nz, CLCG_PCG);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_xf, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
if (rowIdxA != nullptr) delete[] rowIdxA;
|
||||||
|
if (colIdxA != nullptr) delete[] colIdxA;
|
||||||
|
if (A != nullptr) delete[] A;
|
||||||
|
if (b != nullptr) delete[] b;
|
||||||
|
if (ans_x != nullptr) delete[] ans_x;
|
||||||
|
if (Af != nullptr) delete[] Af;
|
||||||
|
if (bf != nullptr) delete[] bf;
|
||||||
|
if (ans_xf != nullptr) delete[] ans_xf;
|
||||||
|
|
||||||
|
if (L_row != nullptr) delete[] L_row;
|
||||||
|
if (L_col != nullptr) delete[] L_col;
|
||||||
|
if (L_val != nullptr) delete[] L_val;
|
||||||
|
|
||||||
|
if (host_m != nullptr) delete[] host_m;
|
||||||
|
|
||||||
|
cusparseDestroyDnVec(dvec_tmp);
|
||||||
|
cusparseDestroyDnVec(dvec_p);
|
||||||
|
|
||||||
|
cudaFree(d_buf);
|
||||||
|
cudaFree(d_buf2);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_t);
|
||||||
|
cudaFree(d_p);
|
||||||
|
|
||||||
|
cudaFree(d_rowIdxL);
|
||||||
|
cudaFree(d_rowPtrL);
|
||||||
|
cudaFree(d_colIdxL);
|
||||||
|
cudaFree(d_L);
|
||||||
|
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cusparseDestroySpMat(smat_L);
|
||||||
|
cusparseSpSV_destroyDescr(descr_L);
|
||||||
|
cusparseSpSV_destroyDescr(descr_LT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_1K_cA";
|
||||||
|
std::string answerPath = "data/case_1K_cB";
|
||||||
|
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
sample14 sp;
|
||||||
|
sp.set_report_interval(100);
|
||||||
|
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
|
||||||
|
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
223
src/sample/sample15.cu
Normal file
223
src/sample/sample15.cu
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/lcg_cuda.h"
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, double **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, double **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new double[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new double[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(double));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(double)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, double **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new double[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(double)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float avg_error(lcg_float *a, lcg_float *b, int n)
|
||||||
|
{
|
||||||
|
lcg_float avg = 0.0;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
avg += (a[i] - b[i])*(a[i] - b[i]);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
lcg_float one = 1.0;
|
||||||
|
lcg_float zero = 0.0;
|
||||||
|
|
||||||
|
void *d_buf;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
double *d_A;
|
||||||
|
|
||||||
|
cusparseMatDescr_t descr_A = 0;
|
||||||
|
csric02Info_t icinfo_A = 0;
|
||||||
|
|
||||||
|
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||||
|
{
|
||||||
|
// Calculate the product of A*x
|
||||||
|
cusparseSpMV(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
x, &zero, prod_Ax, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cudaProgress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
if (converge <= param->epsilon) {
|
||||||
|
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_1M_A";
|
||||||
|
std::string answerPath = "data/case_1M_B";
|
||||||
|
|
||||||
|
int N;
|
||||||
|
int nz;
|
||||||
|
double *A;
|
||||||
|
int *rowIdxA;
|
||||||
|
int *colIdxA;
|
||||||
|
double *b;
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
|
||||||
|
double *ans_x;
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
// Create handles
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(double));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, A, nz * sizeof(double), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
|
||||||
|
|
||||||
|
// create descriptor for matrix A
|
||||||
|
cusparseCreateMatDescr(&descr_A);
|
||||||
|
|
||||||
|
// initialize properties of matrix A
|
||||||
|
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
|
||||||
|
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||||
|
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
int bufferSize;
|
||||||
|
cusparseCreateCsric02Info(&icinfo_A);
|
||||||
|
cusparseDcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||||
|
d_colIdxA, icinfo_A, &bufferSize);
|
||||||
|
|
||||||
|
cudaMalloc(&d_buf, bufferSize);
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
lcg_para self_para = lcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
double *host_m = new double[N];
|
||||||
|
|
||||||
|
// Solve with CG
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CG);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Solve with CGS
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CGS);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
delete[] A;
|
||||||
|
delete[] rowIdxA;
|
||||||
|
delete[] colIdxA;
|
||||||
|
delete[] b;
|
||||||
|
delete[] ans_x;
|
||||||
|
delete[] host_m;
|
||||||
|
|
||||||
|
// Free Device memory
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cudaFree(d_buf);
|
||||||
|
|
||||||
|
cusparseDestroyMatDescr(descr_A);
|
||||||
|
cusparseDestroyCsric02Info(icinfo_A);
|
||||||
|
|
||||||
|
// Free handles
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
170
src/sample/sample2.cpp
Normal file
170
src/sample/sample2.cpp
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "iostream"
|
||||||
|
#include "random"
|
||||||
|
#include "../lib/solver.h"
|
||||||
|
|
||||||
|
#define M 1000
|
||||||
|
#define N 800
|
||||||
|
|
||||||
|
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
|
||||||
|
{
|
||||||
|
lcg_float max = -1;
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TESTFUNC : public LCG_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TESTFUNC();
|
||||||
|
~TESTFUNC();
|
||||||
|
|
||||||
|
// 计算共轭梯度的B项
|
||||||
|
void cal_partb(lcg_float *B, const lcg_float *x);
|
||||||
|
|
||||||
|
//定义共轭梯度中Ax的算法
|
||||||
|
void AxProduct(const lcg_float* a, lcg_float* b, const int num)
|
||||||
|
{
|
||||||
|
lcg_matvec(kernel, a, tmp_arr, M, num, MatNormal);
|
||||||
|
lcg_matvec(kernel, tmp_arr, b, M, num, MatTranspose);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MxProduct(const lcg_float* a, lcg_float* b, const int num)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < num; i++)
|
||||||
|
{
|
||||||
|
b[i] = p[i]*a[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// 普通二维数组做核矩阵
|
||||||
|
lcg_float **kernel;
|
||||||
|
// 中间结果数组
|
||||||
|
lcg_float *tmp_arr;
|
||||||
|
// 预优矩阵
|
||||||
|
lcg_float *p;
|
||||||
|
};
|
||||||
|
|
||||||
|
TESTFUNC::TESTFUNC()
|
||||||
|
{
|
||||||
|
kernel = lcg_malloc(M, N);
|
||||||
|
tmp_arr = lcg_malloc(M);
|
||||||
|
p = lcg_malloc(N);
|
||||||
|
|
||||||
|
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
|
||||||
|
lcg_vecset(p, 1.0, N);
|
||||||
|
|
||||||
|
lcg_float diag;
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diag = 0.0;
|
||||||
|
for (size_t j = 0; j < M; j++)
|
||||||
|
{
|
||||||
|
diag += kernel[j][i]*kernel[j][i];
|
||||||
|
}
|
||||||
|
p[i] = 1.0/diag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TESTFUNC::~TESTFUNC()
|
||||||
|
{
|
||||||
|
lcg_free(kernel, M);
|
||||||
|
lcg_free(tmp_arr);
|
||||||
|
lcg_free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TESTFUNC::cal_partb(lcg_float *B, const lcg_float *x)
|
||||||
|
{
|
||||||
|
lcg_matvec(kernel, x, tmp_arr, M, N, MatNormal);
|
||||||
|
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
// 生成一组正演解
|
||||||
|
double *fm = lcg_malloc(N);
|
||||||
|
lcg_vecrnd(fm, 1.0, 2.0, N);
|
||||||
|
|
||||||
|
TESTFUNC test;
|
||||||
|
|
||||||
|
// 计算共轭梯度B项
|
||||||
|
double *B = lcg_malloc(N);
|
||||||
|
test.cal_partb(B, fm);
|
||||||
|
|
||||||
|
/********************准备工作完成************************/
|
||||||
|
lcg_para self_para = lcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
test.set_lcg_parameter(self_para);
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
lcg_float *m = lcg_malloc(N);
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
|
||||||
|
// 约束解的范围
|
||||||
|
lcg_float *low = lcg_malloc(N);
|
||||||
|
lcg_float *hig = lcg_malloc(N);
|
||||||
|
lcg_vecset(low, 1.0, N);
|
||||||
|
lcg_vecset(hig, 2.0, N);
|
||||||
|
|
||||||
|
test.Minimize(m, B, N, LCG_CG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
test.MinimizePreconditioned(m, B, N);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
test.Minimize(m, B, N, LCG_CGS);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
test.Minimize(m, B, N, LCG_BICGSTAB);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
test.Minimize(m, B, N, LCG_BICGSTAB2);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
test.MinimizeConstrained(m, B, low, hig, N, LCG_PG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_vecset(m, 0.0, N);
|
||||||
|
test.MinimizeConstrained(m, B, low, hig, N, LCG_SPG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
lcg_free(fm);
|
||||||
|
lcg_free(B);
|
||||||
|
lcg_free(m);
|
||||||
|
lcg_free(low);
|
||||||
|
lcg_free(hig);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
129
src/sample/sample3.cpp
Normal file
129
src/sample/sample3.cpp
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "iostream"
|
||||||
|
#include "../lib/clcg.h"
|
||||||
|
|
||||||
|
#define N 100
|
||||||
|
|
||||||
|
lcg_float max_diff(const lcg_complex *a, const lcg_complex *b, int size)
|
||||||
|
{
|
||||||
|
lcg_float max = -1;
|
||||||
|
lcg_complex t;
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
t = a[i] - b[i];
|
||||||
|
max = lcg_max(clcg_module(&t), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 普通二维数组做核矩阵
|
||||||
|
lcg_complex **kernel;
|
||||||
|
|
||||||
|
// 计算核矩阵乘向量的乘积
|
||||||
|
void CalAx(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
|
||||||
|
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
clcg_matvec(kernel, x, prod_Ax, N, x_size, layout, conjugate);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//定义共轭梯度监控函数
|
||||||
|
int Prog(void* instance, const lcg_complex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int k)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
srand(time(0));
|
||||||
|
|
||||||
|
kernel = clcg_malloc(N, N);
|
||||||
|
clcg_vecrnd(kernel, lcg_complex(-1.0, -1.0), lcg_complex(1.0, 1.0), N, N);
|
||||||
|
|
||||||
|
// 设置核矩阵为一个对称阵
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
for (int j = i; j < N; j++)
|
||||||
|
{
|
||||||
|
kernel[j][i] = kernel[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 生成一组正演解
|
||||||
|
lcg_complex *fm = clcg_malloc(N);
|
||||||
|
clcg_vecrnd(fm, lcg_complex(1.0, 1.0), lcg_complex(2.0, 2.0), N);
|
||||||
|
|
||||||
|
// 计算共轭梯度B项
|
||||||
|
lcg_complex *B = clcg_malloc(N);
|
||||||
|
clcg_matvec(kernel, fm, B, N, N, MatNormal, NonConjugate);
|
||||||
|
|
||||||
|
/********************准备工作完成************************/
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
self_para.epsilon = 1e-8;
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
lcg_complex *m = clcg_malloc(N);
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
std::clog << "solver: bicg" << std::endl;
|
||||||
|
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICG);
|
||||||
|
std::clog << std::endl; clcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
std::clog << "solver: bicg-symmetric" << std::endl;
|
||||||
|
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICG_SYM);
|
||||||
|
std::clog << std::endl; clcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
std::clog << "solver: cgs" << std::endl;
|
||||||
|
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_CGS);
|
||||||
|
std::clog << std::endl; clcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
std::clog << "solver: bicgstab" << std::endl;
|
||||||
|
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICGSTAB);
|
||||||
|
std::clog << std::endl; clcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
std::clog << "solver: tfqmr" << std::endl;
|
||||||
|
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_TFQMR);
|
||||||
|
std::clog << std::endl; clcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_free(kernel, N);
|
||||||
|
clcg_free(fm);
|
||||||
|
clcg_free(B);
|
||||||
|
clcg_free(m);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
199
src/sample/sample4.cpp
Normal file
199
src/sample/sample4.cpp
Normal file
@@ -0,0 +1,199 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "../lib/solver.h"
|
||||||
|
#include "ctime"
|
||||||
|
#include "random"
|
||||||
|
#include "iostream"
|
||||||
|
#include "fstream"
|
||||||
|
#include "iomanip"
|
||||||
|
#include "complex"
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, lcg_complex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, lcg_complex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new lcg_complex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new lcg_complex[*pN]{};
|
||||||
|
|
||||||
|
std::complex<double> std_c;
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&std_c, sizeof(std_c));
|
||||||
|
(*cooVal)[i].real(std_c.real());
|
||||||
|
(*cooVal)[i].imag(std_c.imag());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < *pN; i++)
|
||||||
|
{
|
||||||
|
in.read((char*)&std_c, sizeof(std_c));
|
||||||
|
(*b)[i].real(std_c.real());
|
||||||
|
(*b)[i].imag(std_c.imag());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, lcg_complex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new lcg_complex[*pN]{};
|
||||||
|
|
||||||
|
std::complex<double> std_c;
|
||||||
|
for (size_t i = 0; i < *pN; i++)
|
||||||
|
{
|
||||||
|
in.read((char*)&std_c, sizeof(std_c));
|
||||||
|
(*x)[i].real(std_c.real());
|
||||||
|
(*x)[i].imag(std_c.imag());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float max_diff(const lcg_complex *a, const lcg_complex *b, int size)
|
||||||
|
{
|
||||||
|
lcg_float max = -1;
|
||||||
|
lcg_complex t;
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
t = a[i] - b[i];
|
||||||
|
max = lcg_max(clcg_module(&t), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TESTFUNC : public CLCG_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TESTFUNC(int n);
|
||||||
|
~TESTFUNC();
|
||||||
|
|
||||||
|
void set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size);
|
||||||
|
|
||||||
|
//定义共轭梯度中Ax的算法
|
||||||
|
void AxProduct(const lcg_complex *x, lcg_complex *prod_Ax, const int x_size,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
clcg_matvec(kernel, x, prod_Ax, x_size, x_size, layout, conjugate);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// 普通二维数组做核矩阵
|
||||||
|
lcg_complex **kernel;
|
||||||
|
int n_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
TESTFUNC::TESTFUNC(int n)
|
||||||
|
{
|
||||||
|
n_size = n;
|
||||||
|
kernel = clcg_malloc(n_size, n_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
TESTFUNC::~TESTFUNC()
|
||||||
|
{
|
||||||
|
clcg_free(kernel, n_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TESTFUNC::set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < n_size; j++)
|
||||||
|
{
|
||||||
|
kernel[i][j] = lcg_complex(0.0, 0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
kernel[row_id[i]][col_id[i]] = val[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_1K_cA";
|
||||||
|
std::string answerPath = "data/case_1K_cB";
|
||||||
|
|
||||||
|
int N;
|
||||||
|
int nz;
|
||||||
|
lcg_complex *A;
|
||||||
|
int *rowIdxA;
|
||||||
|
int *colIdxA;
|
||||||
|
lcg_complex *b;
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
|
||||||
|
lcg_complex *ans_x;
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
TESTFUNC test(N);
|
||||||
|
test.set_kernel(rowIdxA, colIdxA, A, nz);
|
||||||
|
|
||||||
|
/********************准备工作完成************************/
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-8;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
test.set_clcg_parameter(self_para);
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
lcg_complex *m = clcg_malloc(N);
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
|
||||||
|
test.Minimize(m, b, N, CLCG_BICG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
test.Minimize(m, b, N, CLCG_BICG_SYM);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
test.Minimize(m, b, N, CLCG_CGS);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||||
|
test.Minimize(m, b, N, CLCG_TFQMR);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||||
|
|
||||||
|
clcg_free(m);
|
||||||
|
|
||||||
|
delete[] A;
|
||||||
|
delete[] rowIdxA;
|
||||||
|
delete[] colIdxA;
|
||||||
|
delete[] b;
|
||||||
|
delete[] ans_x;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
155
src/sample/sample5.cpp
Normal file
155
src/sample/sample5.cpp
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "../lib/lcg_eigen.h"
|
||||||
|
#include "iostream"
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
|
||||||
|
#define M 1000
|
||||||
|
#define N 800
|
||||||
|
|
||||||
|
lcg_float max_diff(const Eigen::VectorXd &a, const Eigen::VectorXd &b)
|
||||||
|
{
|
||||||
|
lcg_float max = -1;
|
||||||
|
for (int i = 0; i < a.size(); i++)
|
||||||
|
{
|
||||||
|
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 普通二维数组做核矩阵
|
||||||
|
Eigen::MatrixXd kernel = Eigen::MatrixXd::Random(M, N);
|
||||||
|
// 中间结果数组
|
||||||
|
Eigen::VectorXd tmp_arr(M);
|
||||||
|
Eigen::VectorXd p = Eigen::VectorXd::Constant(N, 1.0);
|
||||||
|
|
||||||
|
// 计算核矩阵乘向量的乘积
|
||||||
|
void CalAx(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax)
|
||||||
|
{
|
||||||
|
tmp_arr = kernel * x;
|
||||||
|
prod_Ax = kernel.transpose() * tmp_arr;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CalMx(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx)
|
||||||
|
{
|
||||||
|
prod_Mx = p.cwiseProduct(x);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//定义共轭梯度监控函数
|
||||||
|
int Prog(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
|
||||||
|
const lcg_para *param, const int k)
|
||||||
|
{
|
||||||
|
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
// 生成一组正演解
|
||||||
|
lcg_float LO = 1.0, HI = 2.0, Range = HI - LO;
|
||||||
|
Eigen::VectorXd fm = Eigen::VectorXd::Random(N);
|
||||||
|
fm = (fm + Eigen::VectorXd::Constant(N, 1.0))*0.5*Range;
|
||||||
|
fm = (fm + Eigen::VectorXd::Constant(N, LO));
|
||||||
|
|
||||||
|
// 计算共轭梯度B项
|
||||||
|
Eigen::VectorXd B(N);
|
||||||
|
tmp_arr = kernel * fm;
|
||||||
|
B = kernel.transpose() * tmp_arr;
|
||||||
|
|
||||||
|
/********************准备工作完成************************/
|
||||||
|
lcg_para self_para = lcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-5;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
Eigen::VectorXd m = Eigen::VectorXd::Zero(N);
|
||||||
|
//Eigen::VectorXd p = Eigen::VectorXd::Constant(N, 1.0);
|
||||||
|
Eigen::VectorXd low = Eigen::VectorXd::Constant(N, LO);
|
||||||
|
Eigen::VectorXd hig = Eigen::VectorXd::Constant(N, HI);
|
||||||
|
|
||||||
|
std::clog << "solver: cg" << std::endl;
|
||||||
|
clock_t start = clock();
|
||||||
|
int ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_CG);
|
||||||
|
clock_t end = clock();
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
std::clog << "solver: pcg" << std::endl;
|
||||||
|
start = clock();
|
||||||
|
ret = lcg_solver_preconditioned_eigen(CalAx, CalMx, Prog, m, B, &self_para, NULL, LCG_PCG);
|
||||||
|
end = clock();
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
std::clog << "solver: cgs" << std::endl;
|
||||||
|
start = clock();
|
||||||
|
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_CGS);
|
||||||
|
end = clock();
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
std::clog << "solver: bicgstab" << std::endl;
|
||||||
|
start = clock();
|
||||||
|
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_BICGSTAB);
|
||||||
|
end = clock();
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
std::clog << "solver: bicgstab2" << std::endl;
|
||||||
|
start = clock();
|
||||||
|
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_BICGSTAB2);
|
||||||
|
end = clock();
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
std::clog << "solver: pg" << std::endl;
|
||||||
|
start = clock();
|
||||||
|
ret = lcg_solver_constrained_eigen(CalAx, Prog, m, B, low, hig, &self_para, NULL, LCG_PG);
|
||||||
|
end = clock();
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
std::clog << "solver: spg" << std::endl;
|
||||||
|
start = clock();
|
||||||
|
ret = lcg_solver_constrained_eigen(CalAx, Prog, m, B, low, hig, &self_para, NULL, LCG_SPG);
|
||||||
|
end = clock();
|
||||||
|
std::clog << std::endl; lcg_error_str(ret);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
235
src/sample/sample6.cpp
Normal file
235
src/sample/sample6.cpp
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "iostream"
|
||||||
|
#include "fstream"
|
||||||
|
#include "complex"
|
||||||
|
#include "../lib/lcg_complex.h"
|
||||||
|
#include "../lib/solver_eigen.h"
|
||||||
|
#include "Eigen/Sparse"
|
||||||
|
|
||||||
|
typedef Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor> spmat_cd; // 注意Eigen默认的稀疏矩阵排序为列优先
|
||||||
|
typedef Eigen::Triplet<lcg_complex> triplt_cd;
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, lcg_complex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, lcg_complex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new lcg_complex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new lcg_complex[*pN]{};
|
||||||
|
|
||||||
|
std::complex<double> std_c;
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&std_c, sizeof(std_c));
|
||||||
|
(*cooVal)[i].real(std_c.real());
|
||||||
|
(*cooVal)[i].imag(std_c.imag());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < *pN; i++)
|
||||||
|
{
|
||||||
|
in.read((char*)&std_c, sizeof(std_c));
|
||||||
|
(*b)[i].real(std_c.real());
|
||||||
|
(*b)[i].imag(std_c.imag());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, lcg_complex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new lcg_complex[*pN]{};
|
||||||
|
|
||||||
|
std::complex<double> std_c;
|
||||||
|
for (size_t i = 0; i < *pN; i++)
|
||||||
|
{
|
||||||
|
in.read((char*)&std_c, sizeof(std_c));
|
||||||
|
(*x)[i].real(std_c.real());
|
||||||
|
(*x)[i].imag(std_c.imag());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float max_diff(const Eigen::VectorXcd &a, const Eigen::VectorXcd &b)
|
||||||
|
{
|
||||||
|
lcg_float max = -1;
|
||||||
|
std::complex<lcg_float> t;
|
||||||
|
for (int i = 0; i < a.size(); i++)
|
||||||
|
{
|
||||||
|
t = a[i] - b[i];
|
||||||
|
max = lcg_max(t.real()*t.real() + t.imag()*t.imag(), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TESTFUNC : public CLCG_EIGEN_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TESTFUNC(int n);
|
||||||
|
~TESTFUNC();
|
||||||
|
|
||||||
|
void set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size);
|
||||||
|
void set_p();
|
||||||
|
|
||||||
|
//定义共轭梯度中Ax的算法
|
||||||
|
void AxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
if (conjugate == Conjugate) prod_Ax = kernel.conjugate() * x;
|
||||||
|
else prod_Ax = kernel * x;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
|
||||||
|
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
prod_Mx = P.cwiseProduct(x);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
spmat_cd kernel;
|
||||||
|
Eigen::VectorXcd P;
|
||||||
|
int n_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
TESTFUNC::TESTFUNC(int n)
|
||||||
|
{
|
||||||
|
n_size = n;
|
||||||
|
kernel.resize(n_size, n_size);
|
||||||
|
kernel.setZero();
|
||||||
|
P.resize(n_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
TESTFUNC::~TESTFUNC()
|
||||||
|
{
|
||||||
|
kernel.resize(0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TESTFUNC::set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size)
|
||||||
|
{
|
||||||
|
std::vector<triplt_cd> val_triplt;
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
val_triplt.push_back(triplt_cd(row_id[i], col_id[i], val[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel.setFromTriplets(val_triplt.begin(), val_triplt.end());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TESTFUNC::set_p()
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
P[i] = 1.0/kernel.coeff(i, i);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_10K_cA";
|
||||||
|
std::string answerPath = "data/case_10K_cB";
|
||||||
|
|
||||||
|
int N;
|
||||||
|
int nz;
|
||||||
|
lcg_complex *A;
|
||||||
|
int *rowIdxA;
|
||||||
|
int *colIdxA;
|
||||||
|
lcg_complex *b;
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
|
||||||
|
lcg_complex *ans_x;
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
TESTFUNC test(N);
|
||||||
|
test.set_kernel(rowIdxA, colIdxA, A, nz);
|
||||||
|
test.set_p();
|
||||||
|
|
||||||
|
Eigen::VectorXcd B, ANS;
|
||||||
|
B.resize(N);
|
||||||
|
ANS.resize(N);
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
B[i] = b[i];
|
||||||
|
ANS[i] = ans_x[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************准备工作完成************************/
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-16;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
test.set_clcg_parameter(self_para);
|
||||||
|
test.set_report_interval(10);
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
Eigen::VectorXcd m = Eigen::VectorXcd::Constant(N, std::complex<double>(0.0, 0.0));
|
||||||
|
|
||||||
|
test.Minimize(m, B, CLCG_BICG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
test.Minimize(m, B, CLCG_BICG_SYM);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
test.Minimize(m, B, CLCG_CGS);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
test.Minimize(m, B, CLCG_TFQMR);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
test.MinimizePreconditioned(m, B, CLCG_PCG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
test.MinimizePreconditioned(m, B, CLCG_PBICG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
B.resize(0);
|
||||||
|
ANS.resize(0);
|
||||||
|
m.resize(0);
|
||||||
|
|
||||||
|
delete[] A;
|
||||||
|
delete[] rowIdxA;
|
||||||
|
delete[] colIdxA;
|
||||||
|
delete[] b;
|
||||||
|
delete[] ans_x;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
233
src/sample/sample7.cpp
Normal file
233
src/sample/sample7.cpp
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "iostream"
|
||||||
|
#include "fstream"
|
||||||
|
#include "../lib/solver_eigen.h"
|
||||||
|
#include "../lib/preconditioner_eigen.h"
|
||||||
|
|
||||||
|
typedef std::complex<double> complex_d;
|
||||||
|
typedef Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> spmat_cd;
|
||||||
|
typedef Eigen::Triplet<complex_d> triplt_cd;
|
||||||
|
typedef Eigen::VectorXcd vector_cd;
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, complex_d **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, complex_d **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new complex_d[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new complex_d[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(complex_d));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(complex_d)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, complex_d **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new complex_d[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(complex_d)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double max_diff(const vector_cd &a, const vector_cd &b)
|
||||||
|
{
|
||||||
|
double max = -1;
|
||||||
|
complex_d t;
|
||||||
|
for (int i = 0; i < a.size(); i++)
|
||||||
|
{
|
||||||
|
t = a[i] - b[i];
|
||||||
|
max = lcg_max(std::sqrt(std::norm(t)), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TESTFUNC : public CLCG_EIGEN_Solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TESTFUNC(int n);
|
||||||
|
~TESTFUNC();
|
||||||
|
|
||||||
|
void set_kernel(int *row_id, int *col_id, complex_d *val, int nz_size);
|
||||||
|
void set_preconditioner();
|
||||||
|
|
||||||
|
//定义共轭梯度中Ax的算法
|
||||||
|
void AxProduct(const vector_cd &x, vector_cd &prod_Ax, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
if (conjugate == Conjugate) prod_Ax = kernel.conjugate() * x;
|
||||||
|
else prod_Ax = kernel * x;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MxProduct(const vector_cd &x, vector_cd &prod_Mx, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||||
|
{
|
||||||
|
// No preconditioning
|
||||||
|
//prod_Mx = x;
|
||||||
|
|
||||||
|
// Preconditioning using the diagonal kernel
|
||||||
|
//prod_Mx = p.cwiseProduct(x);
|
||||||
|
|
||||||
|
// Preconditioning using the ILUT/IC
|
||||||
|
clcg_solve_lower_triangle(l_tri, x, p);
|
||||||
|
clcg_solve_upper_triangle(u_tri, p, prod_Mx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// 普通二维数组做核矩阵
|
||||||
|
spmat_cd kernel, l_tri, u_tri;
|
||||||
|
vector_cd p;
|
||||||
|
int n_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
TESTFUNC::TESTFUNC(int n)
|
||||||
|
{
|
||||||
|
n_size = n;
|
||||||
|
kernel.resize(n_size, n_size);
|
||||||
|
kernel.setZero();
|
||||||
|
p.resize(n_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
TESTFUNC::~TESTFUNC()
|
||||||
|
{
|
||||||
|
kernel.resize(0, 0);
|
||||||
|
l_tri.resize(0, 0);
|
||||||
|
u_tri.resize(0, 0);
|
||||||
|
p.resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TESTFUNC::set_kernel(int *row_id, int *col_id, complex_d *val, int nz_size)
|
||||||
|
{
|
||||||
|
std::vector<triplt_cd> val_triplt;
|
||||||
|
for (size_t i = 0; i < nz_size; i++)
|
||||||
|
{
|
||||||
|
val_triplt.push_back(triplt_cd(row_id[i], col_id[i], val[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel.setFromTriplets(val_triplt.begin(), val_triplt.end());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TESTFUNC::set_preconditioner()
|
||||||
|
{
|
||||||
|
// 1 Preconditioning using the incomplete LU decomposition
|
||||||
|
/*
|
||||||
|
for (size_t i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
p[i] = 1.0/kernel.coeff(i, i);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// 2. Preconditioning using the incomplete LU decomposition
|
||||||
|
//incomplete_LU(kernel, l_tri, u_tri);
|
||||||
|
|
||||||
|
// 3. Preconditioning using the incomplete Cholesky decomposition
|
||||||
|
clcg_incomplete_Cholesky(kernel, l_tri);
|
||||||
|
u_tri = l_tri.transpose();
|
||||||
|
|
||||||
|
// 4. Preconditioning using compressed incomplete decompositions
|
||||||
|
/*
|
||||||
|
vector_cd one = Eigen::VectorXcd::Ones(n_size);
|
||||||
|
vector_cd x = Eigen::VectorXcd::Zero(n_size);
|
||||||
|
|
||||||
|
solve_lower_triangle(l_tri, one, x);
|
||||||
|
solve_upper_triangle(u_tri, x, p);
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[]) try
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_1K_cA";
|
||||||
|
std::string answerPath = "data/case_1K_cB";
|
||||||
|
|
||||||
|
int N;
|
||||||
|
int nz;
|
||||||
|
complex_d *A;
|
||||||
|
int *rowIdxA;
|
||||||
|
int *colIdxA;
|
||||||
|
complex_d *b;
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
|
||||||
|
complex_d *ans_x;
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
TESTFUNC test(N);
|
||||||
|
test.set_kernel(rowIdxA, colIdxA, A, nz);
|
||||||
|
test.set_preconditioner();
|
||||||
|
|
||||||
|
vector_cd B, ANS;
|
||||||
|
B.resize(N);
|
||||||
|
ANS.resize(N);
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
B[i] = b[i];
|
||||||
|
ANS[i] = ans_x[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************准备工作完成************************/
|
||||||
|
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-12;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
test.set_clcg_parameter(self_para);
|
||||||
|
test.set_report_interval(10);
|
||||||
|
|
||||||
|
Eigen::VectorXcd m = Eigen::VectorXcd::Constant(N, std::complex<double>(0.0, 0.0));
|
||||||
|
|
||||||
|
test.MinimizePreconditioned(m, B, CLCG_PCG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
m.setZero();
|
||||||
|
test.MinimizePreconditioned(m, B, CLCG_PBICG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||||
|
|
||||||
|
ANS.resize(0);
|
||||||
|
B.resize(0);
|
||||||
|
m.resize(0);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
catch (std::exception &e)
|
||||||
|
{
|
||||||
|
std::cerr << e.what() << std::endl;
|
||||||
|
}
|
||||||
312
src/sample/sample8.cu
Normal file
312
src/sample/sample8.cu
Normal file
@@ -0,0 +1,312 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/lcg_cuda.h"
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, double **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, double **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new double[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new double[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(double));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(double)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, double **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new double[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(double)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float avg_error(lcg_float *a, lcg_float *b, int n)
|
||||||
|
{
|
||||||
|
lcg_float avg = 0.0;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
avg += (a[i] - b[i])*(a[i] - b[i]);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
lcg_float one = 1.0;
|
||||||
|
lcg_float zero = 0.0;
|
||||||
|
|
||||||
|
void *d_buf;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
double *d_A;
|
||||||
|
double *d_pd;
|
||||||
|
double *d_ic;
|
||||||
|
|
||||||
|
cusparseMatDescr_t descr_A = 0;
|
||||||
|
cusparseMatDescr_t descr_L = 0;
|
||||||
|
csric02Info_t icinfo_A = 0;
|
||||||
|
csrsv2Info_t info_L = 0;
|
||||||
|
csrsv2Info_t info_LT = 0;
|
||||||
|
|
||||||
|
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||||
|
{
|
||||||
|
// Calculate the product of A*x
|
||||||
|
cusparseSpMV(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
x, &zero, prod_Ax, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cudaMx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||||
|
{
|
||||||
|
void *d_x, *d_Ax;
|
||||||
|
cusparseDnVecGetValues(x, &d_x);
|
||||||
|
cusparseDnVecGetValues(prod_Ax, &d_Ax);
|
||||||
|
|
||||||
|
cusparseDcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, (double*) d_x, (double*) d_pd,
|
||||||
|
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
|
||||||
|
cusparseDcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||||
|
n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, (double*) d_pd, (double*) d_Ax,
|
||||||
|
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cudaProgress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||||
|
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
if (converge <= param->epsilon) {
|
||||||
|
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_10K_A";
|
||||||
|
std::string answerPath = "data/case_10K_B";
|
||||||
|
|
||||||
|
int N;
|
||||||
|
int nz;
|
||||||
|
double *A;
|
||||||
|
int *rowIdxA;
|
||||||
|
int *colIdxA;
|
||||||
|
double *b;
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
|
||||||
|
double *ans_x;
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
// Create handles
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(double));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_pd, N * sizeof(double));
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, A, nz * sizeof(double), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
|
||||||
|
|
||||||
|
// This is just used to get bufferSize;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_R_64F);
|
||||||
|
|
||||||
|
size_t bufferSize_B;
|
||||||
|
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||||
|
|
||||||
|
// --- Start of the preconditioning part ---
|
||||||
|
|
||||||
|
// Copy A
|
||||||
|
cudaMalloc(&d_ic, nz * sizeof(lcg_float));
|
||||||
|
cudaMemcpy(d_ic, d_A, nz * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_LT;
|
||||||
|
bufferSize = bufferSize_B;
|
||||||
|
|
||||||
|
// create descriptor for matrix A
|
||||||
|
cusparseCreateMatDescr(&descr_A);
|
||||||
|
|
||||||
|
// initialize properties of matrix A
|
||||||
|
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
|
||||||
|
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||||
|
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// create descriptor for matrix L
|
||||||
|
cusparseCreateMatDescr(&descr_L);
|
||||||
|
|
||||||
|
// initialize properties of matrix L
|
||||||
|
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||||
|
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
|
||||||
|
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||||
|
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create empty info objects for incomplete-cholesky factorization
|
||||||
|
cusparseCreateCsric02Info(&icinfo_A);
|
||||||
|
cusparseCreateCsrsv2Info(&info_L);
|
||||||
|
cusparseCreateCsrsv2Info(&info_LT);
|
||||||
|
|
||||||
|
// Compute buffer size in computing ic factorization
|
||||||
|
cusparseDcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||||
|
d_colIdxA, icinfo_A, &bufferSize_A);
|
||||||
|
cusparseDcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
|
||||||
|
cusparseDcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, &bufferSize_LT);
|
||||||
|
|
||||||
|
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_LT);
|
||||||
|
cudaMalloc(&d_buf, bufferSize);
|
||||||
|
|
||||||
|
// Perform incomplete-choleskey factorization: analysis phase
|
||||||
|
cusparseDcsric02_analysis(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA,
|
||||||
|
d_colIdxA, icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
cusparseDcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
cusparseDcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||||
|
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
|
||||||
|
// Perform incomplete-choleskey factorization: solve phase
|
||||||
|
cusparseDcsric02(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA, d_colIdxA,
|
||||||
|
icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||||
|
|
||||||
|
// --- End of the preconditioning part ---
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
lcg_para self_para = lcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
double *host_m = new double[N];
|
||||||
|
|
||||||
|
// Solve with CG
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CG);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Solve with CGS
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CGS);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Solve with PCG
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lcg_solver_preconditioned_cuda(cudaAx, cudaMx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_PCG);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
delete[] A;
|
||||||
|
delete[] rowIdxA;
|
||||||
|
delete[] colIdxA;
|
||||||
|
delete[] b;
|
||||||
|
delete[] ans_x;
|
||||||
|
delete[] host_m;
|
||||||
|
|
||||||
|
// Free Device memory
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
cudaFree(d_pd);
|
||||||
|
cudaFree(d_ic);
|
||||||
|
|
||||||
|
cusparseDestroyDnVec(dvec_tmp);
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cudaFree(d_buf);
|
||||||
|
|
||||||
|
cusparseDestroyMatDescr(descr_A);
|
||||||
|
cusparseDestroyMatDescr(descr_L);
|
||||||
|
cusparseDestroyCsric02Info(icinfo_A);
|
||||||
|
cusparseDestroyCsrsv2Info(info_L);
|
||||||
|
cusparseDestroyCsrsv2Info(info_LT);
|
||||||
|
|
||||||
|
// Free handles
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
221
src/sample/sample9.cu
Normal file
221
src/sample/sample9.cu
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
/******************************************************
|
||||||
|
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* LibLCG is distributed under a dual licensing scheme. You can
|
||||||
|
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||||
|
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||||
|
* either version 2 of the License, or (at your option) any later version.
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||||
|
* using the LibLCG, please consider the option to obtain a commercial
|
||||||
|
* license for a fee. These licenses are offered by the LibLCG developing
|
||||||
|
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||||
|
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||||
|
* Please do not forget to include some description of your company and the
|
||||||
|
* realm of its activities. Also add information on how to contact you by
|
||||||
|
* electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "../lib/clcg_cuda.h"
|
||||||
|
|
||||||
|
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||||
|
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
in.read((char*)pnz, sizeof(int));
|
||||||
|
|
||||||
|
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||||
|
*cooRowIdx = new int[*pnz]{};
|
||||||
|
*cooColIdx = new int[*pnz]{};
|
||||||
|
*b = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
for (int i = 0; i < *pnz; ++i)
|
||||||
|
{
|
||||||
|
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||||
|
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||||
|
}
|
||||||
|
|
||||||
|
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||||
|
{
|
||||||
|
std::ifstream in(filePath, std::ios::binary);
|
||||||
|
|
||||||
|
in.read((char*)pN, sizeof(int));
|
||||||
|
|
||||||
|
*x = new cuDoubleComplex[*pN]{};
|
||||||
|
|
||||||
|
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||||
|
{
|
||||||
|
lcg_float avg = 0.0;
|
||||||
|
cuDoubleComplex tmp;
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
tmp = clcg_Zdiff(a[i], b[i]);
|
||||||
|
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||||
|
}
|
||||||
|
return sqrt(avg)/n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Declare as global variables
|
||||||
|
cuDoubleComplex one, zero;
|
||||||
|
|
||||||
|
void *d_buf;
|
||||||
|
cusparseSpMatDescr_t smat_A;
|
||||||
|
|
||||||
|
int *d_rowIdxA; // COO
|
||||||
|
int *d_rowPtrA; // CSR
|
||||||
|
int *d_colIdxA;
|
||||||
|
cuDoubleComplex *d_A;
|
||||||
|
cuDoubleComplex *d_B;
|
||||||
|
|
||||||
|
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||||
|
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||||
|
cusparseOperation_t oper_t)
|
||||||
|
{
|
||||||
|
one.x = 1.0; one.y = 0.0;
|
||||||
|
zero.x = 0.0; zero.y = 0.0;
|
||||||
|
// Calculate the product of A*x
|
||||||
|
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cudaProgress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||||
|
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||||
|
{
|
||||||
|
if (converge <= param->epsilon) {
|
||||||
|
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::string inputPath = "data/case_1K_cA";
|
||||||
|
std::string answerPath = "data/case_1K_cB";
|
||||||
|
|
||||||
|
int N, nz;
|
||||||
|
int *rowIdxA, *colIdxA;
|
||||||
|
cuDoubleComplex *A, *b;
|
||||||
|
|
||||||
|
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||||
|
|
||||||
|
cuDoubleComplex *ans_x;
|
||||||
|
readAnswer(answerPath, &N, &ans_x);
|
||||||
|
|
||||||
|
std::clog << "N = " << N << std::endl;
|
||||||
|
std::clog << "nz = " << nz << std::endl;
|
||||||
|
|
||||||
|
// Create handles
|
||||||
|
cublasHandle_t cubHandle;
|
||||||
|
cusparseHandle_t cusHandle;
|
||||||
|
|
||||||
|
cublasCreate(&cubHandle);
|
||||||
|
cusparseCreate(&cusHandle);
|
||||||
|
|
||||||
|
// Allocate GPU memory & copy matrix/vector to device
|
||||||
|
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||||
|
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||||
|
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||||
|
cudaMalloc(&d_B, N * sizeof(cuDoubleComplex));
|
||||||
|
|
||||||
|
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Convert matrix A from COO format to CSR format
|
||||||
|
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||||
|
|
||||||
|
// Create sparse matrix
|
||||||
|
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||||
|
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||||
|
|
||||||
|
// This is just used to get bufferSize;
|
||||||
|
cusparseDnVecDescr_t dvec_tmp;
|
||||||
|
cusparseCreateDnVec(&dvec_tmp, N, d_B, CUDA_C_64F);
|
||||||
|
|
||||||
|
size_t bufferSize_B, bufferSize_B2;
|
||||||
|
|
||||||
|
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||||
|
|
||||||
|
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE, &one, smat_A,
|
||||||
|
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B2);
|
||||||
|
|
||||||
|
if (bufferSize_B2 > bufferSize_B) bufferSize_B = bufferSize_B2;
|
||||||
|
cudaMalloc(&d_buf, bufferSize_B);
|
||||||
|
|
||||||
|
// Declare an initial solution
|
||||||
|
clcg_para self_para = clcg_default_parameters();
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
self_para.abs_diff = 0;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
cuDoubleComplex *host_m = new cuDoubleComplex[N];
|
||||||
|
|
||||||
|
// Solve with BICG
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = clcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_BICG);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Solve with BICG_SYM
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = clcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_BICG_SYM);
|
||||||
|
lcg_error_str(ret);
|
||||||
|
|
||||||
|
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||||
|
|
||||||
|
// Free Host memory
|
||||||
|
delete[] A;
|
||||||
|
delete[] rowIdxA;
|
||||||
|
delete[] colIdxA;
|
||||||
|
delete[] b;
|
||||||
|
delete[] ans_x;
|
||||||
|
delete[] host_m;
|
||||||
|
|
||||||
|
// Free Device memory
|
||||||
|
cudaFree(d_A);
|
||||||
|
cudaFree(d_rowIdxA);
|
||||||
|
cudaFree(d_rowPtrA);
|
||||||
|
cudaFree(d_colIdxA);
|
||||||
|
cudaFree(d_B);
|
||||||
|
|
||||||
|
cusparseDestroyDnVec(dvec_tmp);
|
||||||
|
cusparseDestroySpMat(smat_A);
|
||||||
|
cudaFree(d_buf);
|
||||||
|
|
||||||
|
// Free handles
|
||||||
|
cublasDestroy(cubHandle);
|
||||||
|
cusparseDestroy(cusHandle);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user