initial upload

This commit is contained in:
张壹 2024-09-11 13:39:28 +08:00
parent c7e8487a02
commit 834df92696
68 changed files with 21889 additions and 2 deletions

10
.gitignore vendored
View File

@ -1,4 +1,3 @@
# ---> C++
# Prerequisites
*.d
@ -32,3 +31,12 @@
*.out
*.app
# folder preferences and build folder
.DS_Store
build/
pack/
.vscode/
out/
*.sh
case_*
config.h

30
CMakeLists.txt Normal file
View File

@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.15.2)
#
project(LibLCG VERSION 3.1 LANGUAGES CXX)
#
include(CMakePackageConfigHelpers)
message(STATUS "Platform: " ${CMAKE_HOST_SYSTEM_NAME})
# CMake WindowsC:/Program\ Files/${Project_Name} Linux/Unix/usr/local
message(STATUS "Install prefix: " ${CMAKE_INSTALL_PREFIX})
# CMake
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
#
option(LibLCG_OPENMP "Use OpenMP" ON) # Set OFF to disable the functionality
option(LibLCG_EIGEN "Use Eigen" ON)
option(LibLCG_STD_COMPLEX "Use STD complex" ON)
option(LibLCG_CUDA "Use CUDA" ON)
message(STATUS "Use OpenMP: " ${LibLCG_OPENMP})
message(STATUS "Use Eigen: " ${LibLCG_EIGEN})
message(STATUS "Use STD complex: " ${LibLCG_STD_COMPLEX})
message(STATUS "Use CUDA: " ${LibLCG_CUDA})
# cmake
configure_file(
"${PROJECT_SOURCE_DIR}/config.h.in"
"${PROJECT_SOURCE_DIR}/src/lib/config.h"
)
#
add_subdirectory(src/)

2537
Doxyfile Normal file

File diff suppressed because it is too large Load Diff

524
LICENSE Normal file
View File

@ -0,0 +1,524 @@
LibLCG License
--------------
LibLCG is distributed under a dual licensing scheme. You can
redistribute it and/or modify it under the terms of the GNU Lesser
General Public License (LGPL) as published by the Free Software
Foundation, either version 2 of the License, or (at your option) any
later version. A copy of the GNU Lesser General Public License is
reproduced below.
If the terms and conditions of the LGPL v.2. would prevent you from
using the LibLCG, please consider the option to obtain a commercial
license for a fee. These licenses are offered by the LibLCG developing
team. As a rule, licenses are provided "as-is", unlimited in time for
a one time fee. Please send corresponding requests to:
yizhang-geo@zju.edu.cn. Please do not forget to include some
description of your company and the realm of its activities. Also add
information on how to contact you by electronic and paper mail.
=====================================================================
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the library's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random
Hacker.
<signature of Ty Coon>, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!

20
LibLCGConfig.cmake.in Normal file
View File

@ -0,0 +1,20 @@
@PACKAGE_INIT@
set(@PROJECT_NAME@_Version "@PROJECT_VERSION@")
set_and_check(@PROJECT_NAME@_INSTALL_PREFIX "${PACKAGE_PREFIX_DIR}")
set_and_check(@PROJECT_NAME@_INC_DIR "${PACKAGE_PREFIX_DIR}/include")
set_and_check(@PROJECT_NAME@_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")
set_and_check(@PROJECT_NAME@_LIB_DIR "${PACKAGE_PREFIX_DIR}/lib")
set_and_check(@PROJECT_NAME@_LIBRARY_DIR "${PACKAGE_PREFIX_DIR}/lib")
set(@PROJECT_NAME@_LIB lcg)
set(@PROJECT_NAME@_LIBRARY lcg)
set(@PROJECT_NAME@_FOUND 1)
set(@PROJECT_NAME@_OPENMP @LibLCG_OPENMP@)
set(@PROJECT_NAME@_EIGEN @LibLCG_EIGEN@)
set(@PROJECT_NAME@_STD_COMPLEX @LibLCG_STD_COMPLEX@)
set(@PROJECT_NAME@_CUDA @LibLCG_CUDA@)
# include target information
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")

225
README.md
View File

@ -1,2 +1,225 @@
# liblcg
# C++ Library of the Linear Conjugate Gradient Methods (LibLCG) 说明文档
张壹yizhang-geo@zju.edu.cn
_浙江大学地球科学学院·地球物理研究所_
**此说明仅覆盖算法库的简单介绍及使用,更详细的内容请查看代码注释。如果还有问题,请发邮件联系我。同时也欢迎有兴趣的同学加入开发团队!**
## 简介
liblcg 是一个高效的、可扩展的 C++ 线性共轭梯度算法库在原生数据结构接口的基础上同时提供基于Eigen3和CUDA的算法接口可以方便的实现基于CPU或GPU并行的加速计算其中基于Eigen3的算法包含了稠密与稀疏矩阵的实现而基于CUDA的算法主要为稀疏矩阵的实现。liblcg 包含多种实数与复数域的共轭梯度算法与其他一些迭代求解方法。目前已有得方法包括共轭梯度法、预优的共轭梯度算法、共轭梯度平方算法、双稳共轭梯度算法、BB步共轭梯度投影法与SPG共轭梯度投影法复数域的双共轭梯度法、共轭梯度平方法、预优的共轭梯度法与TFQMR法。共轭梯度法广泛应用于无约束与不等式约束的线性最优化问题拥有优良的收敛与计算效率。
共轭梯度算法可用于求解如下形式的线性方程组:
```
Ax = B
```
其中A 是一个 N 阶的方阵、x 为 N\*1 大小的待求解的模型向量B 为 N\*1 大小的需拟合的目标向量。需要注意的是不同种类的共轭梯度算法对A可能有不同的要求比如必须是正定的或者对称的。不同算法的具体要求可以查阅其他参考文献或者查看代码中的注释。
## 安装
算法库使用 CMake 工具进行汇编可在不同操作平台生成相应的Makefile或工程文件。
### 编译选项
算法库目前可用的编译选项有:
* LibLCG_OPENMP是否使用OpenMP进行加速需要安装OpeMP。默认为ON。
* LibLCG_EIGEN是否编译基于Eigen的算法与借口需要安装Eigen。默认为ON。
* LibLCG_STD_COMPLEX是否使用std::complex\<double\>作为复数的默认类型。默认为ON。
* LibLCG_CUDA是否编译基于CUDA的算法与借口需要安装CUDA。默认为ON。
用户可以使用cmake命令中的-D选项对编译选项进行设置比如关闭LibLCG_Eigen
```shell
cmake -DLibLCG_EIGEN=OFF
```
### Linux 与 MacOS
liblcg的默认安装路径为 /usr/local。头文件与动态库分别安装于 include 与 lib 文件夹。具体的编译与安装步骤如下:
1. 下载安装CMake软件
2. 下载安装GCC编译器常见系统已内置
3. 在源文件路径内使用如下命令进行编译与安装:
```shell
mkdir build && cd build && cmake .. && make install
```
### Windows
#### MinGW 和 GCC
Windows系统不包含GNU编译环境用户需自行下载并配置。方法如下
1. 下载MinGW安装文件并选择gcc、pthreads与make相关软件包安装
2. 下载安装CMake软件
3. 添加CMake与MinGW可执行文件路径至Windows环境变量
4. 在源文件路径内使用如下命令进行编译与安装:
```shell
mkdir build && cd build && cmake .. -G "MinGW Makefiles" && make install
```
默认的安装路径为C:/Program\\ Files。头文件与动态库分别安装于 include 与 lib 文件夹。
**注意:用户需要手动添加头文件与动态库地址到计算机的环境变量中。**
#### Visual Studio
用户可使用CMake工具构建VS工程文件并编译使用动态库。方法如下
1. 下载安装 Visual Studio 软件;
2. 下载安装CMake软件
3. 在源文件路径内使用如下命令生成VS工程文件
```shell
mkdir build && cd build && cmake .. -G "Visual Studio 16 2019"
```
_注如需生成其他版本的VS工程文件请使用-G命令查看相应的识别码。_
4. 使用 Visual Studio 打开.sln工程文件并编译动态库。
## 使用与编译
用户使用库函数时需在源文件中引入相应的头文件,如:
```cpp
#include "lcg/lcg.h"
```
编译可执行文件时需链接lcg动态库。以g++为例:
```shell
g++ example.cpp -llcg -o example_out
```
## 快速开始
要使用liblcg求解线性方程组Ax=B用户需要定义Ax乘积的计算函数回调函数该函数的功能为计算不同的x所对应的乘积Ax。以实数类型的共轭梯度算法为例其回调函数的接口定义为
```cpp
typedef void (*lcg_axfunc_ptr)(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_size);
```
其中,`x`为输入的向量,`prod_Ax`为返回的乘积向量,`n`为这两个向量的长度。注意此处参数列表中并不包含矩阵A这意味这A必须为全局或者类变量。这样设计的主要原因是在某些复杂最优化问题的编程中计算并存储A并不实际或者划算此时一般采用的策略是存储相关变量且仅计算Ax的乘积所以矩阵A并不总是存在。
用户在定义Ax计算函数后即可调用求解函数 lcg_solver() 对线性方程组进行求解。以无约束的求解函数为例,其声明如下:
```cpp
int lcg_solver(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CGS);
```
其中:
1. `lcg_axfunc_ptr Afp` 为正演计算的回调函数;
2. `lcg_progress_ptr Pfp` 监控迭代过程的回调函数(非必须,无需监控时使用 nullptr 参数即可);
3. `lcg_float* m` 初始解向量,迭代取得的解也保存与此数组;
4. `const lcg_float* B` Ax = B 中的 B 项;
5. `const int n_size` 解向量的大小;
6. `const lcg_para* param` 迭代使用的参数,此参数为 nullptr 即使用默认参数;
7. `void* instance` 传入的实例对象, 此函数在类中使用即为类的 this 指针, 在普通函数中使用时即为 nullptr
8. `int solver_id` 求解函数使用的求解方法,具体的方法代号可查看对应的头文件;
### 一个简单的例子
```cpp
#include "cmath"
#include "iostream"
#include "lcg/lcg.h"
#define M 100
#define N 80
// 返回两个数组元素之间的最大差值
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
{
lcg_float max = -1;
for (int i = 0; i < size; i++)
{
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
}
return max;
}
// 普通二维数组做核矩阵
lcg_float **kernel;
// 中间结果数组
lcg_float *tmp_arr;
// 计算核矩阵乘向量的乘积 lcg_solver的回调函数
void CalAx(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_s)
{
// 注意核矩阵实际为 kernel^T * kernel大小为N*N
lcg_matvec(kernel, x, tmp_arr, M, n_s, MatNormal); // tmp_tar = kernel * x
lcg_matvec(kernel, tmp_arr, prod_Ax, M, n_s, MatTranspose); // prod_Ax = kernel^T * tmp_tar
return;
}
// 定义监控函数 lcg_solver的回调函数
// 这个函数显示当前的迭代次数与收敛值
int Prog(void* instance, const lcg_float* m, const lcg_float converge, const lcg_para* param, const int n_s, const int k)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
int main(int argc, char const *argv[])
{
// 开辟数组空间
kernel = lcg_malloc(M, N);
tmp_arr = lcg_malloc(M);
// 为核矩阵赋初值
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
// 生成一组理论解
lcg_float *fm = lcg_malloc(N);
lcg_vecrnd(fm, 1.0, 2.0, N);
// 计算共轭梯度B项
lcg_float *B = lcg_malloc(N);
lcg_matvec(kernel, fm, tmp_arr, M, N, MatNormal);
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
// 设置共轭梯度参数
lcg_para self_para = lcg_default_parameters();
self_para.epsilon = 1e-5;
self_para.abs_diff = 0;
// 声明一组解
lcg_float *m = lcg_malloc(N);
lcg_vecset(m, 0.0, N);
// 使用标准共轭梯度方法LCG_CG求解线性方程组
// 将回调函数传递给solver
// 由于回调函数为全局函数因此instance变量的值为NULL
int ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CG);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
// 销毁数组
lcg_free(kernel, M);
lcg_free(tmp_arr);
lcg_free(fm);
lcg_free(B);
lcg_free(m);
return 0;
}
```
**完整的例子储存在[sample](src/sample)文件夹内。**
## 类模版
liblcg为不同类型的共轭梯度算法定义了通用的求解类模版包含了类中函数的指针代理及通用的监控函数实现用户可直接继承并使用。需要注意的是这些类模版中定义了纯虚的函数接口用户需要全部实现。其中没用到的定义成空函数就行了。以实数的求解类模版为例需要实现的接口函数包括
```cpp
void AxProduct(const lcg_float* a, lcg_float* b, const int num) = 0
void MxProduct(const lcg_float* a, lcg_float* b, const int num) = 0
```
其中`AxProduct`是Ax的计算函数`MxProduct`是预优过程的计算函数即M^-1x。

4
config.h.in Normal file
View File

@ -0,0 +1,4 @@
#cmakedefine LibLCG_OPENMP
#cmakedefine LibLCG_EIGEN
#cmakedefine LibLCG_STD_COMPLEX
#cmakedefine LibLCG_CUDA

11
data/README Normal file
View File

@ -0,0 +1,11 @@
case_*_A: Full symmetric matrix
[ N (int) | nz (int) ]
[ RowIdx (int) | ColIdx (int) | Val (double) ] * nz
[ b (double) * N ]
[ d (double) * N ] (complex matrix only)
case_*_B: Vector
[ N (int) ]
[ x (double) * N]

BIN
data/cases.7z Normal file

Binary file not shown.

105
data/get_cdat.cpp Normal file
View File

@ -0,0 +1,105 @@
#include "../src/lib/lcg_complex.h"
#include "iostream"
#include "fstream"
#include "vector"
#include "Eigen/Sparse"
#define random(x) (rand()%x)
typedef Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor> spmat_cd; // 注意Eigen默认的稀疏矩阵排序为列优先
typedef Eigen::Triplet<lcg_complex> triplt_cd;
int main(int argc, char const *argv[])
{
int N = 1000000;
int nz = 1013000;
lcg_complex *v = new lcg_complex[nz];
lcg_complex *x = new lcg_complex[N];
lcg_complex *b = new lcg_complex[N];
lcg_complex one(1.0, 1.0), none(-1.0, -1.0), zero(0.0, 0.0);
clcg_vecrnd(v, 1.0*one, 10.0*one, nz);
clcg_vecrnd(x, 1.0*one, 2.0*one, N);
clcg_vecset(b, zero, N);
std::vector<triplt_cd> val_triplt;
val_triplt.reserve(2*(nz-N) + N);
for (size_t i = 0; i < N; i++)
{
val_triplt.push_back(triplt_cd(i, i, v[i]));
b[i] += v[i]*x[i];
}
srand((int)time(0));
int r, c;
size_t j = N;
while (j < nz)
{
r = random(N);
c = random(N);
if (r != c)
{
val_triplt.push_back(triplt_cd(r, c, v[j]));
val_triplt.push_back(triplt_cd(c, r, v[j]));
b[r] += v[j]*x[c];
b[c] += v[j]*x[r];
j++;
}
}
spmat_cd A;
A.resize(N, N);
A.setZero();
A.setFromTriplets(val_triplt.begin(), val_triplt.end());
std::ofstream Aout, Bout;
Aout.open("case_1M_cA", std::ios::binary);
Bout.open("case_1M_cB", std::ios::binary);
lcg_complex tmp;
nz = A.nonZeros();
Aout.write((char*)&N, sizeof(int));
Aout.write((char*)&nz, sizeof(int));
for (size_t i = 0; i < N; i++)
{
for (Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor>::InnerIterator it(A, i); it; ++it) // 列循环
{
r = it.row();
c = it.col();
tmp = it.value();
Aout.write((char*)&r, sizeof(int));
Aout.write((char*)&c, sizeof(int));
Aout.write((char*)&tmp, sizeof(lcg_complex));
}
}
for (size_t i = 0; i < N; i++)
{
tmp = b[i];
Aout.write((char*)&tmp, sizeof(lcg_complex));
}
Aout.close();
Bout.write((char*)&N, sizeof(int));
for (size_t i = 0; i < N; i++)
{
tmp = x[i];
Bout.write((char*)&tmp, sizeof(lcg_complex));
}
Bout.close();
delete[] v;
delete[] x;
delete[] b;
return 0;
}

577
doxy/doxygen.sty Normal file
View File

@ -0,0 +1,577 @@
% stylesheet for doxygen 1.8.17
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{doxygen}
% Packages used by this style file
\RequirePackage{alltt}
%%\RequirePackage{array} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package
\RequirePackage{calc}
\RequirePackage{float}
%%\RequirePackage{ifthen} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package
\RequirePackage{verbatim}
\RequirePackage[table]{xcolor}
\RequirePackage{longtable_doxygen}
\RequirePackage{tabu_doxygen}
\RequirePackage{fancyvrb}
\RequirePackage{tabularx}
\RequirePackage{multirow}
\RequirePackage{hanging}
\RequirePackage{ifpdf}
\RequirePackage{adjustbox}
\RequirePackage{amssymb}
\RequirePackage{stackengine}
\RequirePackage[normalem]{ulem} % for strikeout, but don't modify emphasis
%---------- Internal commands used in this style file ----------------
\newcommand{\ensurespace}[1]{%
\begingroup%
\setlength{\dimen@}{#1}%
\vskip\z@\@plus\dimen@%
\penalty -100\vskip\z@\@plus -\dimen@%
\vskip\dimen@%
\penalty 9999%
\vskip -\dimen@%
\vskip\z@skip% hide the previous |\vskip| from |\addvspace|
\endgroup%
}
\newcommand{\DoxyHorRuler}[1]{%
\setlength{\parskip}{0ex plus 0ex minus 0ex}%
\ifthenelse{#1=0}%
{%
\hrule%
}%
{%
\hrulefilll%
}%
}
\newcommand{\DoxyLabelFont}{}
\newcommand{\entrylabel}[1]{%
{%
\parbox[b]{\labelwidth-4pt}{%
\makebox[0pt][l]{\DoxyLabelFont#1}%
\vspace{1.5\baselineskip}%
}%
}%
}
\newenvironment{DoxyDesc}[1]{%
\ensurespace{4\baselineskip}%
\begin{list}{}{%
\settowidth{\labelwidth}{20pt}%
%\setlength{\parsep}{0pt}%
\setlength{\itemsep}{0pt}%
\setlength{\leftmargin}{\labelwidth+\labelsep}%
\renewcommand{\makelabel}{\entrylabel}%
}%
\item[#1]%
}{%
\end{list}%
}
\newsavebox{\xrefbox}
\newlength{\xreflength}
\newcommand{\xreflabel}[1]{%
\sbox{\xrefbox}{#1}%
\setlength{\xreflength}{\wd\xrefbox}%
\ifthenelse{\xreflength>\labelwidth}{%
\begin{minipage}{\textwidth}%
\setlength{\parindent}{0pt}%
\hangindent=15pt\bfseries #1\vspace{1.2\itemsep}%
\end{minipage}%
}{%
\parbox[b]{\labelwidth}{\makebox[0pt][l]{\textbf{#1}}}%
}%
}
%---------- Commands used by doxygen LaTeX output generator ----------
% Used by <pre> ... </pre>
\newenvironment{DoxyPre}{%
\small%
\begin{alltt}%
}{%
\end{alltt}%
\normalsize%
}
% Necessary for redefining not defined characters, i.e. "Replacement Character" in tex output.
\newlength{\CodeWidthChar}
\newlength{\CodeHeightChar}
\settowidth{\CodeWidthChar}{?}
\settoheight{\CodeHeightChar}{?}
% Necessary for hanging indent
\newlength{\DoxyCodeWidth}
\newcommand\DoxyCodeLine[1]{\hangpara{\DoxyCodeWidth}{1}{#1}\par}
\newcommand\NiceSpace{%
\discretionary{}{\kern\fontdimen2\font}{\kern\fontdimen2\font}%
}
% Used by @code ... @endcode
\newenvironment{DoxyCode}[1]{%
\par%
\scriptsize%
\normalfont\ttfamily%
\rightskip0pt plus 1fil%
\settowidth{\DoxyCodeWidth}{000000}%
\settowidth{\CodeWidthChar}{?}%
\settoheight{\CodeHeightChar}{?}%
\setlength{\parskip}{0ex plus 0ex minus 0ex}%
\ifthenelse{\equal{#1}{0}}
{
{\lccode`~32 \lowercase{\global\let~}\NiceSpace}\obeyspaces%
}
{
{\lccode`~32 \lowercase{\global\let~}}\obeyspaces%
}
}{%
\normalfont%
\normalsize%
\settowidth{\CodeWidthChar}{?}%
\settoheight{\CodeHeightChar}{?}%
}
% Redefining not defined characters, i.e. "Replacement Character" in tex output.
\def\ucr{\adjustbox{width=\CodeWidthChar,height=\CodeHeightChar}{\stackinset{c}{}{c}{-.2pt}{%
\textcolor{white}{\sffamily\bfseries\small ?}}{%
\rotatebox{45}{$\blacksquare$}}}}
% Used by @example, @include, @includelineno and @dontinclude
\newenvironment{DoxyCodeInclude}[1]{%
\DoxyCode{#1}%
}{%
\endDoxyCode%
}
% Used by @verbatim ... @endverbatim
\newenvironment{DoxyVerb}{%
\footnotesize%
\verbatim%
}{%
\endverbatim%
\normalsize%
}
% Used by @verbinclude
\newenvironment{DoxyVerbInclude}{%
\DoxyVerb%
}{%
\endDoxyVerb%
}
% Used by numbered lists (using '-#' or <ol> ... </ol>)
\newenvironment{DoxyEnumerate}{%
\enumerate%
}{%
\endenumerate%
}
% Used by bullet lists (using '-', @li, @arg, or <ul> ... </ul>)
\newenvironment{DoxyItemize}{%
\itemize%
}{%
\enditemize%
}
% Used by description lists (using <dl> ... </dl>)
\newenvironment{DoxyDescription}{%
\description%
}{%
\enddescription%
}
% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc
% (only if caption is specified)
\newenvironment{DoxyImage}{%
\begin{figure}[H]%
\begin{center}%
}{%
\end{center}%
\end{figure}%
}
% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc
% (only if no caption is specified)
\newenvironment{DoxyImageNoCaption}{%
\begin{center}%
}{%
\end{center}%
}
% Used by @image
% (only if inline is specified)
\newenvironment{DoxyInlineImage}{%
}{%
}
% Used by @attention
\newenvironment{DoxyAttention}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @author and @authors
\newenvironment{DoxyAuthor}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @date
\newenvironment{DoxyDate}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @invariant
\newenvironment{DoxyInvariant}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @note
\newenvironment{DoxyNote}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @post
\newenvironment{DoxyPostcond}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @pre
\newenvironment{DoxyPrecond}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @copyright
\newenvironment{DoxyCopyright}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @remark
\newenvironment{DoxyRemark}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @return and @returns
\newenvironment{DoxyReturn}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @since
\newenvironment{DoxySince}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @see
\newenvironment{DoxySeeAlso}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @version
\newenvironment{DoxyVersion}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @warning
\newenvironment{DoxyWarning}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by @internal
\newenvironment{DoxyInternal}[1]{%
\paragraph*{#1}%
}{%
}
% Used by @par and @paragraph
\newenvironment{DoxyParagraph}[1]{%
\begin{DoxyDesc}{#1}%
}{%
\end{DoxyDesc}%
}
% Used by parameter lists
\newenvironment{DoxyParams}[2][]{%
\tabulinesep=1mm%
\par%
\ifthenelse{\equal{#1}{}}%
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|}}% name + description
{\ifthenelse{\equal{#1}{1}}%
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + name + desc
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + type + name + desc
}
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]%
\hline%
\endfirsthead%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]%
\hline%
\endhead%
}{%
\end{longtabu*}%
\vspace{6pt}%
}
% Used for fields of simple structs
\newenvironment{DoxyFields}[1]{%
\tabulinesep=1mm%
\par%
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|X[-1,l]|}%
\multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endfirsthead%
\multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endhead%
}{%
\end{longtabu*}%
\vspace{6pt}%
}
% Used for fields simple class style enums
\newenvironment{DoxyEnumFields}[1]{%
\tabulinesep=1mm%
\par%
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endfirsthead%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endhead%
}{%
\end{longtabu*}%
\vspace{6pt}%
}
% Used for parameters within a detailed function description
\newenvironment{DoxyParamCaption}{%
\renewcommand{\item}[2][]{\\ \hspace*{2.0cm} ##1 {\em ##2}}%
}{%
}
% Used by return value lists
\newenvironment{DoxyRetVals}[1]{%
\tabulinesep=1mm%
\par%
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endfirsthead%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endhead%
}{%
\end{longtabu*}%
\vspace{6pt}%
}
% Used by exception lists
\newenvironment{DoxyExceptions}[1]{%
\tabulinesep=1mm%
\par%
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endfirsthead%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endhead%
}{%
\end{longtabu*}%
\vspace{6pt}%
}
% Used by template parameter lists
\newenvironment{DoxyTemplParams}[1]{%
\tabulinesep=1mm%
\par%
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endfirsthead%
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
\hline%
\endhead%
}{%
\end{longtabu*}%
\vspace{6pt}%
}
% Used for member lists
\newenvironment{DoxyCompactItemize}{%
\begin{itemize}%
\setlength{\itemsep}{-3pt}%
\setlength{\parsep}{0pt}%
\setlength{\topsep}{0pt}%
\setlength{\partopsep}{0pt}%
}{%
\end{itemize}%
}
% Used for member descriptions
\newenvironment{DoxyCompactList}{%
\begin{list}{}{%
\setlength{\leftmargin}{0.5cm}%
\setlength{\itemsep}{0pt}%
\setlength{\parsep}{0pt}%
\setlength{\topsep}{0pt}%
\renewcommand{\makelabel}{\hfill}%
}%
}{%
\end{list}%
}
% Used for reference lists (@bug, @deprecated, @todo, etc.)
\newenvironment{DoxyRefList}{%
\begin{list}{}{%
\setlength{\labelwidth}{10pt}%
\setlength{\leftmargin}{\labelwidth}%
\addtolength{\leftmargin}{\labelsep}%
\renewcommand{\makelabel}{\xreflabel}%
}%
}{%
\end{list}%
}
% Used by @bug, @deprecated, @todo, etc.
\newenvironment{DoxyRefDesc}[1]{%
\begin{list}{}{%
\renewcommand\makelabel[1]{\textbf{##1}}%
\settowidth\labelwidth{\makelabel{#1}}%
\setlength\leftmargin{\labelwidth+\labelsep}%
}%
}{%
\end{list}%
}
% Used by parameter lists and simple sections
\newenvironment{Desc}
{\begin{list}{}{%
\settowidth{\labelwidth}{20pt}%
\setlength{\parsep}{0pt}%
\setlength{\itemsep}{0pt}%
\setlength{\leftmargin}{\labelwidth+\labelsep}%
\renewcommand{\makelabel}{\entrylabel}%
}
}{%
\end{list}%
}
% Used by tables
\newcommand{\PBS}[1]{\let\temp=\\#1\let\\=\temp}%
\newenvironment{TabularC}[1]%
{\tabulinesep=1mm
\begin{longtabu*}spread 0pt [c]{*#1{|X[-1]}|}}%
{\end{longtabu*}\par}%
\newenvironment{TabularNC}[1]%
{\begin{tabu}spread 0pt [l]{*#1{|X[-1]}|}}%
{\end{tabu}\par}%
% Used for member group headers
\newenvironment{Indent}{%
\begin{list}{}{%
\setlength{\leftmargin}{0.5cm}%
}%
\item[]\ignorespaces%
}{%
\unskip%
\end{list}%
}
% Used when hyperlinks are turned off
\newcommand{\doxyref}[3]{%
\textbf{#1} (\textnormal{#2}\,\pageref{#3})%
}
% Used to link to a table when hyperlinks are turned on
\newcommand{\doxytablelink}[2]{%
\ref{#1}%
}
% Used to link to a table when hyperlinks are turned off
\newcommand{\doxytableref}[3]{%
\ref{#3}%
}
% Used by @addindex
\newcommand{\lcurly}{\{}
\newcommand{\rcurly}{\}}
% Colors used for syntax highlighting
\definecolor{comment}{rgb}{0.5,0.0,0.0}
\definecolor{keyword}{rgb}{0.0,0.5,0.0}
\definecolor{keywordtype}{rgb}{0.38,0.25,0.125}
\definecolor{keywordflow}{rgb}{0.88,0.5,0.0}
\definecolor{preprocessor}{rgb}{0.5,0.38,0.125}
\definecolor{stringliteral}{rgb}{0.0,0.125,0.25}
\definecolor{charliteral}{rgb}{0.0,0.5,0.5}
\definecolor{vhdldigit}{rgb}{1.0,0.0,1.0}
\definecolor{vhdlkeyword}{rgb}{0.43,0.0,0.43}
\definecolor{vhdllogic}{rgb}{1.0,0.0,0.0}
\definecolor{vhdlchar}{rgb}{0.0,0.0,0.0}
% Color used for table heading
\newcommand{\tableheadbgcolor}{lightgray}%
% Version of hypertarget with correct landing location
\newcommand{\Hypertarget}[1]{\Hy@raisedlink{\hypertarget{#1}{}}}
% possibility to have sections etc. be within the margins
% unfortunately had to copy part of book.cls and add \raggedright
\makeatletter
\newcommand\doxysection{\@startsection {section}{1}{\z@}%
{-3.5ex \@plus -1ex \@minus -.2ex}%
{2.3ex \@plus.2ex}%
{\raggedright\normalfont\Large\bfseries}}
\newcommand\doxysubsection{\@startsection{subsection}{2}{\z@}%
{-3.25ex\@plus -1ex \@minus -.2ex}%
{1.5ex \@plus .2ex}%
{\raggedright\normalfont\large\bfseries}}
\newcommand\doxysubsubsection{\@startsection{subsubsection}{3}{\z@}%
{-3.25ex\@plus -1ex \@minus -.2ex}%
{1.5ex \@plus .2ex}%
{\raggedright\normalfont\normalsize\bfseries}}
\newcommand\doxyparagraph{\@startsection{paragraph}{4}{\z@}%
{3.25ex \@plus1ex \@minus.2ex}%
{-1em}%
{\raggedright\normalfont\normalsize\bfseries}}
\newcommand\doxysubparagraph{\@startsection{subparagraph}{5}{\parindent}%
{3.25ex \@plus1ex \@minus .2ex}%
{-1em}%
{\raggedright\normalfont\normalsize\bfseries}}
\makeatother
% Define caption that is also suitable in a table
\makeatletter
\def\doxyfigcaption{%
\refstepcounter{figure}%
\@dblarg{\@caption{figure}}}
\makeatother

12
doxy/footer.tex Normal file
View File

@ -0,0 +1,12 @@
% Latex footer for doxygen 1.8.17
%--- End generated contents ---
% Index
\backmatter
\newpage
\phantomsection
\clearemptydoublepage
\addcontentsline{toc}{chapter}{\indexname}
\printindex
\end{document}

174
doxy/header.tex Normal file
View File

@ -0,0 +1,174 @@
% Latex header for doxygen 1.8.17
\let\mypdfximage\pdfximage\def\pdfximage{\immediate\mypdfximage}\documentclass[twoside]{book}
%% moved from doxygen.sty due to workaround for LaTex 2019 version and unmaintained tabu package
\usepackage{ifthen}
\ifx\requestedLaTeXdate\undefined
\usepackage{array}
\else
\usepackage{array}[=2016-10-06]
\fi
%%
% Packages required by doxygen
\usepackage{fixltx2e}
\usepackage{calc}
\usepackage{doxygen}
\usepackage{graphicx}
\usepackage[utf8]{inputenc}
\usepackage{makeidx}
\usepackage{multicol}
\usepackage{multirow}
\PassOptionsToPackage{warn}{textcomp}
\usepackage{textcomp}
\usepackage[nointegrals]{wasysym}
\usepackage[table]{xcolor}
\usepackage{ifpdf,ifxetex}
% Font selection
\usepackage[T1]{fontenc}
\usepackage[scaled=.90]{helvet}
\usepackage{courier}
\usepackage{amssymb}
\usepackage{sectsty}
\renewcommand{\familydefault}{\sfdefault}
\allsectionsfont{%
\fontseries{bc}\selectfont%
\color{darkgray}%
}
\renewcommand{\DoxyLabelFont}{%
\fontseries{bc}\selectfont%
\color{darkgray}%
}
\newcommand{\+}{\discretionary{\mbox{\scriptsize$\hookleftarrow$}}{}{}}
% Arguments of doxygenemoji:
% 1) ':<text>:' form of the emoji, already "LaTeX"-escaped
% 2) file with the name of the emoji without the .png extension
% in case image exist use this otherwise use the ':<text>:' form
\newcommand{\doxygenemoji}[2]{%
\IfFileExists{./#2.png}{\raisebox{-0.1em}{\includegraphics[height=0.9em]{./#2.png}}}{#1}%
}
% Page & text layout
\usepackage{geometry}
\geometry{%
a4paper,%
top=2.5cm,%
bottom=2.5cm,%
left=2.5cm,%
right=2.5cm%
}
\tolerance=750
\hfuzz=15pt
\hbadness=750
\setlength{\emergencystretch}{15pt}
\setlength{\parindent}{0cm}
\newcommand{\doxynormalparskip}{\setlength{\parskip}{3ex plus 2ex minus 2ex}}
\newcommand{\doxytocparskip}{\setlength{\parskip}{1ex plus 0ex minus 0ex}}
\doxynormalparskip
\makeatletter
\renewcommand{\paragraph}{%
\@startsection{paragraph}{4}{0ex}{-1.0ex}{1.0ex}{%
\normalfont\normalsize\bfseries\SS@parafont%
}%
}
\renewcommand{\subparagraph}{%
\@startsection{subparagraph}{5}{0ex}{-1.0ex}{1.0ex}{%
\normalfont\normalsize\bfseries\SS@subparafont%
}%
}
\makeatother
\makeatletter
\newcommand\hrulefilll{\leavevmode\leaders\hrule\hskip 0pt plus 1filll\kern\z@}
\makeatother
% Headers & footers
\usepackage{fancyhdr}
\pagestyle{fancyplain}
\fancyhead[LE]{\fancyplain{}{\bfseries\thepage}}
\fancyhead[CE]{\fancyplain{}{}}
\fancyhead[RE]{\fancyplain{}{\bfseries\leftmark}}
\fancyhead[LO]{\fancyplain{}{\bfseries\rightmark}}
\fancyhead[CO]{\fancyplain{}{}}
\fancyhead[RO]{\fancyplain{}{\bfseries\thepage}}
\fancyfoot[LE]{\fancyplain{}{}}
\fancyfoot[CE]{\fancyplain{}{}}
\fancyfoot[RE]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }}
\fancyfoot[LO]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }}
\fancyfoot[CO]{\fancyplain{}{}}
\fancyfoot[RO]{\fancyplain{}{}}
\renewcommand{\footrulewidth}{0.4pt}
\renewcommand{\chaptermark}[1]{%
\markboth{#1}{}%
}
\renewcommand{\sectionmark}[1]{%
\markright{\thesection\ #1}%
}
% Indices & bibliography
\usepackage{natbib}
\usepackage[titles]{tocloft}
\setcounter{tocdepth}{3}
\setcounter{secnumdepth}{5}
\makeindex
\usepackage{newunicodechar}
\newunicodechar{}{${}^{-}$}% Superscript minus
\newunicodechar{²}{${}^{2}$}% Superscript two
\newunicodechar{³}{${}^{3}$}% Superscript three
% Hyperlinks (required, but should be loaded last)
\ifpdf
\usepackage[pdftex,pagebackref=true]{hyperref}
\else
\ifxetex
\usepackage[pagebackref=true]{hyperref}
\else
\usepackage[ps2pdf,pagebackref=true]{hyperref}
\fi
\fi
\hypersetup{%
colorlinks=true,%
linkcolor=blue,%
citecolor=blue,%
unicode%
}
% Custom commands
\newcommand{\clearemptydoublepage}{%
\newpage{\pagestyle{empty}\cleardoublepage}%
}
\usepackage{caption}
\captionsetup{labelsep=space,justification=centering,font={bf},singlelinecheck=off,skip=4pt,position=top}
\usepackage{etoc}
\etocsettocstyle{\doxytocparskip}{\doxynormalparskip}
\renewcommand{\numberline}[1]{#1~}
%===== C O N T E N T S =====
\begin{document}
% Titlepage & ToC
\hypersetup{pageanchor=false,
bookmarksnumbered=true,
pdfencoding=unicode
}
\pagenumbering{alph}
\begin{titlepage}
\vspace*{7cm}
\begin{center}%
{\Large C++ Library of the Linear Conjugate Gradient Methods (LibLCG)}\\
\vspace*{1cm}
{\large Yi Zhang}\\
\end{center}
\end{titlepage}
\clearemptydoublepage
\pagenumbering{roman}
\tableofcontents
\clearemptydoublepage
\pagenumbering{arabic}
\hypersetup{pageanchor=true}
%--- Begin generated contents ---

BIN
refman.pdf Normal file

Binary file not shown.

181
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,181 @@
#
aux_source_directory(lib LCGLIB_SRC)
if(NOT LibLCG_EIGEN)
list(REMOVE_ITEM LCGLIB_SRC "lib/algebra_eigen.cpp")
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_eigen.cpp")
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_eigen.cpp")
list(REMOVE_ITEM LCGLIB_SRC "lib/solver_eigen.cpp")
list(REMOVE_ITEM LCGLIB_SRC "lib/preconditioner_eigen.cpp")
endif()
if(NOT LibLCG_CUDA)
list(REMOVE_ITEM LCGLIB_SRC "lib/algebra_cuda.cu")
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_complex_cuda.cu")
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_cuda.cu")
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_cuda.cu")
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_cuda_f.cu")
list(REMOVE_ITEM LCGLIB_SRC "lib/solver_cuda.cu")
list(REMOVE_ITEM LCGLIB_SRC "lib/preconditioner_cuda.cu")
endif()
#
#
# libcmake
add_library(lcg SHARED ${LCGLIB_SRC})
#
add_library(lcg_static STATIC ${LCGLIB_SRC})
#
set_target_properties(lcg_static PROPERTIES OUTPUT_NAME "lcg")
#
set_target_properties(lcg PROPERTIES CLEAN_DIRECT_OUTPUT 1)
set_target_properties(lcg_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
if(LibLCG_CUDA)
set_target_properties(lcg PROPERTIES CUDA_ARCHITECTURES 70)
set_target_properties(lcg_static PROPERTIES CUDA_ARCHITECTURES 70)
endif()
#
set_target_properties(lcg PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
#
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
#
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
if(LibLCG_EIGEN)
find_package(Eigen3 REQUIRED)
if(EIGEN3_FOUND)
message(STATUS "Eigen3 Found.")
include_directories(${EIGEN3_INCLUDE_DIR})
endif()
endif()
if(LibLCG_CUDA)
enable_language(CUDA)
find_package(CUDA REQUIRED)
if(CUDA_FOUND)
message(STATUS "CUDA Found.")
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
find_library(CUBLAS_LIBRARY cublas ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUSPARSE_LIBRARY cusparse ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUSOLVER_LIBRARY cusolver ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
target_link_libraries(lcg PUBLIC ${CUBLAS_LIBRARY})
target_link_libraries(lcg_static ${CUBLAS_LIBRARY})
target_link_libraries(lcg PUBLIC ${CUSPARSE_LIBRARY})
target_link_libraries(lcg_static ${CUSPARSE_LIBRARY})
target_link_libraries(lcg PUBLIC ${CUSOLVER_LIBRARY})
target_link_libraries(lcg_static ${CUSOLVER_LIBRARY})
endif()
endif()
if(LibLCG_OPENMP)
# openmp
find_package(OpenMP REQUIRED)
if (OpenMP_CXX_FOUND)
message(STATUS "OpenMP Found.")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
target_link_libraries(lcg PUBLIC OpenMP::OpenMP_CXX)
target_link_libraries(lcg_static OpenMP::OpenMP_CXX)
endif()
endif()
set(CONFIG_FILE_PATH lib/cmake/${PROJECT_NAME})
configure_package_config_file(${PROJECT_SOURCE_DIR}/${PROJECT_NAME}Config.cmake.in
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
INSTALL_DESTINATION ${CONFIG_FILE_PATH}
NO_CHECK_REQUIRED_COMPONENTS_MACRO)
write_basic_package_version_file(${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
VERSION ${PROJECT_VERSION}
COMPATIBILITY SameMajorVersion)
#
if(WIN32)
install(TARGETS lcg DESTINATION lib)
install(TARGETS lcg_static DESTINATION lib)
else()
install(TARGETS lcg lcg_static
EXPORT ${PROJECT_NAME}Targets
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
install(EXPORT ${PROJECT_NAME}Targets
DESTINATION ${CONFIG_FILE_PATH})
install(FILES
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
DESTINATION ${CONFIG_FILE_PATH})
endif()
#
install(FILES lib/config.h DESTINATION include/lcg)
install(FILES lib/algebra.h DESTINATION include/lcg)
install(FILES lib/lcg_complex.h DESTINATION include/lcg)
install(FILES lib/util.h DESTINATION include/lcg)
install(FILES lib/lcg.h DESTINATION include/lcg)
install(FILES lib/clcg.h DESTINATION include/lcg)
install(FILES lib/solver.h DESTINATION include/lcg)
install(FILES lib/preconditioner.h DESTINATION include/lcg)
if(LibLCG_CUDA)
install(FILES lib/algebra_cuda.h DESTINATION include/lcg)
install(FILES lib/lcg_complex_cuda.h DESTINATION include/lcg)
install(FILES lib/lcg_cuda.h DESTINATION include/lcg)
install(FILES lib/clcg_cuda.h DESTINATION include/lcg)
install(FILES lib/clcg_cudaf.h DESTINATION include/lcg)
install(FILES lib/solver_cuda.h DESTINATION include/lcg)
install(FILES lib/preconditioner_cuda.h DESTINATION include/lcg)
endif()
if(LibLCG_EIGEN)
install(FILES lib/algebra_eigen.h DESTINATION include/lcg)
install(FILES lib/lcg_eigen.h DESTINATION include/lcg)
install(FILES lib/clcg_eigen.h DESTINATION include/lcg)
install(FILES lib/solver_eigen.h DESTINATION include/lcg)
install(FILES lib/preconditioner_eigen.h DESTINATION include/lcg)
endif()
#
#
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
#
macro(add_sample name file)
#
add_executable(${name} sample/${file})
# Windows
set_target_properties(${name} PROPERTIES INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib)
#
target_link_libraries(${name} PUBLIC lcg)
# CUDA
if(LibLCG_CUDA)
set_target_properties(${name} PROPERTIES CUDA_ARCHITECTURES 70)
endif()
endmacro()
add_sample(lcg_sample1 sample1.cpp)
add_sample(lcg_sample2 sample2.cpp)
add_sample(lcg_sample3 sample3.cpp)
add_sample(lcg_sample4 sample4.cpp)
if(LibLCG_EIGEN)
add_sample(lcg_sample5 sample5.cpp)
add_sample(lcg_sample7 sample7.cpp)
if(LibLCG_STD_COMPLEX)
add_sample(lcg_sample6 sample6.cpp)
endif()
endif()
if(LibLCG_CUDA)
# The followings are not working for now due to CUDA 12+ compatibility issues. Check more later
#add_sample(lcg_sample8 sample8.cu)
#add_sample(lcg_sample9 sample9.cu)
#add_sample(lcg_sample10 sample10.cu)
#add_sample(lcg_sample11 sample11.cu)
#add_sample(lcg_sample12 sample12.cu)
#add_sample(lcg_sample13 sample13.cu)
#add_sample(lcg_sample14 sample14.cu)
add_sample(lcg_sample15 sample15.cu)
endif()

222
src/lib/algebra.cpp Normal file
View File

@ -0,0 +1,222 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "ctime"
#include "random"
#include "algebra.h"
#ifdef LibLCG_OPENMP
#include "omp.h"
#endif
lcg_float lcg_abs(lcg_float a)
{
if (a >= 0.0) return a;
return -1.0*a;
}
lcg_float lcg_max(lcg_float a, lcg_float b)
{
if (a >= b) return a;
return b;
}
lcg_float lcg_min(lcg_float a, lcg_float b)
{
if (a <= b) return a;
return b;
}
lcg_float lcg_set2box(lcg_float low, lcg_float hig, lcg_float a,
bool low_bound, bool hig_bound)
{
if (hig_bound && a >= hig) return hig;
if (!hig_bound && a >= hig) return (hig - 1e-16);
if (low_bound && a <= low) return low;
if (!low_bound && a <= low) return (low + 1e-16);
return a;
}
lcg_float* lcg_malloc(int n)
{
lcg_float* x = new lcg_float [n];
return x;
}
lcg_float** lcg_malloc(int m, int n)
{
lcg_float **x = new lcg_float* [m];
for (int i = 0; i < m; i++)
{
x[i] = new lcg_float [n];
}
return x;
}
void lcg_free(lcg_float* x)
{
if (x != nullptr)
{
delete[] x;
x = nullptr;
}
return;
}
void lcg_free(lcg_float **x, int m)
{
if (x != nullptr)
{
for (int i = 0; i < m; i++)
{
delete[] x[i];
}
delete[] x;
x = nullptr;
}
return;
}
void lcg_vecset(lcg_float *a, lcg_float b, int size)
{
for (int i = 0; i < size; i++)
{
a[i] = b;
}
return;
}
void lcg_vecset(lcg_float **a, lcg_float b, int m, int n)
{
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
a[i][j] = b;
}
}
return;
}
void lcg_vecrnd(lcg_float *a, lcg_float l, lcg_float h, int size)
{
srand(time(nullptr));
for (int i = 0; i < size; i++)
{
a[i] = (h-l)*rand()*1.0/RAND_MAX + l;
}
return;
}
void lcg_vecrnd(lcg_float **a, lcg_float l, lcg_float h, int m, int n)
{
srand(time(nullptr));
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
a[i][j] = (h-l)*rand()*1.0/RAND_MAX + l;
}
}
return;
}
double lcg_squaredl2norm(lcg_float *a, int n)
{
lcg_float sum = 0;
for (size_t i = 0; i < n; i++)
{
sum += a[i]*a[i];
}
return sum;
}
void lcg_dot(lcg_float &ret, const lcg_float *a,
const lcg_float *b, int size)
{
ret = 0.0;
for (int i = 0; i < size; i++)
{
ret += a[i]*b[i];
}
return;
}
void lcg_matvec(lcg_float **A, const lcg_float *x, lcg_float *Ax,
int m_size, int n_size, lcg_matrix_e layout)
{
int i, j;
if (layout == MatNormal)
{
#pragma omp parallel for private (i, j) schedule(guided)
for (i = 0; i < m_size; i++)
{
Ax[i] = 0.0;
for (j = 0; j < n_size; j++)
{
Ax[i] += A[i][j]*x[j];
}
}
return;
}
#pragma omp parallel for private (i, j) schedule(guided)
for (j = 0; j < n_size; j++)
{
Ax[j] = 0.0;
for (i = 0; i < m_size; i++)
{
Ax[j] += A[i][j]*x[i];
}
}
return;
}
void lcg_matvec_coo(const int *row, const int *col, const lcg_float *Mat, const lcg_float *V, lcg_float *p, int M, int N, int nz_size, bool pre_position)
{
if (!pre_position)
{
for (size_t i = 0; i < M; i++)
{
p[i] = 0.0;
}
for (size_t i = 0; i < nz_size; i++)
{
p[row[i]] += Mat[i]*V[col[i]];
}
}
else
{
for (size_t i = 0; i < N; i++)
{
p[i] = 0.0;
}
for (size_t i = 0; i < nz_size; i++)
{
p[col[i]] += Mat[i]*V[row[i]];
}
}
return;
}

219
src/lib/algebra.h Normal file
View File

@ -0,0 +1,219 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _ALGEBRA_H
#define _ALGEBRA_H
#include "config.h"
/**
* @brief Matrix layouts.
*/
enum lcg_matrix_e
{
MatNormal,
MatTranspose,
};
/**
* @brief Conjugate types for a complex number.
*/
enum clcg_complex_e
{
NonConjugate,
Conjugate,
};
/**
* @brief A simple definition of the float type we use here.
* Easy to change in the future. Right now it is just an alias of double
*/
typedef double lcg_float;
/**
* @brief Return absolute value
*
* @param[in] a input value
*
* @return The absolute value
*/
lcg_float lcg_abs(lcg_float a);
/**
* @brief Return the bigger value
*
* @param[in] a input value
* @param[in] b input value
*
* @return The bigger value
*/
lcg_float lcg_max(lcg_float a, lcg_float b);
/**
* @brief Return the smaller value
*
* @param[in] a input value
* @param[in] b input value
*
* @return The smaller value
*/
lcg_float lcg_min(lcg_float a, lcg_float b);
/**
* @brief Set the input value within a box constraint
*
* @param a low boundary
* @param b high boundary
* @param in input value
* @param low_bound Whether to include the low boundary value
* @param hig_bound Whether to include the high boundary value
*
* @return box constrained value
*/
lcg_float lcg_set2box(lcg_float low, lcg_float hig, lcg_float a,
bool low_bound = true, bool hig_bound = true);
/**
* @brief Locate memory for a lcg_float pointer type.
*
* @param[in] n Size of the lcg_float array.
*
* @return Pointer of the array's location.
*/
lcg_float* lcg_malloc(int n);
/**
* @brief Locate memory for a lcg_float second pointer type.
*
* @param[in] n Size of the lcg_float array.
*
* @return Pointer of the array's location.
*/
lcg_float** lcg_malloc(int m, int n);
/**
* @brief Destroy memory used by the lcg_float type array.
*
* @param x Pointer of the array.
*/
void lcg_free(lcg_float* x);
/**
* @brief Destroy memory used by the 2D lcg_float type array.
*
* @param x Pointer of the array.
*/
void lcg_free(lcg_float **x, int m);
/**
* @brief set a vector's value
*
* @param a pointer of the vector
* @param[in] b initial value
* @param[in] size vector size
*/
void lcg_vecset(lcg_float *a, lcg_float b, int size);
/**
* @brief set a 2d vector's value
*
* @param a pointer of the matrix
* @param[in] b initial value
* @param[in] m row size of the matrix
* @param[in] n column size of the matrix
*/
void lcg_vecset(lcg_float **a, lcg_float b, int m, int n);
/**
* @brief set a vector using random values
*
* @param a pointer of the vector
* @param[in] l the lower bound of random values
* @param[in] h the higher bound of random values
* @param[in] size size of the vector
*/
void lcg_vecrnd(lcg_float *a, lcg_float l, lcg_float h, int size);
/**
* @brief set a 2D vector using random values
*
* @param a pointer of the vector
* @param[in] l the lower bound of random values
* @param[in] h the higher bound of random values
* @param[in] m row size of the vector
* @param[in] n column size of the vector
*/
void lcg_vecrnd(lcg_float **a, lcg_float l, lcg_float h, int m, int n);
/**
* @brief calculate the squared L2 norm of the input vector
*
* @param a pointer of the vector
* @param n size of the vector
* @return double L2 norm
*/
double lcg_squaredl2norm(lcg_float *a, int n);
/**
* @brief calculate dot product of two real vectors
*
* @param[in] a pointer of the vector a
* @param[in] b pointer of the vector b
* @param[in] size size of the vector
*
* @return dot product
*/
void lcg_dot(lcg_float &ret, const lcg_float *a, const lcg_float *b, int size);
/**
* @brief calculate product of a real matrix and a vector
*
* Different configurations:
* layout=Normal -> A
* layout=Transpose -> A^T
*
* @param A matrix A
* @param[in] x vector x
* @param Ax product of Ax
* @param[in] m_size row size of A
* @param[in] n_size column size of A
* @param[in] layout layout of A used for multiplication. Must be Normal or Transpose
*/
void lcg_matvec(lcg_float **A, const lcg_float *x, lcg_float *Ax, int m_size, int n_size,
lcg_matrix_e layout = MatNormal);
/**
* @brief Calculate the product of a sparse matrix multipled by a vector. The matrix is stored in the COO format.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param Mat Non-zero values of the input sparse matrix.
* @param V Multipler vector
* @param p Output prodcut
* @param M Row number of the sparse matrix
* @param N Column number of the sparse matrix
* @param nz_size Non-zero size of the matrix
* @param pre_position If ture, the multipler is seen as a row vector. Otherwise, it is treated as a column vector.
*/
void lcg_matvec_coo(const int *row, const int *col, const lcg_float *Mat, const lcg_float *V, lcg_float *p, int M, int N, int nz_size, bool pre_position = false);
#endif //_ALGEBRA_H

110
src/lib/algebra_cuda.cu Normal file
View File

@ -0,0 +1,110 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "algebra_cuda.h"
__global__ void lcg_set2box_cuda_device(const lcg_float *low, const lcg_float *hig, lcg_float *a,
int n, bool low_bound, bool hig_bound)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
if (hig_bound && a[i] >= hig[i]) a[i] = hig[i];
if (!hig_bound && a[i] > hig[i]) a[i] = hig[i];
if (low_bound && a[i] <= low[i]) a[i] = low[i];
if (!low_bound && a[i] < low[i]) a[i] = low[i];
}
return;
}
__global__ void lcg_smDcsr_get_diagonal_device(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < A_len)
{
const int num_non0_row = A_ptr[i + 1] - A_ptr[i];
for (int j = 0; j < num_non0_row; j++)
{
if (A_col[j + A_ptr[i]] == i)
{
A_diag[i] = A_val[j + A_ptr[i]];
break;
}
}
}
return;
}
__global__ void lcg_vecMvecD_element_wise_device(const lcg_float *a, const lcg_float *b, lcg_float *c, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
c[i] = a[i] * b[i];
}
return;
}
__global__ void lcg_vecDvecD_element_wise_device(const lcg_float *a, const lcg_float *b, lcg_float *c, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
c[i] = a[i] / b[i];
}
return;
}
void lcg_set2box_cuda(const lcg_float *low, const lcg_float *hig, lcg_float *a,
int n, bool low_bound, bool hig_bound)
{
int blockSize = 1024;
int numBlocks = (n+ blockSize - 1) / blockSize;
lcg_set2box_cuda_device<<<numBlocks, blockSize>>>(low, hig, a, n, low_bound, hig_bound);
return;
}
void lcg_smDcsr_get_diagonal(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (A_len+ blockSize - 1) / blockSize;
lcg_smDcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
return;
}
void lcg_vecMvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
lcg_vecMvecD_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
return;
}
void lcg_vecDvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
lcg_vecDvecD_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
return;
}

88
src/lib/algebra_cuda.h Normal file
View File

@ -0,0 +1,88 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _ALGEBRA_CUDA_H
#define _ALGEBRA_CUDA_H
#include "algebra.h"
#ifdef LibLCG_CUDA
#include <cuda_runtime.h>
/**
* @brief Set the input value within a box constraint
*
* @param a low boundary
* @param b high boundary
* @param in input value
* @param low_bound Whether to include the low boundary value
* @param hig_bound Whether to include the high boundary value
*
* @return box constrained value
*/
void lcg_set2box_cuda(const lcg_float *low, const lcg_float *hig, lcg_float *a,
int n, bool low_bound = true, bool hig_bound = true);
/**
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] A_ptr Row index pointer
* @param[in] A_col Column index
* @param[in] A_val Non-zero values of the matrix
* @param[in] A_len Dimension of the matrix
* @param A_diag Output digonal elements
* @param[in] bk_size Default CUDA block size.
*/
void lcg_smDcsr_get_diagonal(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag, int bk_size = 1024);
/**
* @brief Element-wise muplication between two CUDA arries.
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] a Pointer of the input array
* @param[in] b Pointer of the input array
* @param c Pointer of the output array
* @param[in] n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void lcg_vecMvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size = 1024);
/**
* @brief Element-wise division between two CUDA arries.
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] a Pointer of the input array
* @param[in] b Pointer of the input array
* @param c Pointer of the output array
* @param[in] n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void lcg_vecDvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size = 1024);
#endif // LibLCG_CUDA
#endif //_ALGEBRA_CUDA_H

32
src/lib/algebra_eigen.cpp Normal file
View File

@ -0,0 +1,32 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "algebra_eigen.h"
void lcg_set2box_eigen(const Eigen::VectorXd &low, const Eigen::VectorXd &hig, Eigen::VectorXd m)
{
for (int i = 0; i < m.size(); i++)
{
m[i] = lcg_set2box(low[i], hig[i], m[i]);
}
return;
}

43
src/lib/algebra_eigen.h Normal file
View File

@ -0,0 +1,43 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _ALGEBRA_EIGEN_H
#define _ALGEBRA_EIGEN_H
#include "algebra.h"
#ifdef LibLCG_EIGEN
#include "Eigen/Dense"
/**
* @brief Set the input value within a box constraint
*
* @param low_bound Whether to include the low boundary value
* @param hig_bound Whether to include the high boundary value
* @param m Returned values
*/
void lcg_set2box_eigen(const Eigen::VectorXd &low, const Eigen::VectorXd &hig, Eigen::VectorXd m);
#endif // LibLCG_EIGEN
#endif // _ALGEBRA_EIGEN_H

837
src/lib/clcg.cpp Normal file
View File

@ -0,0 +1,837 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "clcg.h"
#include "cmath"
#include "config.h"
#ifdef LibLCG_OPENMP
#include "omp.h"
#endif
typedef int (*clcg_solver_ptr)(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance);
int clbicg(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance);
int clbicg_symmetric(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance);
int clcgs(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance);
int clbicgstab(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance);
int cltfqmr(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance);
int clcg_solver(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance,
clcg_solver_enum solver_id)
{
clcg_solver_ptr cg_solver;
switch (solver_id)
{
case CLCG_BICG:
cg_solver = clbicg;
break;
case CLCG_BICG_SYM:
cg_solver = clbicg_symmetric;
break;
case CLCG_CGS:
cg_solver = clcgs;
break;
case CLCG_BICGSTAB:
cg_solver = clbicgstab;
break;
case CLCG_TFQMR:
cg_solver = cltfqmr;
break;
default:
cg_solver = clcgs;
break;
}
return cg_solver(Afp, Pfp, m, B, n_size, param, instance);
}
int clbicg(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
int i;
lcg_complex *r1k = nullptr, *r2k = nullptr, *d1k = nullptr, *d2k = nullptr;
lcg_complex *Ax = nullptr;
r1k = clcg_malloc(n_size); r2k = clcg_malloc(n_size);
d1k = clcg_malloc(n_size); d2k = clcg_malloc(n_size);
Ax = clcg_malloc(n_size);
lcg_complex ak, Ad1d2, r1r2_next, betak;
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
d1k[i] = r1k[i] = B[i] - Ax[i];
d2k[i] = r2k[i] = clcg_conjugate(&r1k[i]);
}
lcg_complex r1r2;
clcg_inner(r1r2, r2k, r1k, n_size);
lcg_float r0_square, rk_square;
lcg_complex r0_mod, rk_mod;
clcg_inner(rk_mod, r1k, r1k, n_size);
r0_square = rk_square = clcg_square(&rk_mod);
if (r0_square < 1.0) r0_square = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, sqrt(rk_square)/n_size, &para, n_size, 0);
}
goto func_ends;
}
else if (rk_square/r0_square <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, rk_square/r0_square, &para, n_size, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
else residual = rk_square/r0_square;
if (Pfp != nullptr)
{
if (Pfp(instance, m, residual, &para, n_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, d1k, Ax, n_size, MatNormal, NonConjugate);
clcg_inner(Ad1d2, d2k, Ax, n_size);
ak = r1r2/Ad1d2;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
m[i] = m[i] + ak*d1k[i];
r1k[i] = r1k[i] - ak*Ax[i];
}
clcg_inner(rk_mod, r1k, r1k, n_size);
rk_square = clcg_square(&rk_mod);
Afp(instance, d2k, Ax, n_size, MatTranspose, Conjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
r2k[i] = r2k[i] - clcg_conjugate(&ak)*Ax[i];
}
for (i = 0; i < n_size; i++)
{
if (m[i] != m[i])
{
ret = CLCG_NAN_VALUE; goto func_ends;
}
}
clcg_inner(r1r2_next, r2k, r1k, n_size);
betak = r1r2_next/r1r2;
r1r2 = r1r2_next;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
d1k[i] = r1k[i] + betak*d1k[i];
d2k[i] = r2k[i] + clcg_conjugate(&betak)*d2k[i];
}
}
func_ends:
{
clcg_free(r1k);
clcg_free(r2k);
clcg_free(d1k);
clcg_free(d2k);
clcg_free(Ax);
}
return ret;
}
int clbicg_symmetric(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
int i;
lcg_complex *rk = nullptr, *dk = nullptr;
lcg_complex *Ax = nullptr;
rk = clcg_malloc(n_size); dk = clcg_malloc(n_size);
Ax = clcg_malloc(n_size);
lcg_complex ak, rkrk2, betak, dkAx;
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
dk[i] = rk[i] = B[i] - Ax[i];
}
lcg_complex rkrk;
clcg_dot(rkrk, rk, rk, n_size);
lcg_float r0_square, rk_square;
lcg_complex r0_mod, rk_mod;
clcg_inner(rk_mod, rk, rk, n_size);
r0_square = rk_square = clcg_square(&rk_mod);
if (r0_square < 1.0) r0_square = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, sqrt(rk_square)/n_size, &para, n_size, 0);
}
goto func_ends;
}
else if (rk_square/r0_square <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, rk_square/r0_square, &para, n_size, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
else residual = rk_square/r0_square;
if (Pfp != nullptr)
{
if (Pfp(instance, m, residual, &para, n_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, dk, Ax, n_size, MatNormal, NonConjugate);
clcg_dot(dkAx, dk, Ax, n_size);
ak = rkrk/dkAx;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
m[i] = m[i] + ak*dk[i];
rk[i] = rk[i] - ak*Ax[i];
}
clcg_inner(rk_mod, rk, rk, n_size);
rk_square = clcg_square(&rk_mod);
for (i = 0; i < n_size; i++)
{
if (m[i] != m[i])
{
ret = CLCG_NAN_VALUE; goto func_ends;
}
}
clcg_dot(rkrk2, rk, rk, n_size);
betak = rkrk2/rkrk;
rkrk = rkrk2;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
dk[i] = rk[i] + betak*dk[i];
}
}
func_ends:
{
clcg_free(rk);
clcg_free(dk);
clcg_free(Ax);
}
return ret;
}
int clcgs(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
int i;
lcg_complex *rk = nullptr, *rbar0 = nullptr, *pk = nullptr;
lcg_complex *Ax = nullptr, *uk = nullptr, *qk = nullptr, *wk = nullptr; // w_k = u_{k-1} + q_k
rk = clcg_malloc(n_size); rbar0 = clcg_malloc(n_size);
pk = clcg_malloc(n_size); Ax = clcg_malloc(n_size);
uk = clcg_malloc(n_size); qk = clcg_malloc(n_size);
wk = clcg_malloc(n_size);
lcg_complex ak, rhok2, sigma, betak;
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
pk[i] = uk[i] = rk[i] = B[i] - Ax[i];
}
lcg_complex rhok;
do
{
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
clcg_inner(rhok, rbar0, rk, n_size);
} while (clcg_module(&rhok) < 1e-8);
lcg_float r0_square, rk_square;
lcg_complex r0_mod, rk_mod;
clcg_inner(rk_mod, rk, rk, n_size);
r0_square = rk_square = clcg_square(&rk_mod);
if (r0_square < 1.0) r0_square = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, sqrt(rk_square)/n_size, &para, n_size, 0);
}
goto func_ends;
}
else if (rk_square/r0_square <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, rk_square/r0_square, &para, n_size, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
else residual = rk_square/r0_square;
if (Pfp != nullptr)
{
if (Pfp(instance, m, residual, &para, n_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, pk, Ax, n_size, MatNormal, NonConjugate); // vk = Apk
clcg_inner(sigma, rbar0, Ax, n_size);
ak = rhok/sigma;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
qk[i] = uk[i] - ak*Ax[i];
wk[i] = uk[i] + qk[i];
}
Afp(instance, wk, Ax, n_size, MatNormal, NonConjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
m[i] = m[i] + ak*wk[i];
rk[i] = rk[i] - ak*Ax[i];
}
clcg_inner(rk_mod, rk, rk, n_size);
rk_square = clcg_square(&rk_mod);
for (i = 0; i < n_size; i++)
{
if (m[i] != m[i])
{
ret = CLCG_NAN_VALUE; goto func_ends;
}
}
clcg_inner(rhok2, rbar0, rk, n_size);
betak = rhok2/rhok;
rhok = rhok2;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
uk[i] = rk[i] + betak*qk[i];
pk[i] = uk[i] + betak*(qk[i] + betak*pk[i]);
}
}
func_ends:
{
clcg_free(rk);
clcg_free(rbar0);
clcg_free(pk);
clcg_free(Ax);
clcg_free(uk);
clcg_free(qk);
clcg_free(wk);
}
return ret;
}
int clbicgstab(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance)
{
// set BICGSTAB parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
int i;
lcg_complex *rk = nullptr, *rbar0 = nullptr, *pk = nullptr, *sk = nullptr;
lcg_complex *Ap = nullptr, *As = nullptr;
rk = clcg_malloc(n_size); rbar0 = clcg_malloc(n_size);
pk = clcg_malloc(n_size); sk = clcg_malloc(n_size);
Ap = clcg_malloc(n_size); As = clcg_malloc(n_size);
lcg_complex ak, rhok2, sigma, omega, betak, Ass, AsAs;
Afp(instance, m, Ap, n_size, MatNormal, NonConjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
pk[i] = rk[i] = B[i] - Ap[i];
}
lcg_complex rhok;
do
{
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
clcg_inner(rhok, rbar0, rk, n_size);
} while (clcg_module(&rhok) < 1e-8);
lcg_float r0_square, rk_square;
lcg_complex r0_mod, rk_mod;
clcg_inner(rk_mod, rk, rk, n_size);
r0_square = rk_square = clcg_square(&rk_mod);
if (r0_square < 1.0) r0_square = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, sqrt(rk_square)/n_size, &para, n_size, 0);
}
goto func_ends;
}
else if (rk_square/r0_square <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, rk_square/r0_square, &para, n_size, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
else residual = rk_square/r0_square;
if (Pfp != nullptr)
{
if (Pfp(instance, m, residual, &para, n_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, pk, Ap, n_size, MatNormal, NonConjugate);
clcg_inner(sigma, rbar0, Ap, n_size);
ak = rhok/sigma;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
sk[i] = rk[i] - ak*Ap[i];
}
Afp(instance, sk, As, n_size, MatNormal, NonConjugate);
clcg_inner(Ass, As, sk, n_size);
clcg_inner(AsAs, As, As, n_size);
omega = Ass/AsAs;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
m[i] = m[i] + ak*pk[i] + omega*sk[i];
rk[i] = sk[i] - omega*As[i];
}
clcg_inner(rk_mod, rk, rk, n_size);
rk_square = clcg_square(&rk_mod);
for (i = 0; i < n_size; i++)
{
if (m[i] != m[i])
{
ret = CLCG_NAN_VALUE; goto func_ends;
}
}
clcg_inner(rhok2, rbar0, rk, n_size);
betak = rhok2*ak/(rhok*omega);
rhok = rhok2;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
pk[i] = rk[i] + betak*(pk[i] - omega*Ap[i]);
}
}
func_ends:
{
clcg_free(rk);
clcg_free(rbar0);
clcg_free(pk);
clcg_free(sk);
clcg_free(Ap);
clcg_free(As);
}
return ret;
}
int cltfqmr(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
const int n_size, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
int i, j;
lcg_complex *pk = nullptr, *uk = nullptr;
lcg_complex *vk = nullptr, *dk = nullptr;
lcg_complex *rbar0 = nullptr, *rk = nullptr;
lcg_complex *Ax = nullptr, *qk = nullptr;
lcg_complex *uqk = nullptr;
pk = clcg_malloc(n_size); uk = clcg_malloc(n_size);
vk = clcg_malloc(n_size); dk = clcg_malloc(n_size);
rbar0 = clcg_malloc(n_size); rk = clcg_malloc(n_size);
Ax = clcg_malloc(n_size); qk = clcg_malloc(n_size);
uqk = clcg_malloc(n_size);
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
pk[i] = uk[i] = rk[i] = B[i] - Ax[i];
clcg_set(&dk[i], 0.0, 0.0);
}
lcg_complex rho, rk_mod, rk_mod2;
lcg_float r0_square, rk_square;
clcg_inner(rk_mod, rk, rk, n_size);
r0_square = rk_square = clcg_square(&rk_mod);
if (r0_square < 1.0) r0_square = 1.0;
do
{
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
clcg_inner(rho, rbar0, rk, n_size);
} while (clcg_module(&rho) < 1e-8);
lcg_float theta = 0.0, omega = clcg_module(&rk_mod);
lcg_float residual, tao = omega;
lcg_complex sigma, alpha, betak, rho2, sign, eta(0.0, 0.0);
int ret, t = 0;
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, sqrt(rk_square)/n_size, &para, n_size, 0);
}
goto func_ends;
}
else if (rk_square/r0_square <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, m, rk_square/r0_square, &para, n_size, 0);
}
goto func_ends;
}
while(1)
{
Afp(instance, pk, vk, n_size, MatNormal, NonConjugate);
clcg_inner(sigma, rbar0, vk, n_size);
alpha = rho/sigma;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
qk[i] = uk[i] - alpha*vk[i];
uqk[i] = uk[i] + qk[i];
}
Afp(instance, uqk, Ax, n_size, MatNormal, NonConjugate);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
rk[i] = rk[i] - alpha*Ax[i];
}
clcg_inner(rk_mod2, rk, rk, n_size);
for (j = 1; j <= 2; j++)
{
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
else residual = rk_square/r0_square;
if (Pfp != nullptr)
{
if (Pfp(instance, m, residual, &para, n_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
sign = theta*theta*(eta/alpha);
if (j == 1)
{
omega = sqrt(clcg_module(&rk_mod)*clcg_module(&rk_mod2));
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
dk[i] = uk[i] + sign*dk[i];
}
}
else
{
omega = clcg_module(&rk_mod2);
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
dk[i] = qk[i] + sign*dk[i];
}
}
theta = omega/tao;
tao = omega/sqrt(1.0+theta*theta);
eta = (1.0/(1.0+theta*theta))*alpha;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
m[i] = m[i] + eta*dk[i];
}
for (i = 0; i < n_size; i++)
{
if (m[i] != m[i])
{
ret = CLCG_NAN_VALUE; goto func_ends;
}
}
}
rk_mod = rk_mod2;
rk_square = clcg_square(&rk_mod);
clcg_inner(rho2, rbar0, rk, n_size);
betak = rho2/rho;
rho = rho2;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
uk[i] = rk[i] + betak*qk[i];
pk[i] = uk[i] + betak*(qk[i] + betak*pk[i]);
}
}
func_ends:
{
clcg_free(pk);
clcg_free(uk);
clcg_free(vk);
clcg_free(dk);
clcg_free(rbar0);
clcg_free(rk);
clcg_free(Ax);
clcg_free(qk);
clcg_free(uqk);
}
return ret;
}

78
src/lib/clcg.h Normal file
View File

@ -0,0 +1,78 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _CLCG_H
#define _CLCG_H
#include "lcg_complex.h"
#include "util.h"
/**
* @brief Callback interface for calculating the complex product of a N*N matrix 'A' multiplied
* by a complex vertical vector 'x'.
*
* @param instance The user data sent for the clcg_solver() functions by the client.
* @param x Multiplier of the Ax product.
* @param Ax Product of A multiplied by x.
* @param x_size Size of x and column/row numbers of A.
* @param layout Whether to use the transpose of A for calculation.
* @param conjugate Whether to use the conjugate of A for calculation.
*/
typedef void (*clcg_axfunc_ptr)(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate);
/**
* @brief Callback interface for monitoring the progress and terminate the iteration
* if necessary.
*
* @param instance The user data sent for the clcg_solver() functions by the client.
* @param m The current solutions.
* @param converge The current value evaluating the iteration progress.
* @param n_size The size of the variables
* @param k The iteration count.
*
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will terminate the optimization process.
*/
typedef int (*clcg_progress_ptr)(void* instance, const lcg_complex* m,
const lcg_float converge, const clcg_para* param, const int n_size, const int k);
/**
* @brief A combined complex conjugate gradient solver function.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int clcg_solver(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance,
clcg_solver_enum solver_id = CLCG_BICG);
#endif // _CLCG_H

529
src/lib/clcg_cuda.cu Normal file
View File

@ -0,0 +1,529 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "cmath"
#include "ctime"
#include "iostream"
#include "clcg_cuda.h"
typedef int (*cuda_solver_ptr)(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clbicg(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clbicg_symmetric(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clcg_solver_cuda(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m, const cuDoubleComplex* B,
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
{
cuda_solver_ptr cg_solver;
switch (solver_id)
{
case CLCG_BICG:
cg_solver = clbicg;
break;
case CLCG_BICG_SYM:
cg_solver = clbicg_symmetric;
break;
default:
return CLCG_UNKNOWN_SOLVER;
}
return cg_solver(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
}
typedef int (*cuda_precondtioned_solver_ptr)(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param,
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clpcg(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clcg_solver_preconditioned_cuda(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
{
cuda_precondtioned_solver_ptr cgp_solver;
switch (solver_id)
{
case CLCG_PCG:
cgp_solver = clpcg; break;
default:
return CLCG_UNKNOWN_SOLVER;
}
return cgp_solver(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
}
int clbicg(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
cuDoubleComplex *r1k = nullptr, *r2k = nullptr;
cuDoubleComplex *d1k = nullptr, *d2k = nullptr, *Ax = nullptr;
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&r1k, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&r2k, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&d1k, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&d2k, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_d1k, dvec_d2k, dvec_Ax;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
cusparseCreateDnVec(&dvec_d1k, n_size, d1k, CUDA_C_64F);
cusparseCreateDnVec(&dvec_d2k, n_size, d2k, CUDA_C_64F);
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
cuDoubleComplex one, none;
one.x = 1.0; one.y = 0.0;
none.x = -1.0; none.y = 0.0;
cuDoubleComplex ak, nak, conj_ak, Ad1d2, r1r2_next, betak, conj_betak;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
// r0 = B - Ax
cudaMemcpy(r1k, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, r1k, 1); // r0 -= Ax
cudaMemcpy(d1k, r1k, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // d0 = r0
clcg_vecZ_conjugate(r1k, r2k, n_size);
cudaMemcpy(d2k, r2k, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
cuDoubleComplex r1r2;
cublasZdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2);
lcg_float rk_mod;
cublasDznrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), &para, n_size, nz_size, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = rk_mod/n_size;
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_d1k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cublasZdotc_v2(cub_handle, n_size, d2k, 1, Ax, 1, &Ad1d2);
ak = cuCdiv(r1r2, Ad1d2);
nak = cuCmul(none, ak);
conj_ak = cuConj(nak);
cublasZaxpy_v2(cub_handle, n_size, &ak, d1k, 1, d_m, 1);
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, r1k, 1);
cublasDznrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
Afp(instance, cub_handle, cus_handle, dvec_d2k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE);
cublasZaxpy_v2(cub_handle, n_size, &conj_ak, Ax, 1, r2k, 1);
cublasZdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2_next);
betak = cuCdiv(r1r2_next, r1r2);
conj_betak = cuConj(betak);
r1r2 = r1r2_next;
cublasZscal_v2(cub_handle, n_size, &betak, d1k, 1);
cublasZaxpy_v2(cub_handle, n_size, &one, r1k, 1, d1k, 1);
cublasZscal_v2(cub_handle, n_size, &conj_betak, d2k, 1);
cublasZaxpy_v2(cub_handle, n_size, &one, r2k, 1, d2k, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(r1k);
cudaFree(r2k);
cudaFree(d1k);
cudaFree(d2k);
cudaFree(Ax);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_d1k);
cusparseDestroyDnVec(dvec_d2k);
cusparseDestroyDnVec(dvec_Ax);
}
return ret;
}
int clbicg_symmetric(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
cuDoubleComplex *rk = nullptr, *dk = nullptr, *Ax = nullptr;
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&rk, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&dk, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Ax;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_64F);
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
cuDoubleComplex one, none;
one.x = 1.0; one.y = 0.0;
none.x = -1.0; none.y = 0.0;
cuDoubleComplex ak, nak, rkrk2, betak, dkAx;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
// r0 = B - Ax
cudaMemcpy(rk, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
cudaMemcpy(dk, rk, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // d0 = r0
cuDoubleComplex rkrk;
cublasZdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk);
lcg_float rk_mod;
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), &para, n_size, nz_size, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = rk_mod/n_size;
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cublasZdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
ak = cuCdiv(rkrk, dkAx);
nak = cuCmul(none, ak);
cublasZaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
cublasZdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk2);
betak = cuCdiv(rkrk2, rkrk);
rkrk = rkrk2;
cublasZscal_v2(cub_handle, n_size, &betak, dk, 1);
cublasZaxpy_v2(cub_handle, n_size, &one, rk, 1, dk, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(rk);
cudaFree(dk);
cudaFree(Ax);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_dk);
cusparseDestroyDnVec(dvec_Ax);
}
return ret;
}
int clpcg(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
cuDoubleComplex *rk = nullptr, *dk = nullptr, *sk = nullptr, *Ax = nullptr;
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&rk, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&dk, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&sk, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_dk, dvec_sk, dvec_Ax;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_C_64F);
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_64F);
cusparseCreateDnVec(&dvec_sk, n_size, sk, CUDA_C_64F);
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
cuDoubleComplex one, none;
one.x = 1.0; one.y = 0.0;
none.x = -1.0; none.y = 0.0;
cuDoubleComplex ak, nak, d_old, betak, dkAx;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
// r0 = B - Ax
cudaMemcpy(rk, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_dk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cuDoubleComplex d_new;
cublasZdotu_v2(cub_handle, n_size, rk, 1, dk, 1, &d_new);
lcg_float rk_mod;
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), &para, n_size, nz_size, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = rk_mod/n_size;
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cublasZdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
ak = cuCdiv(d_new, dkAx);
nak = cuCmul(none, ak);
cublasZaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_sk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
d_old = d_new;
cublasZdotu_v2(cub_handle, n_size, rk, 1, sk, 1, &d_new);
betak = cuCdiv(d_new, d_old);
cublasZscal_v2(cub_handle, n_size, &betak, dk, 1);
cublasZaxpy_v2(cub_handle, n_size, &one, sk, 1, dk, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(rk);
cudaFree(dk);
cudaFree(sk);
cudaFree(Ax);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_rk);
cusparseDestroyDnVec(dvec_dk);
cusparseDestroyDnVec(dvec_sk);
cusparseDestroyDnVec(dvec_Ax);
}
return ret;
}

109
src/lib/clcg_cuda.h Normal file
View File

@ -0,0 +1,109 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _CLCG_CUDA_H
#define _CLCG_CUDA_H
#include "util.h"
#include "lcg_complex_cuda.h"
#ifdef LibLCG_CUDA
#include <cublas_v2.h>
#include <cusparse_v2.h>
/**
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
*
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* @param x Multiplier of the Ax product.
* @param Ax Product of A multiplied by x.
* @param n_size Size of x and column/row numbers of A.
*/
typedef void (*clcg_axfunc_cuda_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size, cusparseOperation_t oper_t);
/**
* @brief Callback interface for monitoring the progress and terminate the iteration
* if necessary. Note that m is hosted on the GPU device.
*
* @param instance The user data sent for the lcg_solver() functions by the client.
* @param m The current solutions.
* @param converge The current value evaluating the iteration progress.
* @param n_size The size of the variables
* @param k The iteration count.
*
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will terminate the optimization process.
*/
typedef int (*clcg_progress_cuda_ptr)(void* instance, const cuDoubleComplex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k);
/**
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_BICG.
*
* @return Status of the function.
*/
int clcg_solver_cuda(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m, const cuDoubleComplex* B,
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_BICG);
/**
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int clcg_solver_preconditioned_cuda(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_PCG);
#endif // LibLCG_CUDA
#endif // _CLCG_CUDA_H

529
src/lib/clcg_cudaf.cu Normal file
View File

@ -0,0 +1,529 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "cmath"
#include "ctime"
#include "iostream"
#include "clcg_cudaf.h"
typedef int (*cuda_solver_ptr)(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clbicg(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clbicg_symmetric(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clcg_solver_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m, const cuComplex* B,
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
{
cuda_solver_ptr cg_solver;
switch (solver_id)
{
case CLCG_BICG:
cg_solver = clbicg;
break;
case CLCG_BICG_SYM:
cg_solver = clbicg_symmetric;
break;
default:
return CLCG_UNKNOWN_SOLVER;
}
return cg_solver(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
}
typedef int (*cuda_precondtioned_solver_ptr)(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param,
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clpcg(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int clcg_solver_preconditioned_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
{
cuda_precondtioned_solver_ptr cgp_solver;
switch (solver_id)
{
case CLCG_PCG:
cgp_solver = clpcg; break;
default:
return CLCG_UNKNOWN_SOLVER;
}
return cgp_solver(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
}
int clbicg(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
cuComplex *d_m = nullptr, *d_B = nullptr;
cuComplex *r1k = nullptr, *r2k = nullptr;
cuComplex *d1k = nullptr, *d2k = nullptr, *Ax = nullptr;
cudaMalloc(&d_m, n_size * sizeof(cuComplex));
cudaMalloc(&d_B, n_size * sizeof(cuComplex));
cudaMalloc(&r1k, n_size * sizeof(cuComplex));
cudaMalloc(&r2k, n_size * sizeof(cuComplex));
cudaMalloc(&d1k, n_size * sizeof(cuComplex));
cudaMalloc(&d2k, n_size * sizeof(cuComplex));
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_d1k, dvec_d2k, dvec_Ax;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
cusparseCreateDnVec(&dvec_d1k, n_size, d1k, CUDA_C_32F);
cusparseCreateDnVec(&dvec_d2k, n_size, d2k, CUDA_C_32F);
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
cuComplex one, none;
one.x = 1.0; one.y = 0.0;
none.x = -1.0; none.y = 0.0;
cuComplex ak, nak, conj_ak, Ad1d2, r1r2_next, betak, conj_betak;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
// r0 = B - Ax
cudaMemcpy(r1k, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, r1k, 1); // r0 -= Ax
cudaMemcpy(d1k, r1k, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // d0 = r0
clcg_vecC_conjugate(r1k, r2k, n_size);
cudaMemcpy(d2k, r2k, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice);
cuComplex r1r2;
cublasCdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2);
float rk_mod;
cublasScnrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), &para, n_size, nz_size, 0);
}
goto func_ends;
}
float residual;
while(1)
{
if (para.abs_diff) residual = rk_mod/n_size;
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_d1k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cublasCdotc_v2(cub_handle, n_size, d2k, 1, Ax, 1, &Ad1d2);
ak = cuCdivf(r1r2, Ad1d2);
nak = cuCmulf(none, ak);
conj_ak = cuConjf(nak);
cublasCaxpy_v2(cub_handle, n_size, &ak, d1k, 1, d_m, 1);
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, r1k, 1);
cublasScnrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
Afp(instance, cub_handle, cus_handle, dvec_d2k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE);
cublasCaxpy_v2(cub_handle, n_size, &conj_ak, Ax, 1, r2k, 1);
cublasCdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2_next);
betak = cuCdivf(r1r2_next, r1r2);
conj_betak = cuConjf(betak);
r1r2 = r1r2_next;
cublasCscal_v2(cub_handle, n_size, &betak, d1k, 1);
cublasCaxpy_v2(cub_handle, n_size, &one, r1k, 1, d1k, 1);
cublasCscal_v2(cub_handle, n_size, &conj_betak, d2k, 1);
cublasCaxpy_v2(cub_handle, n_size, &one, r2k, 1, d2k, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(r1k);
cudaFree(r2k);
cudaFree(d1k);
cudaFree(d2k);
cudaFree(Ax);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_d1k);
cusparseDestroyDnVec(dvec_d2k);
cusparseDestroyDnVec(dvec_Ax);
}
return ret;
}
int clbicg_symmetric(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
cuComplex *d_m = nullptr, *d_B = nullptr;
cuComplex *rk = nullptr, *dk = nullptr, *Ax = nullptr;
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
cudaMalloc(&rk, n_size * sizeof(cuComplex));
cudaMalloc(&dk, n_size * sizeof(cuComplex));
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Ax;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_32F);
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
cuComplex one, none;
one.x = 1.0; one.y = 0.0;
none.x = -1.0; none.y = 0.0;
cuComplex ak, nak, rkrk2, betak, dkAx;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
// r0 = B - Ax
cudaMemcpy(rk, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
cudaMemcpy(dk, rk, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // d0 = r0
cuComplex rkrk;
cublasCdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk);
float rk_mod;
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), &para, n_size, nz_size, 0);
}
goto func_ends;
}
float residual;
while(1)
{
if (para.abs_diff) residual = rk_mod/n_size;
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cublasCdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
ak = cuCdivf(rkrk, dkAx);
nak = cuCmulf(none, ak);
cublasCaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
cublasCdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk2);
betak = cuCdivf(rkrk2, rkrk);
rkrk = rkrk2;
cublasCscal_v2(cub_handle, n_size, &betak, dk, 1);
cublasCaxpy_v2(cub_handle, n_size, &one, rk, 1, dk, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(rk);
cudaFree(dk);
cudaFree(Ax);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_dk);
cusparseDestroyDnVec(dvec_Ax);
}
return ret;
}
int clpcg(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
if (m == nullptr) return CLCG_INVALID_POINTER;
if (B == nullptr) return CLCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
cuComplex *d_m = nullptr, *d_B = nullptr;
cuComplex *rk = nullptr, *dk = nullptr, *sk = nullptr, *Ax = nullptr;
cudaMalloc(&d_m, n_size * sizeof(cuComplex));
cudaMalloc(&d_B, n_size * sizeof(cuComplex));
cudaMalloc(&rk, n_size * sizeof(cuComplex));
cudaMalloc(&dk, n_size * sizeof(cuComplex));
cudaMalloc(&sk, n_size * sizeof(cuComplex));
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_dk, dvec_sk, dvec_Ax;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_C_32F);
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_32F);
cusparseCreateDnVec(&dvec_sk, n_size, sk, CUDA_C_32F);
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
cuComplex one, none;
one.x = 1.0; one.y = 0.0;
none.x = -1.0; none.y = 0.0;
cuComplex ak, nak, d_old, betak, dkAx;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
// r0 = B - Ax
cudaMemcpy(rk, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_dk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cuComplex d_new;
cublasCdotu_v2(cub_handle, n_size, rk, 1, dk, 1, &d_new);
float rk_mod;
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), &para, n_size, nz_size, 0);
}
goto func_ends;
}
float residual;
while(1)
{
if (para.abs_diff) residual = rk_mod/n_size;
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
cublasCdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
ak = cuCdivf(d_new, dkAx);
nak = cuCmulf(none, ak);
cublasCaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_sk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
d_old = d_new;
cublasCdotu_v2(cub_handle, n_size, rk, 1, sk, 1, &d_new);
betak = cuCdivf(d_new, d_old);
cublasCscal_v2(cub_handle, n_size, &betak, dk, 1);
cublasCaxpy_v2(cub_handle, n_size, &one, sk, 1, dk, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(rk);
cudaFree(dk);
cudaFree(sk);
cudaFree(Ax);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_rk);
cusparseDestroyDnVec(dvec_dk);
cusparseDestroyDnVec(dvec_sk);
cusparseDestroyDnVec(dvec_Ax);
}
return ret;
}

109
src/lib/clcg_cudaf.h Normal file
View File

@ -0,0 +1,109 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _CLCG_CUDA_FLOAT_H
#define _CLCG_CUDA_FLOAT_H
#include "util.h"
#include "lcg_complex_cuda.h"
#ifdef LibLCG_CUDA
#include <cublas_v2.h>
#include <cusparse_v2.h>
/**
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
*
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* @param x Multiplier of the Ax product.
* @param Ax Product of A multiplied by x.
* @param n_size Size of x and column/row numbers of A.
*/
typedef void (*clcg_axfunc_cudaf_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size, cusparseOperation_t oper_t);
/**
* @brief Callback interface for monitoring the progress and terminate the iteration
* if necessary. Note that m is hosted on the GPU device.
*
* @param instance The user data sent for the lcg_solver() functions by the client.
* @param m The current solutions.
* @param converge The current value evaluating the iteration progress.
* @param n_size The size of the variables
* @param k The iteration count.
*
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will terminate the optimization process.
*/
typedef int (*clcg_progress_cudaf_ptr)(void* instance, const cuComplex* m, const float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k);
/**
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_BICG.
*
* @return Status of the function.
*/
int clcg_solver_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m, const cuComplex* B,
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_BICG);
/**
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int clcg_solver_preconditioned_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_PCG);
#endif // LibLCG_CUDA
#endif // _CLCG_CUDA_FLOAT_H

777
src/lib/clcg_eigen.cpp Normal file
View File

@ -0,0 +1,777 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "cmath"
#include "ctime"
#include "iostream"
#include "clcg_eigen.h"
#include "config.h"
#ifdef LibLCG_OPENMP
#include "omp.h"
#endif
typedef int (*eigen_solver_ptr)(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int clbicg(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int clbicg_symmetric(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int clcgs(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int cltfqmr(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int clcg_solver_eigen(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id)
{
eigen_solver_ptr cg_solver;
switch (solver_id)
{
case CLCG_BICG:
cg_solver = clbicg;
break;
case CLCG_BICG_SYM:
cg_solver = clbicg_symmetric;
break;
case CLCG_CGS:
cg_solver = clcgs;
break;
case CLCG_TFQMR:
cg_solver = cltfqmr;
break;
default:
return CLCG_UNKNOWN_SOLVER;
}
return cg_solver(Afp, Pfp, m, B, param, instance);
}
typedef int (*eigen_preconditioned_solver_ptr)(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int clpcg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int clpbicg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
int clcg_solver_preconditioned_eigen(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id)
{
eigen_preconditioned_solver_ptr cgp_solver;
switch (solver_id)
{
case CLCG_PCG:
cgp_solver = clpcg; break;
case CLCG_PBICG:
cgp_solver = clpbicg; break;
default:
return CLCG_UNKNOWN_SOLVER;
}
return cgp_solver(Afp, Mfp, Pfp, m, B, param, instance);
}
int clbicg(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
int n_size = B.size();
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
std::complex<lcg_float> ak, Ad1d2, r1r2_next, betak;
Eigen::VectorXcd r1k(n_size), r2k(n_size), d1k(n_size), d2k(n_size);
Eigen::VectorXcd Ax(n_size);
Afp(instance, m, Ax, MatNormal, NonConjugate);
d1k = r1k = B - Ax;
d2k = r2k = r1k.conjugate();
// Eigen's dot is inner product
std::complex<lcg_float> r1r2 = r2k.dot(r1k);
lcg_float rk_mod = std::norm(r1k.dot(r1k));
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, sqrt(rk_mod)/n_size, &para, 0);
}
goto func_ends;
}
else if (rk_mod/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, rk_mod/r0_mod, &para, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
else residual = rk_mod/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, &m, residual, &para, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, d1k, Ax, MatNormal, NonConjugate);
Ad1d2 = d2k.dot(Ax);
ak = r1r2/Ad1d2;
m = m + ak*d1k;
r1k = r1k - ak*Ax;
rk_mod = std::norm(r1k.dot(r1k));
Afp(instance, d2k, Ax, MatTranspose, Conjugate);
r2k = r2k - std::conj(ak)*Ax;
r1r2_next = r2k.dot(r1k);
betak = r1r2_next/r1r2;
r1r2 = r1r2_next;
d1k = r1k + betak*d1k;
d2k = r2k + std::conj(betak)*d2k;
}
func_ends:
{
r1k.resize(0);
r2k.resize(0);
d1k.resize(0);
d2k.resize(0);
Ax.resize(0);
}
return ret;
}
int clbicg_symmetric(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
int n_size = B.size();
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
std::complex<lcg_float> ak, rkrk2, betak, dkAx;
Eigen::VectorXcd rk(n_size), dk(n_size), Ax(n_size);
Afp(instance, m, Ax, MatNormal, NonConjugate);
dk = rk = (B - Ax);
std::complex<lcg_float> rkrk = rk.conjugate().dot(rk);
lcg_float rk_mod = std::norm(rk.dot(rk));
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, sqrt(rk_mod)/n_size, &para, 0);
}
goto func_ends;
}
else if (rk_mod/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, rk_mod/r0_mod, &para, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
else residual = rk_mod/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, &m, residual, &para, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, dk, Ax, MatNormal, NonConjugate);
dkAx = dk.conjugate().dot(Ax);
ak = rkrk/dkAx;
m += ak*dk;
rk -= ak*Ax;
rk_mod = std::norm(rk.dot(rk));
rkrk2 = rk.conjugate().dot(rk);
betak = rkrk2/rkrk;
rkrk = rkrk2;
dk = rk + betak*dk;
}
func_ends:
{
rk.resize(0);
dk.resize(0);
Ax.resize(0);
}
return ret;
}
int clcgs(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
int n_size = B.size();
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
std::complex<lcg_float> ak, rhok2, sigma, betak, rkmod;
Eigen::VectorXcd rk(n_size), s0, pk(n_size);
Eigen::VectorXcd Ax(n_size), uk(n_size), qk(n_size), wk(n_size);
Afp(instance, m, Ax, MatNormal, NonConjugate);
pk = uk = rk = (B - Ax);
std::complex<lcg_float> rhok;
do
{
s0 = Eigen::VectorXcd::Random(n_size);
rhok = s0.conjugate().dot(rk);
} while (std::sqrt(std::norm(rhok)) < 1e-8);
lcg_float rk_mod = std::norm(rk.dot(rk));
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, sqrt(rk_mod)/n_size, &para, 0);
}
goto func_ends;
}
else if (rk_mod/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, rk_mod/r0_mod, &para, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
else residual = rk_mod/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, &m, residual, &para, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, pk, Ax, MatNormal, NonConjugate);
sigma = s0.conjugate().dot(Ax);
ak = rhok/sigma;
qk = uk - ak*Ax;
wk = uk + qk;
Afp(instance, wk, Ax, MatNormal, NonConjugate);
m += ak*wk;
rk -= ak*Ax;
rk_mod = std::norm(rk.dot(rk));
rhok2 = s0.conjugate().dot(rk);
betak = rhok2/rhok;
rhok = rhok2;
uk = rk + betak*qk;
pk = uk + betak*(qk + betak*pk);
}
func_ends:
{
rk.resize(0);
s0.resize(0);
pk.resize(0);
Ax.resize(0);
uk.resize(0);
qk.resize(0);
wk.resize(0);
}
return ret;
}
int cltfqmr(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
int n_size = B.size();
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
int j;
Eigen::VectorXcd pk(n_size), uk(n_size), vk(n_size), dk(n_size);
Eigen::VectorXcd r0(n_size), rk(n_size), Ax(n_size), qk(n_size);
Eigen::VectorXcd uqk(n_size);
Afp(instance, m, Ax, MatNormal, NonConjugate);
pk = uk = r0 = rk = (B - Ax);
dk.setZero();
std::complex<lcg_float> rk_mod = rk.dot(rk);
lcg_float r0_mod = std::norm(rk_mod);
if (r0_mod < 1.0) r0_mod = 1.0;
lcg_float theta = 0.0, omega = sqrt(rk_mod.real());
lcg_float residual, tao = omega;
std::complex<lcg_float> rk_mod2, sigma, alpha, betak, rho, rho2, sign, eta(0.0, 0.0);
rho = r0.dot(r0);
int ret, t = 0;
if (para.abs_diff && sqrt(std::norm(rk_mod))/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, sqrt(std::norm(rk_mod))/n_size, &para, 0);
}
goto func_ends;
}
else if (std::norm(rk_mod)/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, std::norm(rk_mod)/r0_mod, &para, 0);
}
goto func_ends;
}
while(1)
{
Afp(instance, pk, vk, MatNormal, NonConjugate);
sigma = r0.dot(vk);
alpha = rho/sigma;
qk = uk - alpha*vk;
uqk = uk + qk;
Afp(instance, uqk, Ax, MatNormal, NonConjugate);
rk -= alpha*Ax;
rk_mod2 = rk.dot(rk);
for (j = 1; j <= 2; j++)
{
if (para.abs_diff) residual = std::sqrt(std::norm(rk_mod))/n_size;
else residual = std::norm(rk_mod)/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, &m, residual, &para, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
sign = theta*theta*(eta/alpha);
if (j == 1)
{
omega = sqrt(sqrt(rk_mod.real())*sqrt(rk_mod2.real()));
dk = uk + sign*dk;
}
else
{
omega = sqrt(rk_mod2.real());
dk = qk + sign*dk;
}
theta = omega/tao;
tao = omega/sqrt(1.0+theta*theta);
eta = (1.0/(1.0+theta*theta))*alpha;
m += eta*dk;
}
rk_mod = rk_mod2;
rho2 = r0.dot(rk);
betak = rho2/rho;
rho = rho2;
uk = rk + betak*qk;
pk = uk + betak*(qk + betak*pk);
}
func_ends:
{
pk.resize(0);
uk.resize(0);
vk.resize(0);
dk.resize(0);
r0.resize(0);
rk.resize(0);
Ax.resize(0);
qk.resize(0);
uqk.resize(0);
}
return ret;
}
int clpcg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
int n_size = B.size();
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
Eigen::VectorXcd rk(n_size), dk(n_size), sk(n_size), Ax(n_size);
Afp(instance, m, Ax, MatNormal, NonConjugate);
rk = (B - Ax);
Mfp(instance, rk, dk, MatNormal, NonConjugate);
std::complex<lcg_float> ak, d_old, betak, dkAx;
std::complex<lcg_float> d_new = rk.conjugate().dot(dk);
lcg_float rk_mod = std::norm(rk.dot(rk));
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, sqrt(rk_mod)/n_size, &para, 0);
}
goto func_ends;
}
else if (rk_mod/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, rk_mod/r0_mod, &para, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
else residual = rk_mod/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, &m, residual, &para, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, dk, Ax, MatNormal, NonConjugate);
dkAx = dk.conjugate().dot(Ax);
ak = d_new/dkAx;
m += ak*dk;
rk -= ak*Ax;
rk_mod = std::norm(rk.dot(rk));
Mfp(instance, rk, sk, MatNormal, NonConjugate);
d_old = d_new;
d_new = rk.conjugate().dot(sk);
betak = d_new/d_old;
dk = sk + betak*dk;
}
func_ends:
{
rk.resize(0);
dk.resize(0);
sk.resize(0);
Ax.resize(0);
}
return ret;
}
int clpbicg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
{
// set CGS parameters
clcg_para para = (param != nullptr) ? (*param) : defparam2;
int n_size = B.size();
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
std::complex<lcg_float> ak, betak, pkAx, rhok2;
Eigen::VectorXcd rk(n_size), rsk(n_size), zk(n_size), pk(n_size), psk(n_size), Ax(n_size), Asx(n_size);
Afp(instance, m, Ax, MatNormal, NonConjugate);
rk = (B - Ax);
Mfp(instance, rk, zk, MatNormal, NonConjugate);
pk = zk;
rsk = rk.conjugate();
psk = pk.conjugate();
std::complex<lcg_float> rhok = rsk.dot(zk);
lcg_float rk_mod = std::norm(rk.dot(rk));
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, sqrt(rk_mod)/n_size, &para, 0);
}
goto func_ends;
}
else if (rk_mod/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, &m, rk_mod/r0_mod, &para, 0);
}
goto func_ends;
}
lcg_float residual;
while(1)
{
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
else residual = rk_mod/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, &m, residual, &para, t))
{
ret = CLCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = CLCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, pk, Ax, MatNormal, NonConjugate);
Afp(instance, psk, Asx, MatNormal, Conjugate);
pkAx = psk.dot(Ax);
ak = rhok/pkAx;
m += ak*pk;
rsk = rk.conjugate() - std::conj(ak)*Asx;
rk -= ak*Ax;
rk_mod = std::norm(rk.dot(rk));
Mfp(instance, rk, zk, MatNormal, NonConjugate);
rhok2 = rsk.dot(zk);
betak = rhok2/rhok;
rhok = rhok2;
pk = zk + betak*pk;
psk = zk.conjugate() + std::conj(betak)*psk;
}
func_ends:
{
rk.resize(0);
rsk.resize(0);
zk.resize(0);
pk.resize(0);
psk.resize(0);
Ax.resize(0);
Asx.resize(0);
}
return ret;
}

94
src/lib/clcg_eigen.h Normal file
View File

@ -0,0 +1,94 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _CLCG_EIGEN_H
#define _CLCG_EIGEN_H
#include "util.h"
#include "complex"
#include "Eigen/Dense"
/**
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
* by a vertical vector 'x'.
*
* @param instance The user data sent for the solver functions by the client.
* @param x Multiplier of the Ax product.
* @param Ax Product of A multiplied by x.
* @param layout layout information of the matrix A passed by the solver functions.
* @param conjugate Layout information of the matrix A passed by the solver functions.
*/
typedef void (*clcg_axfunc_eigen_ptr)(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
lcg_matrix_e layout, clcg_complex_e conjugate);
/**
* @brief Callback interface for monitoring the progress and terminate the iteration
* if necessary.
*
* @param instance The user data sent for the solver functions by the client.
* @param m The current solutions.
* @param converge The current value evaluating the iteration progress.
* @param param The parameter object passed by the solver functions.
* @param k The iteration count.
*
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will terminate the optimization process.
*/
typedef int (*clcg_progress_eigen_ptr)(void* instance, const Eigen::VectorXcd *m, const lcg_float converge,
const clcg_para *param, const int k);
/**
* @brief A combined conjugate gradient solver function.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the solver function by the client.
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is CLCG_CGS.
*
* @return Status of the function.
*/
int clcg_solver_eigen(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id = CLCG_CGS);
/**
* @brief A combined conjugate gradient solver function.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the solver function by the client.
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
* @param solver_id Solver type used to solve the linear system. the value must CLCG_PBICG (default) or CLCG_PCG.
*
* @return Status of the function.
*/
int clcg_solver_preconditioned_eigen(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id = CLCG_PBICG);
#endif // _CLCG_EIGEN_H

1419
src/lib/lcg.cpp Normal file

File diff suppressed because it is too large Load Diff

171
src/lib/lcg.h Normal file
View File

@ -0,0 +1,171 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _LCG_H
#define _LCG_H
#include "util.h"
/**
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
* by a vertical vector 'x'.
*
* @param instance The user data sent for the lcg_solver() functions by the client.
* @param x Multiplier of the Ax product.
* @param Ax Product of A multiplied by x.
* @param n_size Size of x and column/row numbers of A.
*/
typedef void (*lcg_axfunc_ptr)(void* instance, const lcg_float* x, lcg_float* prod_Ax,
const int n_size);
/**
* @brief Callback interface for monitoring the progress and terminate the iteration
* if necessary.
*
* @param instance The user data sent for the lcg_solver() functions by the client.
* @param m The current solutions.
* @param converge The current value evaluating the iteration progress.
* @param n_size The size of the variables
* @param k The iteration count.
*
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will terminate the optimization process.
*/
typedef int (*lcg_progress_ptr)(void* instance, const lcg_float* m, const lcg_float converge,
const lcg_para* param, const int n_size, const int k);
/**
* @brief A combined conjugate gradient solver function.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int lcg_solver(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CGS);
/**
* @brief A combined conjugate gradient solver function.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_PCG.
*
* @return Status of the function.
*/
int lcg_solver_preconditioned(lcg_axfunc_ptr Afp, lcg_axfunc_ptr Mfp, lcg_progress_ptr Pfp, lcg_float* m,
const lcg_float* B, const int n_size, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PCG);
/**
* @brief A combined conjugate gradient solver function with inequality constraints.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] low The lower boundary of the acceptable solution.
* @param[in] hig The higher boundary of the acceptable solution.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
* @param P Precondition vector (optional expect for the LCG_PCG method). The default value is NULL.
*
* @return Status of the function.
*/
int lcg_solver_constrained(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B,
const lcg_float* low, const lcg_float *hig, const int n_size, const lcg_para* param,
void* instance, lcg_solver_enum solver_id = LCG_PG);
/**
* @brief Standalone function of the Linear Conjugate Gradient algorithm
*
* @note To use the lcg() function for massive inversions, it is better to provide
* external vectors Gk, Dk and ADk to avoid allocating and destroying temporary vectors.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector of the size n_size
* @param[in] B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param[in] param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param Gk Conjugate gradient vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param Dk Directional gradient vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param ADk Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
*
* @return Status of the function.
*/
int lcg(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
const lcg_para* param, void* instance, lcg_float* Gk = nullptr, lcg_float* Dk = nullptr,
lcg_float* ADk = nullptr);
/**
* @brief Standalone function of the Conjugate Gradient Squared algorithm.
*
* @note Algorithm 2 in "Generalized conjugate gradient method" by Fokkema et al. (1996).
*
* @note To use the lcgs() function for massive inversions, it is better to provide
* external vectors RK, R0T, PK, AX, UK, QK, and WK to avoid allocating and destroying temporary vectors.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
* @param RK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param R0T Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param PK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param AX Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param UK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param QK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
* @param WK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
*
* @return Status of the function.
*/
int lcgs(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
const lcg_para* param, void* instance, lcg_float* RK = nullptr, lcg_float* R0T = nullptr,
lcg_float* PK = nullptr, lcg_float* AX = nullptr, lcg_float* UK = nullptr, lcg_float* QK = nullptr,
lcg_float* WK = nullptr);
#endif // _LCG_H

496
src/lib/lcg_complex.cpp Normal file
View File

@ -0,0 +1,496 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "cmath"
#include "ctime"
#include "random"
#include "lcg_complex.h"
#ifdef LibLCG_OPENMP
#include "omp.h"
#endif
lcg_complex* clcg_malloc(int n)
{
lcg_complex *x = new lcg_complex [n];
return x;
}
lcg_complex** clcg_malloc(int m, int n)
{
lcg_complex **x = new lcg_complex* [m];
for (int i = 0; i < m; i++)
{
x[i] = new lcg_complex [n];
}
return x;
}
void clcg_free(lcg_complex* x)
{
if (x != nullptr)
{
delete[] x; x = nullptr;
}
return;
}
void clcg_free(lcg_complex **x, int m)
{
if (x != nullptr)
{
for (int i = 0; i < m; i++)
{
delete[] x[i];
}
delete[] x;
x = nullptr;
}
return;
}
void clcg_vecset(lcg_complex *a, lcg_complex b, int size)
{
for (int i = 0; i < size; i++)
{
a[i] = b;
}
return;
}
void clcg_vecset(lcg_complex **a, lcg_complex b, int m, int n)
{
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
a[i][j] = b;
}
}
return;
}
#ifdef LibLCG_STD_COMPLEX
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i)
{
a->real(r);
a->imag(i);
return;
}
lcg_float clcg_square(const lcg_complex *a)
{
return std::norm(*a);
}
lcg_float clcg_module(const lcg_complex *a)
{
return sqrt(std::norm(*a));
}
lcg_complex clcg_conjugate(const lcg_complex *a)
{
lcg_complex b = std::conj(*a);
return b;
}
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size)
{
srand(time(0));
for (int i = 0; i < size; i++)
{
a[i].real((h.real()-l.real())*rand()*1.0/RAND_MAX + l.real());
a[i].imag((h.imag()-l.imag())*rand()*1.0/RAND_MAX + l.imag());
}
return;
}
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n)
{
srand(time(0));
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
a[i][j].real((h.real()-l.real())*rand()*1.0/RAND_MAX + l.real());
a[i][j].imag((h.imag()-l.imag())*rand()*1.0/RAND_MAX + l.imag());
}
}
return;
}
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
{
lcg_float re = 0.0, im = 0.0;
// <a,b> = \sum{a_i \cdot b_i}
for (int i = 0; i < size; i++)
{
re += (a[i].real()*b[i].real() - a[i].imag()*b[i].imag());
im += (a[i].real()*b[i].imag() + a[i].imag()*b[i].real());
}
ret.real(re); ret.imag(im);
return;
}
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
{
lcg_float re = 0.0, im = 0.0;
// <a,b> = \sum{\bar{a_i} \cdot b_i}
for (int i = 0; i < size; i++)
{
re += (a[i].real()*b[i].real() + a[i].imag()*b[i].imag());
im += (a[i].real()*b[i].imag() - a[i].imag()*b[i].real());
}
ret.real(re); ret.imag(im);
return;
}
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax,
int m_size, int n_size, lcg_matrix_e layout, clcg_complex_e conjugate)
{
size_t i, j;
lcg_float re, im;
if (conjugate == Conjugate)
{
if (layout == MatNormal)
{
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (i = 0; i < m_size; i++)
{
re = 0.0; im = 0.0;
for (j = 0; j < n_size; j++)
{
re += (A[i][j].real()*x[j].real() + A[i][j].imag()*x[j].imag());
im += (A[i][j].real()*x[j].imag() - A[i][j].imag()*x[j].real());
}
Ax[i].real(re); Ax[i].imag(im);
}
return;
}
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (j = 0; j < n_size; j++)
{
re = 0.0; im = 0.0;
for (i = 0; i < m_size; i++)
{
re += (A[i][j].real()*x[i].real() + A[i][j].imag()*x[i].imag());
im += (A[i][j].real()*x[i].imag() - A[i][j].imag()*x[i].real());
}
Ax[j].real(re); Ax[j].imag(im);
}
return;
}
if (layout == MatNormal)
{
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (i = 0; i < m_size; i++)
{
re = 0.0; im = 0.0;
for (j = 0; j < n_size; j++)
{
re += (A[i][j].real()*x[j].real() - A[i][j].imag()*x[j].imag());
im += (A[i][j].real()*x[j].imag() + A[i][j].imag()*x[j].real());
}
Ax[i].real(re); Ax[i].imag(im);
}
return;
}
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (j = 0; j < n_size; j++)
{
re = 0.0; im = 0.0;
for (i = 0; i < m_size; i++)
{
re += (A[i][j].real()*x[i].real() - A[i][j].imag()*x[i].imag());
im += (A[i][j].real()*x[i].imag() + A[i][j].imag()*x[i].real());
}
Ax[j].real(re); Ax[j].imag(im);
}
return;
}
#else
lcg_complex::lcg_complex()
{
rel = img = 0.0;
}
lcg_complex::lcg_complex(lcg_float r, lcg_float i)
{
rel = r; img = i;
}
lcg_complex::~lcg_complex(){}
void lcg_complex::real(lcg_float a)
{
rel = a;
return;
}
void lcg_complex::imag(lcg_float a)
{
img = a;
return;
}
lcg_float lcg_complex::real()
{
return rel;
}
lcg_float lcg_complex::imag()
{
return img;
}
bool operator==(const lcg_complex &a, const lcg_complex &b)
{
if (a.rel == b.rel && a.img == b.img)
return true;
return false;
}
bool operator!=(const lcg_complex &a, const lcg_complex &b)
{
if (a.rel != b.rel || a.img != b.img)
return true;
return false;
}
lcg_complex operator+(const lcg_complex &a, const lcg_complex &b)
{
lcg_complex ret;
ret.rel = a.rel + b.rel;
ret.img = a.img + b.img;
return ret;
}
lcg_complex operator-(const lcg_complex &a, const lcg_complex &b)
{
lcg_complex ret;
ret.rel = a.rel - b.rel;
ret.img = a.img - b.img;
return ret;
}
lcg_complex operator*(const lcg_complex &a, const lcg_complex &b)
{
lcg_complex ret;
ret.rel = a.rel*b.rel - a.img*b.img;
ret.img = a.rel*b.img + a.img*b.rel;
return ret;
}
lcg_complex operator*(const lcg_float &a, const lcg_complex &b)
{
lcg_complex ret;
ret.rel = a*b.rel;
ret.img = a*b.img;
return ret;
}
lcg_complex operator/(const lcg_complex &a, const lcg_complex &b)
{
lcg_complex ret;
if (b.rel == 0 && b.img == 0)
{
ret.rel = ret.img = NAN;
return ret;
}
ret.rel = (a.rel*b.rel + a.img*b.img)/(b.rel*b.rel + b.img*b.img);
ret.img = (a.img*b.rel - a.rel*b.img)/(b.rel*b.rel + b.img*b.img);
return ret;
}
lcg_complex operator/(const lcg_float &a, const lcg_complex &b)
{
lcg_complex ret;
if (b.rel == 0 && b.img == 0)
{
ret.rel = ret.img = NAN;
return ret;
}
ret.rel = a*b.rel/(b.rel*b.rel + b.img*b.img);
ret.img = -1.0*a*b.img/(b.rel*b.rel + b.img*b.img);
return ret;
}
std::ostream &operator<<(std::ostream &os, const lcg_complex &a)
{
if (a.img >= 0)
os << a.rel << "+" << a.img << "i";
else
os << a.rel << a.img << "i";
return os;
}
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i)
{
a->rel = r;
a->img = i;
return;
}
lcg_float clcg_square(const lcg_complex *a)
{
return a->rel * a->rel + a->img * a->img;
}
lcg_float clcg_module(const lcg_complex *a)
{
return sqrt(clcg_square(a));
}
lcg_complex clcg_conjugate(const lcg_complex *a)
{
lcg_complex b;
b.rel = a->rel;
b.img = -1.0 * a->img;
return b;
}
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size)
{
srand(time(nullptr));
for (int i = 0; i < size; i++)
{
a[i].rel = (h.rel-l.rel)*rand()*1.0/RAND_MAX + l.rel;
a[i].img = (h.img-l.img)*rand()*1.0/RAND_MAX + l.img;
}
return;
}
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n)
{
srand(time(nullptr));
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
a[i][j].rel = (h.rel-l.rel)*rand()*1.0/RAND_MAX + l.rel;
a[i][j].img = (h.img-l.img)*rand()*1.0/RAND_MAX + l.img;
}
}
return;
}
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
{
clcg_set(&ret, 0.0, 0.0);
// <a,b> = \sum{a_i \cdot b_i}
for (int i = 0; i < size; i++)
{
ret.rel += (a[i].rel*b[i].rel - a[i].img*b[i].img);
ret.img += (a[i].rel*b[i].img + a[i].img*b[i].rel);
}
return;
}
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
{
clcg_set(&ret, 0.0, 0.0);
// <a,b> = \sum{\bar{a_i} \cdot b_i}
for (int i = 0; i < size; i++)
{
ret.rel += (a[i].rel*b[i].rel + a[i].img*b[i].img);
ret.img += (a[i].rel*b[i].img - a[i].img*b[i].rel);
}
return;
}
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax,
int m_size, int n_size, lcg_matrix_e layout, clcg_complex_e conjugate)
{
int i, j;
lcg_float re, im;
if (conjugate == Conjugate)
{
if (layout == MatNormal)
{
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (i = 0; i < m_size; i++)
{
re = 0.0; im = 0.0;
for (j = 0; j < n_size; j++)
{
re += (A[i][j].rel*x[j].rel + A[i][j].img*x[j].img);
im += (A[i][j].rel*x[j].img - A[i][j].img*x[j].rel);
}
clcg_set(&Ax[i], re, im);
}
return;
}
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (j = 0; j < n_size; j++)
{
re = 0.0; im = 0.0;
for (i = 0; i < m_size; i++)
{
re += (A[i][j].rel*x[i].rel + A[i][j].img*x[i].img);
im += (A[i][j].rel*x[i].img - A[i][j].img*x[i].rel);
}
clcg_set(&Ax[j], re, im);
}
return;
}
if (layout == MatNormal)
{
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (i = 0; i < m_size; i++)
{
re = 0.0; im = 0.0;
for (j = 0; j < n_size; j++)
{
re += (A[i][j].rel*x[j].rel - A[i][j].img*x[j].img);
im += (A[i][j].rel*x[j].img + A[i][j].img*x[j].rel);
}
clcg_set(&Ax[i], re, im);
}
return;
}
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (j = 0; j < n_size; j++)
{
re = 0.0; im = 0.0;
for (i = 0; i < m_size; i++)
{
re+= (A[i][j].rel*x[i].rel - A[i][j].img*x[i].img);
im += (A[i][j].rel*x[i].img + A[i][j].img*x[i].rel);
}
clcg_set(&Ax[j], re, im);
}
return;
}
#endif // LibLCG_SYSTEM_COMPLEX

329
src/lib/lcg_complex.h Normal file
View File

@ -0,0 +1,329 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _LCG_COMPLEX_H
#define _LCG_COMPLEX_H
#include "iostream"
#include "algebra.h"
#ifdef LibLCG_STD_COMPLEX
#include "complex"
typedef std::complex<lcg_float> lcg_complex;
#else
/**
* @brief A simple definition of the complex number type.
* Easy to change in the future. Right now it is just two double variables
*/
struct lcg_complex
{
lcg_float rel; ///< The real part
lcg_float img; ///< The imaginary part
/**
* @brief Constructs a new instance.
*/
lcg_complex();
/**
* @brief Constructs a new instance.
*
* @param[in] r The real part of the complex number
* @param[in] i The imaginary part of the complex number
*/
lcg_complex(lcg_float r, lcg_float i);
/**
* @brief Destructor
*/
virtual ~lcg_complex();
/**
* @brief Set real part of a complex number
*
* @param a Input value
*/
void real(lcg_float a);
/**
* @brief Set image part of a complex number
*
* @param a Input value
*/
void imag(lcg_float a);
/**
* @brief Get real part of a complex number
*
* @return lcg_float Real component
*/
lcg_float real();
/**
* @brief Get image part of a complex number
*
* @return lcg_float Image component
*/
lcg_float imag();
};
/**
* @brief Reload equality operator.
*
* @param[in] a complex number a
* @param[in] b complex number b
*
* @return equal or not
*/
bool operator==(const lcg_complex &a, const lcg_complex &b);
/**
* @brief Reload inequality operator.
*
* @param[in] a complex number a
* @param[in] b complex number b
*
* @return unequal or not
*/
bool operator!=(const lcg_complex &a, const lcg_complex &b);
/**
* @brief Reload addition operator.
*
* @param[in] a complex number a
* @param[in] b complex number b
*
* @return sum
*/
lcg_complex operator+(const lcg_complex &a, const lcg_complex &b);
/**
* @brief Reload subtraction operator.
*
* @param[in] a complex number a
* @param[in] b complex number b
*
* @return subtraction
*/
lcg_complex operator-(const lcg_complex &a, const lcg_complex &b);
/**
* @brief Reload multiplication operator.
*
* @param[in] a complex number a
* @param[in] b complex number b
*
* @return product
*/
lcg_complex operator*(const lcg_complex &a, const lcg_complex &b);
/**
* @brief Reload multiplication operator.
*
* @param[in] a real number a
* @param[in] b complex number b
*
* @return product
*/
lcg_complex operator*(const lcg_float &a, const lcg_complex &b);
/**
* @brief Reload division operator.
*
* @param[in] a complex number a
* @param[in] b complex number b
*
* @return quotient
*/
lcg_complex operator/(const lcg_complex &a, const lcg_complex &b);
/**
* @brief Reload division operator.
*
* @param[in] a real number a
* @param[in] b complex number b
*
* @return quotient
*/
lcg_complex operator/(const lcg_float &a, const lcg_complex &b);
/**
* @brief Reload ostream operator.
*
* @param os The ostream
* @param[in] a complex number a
*
* @return The ostream
*/
std::ostream &operator<<(std::ostream &os, const lcg_complex &a);
#endif // LibLCG_STD_COMPLEX
/**
* @brief Locate memory for a lcg_complex pointer type.
*
* @param[in] n Size of the lcg_float array.
*
* @return Pointer of the array's location.
*/
lcg_complex* clcg_malloc(int n);
/**
* @brief Locate memory for a lcg_complex second pointer type.
*
* @param[in] n Size of the lcg_float array.
*
* @return Pointer of the array's location.
*/
lcg_complex** clcg_malloc(int m, int n);
/**
* @brief Destroy memory used by the lcg_complex type array.
*
* @param x Pointer of the array.
*/
void clcg_free(lcg_complex* x);
/**
* @brief Destroy memory used by the 2D lcg_complex type array.
*
* @param x Pointer of the array.
*/
void clcg_free(lcg_complex **x, int m);
/**
* @brief set a complex vector's value
*
* @param a pointer of the vector
* @param[in] b initial value
* @param[in] size vector size
*/
void clcg_vecset(lcg_complex *a, lcg_complex b, int size);
/**
* @brief set a 2d complex vector's value
*
* @param a pointer of the matrix
* @param[in] b initial value
* @param[in] m row size of the matrix
* @param[in] n column size of the matrix
*/
void clcg_vecset(lcg_complex **a, lcg_complex b, int m, int n);
/**
* @brief setup a complex number
*
* @param[in] r The real part of the complex number
* @param[in] i The imaginary part of the complex number
*/
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i);
/**
* @brief Calculate the squared module of a complex number
*
* @return The module
*/
lcg_float clcg_square(const lcg_complex *a);
/**
* @brief Calculate the module of a complex number
*
* @return The module
*/
lcg_float clcg_module(const lcg_complex *a);
/**
* @brief Calculate the conjugate of a complex number
*
* @return The complex conjugate.
*/
lcg_complex clcg_conjugate(const lcg_complex *a);
/**
* @brief set a complex vector using random values
*
* @param a pointer of the vector
* @param[in] l the lower bound of random values
* @param[in] h the higher bound of random values
* @param[in] size size of the vector
*/
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size);
/**
* @brief set a 2D complex vector using random values
*
* @param a pointer of the vector
* @param[in] l the lower bound of random values
* @param[in] h the higher bound of random values
* @param[in] m row size of the vector
* @param[in] n column size of the vector
*/
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n);
/**
* @brief calculate dot product of two complex vectors
*
* the product of two complex vectors are defined as <a, b> = \sum{a_i \cdot b_i}
*
* @param[in] a complex vector a
* @param[in] b complex vector b
* @param[in] x_size size of the vector
*
* @return product
*/
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size);
/**
* @brief calculate inner product of two complex vectors
*
* the product of two complex vectors are defined as <a, b> = \sum{\bar{a_i} \cdot b_i}
*
* @param[in] a complex vector a
* @param[in] b complex vector b
* @param[in] x_size size of the vector
*
* @return product
*/
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size);
/**
* @brief calculate product of a complex matrix and a complex vector
*
* the product of two complex vectors are defined as <a, b> = \sum{\bar{a_i}\cdot\b_i}.
* Different configurations:
* layout=Normal,conjugate=false -> A
* layout=Transpose,conjugate=false -> A^T
* layout=Normal,conjugate=true -> \bar{A}
* layout=Transpose,conjugate=true -> A^H
*
* @param A complex matrix A
* @param[in] x complex vector x
* @param Ax product of Ax
* @param[in] m_size row size of A
* @param[in] n_size column size of A
* @param[in] layout layout of A used for multiplication. Must be Normal or Transpose
* @param[in] conjugate whether to use the complex conjugate of A for calculation
*/
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax, int m_size, int n_size,
lcg_matrix_e layout = MatNormal, clcg_complex_e conjugate = NonConjugate);
#endif // _LCG_COMPLEX_H

356
src/lib/lcg_complex_cuda.cu Normal file
View File

@ -0,0 +1,356 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "lcg_complex_cuda.h"
#include "complex"
#include "map"
__global__ void smCcsr_get_diagonal_device(const int *A_row, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < A_len)
{
const int num_non0_row = A_row[i + 1] - A_row[i];
for (int j = 0; j < num_non0_row; j++)
{
if (A_col[j + A_row[i]] == i)
{
A_diag[i] = A_val[j + A_row[i]];
break;
}
}
}
return;
}
__global__ void smZcsr_get_diagonal_device(const int *A_row, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < A_len)
{
const int num_non0_row = A_row[i + 1] - A_row[i];
for (int j = 0; j < num_non0_row; j++)
{
if (A_col[j + A_row[i]] == i)
{
A_diag[i] = A_val[j + A_row[i]];
break;
}
}
}
return;
}
__global__ void vecMvecC_element_wise_device(const cuComplex *a, const cuComplex *b, cuComplex *c, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
c[i] = cuCmulf(a[i], b[i]);
}
return;
}
__global__ void vecMvecZ_element_wise_device(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
c[i] = cuCmul(a[i], b[i]);
}
return;
}
__global__ void vecDvecC_element_wise_device(const cuComplex *a, const cuComplex *b, cuComplex *c, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
c[i] = cuCdivf(a[i], b[i]);
}
return;
}
__global__ void vecDvecZ_element_wise_device(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
c[i] = cuCdiv(a[i], b[i]);
}
return;
}
__global__ void vecC_conjugate_device(const cuComplex *a, cuComplex *ca, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
ca[i] = a[i];
ca[i].y *= -1.0;
}
return;
}
__global__ void vecZ_conjugate_device(const cuDoubleComplex *a, cuDoubleComplex *ca, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
ca[i] = a[i];
ca[i].y *= -1.0;
}
return;
}
lcg_complex cuda2lcg_complex(cuDoubleComplex a)
{
return lcg_complex(a.x, a.y);
}
#ifdef LibLCG_STD_COMPLEX
cuDoubleComplex lcg2cuda_complex(lcg_complex a)
{
cuDoubleComplex o;
o.x = a.real(); o.y = a.imag();
return o;
}
#else
cuDoubleComplex lcg2cuda_complex(lcg_complex a)
{
cuDoubleComplex o;
o.x = a.rel(); o.y = a.img();
return o;
}
#endif // LibLCG_STD_COMPLEX
cuDoubleComplex* clcg_malloc_cuda(size_t n)
{
cuDoubleComplex *x = new cuDoubleComplex [n];
return x;
}
void clcg_free_cuda(cuDoubleComplex *x)
{
if (x != nullptr)
{
delete[] x; x = nullptr;
}
return;
}
void clcg_vecset_cuda(cuDoubleComplex *a, cuDoubleComplex b, size_t size)
{
for (size_t i = 0; i < size; i++)
{
a[i].x = b.x; a[i].y = b.y;
}
return;
}
cuComplex clcg_Cscale(float s, cuComplex a)
{
cuComplex o;
o.x = s*a.x;
o.y = s*a.y;
return o;
}
cuComplex clcg_Csum(cuComplex a, cuComplex b)
{
cuComplex o;
o.x = a.x + b.x;
o.y = a.y + b.y;
return o;
}
cuComplex clcg_Cdiff(cuComplex a, cuComplex b)
{
cuComplex o;
o.x = a.x - b.x;
o.y = a.y - b.y;
return o;
}
cuComplex clcg_Csqrt(cuComplex a)
{
std::complex<float> c = std::sqrt(std::complex<float>(a.x, a.y));
cuComplex s;
s.x = c.real(); s.y = c.imag();
return s;
}
cuDoubleComplex clcg_Zscale(lcg_float s, cuDoubleComplex a)
{
cuDoubleComplex o;
o.x = s*a.x;
o.y = s*a.y;
return o;
}
cuDoubleComplex clcg_Zsum(cuDoubleComplex a, cuDoubleComplex b)
{
cuDoubleComplex o;
o.x = a.x + b.x;
o.y = a.y + b.y;
return o;
}
cuDoubleComplex clcg_Zdiff(cuDoubleComplex a, cuDoubleComplex b)
{
cuDoubleComplex o;
o.x = a.x - b.x;
o.y = a.y - b.y;
return o;
}
cuDoubleComplex clcg_Zsqrt(cuDoubleComplex a)
{
std::complex<lcg_float> c = std::sqrt(std::complex<lcg_float>(a.x, a.y));
cuDoubleComplex s;
s.x = c.real(); s.y = c.imag();
return s;
}
void clcg_smCcoo_row2col(const int *A_row, const int *A_col, const cuComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuComplex *Ac_val)
{
size_t i, order;
std::map<size_t, cuComplex> sort_map;
std::map<size_t, cuComplex>::iterator st_iter;
for (i = 0; i < nz; i++)
{
order = N*A_col[i] + A_row[i];
sort_map[order] = A[i];
}
i = 0;
for (st_iter = sort_map.begin(); st_iter != sort_map.end(); st_iter++)
{
order = st_iter->first;
// exchange the row and column indice to rotate the matrix
Ac_row[i] = order/N;
Ac_col[i] = order%N;
Ac_val[i] = st_iter->second;
i++;
}
sort_map.clear();
return;
}
void clcg_smZcoo_row2col(const int *A_row, const int *A_col, const cuDoubleComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuDoubleComplex *Ac_val)
{
size_t i, order;
std::map<size_t, cuDoubleComplex> sort_map;
std::map<size_t, cuDoubleComplex>::iterator st_iter;
for (i = 0; i < nz; i++)
{
order = N*A_col[i] + A_row[i];
sort_map[order] = A[i];
}
i = 0;
for (st_iter = sort_map.begin(); st_iter != sort_map.end(); st_iter++)
{
order = st_iter->first;
// exchange the row and column indice to rotate the matrix
Ac_row[i] = order/N;
Ac_col[i] = order%N;
Ac_val[i] = st_iter->second;
i++;
}
sort_map.clear();
return;
}
void clcg_smCcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (A_len + blockSize - 1) / blockSize;
smCcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
return;
}
void clcg_smZcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (A_len + blockSize - 1) / blockSize;
smZcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
return;
}
void clcg_vecMvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
vecMvecC_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
return;
}
void clcg_vecMvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
vecMvecZ_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
return;
}
void clcg_vecDvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
vecDvecC_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
return;
}
void clcg_vecDvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
vecDvecZ_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
return;
}
void clcg_vecC_conjugate(const cuComplex *a, cuComplex *ca, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
vecC_conjugate_device<<<numBlocks, blockSize>>>(a, ca, n);
return;
}
void clcg_vecZ_conjugate(const cuDoubleComplex *a, cuDoubleComplex *ca, int n, int bk_size)
{
int blockSize = bk_size;
int numBlocks = (n + blockSize - 1) / blockSize;
vecZ_conjugate_device<<<numBlocks, blockSize>>>(a, ca, n);
return;
}

278
src/lib/lcg_complex_cuda.h Normal file
View File

@ -0,0 +1,278 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _LCG_COMPLEX_CUDA_H
#define _LCG_COMPLEX_CUDA_H
#include "lcg_complex.h"
#ifdef LibLCG_CUDA
#include <cuda_runtime.h>
#include <cuComplex.h>
/**
* @brief Convert cuda complex number to lcg complex number
*
* @param a CUDA complex number
* @return lcg_complex lcg complex number
*/
lcg_complex cuda2lcg_complex(cuDoubleComplex a);
/**
* @brief Convert lcg complex number to CUDA complex number
*
* @param a lcg complex number
* @return cuDoubleComplex CUDA complex number
*/
cuDoubleComplex lcg2cuda_complex(lcg_complex a);
/**
* @brief Locate memory for a cuDoubleComplex pointer type.
*
* @param[in] n Size of the lcg_float array.
*
* @return Pointer of the array's location.
*/
cuDoubleComplex* clcg_malloc_cuda(size_t n);
/**
* @brief Destroy memory used by the cuDoubleComplex type array.
*
* @param x Pointer of the array.
*/
void clcg_free_cuda(cuDoubleComplex *x);
/**
* @brief set a complex vector's value
*
* @param a pointer of the vector
* @param[in] b initial value
* @param[in] size vector size
*/
void clcg_vecset_cuda(cuDoubleComplex *a, cuDoubleComplex b, size_t size);
/**
* @brief Host side function for scale a cuDoubleComplex object
*
* @param s scale factor
* @param a Complex number
* @return cuComplex scaled complex number
*/
cuComplex clcg_Cscale(lcg_float s, cuComplex a);
/**
* @brief Calculate the sum of two cuda complex number. This is a host side function.
*
* @param a Complex number
* @param b Complex number
* @return cuComplex Sum of the input complex number
*/
cuComplex clcg_Csum(cuComplex a, cuComplex b);
/**
* @brief Calculate the difference of two cuda complex number. This is a host side function.
*
* @param a Complex number
* @param b Complex number
* @return cuComplex Difference of the input complex number
*/
cuComplex clcg_Cdiff(cuComplex a, cuComplex b);
/**
* @brief Calculate the sqrt() of a cuda complex number
*
* @param a Complex number
* @return cuComplex root value
*/
cuComplex clcg_Csqrt(cuComplex a);
/**
* @brief Host side function for scale a cuDoubleComplex object
*
* @param s scale factor
* @param a Complex number
* @return cuDoubleComplex scaled complex number
*/
cuDoubleComplex clcg_Zscale(lcg_float s, cuDoubleComplex a);
/**
* @brief Calculate the sum of two cuda complex number. This is a host side function.
*
* @param a Complex number
* @param b Complex number
* @return cuDoubleComplex Sum of the input complex number
*/
cuDoubleComplex clcg_Zsum(cuDoubleComplex a, cuDoubleComplex b);
/**
* @brief Calculate the difference of two cuda complex number. This is a host side function.
*
* @param a Complex number
* @param b Complex number
* @return cuDoubleComplex Difference of the input complex number
*/
cuDoubleComplex clcg_Zdiff(cuDoubleComplex a, cuDoubleComplex b);
/**
* @brief Calculate the sqrt() of a cuda complex number
*
* @param a Complex number
* @return cuDoubleComplex root value
*/
cuDoubleComplex clcg_Zsqrt(cuDoubleComplex a);
/**
* @brief Convert the indexing sequence of a sparse matrix from the row-major to col-major format.
*
* @note The sparse matrix is stored in the COO foramt. This is a host side function.
*
* @param A_row Row index
* @param A_col Column index
* @param A Non-zero values of the matrix
* @param N Row/column length of A
* @param nz Number of the non-zero values in A
* @param Ac_row Output row index
* @param Ac_col Output column index
* @param Ac_val Non-zero values of the output matrix
*/
void clcg_smCcoo_row2col(const int *A_row, const int *A_col, const cuComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuComplex *Ac_val);
/**
* @brief Convert the indexing sequence of a sparse matrix from the row-major to col-major format.
*
* @note The sparse matrix is stored in the COO foramt. This is a host side function.
*
* @param A_row Row index
* @param A_col Column index
* @param A Non-zero values of the matrix
* @param N Row/column length of A
* @param nz Number of the non-zero values in A
* @param Ac_row Output row index
* @param Ac_col Output column index
* @param Ac_val Non-zero values of the output matrix
*/
void clcg_smZcoo_row2col(const int *A_row, const int *A_col, const cuDoubleComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuDoubleComplex *Ac_val);
/**
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] A_ptr Row index pointer
* @param[in] A_col Column index
* @param[in] A_val Non-zero values of the matrix
* @param[in] A_len Dimension of the matrix
* @param A_diag Output digonal elements
* @param[in] bk_size Default CUDA block size.
*/
void clcg_smCcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag, int bk_size = 1024);
/**
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] A_ptr Row index pointer
* @param[in] A_col Column index
* @param[in] A_val Non-zero values of the matrix
* @param[in] A_len Dimension of the matrix
* @param A_diag Output digonal elements
* @param[in] bk_size Default CUDA block size.
*/
void clcg_smZcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag, int bk_size = 1024);
/**
* @brief Element-wise muplication between two CUDA arries.
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] a Pointer of the input array
* @param[in] b Pointer of the input array
* @param c Pointer of the output array
* @param[in] n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void clcg_vecMvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size = 1024);
/**
* @brief Element-wise muplication between two CUDA arries.
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] a Pointer of the input array
* @param[in] b Pointer of the input array
* @param c Pointer of the output array
* @param[in] n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void clcg_vecMvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size = 1024);
/**
* @brief Element-wise division between two CUDA arries.
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] a Pointer of the input array
* @param[in] b Pointer of the input array
* @param c Pointer of the output array
* @param[in] n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void clcg_vecDvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size = 1024);
/**
* @brief Element-wise division between two CUDA arries.
*
* @note This is a device side function. All memories must be allocated on the GPU device.
*
* @param[in] a Pointer of the input array
* @param[in] b Pointer of the input array
* @param c Pointer of the output array
* @param[in] n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void clcg_vecDvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size = 1024);
/**
* @brief Return complex conjugates of an input CUDA complex array
*
* @param a Pointer of the input arra
* @param ca Pointer of the output array
* @param n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void clcg_vecC_conjugate(const cuComplex *a, cuComplex *ca, int n, int bk_size = 1024);
/**
* @brief Return complex conjugates of an input CUDA complex array
*
* @param a Pointer of the input arra
* @param ca Pointer of the output array
* @param n Length of the arraies
* @param[in] bk_size Default CUDA block size.
*/
void clcg_vecZ_conjugate(const cuDoubleComplex *a, cuDoubleComplex *ca, int n, int bk_size = 1024);
#endif // LibLCG_CUDA
#endif // _LCG_COMPLEX_CUDA_H

685
src/lib/lcg_cuda.cu Normal file
View File

@ -0,0 +1,685 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "cmath"
#include "ctime"
#include "iostream"
#include "lcg_cuda.h"
typedef int (*lcg_solver_cuda_ptr)(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int lcg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int lcgs(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int lcg_solver_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
{
lcg_solver_cuda_ptr cg_solver_cuda;
switch (solver_id)
{
case LCG_CG:
cg_solver_cuda = lcg;
break;
case LCG_CGS:
cg_solver_cuda = lcgs;
break;
default:
cg_solver_cuda = lcg;
break;
}
return cg_solver_cuda(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
}
int lpcg(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int lcg_solver_preconditioned_cuda(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp,
lcg_float* m, const lcg_float* B, const int n_size, const int nz_size, const lcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
{
return lpcg(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
}
int lpg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param,
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
int lcg_solver_constrained_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
{
return lpg(Afp, Pfp, m, B, low, hig, n_size, nz_size, param, instance, cub_handle, cus_handle);
}
int lcg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CG parameters
lcg_para para = (param != nullptr) ? (*param) : defparam;
//check parameters
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
if (m == nullptr) return LCG_INVALID_POINTER;
if (B == nullptr) return LCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
// locate memory
lcg_float *d_m = nullptr, *d_B = nullptr;
lcg_float *gk = nullptr, *dk = nullptr, *Adk = nullptr;
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
cudaMalloc(&gk, n_size * sizeof(lcg_float));
cudaMalloc(&dk, n_size * sizeof(lcg_float));
cudaMalloc(&Adk, n_size * sizeof(lcg_float));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Adk;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_R_64F);
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
lcg_float none = -1.0;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
// g0 = Ax - B
cudaMemcpy(gk, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
cudaMemset(dk, 0, n_size * sizeof(lcg_float)); // d0 = 0
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, dk, 1); // d0 = -g0
lcg_float gk_mod;
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
lcg_float g0_mod = gk_mod;
if (g0_mod < 1.0) g0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(gk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, sqrt(gk_mod)/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (gk_mod/g0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, gk_mod/g0_mod, &para, n_size, nz_size, 0);
}
goto func_ends;
}
lcg_float dTAd, ak, betak, gk1_mod, residual;
while (1)
{
if (para.abs_diff) residual = sqrt(gk_mod)/n_size;
else residual = gk_mod/g0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = LCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = LCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Adk, n_size, nz_size);
cublasDdot_v2(cub_handle, n_size, dk, 1, Adk, 1, &dTAd); // dTAd = dk^T * Adk
ak = gk_mod/dTAd;
cublasDaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1); // m += ak*dk
cublasDaxpy_v2(cub_handle, n_size, &ak, Adk, 1, gk, 1); // gk += ak*Adk
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk1_mod); // gk1_mod = ||gk||
betak = gk1_mod/gk_mod;
gk_mod = gk1_mod;
cublasDscal_v2(cub_handle, n_size, &betak, dk, 1); // dk *= betak
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, dk, 1); // dk -= gk
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(dk);
cudaFree(gk);
cudaFree(Adk);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_dk);
cusparseDestroyDnVec(dvec_Adk);
}
return ret;
}
int lcgs(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CG parameters
lcg_para para = (param != nullptr) ? (*param) : defparam;
//check parameters
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
if (m == nullptr) return LCG_INVALID_POINTER;
if (B == nullptr) return LCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
// locate memory
lcg_float *d_m = nullptr, *d_B = nullptr;
lcg_float *rk = nullptr, *r0T = nullptr, *pk = nullptr, *qpk = nullptr;
lcg_float *Ax = nullptr, *uk = nullptr, *qk = nullptr, *wk = nullptr;
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
cudaMalloc(&rk, n_size * sizeof(lcg_float));
cudaMalloc(&r0T, n_size * sizeof(lcg_float));
cudaMalloc(&pk, n_size * sizeof(lcg_float));
cudaMalloc(&qpk, n_size * sizeof(lcg_float));
cudaMalloc(&Ax, n_size * sizeof(lcg_float));
cudaMalloc(&uk, n_size * sizeof(lcg_float));
cudaMalloc(&qk, n_size * sizeof(lcg_float));
cudaMalloc(&wk, n_size * sizeof(lcg_float));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_wk, dvec_pk, dvec_Ax;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
cusparseCreateDnVec(&dvec_wk, n_size, wk, CUDA_R_64F);
cusparseCreateDnVec(&dvec_pk, n_size, pk, CUDA_R_64F);
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_R_64F);
lcg_float one = 1.0;
lcg_float none = -1.0;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size);
// r0 = B - Ax
cudaMemcpy(rk, d_B, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // r0 = B
cublasDaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
// p0 = u0 = r0T = r0
cudaMemcpy(pk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cudaMemcpy(uk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cudaMemcpy(r0T, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
lcg_float rkr0T;
cublasDdot_v2(cub_handle, n_size, rk, 1, r0T, 1, &rkr0T);
lcg_float rk_mod;
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod); // rk_mod = ||rk||
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, sqrt(rk_mod)/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/r0_mod, &para, n_size, nz_size, 0);
}
goto func_ends;
}
lcg_float ak, nak, rkr0T1, AprT, betak, residual;
while (1)
{
if (para.abs_diff) residual = sqrt(rk_mod)/n_size;
else residual = rk_mod/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = LCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = LCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_pk, dvec_Ax, n_size, nz_size);
AprT = 0.0;
cublasDdot_v2(cub_handle, n_size, r0T, 1, Ax, 1, &AprT);
ak = rkr0T/AprT;
nak = -1.0*ak;
cudaMemcpy(qk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cudaMemcpy(wk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cublasDaxpy_v2(cub_handle, n_size, &nak, Ax, 1, qk, 1);
cublasDaxpy_v2(cub_handle, n_size, &one, qk, 1, wk, 1);
Afp(instance, cub_handle, cus_handle, dvec_wk, dvec_Ax, n_size, nz_size);
cublasDaxpy_v2(cub_handle, n_size, &ak, wk, 1, d_m, 1);
cublasDaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod);
cublasDdot_v2(cub_handle, n_size, rk, 1, r0T, 1, &rkr0T1);
betak = rkr0T1/rkr0T;
rkr0T = rkr0T1;
cudaMemcpy(uk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cublasDaxpy_v2(cub_handle, n_size, &betak, qk, 1, uk, 1);
cudaMemcpy(qpk, qk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cublasDaxpy_v2(cub_handle, n_size, &betak, pk, 1, qpk, 1);
cudaMemcpy(pk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cublasDaxpy_v2(cub_handle, n_size, &betak, qpk, 1, pk, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(rk);
cudaFree(r0T);
cudaFree(pk);
cudaFree(qpk);
cudaFree(Ax);
cudaFree(uk);
cudaFree(qk);
cudaFree(wk);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_wk);
cusparseDestroyDnVec(dvec_pk);
cusparseDestroyDnVec(dvec_Ax);
}
return ret;
}
int lpcg(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CG parameters
lcg_para para = (param != nullptr) ? (*param) : defparam;
//check parameters
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
if (m == nullptr) return LCG_INVALID_POINTER;
if (B == nullptr) return LCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
// locate memory
lcg_float *d_m = nullptr, *d_B = nullptr;
lcg_float *rk = nullptr, *zk = nullptr, *dk = nullptr, *Adk = nullptr;
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
cudaMalloc(&rk, n_size * sizeof(lcg_float));
cudaMalloc(&zk, n_size * sizeof(lcg_float));
cudaMalloc(&dk, n_size * sizeof(lcg_float));
cudaMalloc(&Adk, n_size * sizeof(lcg_float));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_zk, dvec_dk, dvec_Adk;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_R_64F);
cusparseCreateDnVec(&dvec_zk, n_size, zk, CUDA_R_64F);
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_R_64F);
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
lcg_float one = 1.0;
lcg_float none = -1.0;
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
// r0 = B - Ax
cudaMemcpy(rk, d_B, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // r0 = B
cublasDaxpy_v2(cub_handle, n_size, &none, Adk, 1, rk, 1); // r0 -= Ax
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_zk, n_size, nz_size);
// d0 = z0
cudaMemcpy(dk, zk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
lcg_float rk_mod;
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod); // rk_mod = ||rk||
lcg_float r0_mod = rk_mod;
if (r0_mod < 1.0) r0_mod = 1.0;
lcg_float zTr;
cublasDdot_v2(cub_handle, n_size, zk, 1, rk, 1, &zTr);
int ret, t = 0;
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, sqrt(rk_mod)/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (rk_mod/r0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, rk_mod/r0_mod, &para, n_size, nz_size, 0);
}
goto func_ends;
}
lcg_float dTAd, ak, nak, betak, zTr1, residual;
while (1)
{
if (para.abs_diff) residual = sqrt(rk_mod)/n_size;
else residual = rk_mod/r0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = LCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = LCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Adk, n_size, nz_size);
cublasDdot_v2(cub_handle, n_size, dk, 1, Adk, 1, &dTAd);
ak = zTr/dTAd;
nak = -1.0*ak;
cublasDaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
cublasDaxpy_v2(cub_handle, n_size, &nak, Adk, 1, rk, 1);
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_zk, n_size, nz_size);
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod);
cublasDdot_v2(cub_handle, n_size, zk, 1, rk, 1, &zTr1);
betak = zTr1/zTr;
zTr = zTr1;
cublasDscal_v2(cub_handle, n_size, &betak, dk, 1); // dk *= betak
cublasDaxpy_v2(cub_handle, n_size, &one, zk, 1, dk, 1);
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(rk);
cudaFree(zk);
cudaFree(dk);
cudaFree(Adk);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_rk);
cusparseDestroyDnVec(dvec_zk);
cusparseDestroyDnVec(dvec_dk);
cusparseDestroyDnVec(dvec_Adk);
}
return ret;
}
int lpg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param,
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
// set CG parameters
lcg_para para = (param != nullptr) ? (*param) : defparam;
// check parameters
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
if (para.step <= 0.0) return LCG_INVALID_LAMBDA;
if (m == nullptr) return LCG_INVALID_POINTER;
if (B == nullptr) return LCG_INVALID_POINTER;
if (low == nullptr) return LCG_INVALID_POINTER;
if (hig == nullptr) return LCG_INVALID_POINTER;
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
// locate memory
lcg_float *d_m = nullptr, *d_B = nullptr;
lcg_float *gk = nullptr, *Adk = nullptr;
lcg_float *m_new = nullptr, *gk_new = nullptr;
lcg_float *sk = nullptr, *yk = nullptr;
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
cudaMalloc(&gk, n_size *sizeof(lcg_float));
cudaMalloc(&Adk, n_size *sizeof(lcg_float));
cudaMalloc(&m_new, n_size *sizeof(lcg_float));
cudaMalloc(&gk_new, n_size *sizeof(lcg_float));
cudaMalloc(&sk, n_size *sizeof(lcg_float));
cudaMalloc(&yk, n_size *sizeof(lcg_float));
// Copy initial solutions
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
cusparseDnVecDescr_t dvec_m, dvec_mnew, dvec_Adk;
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
cusparseCreateDnVec(&dvec_mnew, n_size, m_new, CUDA_R_64F);
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
lcg_float none = -1.0;
lcg_float nalpha_k, alpha_k = para.step;
lcg_set2box_cuda(low, hig, m, n_size);
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
// g0 = Ax - B
cudaMemcpy(gk, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
lcg_float gk_mod;
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
lcg_float g0_mod = gk_mod;
if (g0_mod < 1.0) g0_mod = 1.0;
int ret, t = 0;
if (para.abs_diff && sqrt(gk_mod)/n_size <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, sqrt(gk_mod)/n_size, &para, n_size, nz_size, 0);
}
goto func_ends;
}
else if (gk_mod/g0_mod <= para.epsilon)
{
ret = LCG_ALREADY_OPTIMIZIED;
if (Pfp != nullptr)
{
Pfp(instance, d_m, gk_mod/g0_mod, &para, n_size, nz_size, 0);
}
goto func_ends;
}
lcg_float sk_mod, syk_mod, residual;
while(1)
{
if (para.abs_diff) residual = sqrt(gk_mod)/n_size;
else residual = gk_mod/g0_mod;
if (Pfp != nullptr)
{
if (Pfp(instance, d_m, residual, &para, n_size, nz_size, t))
{
ret = LCG_STOP; goto func_ends;
}
}
if (residual <= para.epsilon)
{
ret = LCG_CONVERGENCE; goto func_ends;
}
if (para.max_iterations > 0 && t+1 > para.max_iterations)
{
ret = LCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
nalpha_k = -1.0*alpha_k;
cudaMemcpy(m_new, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cublasDaxpy_v2(cub_handle, n_size, &nalpha_k, gk, 1, m_new, 1);
lcg_set2box_cuda(low, hig, m_new, n_size);
Afp(instance, cub_handle, cus_handle, dvec_mnew, dvec_Adk, n_size, nz_size);
cudaMemcpy(gk_new, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
cudaMemcpy(sk, m_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cublasDaxpy_v2(cub_handle, n_size, &none, d_m, 1, sk, 1);
cudaMemcpy(yk, gk_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, sk, 1);
cublasDdot_v2(cub_handle, n_size, sk, 1, sk, 1, &sk_mod);
cublasDdot_v2(cub_handle, n_size, sk, 1, yk, 1, &syk_mod);
alpha_k = sk_mod/syk_mod;
cudaMemcpy(d_m, m_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
cudaMemcpy(gk, gk_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
lcg_float gk_mod;
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
}
func_ends:
{
// Copy to host memories
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
cudaFree(d_m);
cudaFree(d_B);
cudaFree(gk);
cudaFree(gk_new);
cudaFree(m_new);
cudaFree(sk);
cudaFree(yk);
cudaFree(Adk);
cusparseDestroyDnVec(dvec_m);
cusparseDestroyDnVec(dvec_mnew);
cusparseDestroyDnVec(dvec_Adk);
}
return ret;
}

135
src/lib/lcg_cuda.h Normal file
View File

@ -0,0 +1,135 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _LCG_CUDA_H
#define _LCG_CUDA_H
#include "util.h"
#include "algebra_cuda.h"
#ifdef LibLCG_CUDA
#include <cublas_v2.h>
#include <cusparse_v2.h>
/**
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
*
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* @param x Multiplier of the Ax product.
* @param Ax Product of A multiplied by x.
* @param n_size Size of x and column/row numbers of A.
*/
typedef void (*lcg_axfunc_cuda_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size);
/**
* @brief Callback interface for monitoring the progress and terminate the iteration
* if necessary. Note that m is hosted on the GPU device.
*
* @param instance The user data sent for the lcg_solver() functions by the client.
* @param m The current solutions.
* @param converge The current value evaluating the iteration progress.
* @param n_size The size of the variables
* @param k The iteration count.
*
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will terminate the optimization process.
*/
typedef int (*lcg_progress_cuda_ptr)(void* instance, const lcg_float* m, const lcg_float converge,
const lcg_para* param, const int n_size, const int nz_size, const int k);
/**
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int lcg_solver_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle,
cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_CG);
/**
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param[in] nz_size Size of the non-zero element of a cusparse object.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int lcg_solver_preconditioned_cuda(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp,
lcg_float* m, const lcg_float* B, const int n_size, const int nz_size, const lcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_PCG);
/**
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param low Lower bound of the acceptable solution.
* @param hig Higher bound of the acceptable solution.
* @param B Objective vector of the linear system.
* @param[in] n_size Size of the solution vector and objective vector.
* @param[in] nz_size Size of the non-zero element of a cusparse object.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* @param cub_handle Handler of the cublas object.
* @param cus_handle Handlee of the cusparse object.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int lcg_solver_constrained_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param, void* instance,
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_PG);
#endif // LibLCG_CUDA
#endif // _LCG_CUDA_H

1128
src/lib/lcg_eigen.cpp Normal file

File diff suppressed because it is too large Load Diff

110
src/lib/lcg_eigen.h Normal file
View File

@ -0,0 +1,110 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _LCG_EIGEN_H
#define _LCG_EIGEN_H
#include "util.h"
#include "algebra_eigen.h"
/**
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
* by a vertical vector 'x'.
*
* @param instance The user data sent for the lcg_solver() functions by the client.
* @param x Multiplier of the Ax product.
* @param Ax Product of A multiplied by x.
*/
typedef void (*lcg_axfunc_eigen_ptr)(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax);
/**
* @brief Callback interface for monitoring the progress and terminate the iteration
* if necessary.
*
* @param instance The user data sent for the lcg_solver() functions by the client.
* @param m The current solutions.
* @param converge The current value evaluating the iteration progress.
* @param k The iteration count.
*
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will terminate the optimization process.
*/
typedef int (*lcg_progress_eigen_ptr)(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
const lcg_para *param, const int k);
/**
* @brief A combined conjugate gradient solver function.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
*
* @return Status of the function.
*/
int lcg_solver_eigen(lcg_axfunc_eigen_ptr Afp, lcg_progress_eigen_ptr Pfp, Eigen::VectorXd &m,
const Eigen::VectorXd &B, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CG);
/**
* @brief A combined conjugate gradient solver function.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_PCG.
*
* @return Status of the function.
*/
int lcg_solver_preconditioned_eigen(lcg_axfunc_eigen_ptr Afp, lcg_axfunc_eigen_ptr Mfp, lcg_progress_eigen_ptr Pfp,
Eigen::VectorXd &m, const Eigen::VectorXd &B, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PCG);
/**
* @brief A combined conjugate gradient solver function with inequality constraints.
*
* @param[in] Afp Callback function for calculating the product of 'Ax'.
* @param[in] Pfp Callback function for monitoring the iteration progress.
* @param m Initial solution vector.
* @param B Objective vector of the linear system.
* @param[in] low The lower boundary of the acceptable solution.
* @param[in] hig The higher boundary of the acceptable solution.
* @param param Parameter setup for the conjugate gradient methods.
* @param instance The user data sent for the lcg_solver() function by the client.
* This variable is either 'this' for class member functions or 'NULL' for global functions.
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
* @param P Precondition vector (optional expect for the LCG_PCG method). The default value is NULL.
*
* @return Status of the function.
*/
int lcg_solver_constrained_eigen(lcg_axfunc_eigen_ptr Afp, lcg_progress_eigen_ptr Pfp, Eigen::VectorXd &m,
const Eigen::VectorXd &B, const Eigen::VectorXd &low, const Eigen::VectorXd &hig,
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PG);
#endif //_LCG_EIGEN_H

381
src/lib/preconditioner.cpp Normal file
View File

@ -0,0 +1,381 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "preconditioner.h"
#include "cmath"
#include "map"
void lcg_incomplete_Cholesky_half_buffsize_coo(const int *row, const int *col, int nz_size, int *lnz_size)
{
size_t c = 0;
for (size_t i = 0; i < nz_size; i++)
{
if (row[i] >= col[i])
{
c++;
}
}
*lnz_size = c;
return;
}
void lcg_incomplete_Cholesky_half_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size,
int lnz_size, int *IC_row, int *IC_col, lcg_float *IC_val)
{
// We use this to store diagonal elements of the factorizated lower triangular matrix
lcg_float *diagonal = new lcg_float [N];
// A temporary row
lcg_float *tmp_row = new lcg_float [N];
// index of non-zero elements in tmp_row
int *filled_idx = new int [N];
// Begining index of each row in the input matrix
int *row_st_idx = new int [N];
size_t i, j, f;
// Set initial values
for (i = 0; i < N; i++)
{
diagonal[i] = 0.0;
tmp_row[i] = 0.0;
filled_idx[i] = -1;
row_st_idx[i] = -1;
}
// copy elements in the lower triangle to the output matrix
j = 0;
for (i = 0; i < nz_size; i++)
{
if (row[i] >= col[i])
{
IC_row[j] = row[i];
IC_col[j] = col[i];
IC_val[j] = val[i];
j++;
}
}
// Get the begining index of each row in the matrix
j = 1;
row_st_idx[0] = IC_row[0];
size_t old_row = IC_row[0];
for (i = 1; i < lnz_size; i++)
{
if (IC_row[i] > old_row)
{
row_st_idx[j] = i;
old_row = IC_row[i];
j++;
}
}
// Calculate the first element
IC_val[0] = sqrt(IC_val[0]);
diagonal[0] = IC_val[0];
lcg_float dia_sum;
dia_sum = 0.0;
// The first one is already calculated
for (i = 1; i < lnz_size; i++)
{
// Calculate the first column if there is one
if (IC_col[i] == 0)
{
IC_val[i] = IC_val[i]/IC_val[0];
dia_sum = dia_sum + IC_val[i]*IC_val[i];
continue; // Case 1 break
}
// Calculate elements in the middle of a row
if (IC_row[i] > IC_col[i])
{
// Find needed values from previous elements
f = 0;
j = row_st_idx[IC_col[i]];
while (IC_col[j] < IC_col[i])
{
tmp_row[IC_col[j]] = IC_val[j];
filled_idx[f] = IC_col[j];
f++;
j++;
}
j = row_st_idx[IC_row[i]];
while (IC_col[j] < IC_col[i])
{
IC_val[i] = IC_val[i] - IC_val[j]*tmp_row[IC_col[j]];
j++;
}
IC_val[i] = IC_val[i]/diagonal[IC_col[i]];
dia_sum = dia_sum + IC_val[i]*IC_val[i];
// reset tmp variables
for (j = 0; j < f; j++)
{
tmp_row[filled_idx[j]] = 0.0;
}
continue; // Case 2 break
}
// We have rearched the diagonal position
if (IC_row[i] == IC_col[i])
{
IC_val[i] = sqrt(IC_val[i] - dia_sum);
diagonal[IC_col[i]] = IC_val[i];
dia_sum = 0.0;
}
}
delete[] diagonal;
delete[] tmp_row;
delete[] row_st_idx;
delete[] filled_idx;
return;
}
void lcg_incomplete_Cholesky_full_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int *IC_row, int *IC_col, lcg_float *IC_val)
{
// We use this to store diagonal elements of the factorizated lower triangular matrix
lcg_float *diagonal = new lcg_float [N];
// A temporary row
lcg_float *tmp_row = new lcg_float [N];
// index of non-zero elements in tmp_row
int *filled_idx = new int [N];
// Begining index of each row in the input matrix
int *row_st_idx = new int [N];
size_t i, j, f, l;
// Set initial values
for (i = 0; i < N; i++)
{
diagonal[i] = 0.0;
tmp_row[i] = 0.0;
filled_idx[i] = -1;
row_st_idx[i] = -1;
}
// copy elements to the output matrix
for (i = 0; i < nz_size; i++)
{
IC_row[i] = row[i];
IC_col[i] = col[i];
IC_val[i] = val[i];
}
// count element number in the lower triangular part (including the diagonal) and the upper triangular part (excluding the diagonal)
// build map from elements' cooridnate to their index in the array
size_t order, L_nz = 0;
std::map<size_t, size_t> index_map;
for (i = 0; i < nz_size; i++)
{
if (row[i] >= col[i]) // Count number for thr lower triangular part
{
L_nz++;
}
else // Only need to build the map for the upper triangular part
{
order = N*row[i] + col[i];
index_map[order] = i;
}
}
// We use to store element index in the lower triangle
j = 0;
size_t *low_idx = new size_t [L_nz];
for (i = 0; i < nz_size; i++)
{
if (row[i] >= col[i])
{
low_idx[j] = i;
j++;
}
}
// Get the begining index of each row in the matrix
j = 1;
row_st_idx[0] = IC_row[0];
size_t old_row = IC_row[0];
for (i = 1; i < nz_size; i++)
{
if (IC_row[i] > old_row)
{
row_st_idx[j] = i;
old_row = IC_row[i];
j++;
}
}
// Calculate the first element
IC_val[0] = sqrt(IC_val[0]);
diagonal[0] = IC_val[0];
lcg_float dia_sum;
dia_sum = 0.0;
// The first one is already calculated
for (i = 1; i < L_nz; i++)
{
l = low_idx[i];
// Calculate the first column if there is one
if (IC_col[l] == 0)
{
IC_val[l] = IC_val[l]/IC_val[0];
dia_sum = dia_sum + IC_val[l]*IC_val[l];
// Set value at the upper triangle
order = IC_row[l];
IC_val[index_map[order]] = IC_val[l];
continue; // Case 1 break
}
// Calculate elements in the middle of a row
if (IC_row[l] > IC_col[l])
{
// Find needed values from previous elements
f = 0;
j = row_st_idx[IC_col[l]];
while (IC_col[j] < IC_col[l])
{
tmp_row[IC_col[j]] = IC_val[j];
filled_idx[f] = IC_col[j];
f++;
j++;
}
j = row_st_idx[IC_row[l]];
while (IC_col[j] < IC_col[l])
{
IC_val[l] = IC_val[l] - IC_val[j]*tmp_row[IC_col[j]];
j++;
}
IC_val[l] = IC_val[l]/diagonal[IC_col[l]];
dia_sum = dia_sum + IC_val[l]*IC_val[l];
// Set value at the upper triangle
order = N*IC_col[l] + IC_row[l];
IC_val[index_map[order]] = IC_val[l];
// reset tmp variables
for (j = 0; j < f; j++)
{
tmp_row[filled_idx[j]] = 0.0;
}
continue; // Case 2 break
}
// We have rearched the diagonal position
if (IC_row[l] == IC_col[l])
{
IC_val[l] = sqrt(IC_val[l] - dia_sum);
diagonal[IC_col[l]] = IC_val[l];
dia_sum = 0.0;
}
}
delete[] diagonal;
delete[] tmp_row;
delete[] row_st_idx;
delete[] filled_idx;
delete[] low_idx;
index_map.clear();
return;
}
void lcg_solve_upper_triangle_coo(const int *row, const int *col, const lcg_float *U, const lcg_float *B, lcg_float *x, int N, int nz_size)
{
for (size_t i = 0; i < N; i++)
{
x[i] = 0.0;
}
size_t iter = nz_size - 1;
double sum;
for (size_t i = N-1; i >= 0; i--)
{
sum = 0.0;
for (size_t j = iter; j >= 0; j--)
{
if (row[j] == i && col[j] > i)
{
sum += U[j] * x[col[j]];
}
else if (row[j] == i && col[j] == i)
{
x[i] = (B[i] - sum)/U[j];
if (j == 0) return;
else iter = j-1;
break;
}
}
}
return;
}
void lcg_solve_lower_triangle_coo(const int *row, const int *col, const lcg_float *L, const lcg_float *B, lcg_float *x, int N, int nz_size)
{
for (size_t i = 0; i < N; i++)
{
x[i] = 0.0;
}
size_t iter = 0;
double sum;
for (size_t i = 0; i < N; i++)
{
sum = 0.0;
for (size_t j = iter; j < nz_size; j++)
{
if (row[j] == i && col[j] < i)
{
sum += L[j] * x[col[j]];
}
else if (row[j] == i && col[j] == i)
{
x[i] = (B[i] - sum)/L[j];
iter = j+1;
break;
}
}
}
return;
}
bool lcg_full_rank_coo(const int *row, const int *col, const lcg_float *M, int N, int nz_size)
{
size_t s = 0;
for (size_t i = 0; i < nz_size; i++)
{
if (row[i] == col[i] && M[i] != 0.0)
{
s++;
}
}
if (s == N) return true;
else return false;
}

110
src/lib/preconditioner.h Normal file
View File

@ -0,0 +1,110 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _PRECONDITIONER_H
#define _PRECONDITIONER_H
#include "algebra.h"
/**
* @brief Return the number of non-zero elements in the lower triangular part of the input matrix
*
* @param row[in] Row index of the input sparse matrix.
* @param col[in] Column index of the input sparse matrix.
* @param nz_size[in] Length of the non-zero elements.
* @param lnz_size[out] Legnth of the non-zero elements in the lower triangle
*/
void lcg_incomplete_Cholesky_half_buffsize_coo(const int *row, const int *col, int nz_size, int *lnz_size);
/**
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
*
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param val Non-zero values of the input sparse matrix.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zero elements.
* @param lnz_size Legnth of the non-zero elements in the lower triangle
* @param IC_row Row index of the factorized triangular sparse matrix.
* @param IC_col Column index of the factorized triangular sparse matrix.
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
*/
void lcg_incomplete_Cholesky_half_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, lcg_float *IC_val);
/**
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
*
* @note The factorized lower and upper triangular matrixes are stored in the lower and upper triangular parts of the output matrix accordingly.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param val Non-zero values of the input sparse matrix.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zeor elements.
* @param IC_row Row index of the factorized triangular sparse matrix.
* @param IC_col Column index of the factorized triangular sparse matrix.
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
*/
void lcg_incomplete_Cholesky_full_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int *IC_row, int *IC_col, lcg_float *IC_val);
/**
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param U Non-zero values of the input sparse matrix.
* @param B Object array.
* @param x The returned solution.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zeor elements.
*/
void lcg_solve_upper_triangle_coo(const int *row, const int *col, const lcg_float *U, const lcg_float *B, lcg_float *x, int N, int nz_size);
/**
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param L Non-zero values of the input sparse matrix.
* @param B Object array.
* @param x The returned solution.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zeor elements.
*/
void lcg_solve_lower_triangle_coo(const int *row, const int *col, const lcg_float *L, const lcg_float *B, lcg_float *x, int N, int nz_size);
/**
* @brief Check to see if a square matrix is full ranked or not. The sparse matrix is stored in the COO format.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param M Non-zero values of the input sparse matrix.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zeor elements.
* @return true The matrix is full ranked.
* @return false The matrix is not full ranked.
*/
bool lcg_full_rank_coo(const int *row, const int *col, const lcg_float *M, int N, int nz_size);
#endif // _PRECONDITIONER_H

View File

@ -0,0 +1,421 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "preconditioner_cuda.h"
#include "map"
void clcg_incomplete_Cholesky_cuda_half_buffsize(const int *row, const int *col, int nz_size, int *lnz_size)
{
size_t c = 0;
for (size_t i = 0; i < nz_size; i++)
{
if (row[i] >= col[i])
{
c++;
}
}
*lnz_size = c;
return;
}
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuComplex *val, int N, int nz_size,
int lnz_size, int *IC_row, int *IC_col, cuComplex *IC_val)
{
// We use this to store diagonal elements of the factorizated lower triangular matrix
cuComplex *diagonal = new cuComplex [N];
// A temporary row
cuComplex *tmp_row = new cuComplex [N];
// index of non-zero elements in tmp_row
int *filled_idx = new int [N];
// Begining index of each row in the input matrix
int *row_st_idx = new int [N];
size_t i, j, f;
// Set initial values
for (i = 0; i < N; i++)
{
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
filled_idx[i] = -1;
row_st_idx[i] = -1;
}
// copy elements in the lower triangle to the output matrix
j = 0;
for (i = 0; i < nz_size; i++)
{
if (row[i] >= col[i])
{
IC_row[j] = row[i];
IC_col[j] = col[i];
IC_val[j] = val[i];
j++;
}
}
// Get the begining index of each row in the matrix
j = 1;
row_st_idx[0] = IC_row[0];
size_t old_row = IC_row[0];
for (i = 1; i < lnz_size; i++)
{
if (IC_row[i] > old_row)
{
row_st_idx[j] = i;
old_row = IC_row[i];
j++;
}
}
// Calculate the first element
IC_val[0] = clcg_Csqrt(IC_val[0]);
diagonal[0] = IC_val[0];
cuComplex dia_sum;
dia_sum.x = 0.0; dia_sum.y = 0.0;
// The first one is already calculated
for (i = 1; i < lnz_size; i++)
{
// Calculate the first column if there is one
if (IC_col[i] == 0)
{
IC_val[i] = cuCdivf(IC_val[i], IC_val[0]);
dia_sum = clcg_Csum(dia_sum, cuCmulf(IC_val[i], IC_val[i]));
continue; // Case 1 break
}
// Calculate elements in the middle of a row
if (IC_row[i] > IC_col[i])
{
// Find needed values from previous elements
f = 0;
j = row_st_idx[IC_col[i]];
while (IC_col[j] < IC_col[i])
{
tmp_row[IC_col[j]] = IC_val[j];
filled_idx[f] = IC_col[j];
f++;
j++;
}
j = row_st_idx[IC_row[i]];
while (IC_col[j] < IC_col[i])
{
IC_val[i] = clcg_Cdiff(IC_val[i], cuCmulf(IC_val[j], tmp_row[IC_col[j]]));
j++;
}
IC_val[i] = cuCdivf(IC_val[i], diagonal[IC_col[i]]);
dia_sum = clcg_Csum(dia_sum, cuCmulf(IC_val[i], IC_val[i]));
// reset tmp variables
for (j = 0; j < f; j++)
{
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
}
continue; // Case 2 break
}
// We have rearched the diagonal position
if (IC_row[i] == IC_col[i])
{
IC_val[i] = clcg_Csqrt(clcg_Cdiff(IC_val[i], dia_sum));
diagonal[IC_col[i]] = IC_val[i];
dia_sum.x = 0.0; dia_sum.y = 0.0;
}
}
delete[] diagonal;
delete[] tmp_row;
delete[] row_st_idx;
delete[] filled_idx;
return;
}
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size,
int lnz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val)
{
// We use this to store diagonal elements of the factorizated lower triangular matrix
cuDoubleComplex *diagonal = new cuDoubleComplex [N];
// A temporary row
cuDoubleComplex *tmp_row = new cuDoubleComplex [N];
// index of non-zero elements in tmp_row
int *filled_idx = new int [N];
// Begining index of each row in the input matrix
int *row_st_idx = new int [N];
size_t i, j, f;
// Set initial values
for (i = 0; i < N; i++)
{
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
filled_idx[i] = -1;
row_st_idx[i] = -1;
}
// copy elements in the lower triangle to the output matrix
j = 0;
for (i = 0; i < nz_size; i++)
{
if (row[i] >= col[i])
{
IC_row[j] = row[i];
IC_col[j] = col[i];
IC_val[j] = val[i];
j++;
}
}
// Get the begining index of each row in the matrix
j = 1;
row_st_idx[0] = IC_row[0];
size_t old_row = IC_row[0];
for (i = 1; i < lnz_size; i++)
{
if (IC_row[i] > old_row)
{
row_st_idx[j] = i;
old_row = IC_row[i];
j++;
}
}
// Calculate the first element
IC_val[0] = clcg_Zsqrt(IC_val[0]);
diagonal[0] = IC_val[0];
cuDoubleComplex dia_sum;
dia_sum.x = 0.0; dia_sum.y = 0.0;
// The first one is already calculated
for (i = 1; i < lnz_size; i++)
{
// Calculate the first column if there is one
if (IC_col[i] == 0)
{
IC_val[i] = cuCdiv(IC_val[i], IC_val[0]);
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[i], IC_val[i]));
continue; // Case 1 break
}
// Calculate elements in the middle of a row
if (IC_row[i] > IC_col[i])
{
// Find needed values from previous elements
f = 0;
j = row_st_idx[IC_col[i]];
while (IC_col[j] < IC_col[i])
{
tmp_row[IC_col[j]] = IC_val[j];
filled_idx[f] = IC_col[j];
f++;
j++;
}
j = row_st_idx[IC_row[i]];
while (IC_col[j] < IC_col[i])
{
IC_val[i] = clcg_Zdiff(IC_val[i], cuCmul(IC_val[j], tmp_row[IC_col[j]]));
j++;
}
IC_val[i] = cuCdiv(IC_val[i], diagonal[IC_col[i]]);
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[i], IC_val[i]));
// reset tmp variables
for (j = 0; j < f; j++)
{
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
}
continue; // Case 2 break
}
// We have rearched the diagonal position
if (IC_row[i] == IC_col[i])
{
IC_val[i] = clcg_Zsqrt(clcg_Zdiff(IC_val[i], dia_sum));
diagonal[IC_col[i]] = IC_val[i];
dia_sum.x = 0.0; dia_sum.y = 0.0;
}
}
delete[] diagonal;
delete[] tmp_row;
delete[] row_st_idx;
delete[] filled_idx;
return;
}
void clcg_incomplete_Cholesky_cuda_full(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val)
{
// We use this to store diagonal elements of the factorizated lower triangular matrix
cuDoubleComplex *diagonal = new cuDoubleComplex [N];
// A temporary row
cuDoubleComplex *tmp_row = new cuDoubleComplex [N];
// index of non-zero elements in tmp_row
int *filled_idx = new int [N];
// Begining index of each row in the input matrix
int *row_st_idx = new int [N];
size_t i, j, f, l;
// Set initial values
for (i = 0; i < N; i++)
{
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
filled_idx[i] = -1;
row_st_idx[i] = -1;
}
// copy elements to the output matrix
for (i = 0; i < nz_size; i++)
{
IC_row[i] = row[i];
IC_col[i] = col[i];
IC_val[i] = val[i];
}
// count element number in the lower triangular part (including the diagonal) and the upper triangular part (excluding the diagonal)
// build map from elements' cooridnate to their index in the array
size_t order, L_nz = 0;
std::map<size_t, size_t> index_map;
for (i = 0; i < nz_size; i++)
{
if (row[i] >= col[i]) // Count number for thr lower triangular part
{
L_nz++;
}
else // Only need to build the map for the upper triangular part
{
order = N*row[i] + col[i];
index_map[order] = i;
}
}
// We use to store element index in the lower triangle
j = 0;
size_t *low_idx = new size_t [L_nz];
for (i = 0; i < nz_size; i++)
{
if (row[i] >= col[i])
{
low_idx[j] = i;
j++;
}
}
// Get the begining index of each row in the matrix
j = 1;
row_st_idx[0] = IC_row[0];
size_t old_row = IC_row[0];
for (i = 1; i < nz_size; i++)
{
if (IC_row[i] > old_row)
{
row_st_idx[j] = i;
old_row = IC_row[i];
j++;
}
}
// Calculate the first element
IC_val[0] = clcg_Zsqrt(IC_val[0]);
diagonal[0] = IC_val[0];
cuDoubleComplex dia_sum;
dia_sum.x = 0.0; dia_sum.y = 0.0;
// The first one is already calculated
for (i = 1; i < L_nz; i++)
{
l = low_idx[i];
// Calculate the first column if there is one
if (IC_col[l] == 0)
{
IC_val[l] = cuCdiv(IC_val[l], IC_val[0]);
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[l], IC_val[l]));
// Set value at the upper triangle
order = IC_row[l];
IC_val[index_map[order]] = IC_val[l];
continue; // Case 1 break
}
// Calculate elements in the middle of a row
if (IC_row[l] > IC_col[l])
{
// Find needed values from previous elements
f = 0;
j = row_st_idx[IC_col[l]];
while (IC_col[j] < IC_col[l])
{
tmp_row[IC_col[j]] = IC_val[j];
filled_idx[f] = IC_col[j];
f++;
j++;
}
j = row_st_idx[IC_row[l]];
while (IC_col[j] < IC_col[l])
{
IC_val[l] = clcg_Zdiff(IC_val[l], cuCmul(IC_val[j], tmp_row[IC_col[j]]));
j++;
}
IC_val[l] = cuCdiv(IC_val[l], diagonal[IC_col[l]]);
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[l], IC_val[l]));
// Set value at the upper triangle
order = N*IC_col[l] + IC_row[l];
IC_val[index_map[order]] = IC_val[l];
// reset tmp variables
for (j = 0; j < f; j++)
{
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
}
continue; // Case 2 break
}
// We have rearched the diagonal position
if (IC_row[l] == IC_col[l])
{
IC_val[l] = clcg_Zsqrt(clcg_Zdiff(IC_val[l], dia_sum));
diagonal[IC_col[l]] = IC_val[l];
dia_sum.x = 0.0; dia_sum.y = 0.0;
}
}
delete[] diagonal;
delete[] tmp_row;
delete[] row_st_idx;
delete[] filled_idx;
delete[] low_idx;
index_map.clear();
return;
}

View File

@ -0,0 +1,92 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _PRECONDITIONER_CUDA_H
#define _PRECONDITIONER_CUDA_H
#include "lcg_complex_cuda.h"
#ifdef LibLCG_CUDA
/**
* @brief Return the number of non-zero elements in the lower triangular part of the input matrix
*
* @param row[in] Row index of the input sparse matrix.
* @param col[in] Column index of the input sparse matrix.
* @param nz_size[in] Length of the non-zero elements.
* @param lnz_size[out] Legnth of the non-zero elements in the lower triangle
*/
void clcg_incomplete_Cholesky_cuda_half_buffsize(const int *row, const int *col, int nz_size, int *lnz_size);
/**
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
*
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param val Non-zero values of the input sparse matrix.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zero elements.
* @param lnz_size Legnth of the non-zero elements in the lower triangle
* @param IC_row Row index of the factorized triangular sparse matrix.
* @param IC_col Column index of the factorized triangular sparse matrix.
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
*/
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuComplex *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, cuComplex *IC_val);
/**
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
*
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param val Non-zero values of the input sparse matrix.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zero elements.
* @param lnz_size Legnth of the non-zero elements in the lower triangle
* @param IC_row Row index of the factorized triangular sparse matrix.
* @param IC_col Column index of the factorized triangular sparse matrix.
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
*/
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val);
/**
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
*
* @note The factorized lower and upper triangular matrixes are stored in the lower and upper triangular parts of the output matrix accordingly.
*
* @param row Row index of the input sparse matrix.
* @param col Column index of the input sparse matrix.
* @param val Non-zero values of the input sparse matrix.
* @param N Row/Column size of the sparse matrix.
* @param nz_size Length of the non-zeor elements.
* @param IC_row Row index of the factorized triangular sparse matrix.
* @param IC_col Column index of the factorized triangular sparse matrix.
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
*/
void clcg_incomplete_Cholesky_cuda_full(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val);
#endif // LibLCG_CUDA
#endif // _PRECONDITIONER_CUDA_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,159 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _PRECONDITIONER_EIGEN_H
#define _PRECONDITIONER_EIGEN_H
#include "complex"
#include "Eigen/Dense"
#include "Eigen/SparseCore"
/**
* @brief Perform the Cholesky decomposition and return the lower triangular matrix.
*
* @note This could serve as a direct solver.
*
* @param A The input matrix. Must be full rank and symmetric (aka. A = A^T)
* @param L The output low triangular matrix
*/
void lcg_Cholesky(const Eigen::MatrixXd &A, Eigen::MatrixXd &L);
/**
* @brief Perform the Cholesky decomposition and return the lower triangular matrix
*
* @note This could serve as a direct solver.
*
* @param[in] A The input matrix. Must be full rank and symmetric (aka. A = A^T)
* @param L The output low triangular matrix
*/
void clcg_Cholesky(const Eigen::MatrixXcd &A, Eigen::MatrixXcd &L);
/**
* @brief Calculate the invert of a lower triangle matrix (Full rank only).
*
* @param L The operating lower triangle matrix
* @param Linv The inverted lower triangle matrix
*/
void lcg_invert_lower_triangle(const Eigen::MatrixXd &L, Eigen::MatrixXd &Linv);
/**
* @brief Calculate the invert of a upper triangle matrix (Full rank only).
*
* @param U The operating upper triangle matrix
* @param Uinv The inverted upper triangle matrix
*/
void lcg_invert_upper_triangle(const Eigen::MatrixXd &U, Eigen::MatrixXd &Uinv);
/**
* @brief Calculate the invert of a lower triangle matrix (Full rank only).
*
* @param L The operating lower triangle matrix
* @param Linv The inverted lower triangle matrix
*/
void clcg_invert_lower_triangle(const Eigen::MatrixXcd &L, Eigen::MatrixXcd &Linv);
/**
* @brief Calculate the invert of a upper triangle matrix (Full rank only).
*
* @param U The operating upper triangle matrix
* @param Uinv The inverted upper triangle matrix
*/
void clcg_invert_upper_triangle(const Eigen::MatrixXcd &U, Eigen::MatrixXcd &Uinv);
/**
* @brief Calculate the incomplete Cholesky decomposition and return the lower triangular matrix
*
* @param[in] A The input sparse matrix. Must be full rank and symmetric (aka. A = A^T)
* @param L The output lower triangular matrix
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
*/
void lcg_incomplete_Cholesky(const Eigen::SparseMatrix<double, Eigen::RowMajor> &A, Eigen::SparseMatrix<double, Eigen::RowMajor> &L, size_t fill = 0);
/**
* @brief Calculate the incomplete Cholesky decomposition and return the lower triangular matrix
*
* @param[in] A The input sparse matrix. Must be full rank and symmetric (aka. A = A^T)
* @param L The output lower triangular matrix
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
*/
void clcg_incomplete_Cholesky(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &A, Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L, size_t fill = 0);
/**
* @brief Calculate the incomplete LU factorizations
*
* @param A The input sparse matrix. Must be full rank.
* @param L The output lower triangular matrix.
* @param U The output upper triangular matrix.
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
*/
void lcg_incomplete_LU(const Eigen::SparseMatrix<double, Eigen::RowMajor> &A, Eigen::SparseMatrix<double, Eigen::RowMajor> &L, Eigen::SparseMatrix<double, Eigen::RowMajor> &U, size_t fill = 0);
/**
* @brief Calculate the incomplete LU factorizations
*
* @param A The input sparse matrix. Must be full rank.
* @param L The output lower triangular matrix.
* @param U The output upper triangular matrix.
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
*/
void clcg_incomplete_LU(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &A, Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L,
Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &U, size_t fill = 0);
/**
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
*
* @param L The input lower triangle matrix
* @param B The object vector
* @param X The solution vector
*/
void lcg_solve_lower_triangle(const Eigen::SparseMatrix<double, Eigen::RowMajor> &L, const Eigen::VectorXd &B, Eigen::VectorXd &X);
/**
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
*
* @param U The input upper triangle matrix
* @param B The object vector
* @param X The solution vector
*/
void lcg_solve_upper_triangle(const Eigen::SparseMatrix<double, Eigen::RowMajor> &U, const Eigen::VectorXd &B, Eigen::VectorXd &X);
/**
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
*
* @param L The input lower triangle matrix
* @param B The object vector
* @param X The solution vector
*/
void clcg_solve_lower_triangle(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L, const Eigen::VectorXcd &B, Eigen::VectorXcd &X);
/**
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
*
* @param U The input upper triangle matrix
* @param B The object vector
* @param X The solution vector
*/
void clcg_solve_upper_triangle(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &U, const Eigen::VectorXcd &B, Eigen::VectorXcd &X);
#endif // _PRECONDITIONER_EIGEN_H

311
src/lib/solver.cpp Normal file
View File

@ -0,0 +1,311 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "solver.h"
#include "ctime"
#include "iostream"
#include "config.h"
#ifdef LibLCG_OPENMP
#include "omp.h"
#endif
LCG_Solver::LCG_Solver()
{
param_ = lcg_default_parameters();
inter_ = 1;
silent_ = false;
}
int LCG_Solver::Progress(const lcg_float* m, const lcg_float converge,
const lcg_para *param, const int n_size, const int k)
{
if (inter_ > 0 && k%inter_ == 0)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
if (converge <= param->epsilon)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
}
return 0;
}
void LCG_Solver::silent()
{
silent_ = true;
return;
}
void LCG_Solver::set_report_interval(unsigned int inter)
{
inter_ = inter;
return;
}
void LCG_Solver::set_lcg_parameter(const lcg_para &in_param)
{
param_ = in_param;
return;
}
void LCG_Solver::Minimize(lcg_float *m, const lcg_float *b, int x_size,
lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver(_AxProduct, nullptr, m, b, x_size, &param_, this, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = lcg_solver(_AxProduct, _Progress, m, b, x_size, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = lcg_solver(_AxProduct, _Progress, m, b, x_size, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_CG:
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
break;
case LCG_CGS:
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
break;
case LCG_BICGSTAB:
std::clog << "Solver: BICGSTAB. Times cost: " << costime << " ms" << std::endl;
break;
case LCG_BICGSTAB2:
std::clog << "Solver: BICGSTAB2. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void LCG_Solver::MinimizePreconditioned(lcg_float *m, const lcg_float *b, int x_size,
lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, nullptr, m, b, x_size, &param_, this, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, _Progress, m, b, x_size, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, _Progress, m, b, x_size, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_PCG:
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void LCG_Solver::MinimizeConstrained(lcg_float *m, const lcg_float *b, const lcg_float* low,
const lcg_float *hig, int x_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_constrained(_AxProduct, nullptr, m, b, low, hig, x_size, &param_, this, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = lcg_solver_constrained(_AxProduct, _Progress, m, b, low, hig, x_size, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = lcg_solver_constrained(_AxProduct, _Progress, m, b, low, hig, x_size, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_PG:
std::clog << "Solver: PG-CG. Time cost: " << costime << " ms" << std::endl;
break;
case LCG_SPG:
std::clog << "Solver: SPG-CG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
CLCG_Solver::CLCG_Solver()
{
param_ = clcg_default_parameters();
inter_ = 1;
silent_ = false;
}
int CLCG_Solver::Progress(const lcg_complex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int k)
{
if (inter_ > 0 && k%inter_ == 0)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
if (converge <= param->epsilon)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
}
return 0;
}
void CLCG_Solver::silent()
{
silent_ = true;
return;
}
void CLCG_Solver::set_report_interval(unsigned int inter)
{
inter_ = inter;
return;
}
void CLCG_Solver::set_clcg_parameter(const clcg_para &in_param)
{
param_ = in_param;
return;
}
void CLCG_Solver::Minimize(lcg_complex *m, const lcg_complex *b, int x_size,
clcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = clcg_solver(_AxProduct, nullptr, m, b, x_size, &param_, this, solver_id);
if (ret < 0) clcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = clcg_solver(_AxProduct, _Progress, m, b, x_size, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = clcg_solver(_AxProduct, _Progress, m, b, x_size, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case CLCG_BICG:
std::clog << "Solver: Bi-CG. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_BICG_SYM:
std::clog << "Solver: Bi-CG (symmetrically accelerated). Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_CGS:
std::clog << "Solver: CGS. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_TFQMR:
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Times cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) clcg_error_str(ret, er_throw);
else if (ret < 0) clcg_error_str(ret, er_throw);
return;
}

285
src/lib/solver.h Normal file
View File

@ -0,0 +1,285 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _SOLVER_H
#define _SOLVER_H
#include "lcg.h"
#include "clcg.h"
/**
* @brief Linear conjugate gradient solver class
*/
class LCG_Solver
{
protected:
lcg_para param_;
unsigned int inter_;
bool silent_;
public:
LCG_Solver();
virtual ~LCG_Solver(){}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param instance User data sent to identify the function address
* @param a[in] Pointer of the multiplier
* @param b[out] Pointer of the product
* @param num Size of the array
*/
static void _AxProduct(void* instance, const lcg_float* a, lcg_float* b, const int num)
{
return reinterpret_cast<LCG_Solver*>(instance)->AxProduct(a, b, num);
}
/**
* @brief Virtual function of the product of A*x
*
* @param a[in] Pointer of the multiplier
* @param b[out] Pointer of the product
* @param num Size of the array
*/
virtual void AxProduct(const lcg_float* a, lcg_float* b, const int num) = 0;
/**
* @brief Interface of the virtual function of the product of M^-1*x
*
* @param instance User data sent to identify the function address
* @param a[in] Pointer of the multiplier
* @param b[out] Pointer of the product
* @param num Size of the array
*/
static void _MxProduct(void* instance, const lcg_float* a, lcg_float* b, const int num)
{
return reinterpret_cast<LCG_Solver*>(instance)->MxProduct(a, b, num);
}
/**
* @brief Virtual function of the product of M^-1*x
*
* @param a[in] Pointer of the multiplier
* @param b[out] Pointer of the product
* @param num Size of the array
*/
virtual void MxProduct(const lcg_float* a, lcg_float* b, const int num) = 0;
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param instance User data sent to identify the function address
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param k Current iteration times
* @return int Status of the process
*/
static int _Progress(void* instance, const lcg_float* m, const lcg_float converge,
const lcg_para *param, const int n_size, const int k)
{
return reinterpret_cast<LCG_Solver*>(instance)->Progress(m, converge, param, n_size, k);
}
/**
* @brief Virtual function of the process monitoring
*
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param k Current iteration times
* @return int Status of the process
*/
virtual int Progress(const lcg_float* m, const lcg_float converge,
const lcg_para *param, const int n_size, const int k);
/**
* @brief Do not report any processes
*/
void silent();
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_report_interval(unsigned int inter);
/**
* @brief Set the parameters of the algorithms
*
* @param in_param the input parameters
*/
void set_lcg_parameter(const lcg_para &in_param);
/**
* @brief Run the minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param x_size Size of the solution vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void Minimize(lcg_float *m, const lcg_float *b, int x_size,
lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
/**
* @brief Run the preconitioned minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param x_size Size of the solution vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizePreconditioned(lcg_float *m, const lcg_float *b, int x_size,
lcg_solver_enum solver_id = LCG_PCG, bool verbose = true, bool er_throw = false);
/**
* @brief Run the constrained minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param low Lower bound of the solution vector
* @param hig Higher bound of the solution vector
* @param x_size Size of the solution vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizeConstrained(lcg_float *m, const lcg_float *b, const lcg_float* low,
const lcg_float *hig, int x_size, lcg_solver_enum solver_id = LCG_PG,
bool verbose = true, bool er_throw = false);
};
/**
* @brief Complex linear conjugate gradient solver class
*/
class CLCG_Solver
{
protected:
clcg_para param_;
unsigned int inter_;
bool silent_;
public:
CLCG_Solver();
virtual ~CLCG_Solver(){}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param instance User data sent to identify the function address
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param x_size Size of the array
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
*/
static void _AxProduct(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate)
{
return reinterpret_cast<CLCG_Solver*>(instance)->AxProduct(x, prod_Ax, x_size, layout, conjugate);
}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param x_size Size of the array
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
*/
virtual void AxProduct(const lcg_complex *x, lcg_complex *prod_Ax,
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param instance User data sent to identify the function address
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param k Current iteration times
* @return int Status of the process
*/
static int _Progress(void* instance, const lcg_complex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int k)
{
return reinterpret_cast<CLCG_Solver*>(instance)->Progress(m, converge, param, n_size, k);
}
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param k Current iteration times
* @return int Status of the process
*/
virtual int Progress(const lcg_complex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int k);
/**
* @brief Do not report any processes
*/
void silent();
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_report_interval(unsigned int inter);
/**
* @brief Set the parameters of the algorithms
*
* @param in_param the input parameters
*/
void set_clcg_parameter(const clcg_para &in_param);
/**
* @brief Run the minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param x_size Size of the solution vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void Minimize(lcg_complex *m, const lcg_complex *b, int x_size,
clcg_solver_enum solver_id = CLCG_CGS, bool verbose = true,
bool er_throw = false);
};
#endif // _SOLVER_H

414
src/lib/solver_cuda.cu Normal file
View File

@ -0,0 +1,414 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "solver_cuda.h"
#include "cmath"
#include "ctime"
#include "iostream"
LCG_CUDA_Solver::LCG_CUDA_Solver()
{
param_ = lcg_default_parameters();
inter_ = 1;
silent_ = false;
}
int LCG_CUDA_Solver::Progress(const lcg_float* m, const lcg_float converge,
const lcg_para* param, const int n_size, const int nz_size, const int k)
{
if (inter_ > 0 && k%inter_ == 0)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
if (converge <= param->epsilon)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
}
return 0;
}
void LCG_CUDA_Solver::silent()
{
silent_ = true;
return;
}
void LCG_CUDA_Solver::set_report_interval(unsigned int inter)
{
inter_ = inter;
return;
}
void LCG_CUDA_Solver::set_lcg_parameter(const lcg_para &in_param)
{
param_ = in_param;
return;
}
void LCG_CUDA_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
const int n_size, const int nz_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
clock_t start = clock();
int ret = lcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_CG:
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
break;
case LCG_CGS:
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void LCG_CUDA_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
const int n_size, const int nz_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
clock_t start = clock();
int ret = lcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_PCG:
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void LCG_CUDA_Solver::MinimizeConstrained(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, const lcg_float *b,
const lcg_float* low, const lcg_float *hig, const int n_size, const int nz_size, lcg_solver_enum solver_id,
bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_constrained_cuda(_AxProduct, nullptr, x, b, low, hig, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
clock_t start = clock();
int ret = lcg_solver_constrained_cuda(_AxProduct, _Progress, x, b, low, hig, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_PG:
std::clog << "Solver: PG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
CLCG_CUDAF_Solver::CLCG_CUDAF_Solver()
{
param_ = clcg_default_parameters();
inter_ = 1;
silent_ = false;
}
int CLCG_CUDAF_Solver::Progress(const cuComplex* m, const float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k)
{
if (inter_ > 0 && k%inter_ == 0)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
if (converge <= param->epsilon)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
}
return 0;
}
void CLCG_CUDAF_Solver::silent()
{
silent_ = true;
return;
}
void CLCG_CUDAF_Solver::set_report_interval(unsigned int inter)
{
inter_ = inter;
return;
}
void CLCG_CUDAF_Solver::set_clcg_parameter(const clcg_para &in_param)
{
param_ = in_param;
return;
}
void CLCG_CUDAF_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = clcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
clock_t start = clock();
int ret = clcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
clock_t end = clock();
float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case CLCG_BICG:
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
break;
case CLCG_BICG_SYM:
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void CLCG_CUDAF_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
clock_t start = clock();
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
clock_t end = clock();
float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case CLCG_PCG:
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
CLCG_CUDA_Solver::CLCG_CUDA_Solver()
{
param_ = clcg_default_parameters();
inter_ = 1;
silent_ = false;
}
int CLCG_CUDA_Solver::Progress(const cuDoubleComplex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k)
{
if (inter_ > 0 && k%inter_ == 0)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
if (converge <= param->epsilon)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
}
return 0;
}
void CLCG_CUDA_Solver::silent()
{
silent_ = true;
return;
}
void CLCG_CUDA_Solver::set_report_interval(unsigned int inter)
{
inter_ = inter;
return;
}
void CLCG_CUDA_Solver::set_clcg_parameter(const clcg_para &in_param)
{
param_ = in_param;
return;
}
void CLCG_CUDA_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = clcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
clock_t start = clock();
int ret = clcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case CLCG_BICG:
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
break;
case CLCG_BICG_SYM:
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void CLCG_CUDA_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
clock_t start = clock();
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, &param_, this, cub_handle, cus_handle, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case CLCG_PCG:
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}

545
src/lib/solver_cuda.h Normal file
View File

@ -0,0 +1,545 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _SOLVER_CUDA_H
#define _SOLVER_CUDA_H
#include "lcg_cuda.h"
#include "clcg_cuda.h"
#include "clcg_cudaf.h"
#ifdef LibLCG_CUDA
/**
* @brief Linear conjugate gradient solver class
*/
class LCG_CUDA_Solver
{
protected:
lcg_para param_;
unsigned int inter_;
bool silent_;
public:
LCG_CUDA_Solver();
virtual ~LCG_CUDA_Solver(){}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param instance User data sent to identify the function address
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
{
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size);
}
/**
* @brief Virtual function of the product of A*x
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size) = 0;
/**
* @brief Interface of the virtual function of the product of M^-1*x
*
* @param instance User data sent to identify the function address
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx, const int n_size, const int nz_size)
{
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size);
}
/**
* @brief Virtual function of the product of M^-1*x
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx, const int n_size, const int nz_size) = 0;
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param instance User data sent to identify the function address
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param k Current iteration times
* @return int Status of the process
*/
static int _Progress(void* instance, const lcg_float* m, const lcg_float converge,
const lcg_para* param, const int n_size, const int nz_size, const int k)
{
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
}
/**
* @brief Virtual function of the process monitoring
*
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param k Current iteration times
* @return int Status of the process
*/
virtual int Progress(const lcg_float* m, const lcg_float converge,
const lcg_para* param, const int n_size, const int nz_size, const int k);
/**
* @brief Do not report any processes
*/
void silent();
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_report_interval(unsigned int inter);
/**
* @brief Set the parameters of the algorithms
*
* @param in_param the input parameters
*/
void set_lcg_parameter(const lcg_para &in_param);
/**
* @brief Run the constrained minimizing process
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param n_size Size of the solution vector
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
/**
* @brief Run the preconditioned minimizing process
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param n_size Size of the solution vector
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
/**
* @brief Run the constrained minimizing process
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param low Lower bound of the solution vector
* @param hig Higher bound of the solution vector
* @param n_size Size of the solution vector
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizeConstrained(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, const lcg_float *b,
const lcg_float* low, const lcg_float *hig, const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_PG,
bool verbose = true, bool er_throw = false);
};
/**
* @brief Complex linear conjugate gradient solver class
*/
class CLCG_CUDAF_Solver
{
protected:
clcg_para param_;
unsigned int inter_;
bool silent_;
public:
CLCG_CUDAF_Solver();
virtual ~CLCG_CUDAF_Solver(){}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param instance User data sent to identify the function address
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size, oper_t);
}
/**
* @brief Virtual function of the product of A*x
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
/**
* @brief Interface of the virtual function of the product of M^-1*x
*
* @param instance User data sent to identify the function address
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->MxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size, oper_t);
}
/**
* @brief Virtual function of the product of M^-1*x
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param instance User data sent to identify the function address
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param k Current iteration times
* @return int Status of the process
*/
static int _Progress(void* instance, const cuComplex* m, const float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k)
{
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
}
/**
* @brief Virtual function of the process monitoring
*
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param k Current iteration times
* @return int Status of the process
*/
virtual int Progress(const cuComplex* m, const float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k);
/**
* @brief Do not report any processes
*/
void silent();
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_report_interval(unsigned int inter);
/**
* @brief Set the parameters of the algorithms
*
* @param in_param the input parameters
*/
void set_clcg_parameter(const clcg_para &in_param);
/**
* @brief Run the constrained minimizing process
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param n_size Size of the solution vector
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_BICG, bool verbose = true, bool er_throw = false);
/**
* @brief Run the preconditioned minimizing process
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param n_size Size of the solution vector
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_PCG, bool verbose = true, bool er_throw = false);
};
/**
* @brief Complex linear conjugate gradient solver class
*/
class CLCG_CUDA_Solver
{
protected:
clcg_para param_;
unsigned int inter_;
bool silent_;
public:
CLCG_CUDA_Solver();
virtual ~CLCG_CUDA_Solver(){}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param instance User data sent to identify the function address
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size, oper_t);
}
/**
* @brief Virtual function of the product of A*x
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
/**
* @brief Interface of the virtual function of the product of M^-1*x
*
* @param instance User data sent to identify the function address
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->MxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size, oper_t);
}
/**
* @brief Virtual function of the product of M^-1*x
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
*/
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param instance User data sent to identify the function address
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param k Current iteration times
* @return int Status of the process
*/
static int _Progress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k)
{
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
}
/**
* @brief Virtual function of the process monitoring
*
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param n_size Size of the solution
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param k Current iteration times
* @return int Status of the process
*/
virtual int Progress(const cuDoubleComplex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k);
/**
* @brief Do not report any processes
*/
void silent();
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_report_interval(unsigned int inter);
/**
* @brief Set the parameters of the algorithms
*
* @param in_param the input parameters
*/
void set_clcg_parameter(const clcg_para &in_param);
/**
* @brief Run the constrained minimizing process
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param n_size Size of the solution vector
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_BICG, bool verbose = true, bool er_throw = false);
/**
* @brief Run the preconditioned minimizing process
*
* @param cub_handle Handler of the CuBLAS library
* @param cus_handle Handler of the CuSparse library
* @param x Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param n_size Size of the solution vector
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_PCG, bool verbose = true, bool er_throw = false);
};
#endif // LibLCG_CUDA
#endif // _SOLVER_CUDA_H

365
src/lib/solver_eigen.cpp Normal file
View File

@ -0,0 +1,365 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "solver_eigen.h"
#include "cmath"
#include "ctime"
#include "iostream"
#include "config.h"
#ifdef LibLCG_OPENMP
#include "omp.h"
#endif
LCG_EIGEN_Solver::LCG_EIGEN_Solver()
{
param_ = lcg_default_parameters();
inter_ = 1;
silent_ = false;
}
int LCG_EIGEN_Solver::Progress(const Eigen::VectorXd *m, const lcg_float converge, const lcg_para *param,
const int k)
{
if (inter_ > 0 && k%inter_ == 0)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
if (converge <= param->epsilon)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
}
return 0;
}
void LCG_EIGEN_Solver::silent()
{
silent_ = true;
return;
}
void LCG_EIGEN_Solver::set_report_interval(unsigned int inter)
{
inter_ = inter;
return;
}
void LCG_EIGEN_Solver::set_lcg_parameter(const lcg_para &in_param)
{
param_ = in_param;
return;
}
void LCG_EIGEN_Solver::Minimize(Eigen::VectorXd &m, const Eigen::VectorXd &b,
lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_eigen(_AxProduct, nullptr, m, b, &param_, this, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_CG:
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
break;
case LCG_CGS:
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
break;
case LCG_BICGSTAB:
std::clog << "Solver: BICGSTAB. Times cost: " << costime << " ms" << std::endl;
break;
case LCG_BICGSTAB2:
std::clog << "Solver: BICGSTAB2. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void LCG_EIGEN_Solver::MinimizePreconditioned(Eigen::VectorXd &m, const Eigen::VectorXd &b,
lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, nullptr, m, b, &param_, this, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = lcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_PCG:
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
void LCG_EIGEN_Solver::MinimizeConstrained(Eigen::VectorXd &m, const Eigen::VectorXd &B, const Eigen::VectorXd &low,
const Eigen::VectorXd &hig, lcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = lcg_solver_constrained_eigen(_AxProduct, nullptr, m, B, low, hig, &param_, this, solver_id);
if (ret < 0) lcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = lcg_solver_constrained_eigen(_AxProduct, _Progress, m, B, low, hig, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = lcg_solver_constrained_eigen(_AxProduct, _Progress, m, B, low, hig, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case LCG_PG:
std::clog << "Solver: PG-CG. Time cost: " << costime << " ms" << std::endl;
break;
case LCG_SPG:
std::clog << "Solver: SPG-CG. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) lcg_error_str(ret, er_throw);
else if (ret < 0) lcg_error_str(ret, er_throw);
return;
}
CLCG_EIGEN_Solver::CLCG_EIGEN_Solver()
{
param_ = clcg_default_parameters();
inter_ = 1;
silent_ = false;
}
int CLCG_EIGEN_Solver::Progress(const Eigen::VectorXcd *m, const lcg_float converge, const clcg_para *param,
const int k)
{
if (inter_ > 0 && (k%inter_) == 0)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
if (converge <= param->epsilon)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
}
return 0;
}
void CLCG_EIGEN_Solver::silent()
{
silent_ = true;
return;
}
void CLCG_EIGEN_Solver::set_clcg_parameter(const clcg_para &in_param)
{
param_ = in_param;
return;
}
void CLCG_EIGEN_Solver::set_report_interval(unsigned int inter)
{
inter_ = inter;
return;
}
void CLCG_EIGEN_Solver::Minimize(Eigen::VectorXcd &m, const Eigen::VectorXcd &b,
clcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = clcg_solver_eigen(_AxProduct, nullptr, m, b, &param_, this, solver_id);
if (ret < 0) clcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = clcg_solver_eigen(_AxProduct, _Progress, m, b, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = clcg_solver_eigen(_AxProduct, _Progress, m, b, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case CLCG_BICG:
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
break;
case CLCG_BICG_SYM:
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
break;
case CLCG_CGS:
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
break;
case CLCG_TFQMR:
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_PCG:
std::clog << "Solver: PCG. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_PBICG:
std::clog << "Solver: PBICG. Times cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) clcg_error_str(ret, er_throw);
else if (ret < 0) clcg_error_str(ret, er_throw);
return;
}
void CLCG_EIGEN_Solver::MinimizePreconditioned(Eigen::VectorXcd &m, const Eigen::VectorXcd &b,
clcg_solver_enum solver_id, bool verbose, bool er_throw)
{
if (silent_)
{
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, nullptr, m, b, &param_, this, solver_id);
if (ret < 0) clcg_error_str(ret, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef LibLCG_OPENMP
double start = omp_get_wtime();
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, &param_, this, solver_id);
double end = omp_get_wtime();
lcg_float costime = 1000*(end-start);
#else
clock_t start = clock();
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, &param_, this, solver_id);
clock_t end = clock();
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case CLCG_PCG:
std::clog << "Solver: PCG. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_PBICG:
std::clog << "Solver: PBICG. Times cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) clcg_error_str(ret, er_throw);
else if (ret < 0) clcg_error_str(ret, er_throw);
return;
}

308
src/lib/solver_eigen.h Normal file
View File

@ -0,0 +1,308 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _SOLVER_EIGEN_H
#define _SOLVER_EIGEN_H
#include "lcg_eigen.h"
#include "clcg_eigen.h"
/**
* @brief Linear conjugate gradient solver class
*/
class LCG_EIGEN_Solver
{
protected:
lcg_para param_;
unsigned int inter_;
bool silent_;
public:
LCG_EIGEN_Solver();
virtual ~LCG_EIGEN_Solver(){}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param instance User data sent to identify the function address
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
*/
static void _AxProduct(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax)
{
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->AxProduct(x, prod_Ax);
}
/**
* @brief Virtual function of the product of A*x
*
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
*/
virtual void AxProduct(const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax) = 0;
/**
* @brief Interface of the virtual function of the product of M^-1*x
*
* @param instance User data sent to identify the function address
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
*/
static void _MxProduct(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx)
{
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->MxProduct(x, prod_Mx);
}
/**
* @brief Virtual function of the product of M^-1*x
*
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
*/
virtual void MxProduct(const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx) = 0;
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param instance User data sent to identify the function address
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param k Current iteration times
* @return int Status of the process
*/
static int _Progress(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
const lcg_para *param, const int k)
{
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->Progress(m, converge, param, k);
}
/**
* @brief Virtual function of the process monitoring
*
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param k Current iteration times
* @return int Status of the process
*/
virtual int Progress(const Eigen::VectorXd *m, const lcg_float converge, const lcg_para *param,
const int k);
/**
* @brief Do not report any processes
*/
void silent();
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_report_interval(unsigned int inter);
/**
* @brief Set the parameters of the algorithms
*
* @param in_param the input parameters
*/
void set_lcg_parameter(const lcg_para &in_param);
/**
* @brief Run the minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void Minimize(Eigen::VectorXd &m, const Eigen::VectorXd &b, lcg_solver_enum solver_id = LCG_CG,
bool verbose = true, bool er_throw = false);
/**
* @brief Run the preconitioned minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizePreconditioned(Eigen::VectorXd &m, const Eigen::VectorXd &b, lcg_solver_enum solver_id = LCG_PCG,
bool verbose = true, bool er_throw = false);
/**
* @brief Run the constrained minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param low Lower bound of the solution vector
* @param hig Higher bound of the solution vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizeConstrained(Eigen::VectorXd &m, const Eigen::VectorXd &B, const Eigen::VectorXd &low,
const Eigen::VectorXd &hig, lcg_solver_enum solver_id = LCG_PG, bool verbose = true,
bool er_throw = false);
};
/**
* @brief Complex linear conjugate gradient solver class
*/
class CLCG_EIGEN_Solver
{
protected:
clcg_para param_;
unsigned int inter_;
bool silent_;
public:
CLCG_EIGEN_Solver();
virtual ~CLCG_EIGEN_Solver(){}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param instance User data sent to identify the function address
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
*/
static void _AxProduct(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
lcg_matrix_e layout, clcg_complex_e conjugate)
{
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->AxProduct(x, prod_Ax, layout, conjugate);
}
/**
* @brief Interface of the virtual function of the product of A*x
*
* @param x[in] Pointer of the multiplier
* @param prod_Ax[out] Pointer of the product
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
*/
virtual void AxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
/**
* @brief Interface of the virtual function of the product of M^-1*x
*
* @param instance User data sent to identify the function address
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
*/
static void _MxProduct(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
lcg_matrix_e layout, clcg_complex_e conjugate)
{
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->MxProduct(x, prod_Mx, layout, conjugate);
}
/**
* @brief Interface of the virtual function of the product of M^-1*x
*
* @param x[in] Pointer of the multiplier
* @param prod_Mx[out] Pointer of the product
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
*/
virtual void MxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
/**
* @brief Interface of the virtual function of the process monitoring
*
* @param instance User data sent to identify the function address
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param k Current iteration times
* @return int Status of the process
*/
static int _Progress(void* instance, const Eigen::VectorXcd *m, const lcg_float converge,
const clcg_para *param, const int k)
{
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->Progress(m, converge, param, k);
}
/**
* @brief Virtual function of the process monitoring
*
* @param m Pointer of the current solution
* @param converge Current value of the convergence
* @param param Pointer of the parameters used in the algorithms
* @param k Current iteration times
* @return int Status of the process
*/
virtual int Progress(const Eigen::VectorXcd *m, const lcg_float converge, const clcg_para *param,
const int k);
/**
* @brief Do not report any processes
*/
void silent();
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_report_interval(unsigned int inter);
/**
* @brief Set the interval to run the process monitoring function
*
* @param inter the interval
*/
void set_clcg_parameter(const clcg_para &in_param);
/**
* @brief Run the minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void Minimize(Eigen::VectorXcd &m, const Eigen::VectorXcd &b, clcg_solver_enum solver_id = CLCG_CGS,
bool verbose = true, bool er_throw = false);
/**
* @brief Run the preconitioned minimizing process
*
* @param m Pointer of the solution vector
* @param b Pointer of the targeting vector
* @param solver_id Solver type
* @param verbose Report more information of the full process
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
*/
void MinimizePreconditioned(Eigen::VectorXcd &m, const Eigen::VectorXcd &b, clcg_solver_enum solver_id = CLCG_PBICG,
bool verbose = true, bool er_throw = false);
};
#endif // _SOLVER_EIGEN_H

253
src/lib/util.cpp Normal file
View File

@ -0,0 +1,253 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "iostream"
#include "exception"
#include "stdexcept"
#include "util.h"
#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif
lcg_para lcg_default_parameters()
{
lcg_para param = defparam;
return param;
}
lcg_solver_enum lcg_select_solver(std::string slr_char)
{
lcg_solver_enum slr_id;
if (slr_char == "LCG_CG") slr_id = LCG_CG;
else if (slr_char == "LCG_PCG") slr_id = LCG_PCG;
else if (slr_char == "LCG_CGS") slr_id = LCG_CGS;
else if (slr_char == "LCG_BICGSTAB") slr_id = LCG_BICGSTAB;
else if (slr_char == "LCG_BICGSTAB2") slr_id = LCG_BICGSTAB2;
else if (slr_char == "LCG_PG") slr_id = LCG_PG;
else if (slr_char == "LCG_SPG") slr_id = LCG_SPG;
else throw std::invalid_argument("Invalid solver type.");
return slr_id;
}
void lcg_error_str(int er_index, bool er_throw)
{
#if defined _WINDOWS || __WIN32__
if (!er_throw)
{
if (er_index >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
std::cerr << "Success! ";
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
std::cerr << "Fail! ";
}
}
#else
if (!er_throw)
{
if (er_index >= 0)
std::cerr << "\033[1m\033[32mSuccess! ";
else
std::cerr << "\033[1m\033[31mFail! ";
}
#endif
std::string err_str;
switch (er_index)
{
case LCG_SUCCESS:
err_str = "Iteration reached convergence."; break;
case LCG_STOP:
err_str = "Iteration is stopped by the progress evaluation function."; break;
case LCG_ALREADY_OPTIMIZIED:
err_str = "The variables are already optimized."; break;
case LCG_UNKNOWN_ERROR:
err_str = "Unknown error."; break;
case LCG_INVILAD_VARIABLE_SIZE:
err_str = "The size of the variables is negative."; break;
case LCG_INVILAD_MAX_ITERATIONS:
err_str = "The maximal iteration times can't be negative."; break;
case LCG_INVILAD_EPSILON:
err_str = "The epsilon is not in the range (0, 1)."; break;
case LCG_INVILAD_RESTART_EPSILON:
err_str = "The restart threshold can't be negative."; break;
case LCG_REACHED_MAX_ITERATIONS:
err_str = "The maximal iteration has been reached."; break;
case LCG_NULL_PRECONDITION_MATRIX:
err_str = "The precondition matrix can't be null."; break;
case LCG_NAN_VALUE:
err_str = "The model values are NaN."; break;
case LCG_INVALID_POINTER:
err_str = "Invalid pointer."; break;
case LCG_INVALID_LAMBDA:
err_str = "Invalid value for lambda."; break;
case LCG_INVALID_SIGMA:
err_str = "Invalid value for sigma."; break;
case LCG_INVALID_BETA:
err_str = "Invalid value for beta."; break;
case LCG_INVALID_MAXIM:
err_str = "Invalid value for maxi_m."; break;
case LCG_SIZE_NOT_MATCH:
err_str = "The sizes of solution and target do not match."; break;
default:
err_str = "Unknown error."; break;
}
if (er_throw && er_index < 0) throw std::runtime_error(err_str.c_str());
else std::cerr << err_str;
#if defined _WINDOWS || __WIN32__
if (!er_throw)
{
if (er_index >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
std::cerr << std::endl;
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
std::cerr << std::endl;
}
}
#else
if (!er_throw)
{
if (er_index >= 0)
std::cerr << "\033[0m" << std::endl;
else
std::cerr << "\033[0m" << std::endl;
}
#endif
return;
}
clcg_para clcg_default_parameters()
{
clcg_para param = defparam2;
return param;
}
clcg_solver_enum clcg_select_solver(std::string slr_char)
{
clcg_solver_enum slr_id;
if (slr_char == "CLCG_BICG") slr_id = CLCG_BICG;
else if (slr_char == "CLCG_BICG_SYM") slr_id = CLCG_BICG_SYM;
else if (slr_char == "CLCG_CGS") slr_id = CLCG_CGS;
else if (slr_char == "CLCG_TFQMR") slr_id = CLCG_TFQMR;
else throw std::invalid_argument("Invalid solver type.");
return slr_id;
}
void clcg_error_str(int er_index, bool er_throw)
{
#if defined _WINDOWS || __WIN32__
if (!er_throw)
{
if (er_index >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
std::cerr << "Success! ";
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
std::cerr << "Fail! ";
}
}
#else
if (!er_throw)
{
if (er_index >= 0)
std::cerr << "\033[1m\033[32mSuccess! ";
else
std::cerr << "\033[1m\033[31mFail! ";
}
#endif
std::string err_str;
switch (er_index)
{
case CLCG_SUCCESS:
err_str = "Iteration reached convergence."; break;
case CLCG_STOP:
err_str = "Iteration is stopped by the progress evaluation function."; break;
case CLCG_ALREADY_OPTIMIZIED:
err_str = "The variables are already optimized."; break;
case CLCG_UNKNOWN_ERROR:
err_str = "Unknown error."; break;
case CLCG_INVILAD_VARIABLE_SIZE:
err_str = "The size of the variables is negative."; break;
case CLCG_INVILAD_MAX_ITERATIONS:
err_str = "The maximal iteration times is negative."; break;
case CLCG_INVILAD_EPSILON:
err_str = "The epsilon is not in the range (0, 1)."; break;
case CLCG_REACHED_MAX_ITERATIONS:
err_str = "The maximal iteration has been reached."; break;
case CLCG_NAN_VALUE:
err_str = "The model values are NaN."; break;
case CLCG_INVALID_POINTER:
err_str = "Invalid pointer."; break;
case CLCG_SIZE_NOT_MATCH:
err_str = "The sizes of the solution and target do not match."; break;
case CLCG_UNKNOWN_SOLVER:
err_str = "Unknown solver."; break;
default:
err_str = "Unknown error."; break;
}
if (er_throw && er_index < 0) throw std::runtime_error(err_str.c_str());
else std::cerr << err_str;
#if defined _WINDOWS || __WIN32__
if (!er_throw)
{
if (er_index >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
std::cerr << std::endl;
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
std::cerr << std::endl;
}
}
#else
if (!er_throw)
{
if (er_index >= 0)
std::cerr << "\033[0m" << std::endl;
else
std::cerr << "\033[0m" << std::endl;
}
#endif
return;
}

308
src/lib/util.h Normal file
View File

@ -0,0 +1,308 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#ifndef _LCG_UTIL_H
#define _LCG_UTIL_H
#include "string"
#include "algebra.h"
/**
* @brief Types of method that could be recognized by the lcg_solver() function.
*/
enum lcg_solver_enum
{
/**
* Conjugate gradient method.
*/
LCG_CG,
/**
* Preconditioned conjugate gradient method.
*/
LCG_PCG,
/**
* Conjugate gradient squared method.
*/
LCG_CGS,
/**
* Biconjugate gradient method.
*/
LCG_BICGSTAB,
/**
* Biconjugate gradient method with restart.
*/
LCG_BICGSTAB2,
/**
* Conjugate gradient method with projected gradient for inequality constraints.
* This algorithm comes without non-monotonic linear search for the step length.
*/
LCG_PG,
/**
* Conjugate gradient method with spectral projected gradient for inequality constraints.
* This algorithm comes with non-monotonic linear search for the step length.
*/
LCG_SPG,
};
/**
* @brief return value of the lcg_solver() function
*/
enum lcg_return_enum
{
LCG_SUCCESS = 0, ///< The solver function terminated successfully.
LCG_CONVERGENCE = 0, ///< The iteration reached convergence.
LCG_STOP, ///< The iteration is stopped by the monitoring function.
LCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
// A negative number means a error
LCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
LCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
LCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
LCG_INVILAD_EPSILON, ///< The epsilon is negative.
LCG_INVILAD_RESTART_EPSILON, ///< The restart epsilon is negative.
LCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
LCG_NULL_PRECONDITION_MATRIX, ///< Null precondition matrix.
LCG_NAN_VALUE, ///< Nan value.
LCG_INVALID_POINTER, ///< Invalid pointer.
LCG_INVALID_LAMBDA, ///< Invalid range for lambda.
LCG_INVALID_SIGMA, ///< Invalid range for sigma.
LCG_INVALID_BETA, ///< Invalid range for beta.
LCG_INVALID_MAXIM, ///< Invalid range for maxi_m.
LCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
};
/**
* @brief Parameters of the conjugate gradient methods.
*/
struct lcg_para
{
/**
* Maximal iteration times. The process will continue till the convergence is met
* if this option is set to zero (default).
*/
int max_iterations;
/**
* Epsilon for convergence test.
* This parameter determines the accuracy with which the solution is to be
* found. A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or
* sqrt(||g||)/N <= epsilon for the lcg_solver() function, where ||.|| denotes
* the Euclidean (L2) norm. The default value of epsilon is 1e-8.
*/
lcg_float epsilon;
/**
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
* The default value is false which means the gradient based evaluating method is used.
* The AMD based method will be used if this variable is set to true. This parameter is only
* applied to the non-constrained methods.
*/
int abs_diff;
/**
* Restart epsilon for the LCG_BICGSTAB2 algorithm. The default value is 1e-6
*/
lcg_float restart_epsilon;
/**
* Initial step length for the project gradient method. The default is 1.0
*/
lcg_float step;
/**
* multiplier for updating solutions with the spectral projected gradient method. The range of
* this variable is (0, 1). The default is given as 0.95
*/
lcg_float sigma;
/**
* descending ratio for conducting the non-monotonic linear search. The range of
* this variable is (0, 1). The default is given as 0.9
*/
lcg_float beta;
/**
* The maximal record times of the objective values for the SPG method. The method use the
* objective values from the most recent maxi_m times to preform the non-monotonic linear search.
* The default value is 10.
*/
int maxi_m;
};
/**
* Default parameter for conjugate gradient methods
*/
static const lcg_para defparam = {0, 1e-8, 0, 1e-6, 1.0, 0.95, 0.9, 10};
/**
* @brief Return a lcg_para type instance with default values.
*
* Users can use this function to get default parameters' value for the conjugate gradient methods.
*
* @return A lcg_para type instance.
*/
lcg_para lcg_default_parameters();
/**
* @brief Select a type of solver according to the name
*
* @param[in] slr_char Name of the solver
*
* @return The lcg solver enum.
*/
lcg_solver_enum lcg_select_solver(std::string slr_char);
/**
* @brief Display or throw out a string explanation for the lcg_solver() function's return values.
*
* @param[in] er_index The error index returned by the lcg_solver() function.
* @param[in] er_throw throw out a char string of the explanation.
*
* @return A string explanation of the error.
*/
void lcg_error_str(int er_index, bool er_throw = false);
/**
* @brief Types of method that could be recognized by the clcg_solver() function.
*/
enum clcg_solver_enum
{
/**
* Jacob's Bi-Conjugate Gradient Method
*/
CLCG_BICG,
/**
* Bi-Conjugate Gradient Method accelerated for complex symmetric A
*/
CLCG_BICG_SYM,
/**
* Conjugate Gradient Squared Method with real coefficients.
*/
CLCG_CGS,
/**
* Biconjugate gradient method.
*/
CLCG_BICGSTAB,
/**
* Quasi-Minimal Residual Method
*/
//CLCG_QMR,
/**
* Transpose Free Quasi-Minimal Residual Method
*/
CLCG_TFQMR,
/**
* Preconditioned conjugate gradient
*/
CLCG_PCG,
/**
* Preconditioned Bi-Conjugate Gradient Method
*/
CLCG_PBICG,
};
/**
* @brief return value of the clcg_solver() function
*/
enum clcg_return_enum
{
CLCG_SUCCESS = 0, ///< The solver function terminated successfully.
CLCG_CONVERGENCE = 0, ///< The iteration reached convergence.
CLCG_STOP, ///< The iteration is stopped by the monitoring function.
CLCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
// A negative number means a error
CLCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
CLCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
CLCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
CLCG_INVILAD_EPSILON, ///< The epsilon is negative.
CLCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
CLCG_NAN_VALUE, ///< Nan value.
CLCG_INVALID_POINTER, ///< Invalid pointer.
CLCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
CLCG_UNKNOWN_SOLVER, ///< Unknown solver
};
/**
* @brief Parameters of the conjugate gradient methods.
*/
struct clcg_para
{
/**
* Maximal iteration times. The process will continue till the convergence is met
* if this option is set to zero (default).
*/
int max_iterations;
/**
* Epsilon for convergence test.
* This parameter determines the accuracy with which the solution is to be found.
* A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or sqrt(||g||)/N
* <= epsilon for the lcg_solver() function, where ||.|| denotes the Euclidean (L2) norm.
* The default value of epsilon is 1e-8. For box-constrained methods,the convergence test
* is implemented using ||P(m-g) - m|| <= epsilon, in which P is the projector that
* transfers m into the constrained domain.
*/
lcg_float epsilon;
/**
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
* The default value is false which means the gradient based evaluating method is used.
* The AMD based method will be used if this variable is set to true. This parameter is only
* applied to the non-constrained methods.
*/
int abs_diff;
};
/**
* Default parameter for conjugate gradient methods
*/
static const clcg_para defparam2 = {0, 1e-8, 0};
/**
* @brief Return a clcg_para type instance with default values.
*
* Users can use this function to get default parameters' value for the complex conjugate gradient methods.
*
* @return A clcg_para type instance.
*/
clcg_para clcg_default_parameters();
/**
* @brief Select a type of solver according to the name
*
* @param[in] slr_char Name of the solver
*
* @return The clcg solver enum.
*/
clcg_solver_enum clcg_select_solver(std::string slr_char);
/**
* @brief Display or throw out a string explanation for the clcg_solver() function's return values.
*
* @param[in] er_index The error index returned by the lcg_solver() function.
* @param[in] er_throw throw out a char string of the explanation.
*
* @return A string explanation of the error.
*/
void clcg_error_str(int er_index, bool er_throw = false);
#endif // _LCG_UTIL_H

167
src/sample/sample1.cpp Normal file
View File

@ -0,0 +1,167 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "cmath"
#include "iostream"
#include "../lib/lcg.h"
#define M 100
#define N 80
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
{
lcg_float max = -1;
for (int i = 0; i < size; i++)
{
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
}
return max;
}
// 普通二维数组做核矩阵
lcg_float **kernel;
// 中间结果数组
lcg_float *tmp_arr;
// 预优矩阵
lcg_float *p;
// 计算核矩阵乘向量的乘积
void CalAx(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_s)
{
lcg_matvec(kernel, x, tmp_arr, M, n_s, MatNormal);
lcg_matvec(kernel, tmp_arr, prod_Ax, M, n_s, MatTranspose);
return;
}
void CalMx(void* instance, const lcg_float* x, lcg_float* prod_Mx, const int n_s)
{
for (size_t i = 0; i < n_s; i++)
{
prod_Mx[i] = p[i]*x[i];
}
return;
}
//定义共轭梯度监控函数
int Prog(void* instance, const lcg_float* m, const lcg_float converge, const lcg_para* param, const int n_s, const int k)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
int main(int argc, char const *argv[])
{
kernel = lcg_malloc(M, N);
tmp_arr = lcg_malloc(M);
p = lcg_malloc(N);
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
// 生成一组正演解
lcg_float *fm = lcg_malloc(N);
lcg_vecrnd(fm, 1.0, 2.0, N);
// 计算共轭梯度B项
lcg_float *B = lcg_malloc(N);
lcg_matvec(kernel, fm, tmp_arr, M, N, MatNormal);
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
/********************准备工作完成************************/
lcg_para self_para = lcg_default_parameters();
self_para.epsilon = 1e-7;
self_para.abs_diff = 0;
// 声明一组解
lcg_float *m = lcg_malloc(N);
lcg_vecset(m, 0.0, N);
// 声明一组预优因子
lcg_float diag;
for (size_t i = 0; i < N; i++)
{
diag = 0.0;
for (size_t j = 0; j < M; j++)
{
diag += kernel[j][i]*kernel[j][i];
}
p[i] = 1.0/diag;
}
// 约束解的范围
lcg_float *low = lcg_malloc(N);
lcg_float *hig = lcg_malloc(N);
lcg_vecset(low, 1.0, N);
lcg_vecset(hig, 2.0, N);
int ret;
std::clog << "solver: cg" << std::endl;
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CG);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
std::clog << "solver: pcg" << std::endl;
ret = lcg_solver_preconditioned(CalAx, CalMx, Prog, m, B, N, &self_para, NULL, LCG_PCG);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
std::clog << "solver: cgs" << std::endl;
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CGS);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
std::clog << "solver: bicgstab" << std::endl;
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_BICGSTAB);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
std::clog << "solver: bicgstab2" << std::endl;
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_BICGSTAB2);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
std::clog << "solver: pg" << std::endl;
ret = lcg_solver_constrained(CalAx, Prog, m, B, low, hig, N, &self_para, NULL, LCG_PG);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
std::clog << "solver: spg" << std::endl;
ret = lcg_solver_constrained(CalAx, Prog, m, B, low, hig, N, &self_para, NULL, LCG_SPG);
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_free(kernel, M);
lcg_free(tmp_arr);
lcg_free(fm);
lcg_free(B);
lcg_free(m);
lcg_free(p);
lcg_free(low);
lcg_free(hig);
return 0;
}

318
src/sample/sample10.cu Normal file
View File

@ -0,0 +1,318 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/solver_cuda.h"
// Declare as global variables
cuDoubleComplex one = {1.0, 0.0};
cuDoubleComplex zero = {0.0, 0.0};
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new cuDoubleComplex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new cuDoubleComplex[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
}
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new cuDoubleComplex[*pN]{};
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
return;
}
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
{
lcg_float avg = 0.0;
cuDoubleComplex tmp;
for (size_t i = 0; i < n; i++)
{
tmp = clcg_Zdiff(a[i], b[i]);
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
}
return sqrt(avg)/n;
}
class sample10 : public CLCG_CUDA_Solver
{
public:
sample10(){}
virtual ~sample10(){}
void solve(std::string inputPath, std::string answerPath);
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
cusparseOperation_t oper_t)
{
// Calculate the product of A*x
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
return;
}
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
cusparseOperation_t oper_t)
{
void *d_x, *d_Ax;
cusparseDnVecGetValues(x, &d_x);
cusparseDnVecGetValues(prod_Ax, &d_Ax);
if (use_incomplete_cholesky)
{
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, (cuDoubleComplex*) d_x, (cuDoubleComplex*) d_pd,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, (cuDoubleComplex*) d_pd, (cuDoubleComplex*) d_Ax,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
}
else
{
clcg_vecDvecZ_element_wise((cuDoubleComplex*) d_x, d_pd, (cuDoubleComplex*) d_Ax, n_size);
}
return;
}
private:
bool use_incomplete_cholesky;
int N, nz;
int *rowIdxA, *colIdxA;
cuDoubleComplex *A, *b;
cuDoubleComplex *ans_x;
void *d_buf;
cusparseSpMatDescr_t smat_A;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
cuDoubleComplex *d_A;
cuDoubleComplex *d_pd;
cuDoubleComplex *d_ic;
cusparseMatDescr_t descr_A;
cusparseMatDescr_t descr_L;
csric02Info_t icinfo_A;
csrsv2Info_t info_L;
csrsv2Info_t info_LT;
cuDoubleComplex *host_m;
cusparseDnVecDescr_t dvec_tmp;
};
void sample10::solve(std::string inputPath, std::string answerPath)
{
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
// Create handles
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMalloc(&d_pd, N * sizeof(cuDoubleComplex));
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
// This is just used to get bufferSize;
cusparseDnVecDescr_t dvec_tmp;
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_C_64F);
size_t bufferSize_B;
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
// --- Start of the preconditioning part ---
// Get the diagonal elemenets
clcg_smZcsr_get_diagonal(d_rowPtrA, d_colIdxA, d_A, N, d_pd);
// Copy A
cudaMalloc(&d_ic, nz * sizeof(cuDoubleComplex));
cudaMemcpy(d_ic, d_A, nz * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
// create descriptor for matrix A
cusparseCreateMatDescr(&descr_A);
// initialize properties of matrix A
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_SYMMETRIC);
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
// create descriptor for matrix L
cusparseCreateMatDescr(&descr_L);
// initialize properties of matrix L
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_NON_UNIT);
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
// Create empty info objects for incomplete-cholesky factorization
cusparseCreateCsric02Info(&icinfo_A);
cusparseCreateCsrsv2Info(&info_L);
cusparseCreateCsrsv2Info(&info_LT);
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_LT;
bufferSize = bufferSize_B;
// Compute buffer size in computing ic factorization
cusparseZcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
d_colIdxA, icinfo_A, &bufferSize_A);
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, &bufferSize_LT);
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_LT);
cudaMalloc(&d_buf, bufferSize);
// Perform incomplete-choleskey factorization: analysis phase
cusparseZcsric02_analysis(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA,
d_colIdxA, icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
// Perform incomplete-choleskey factorization: solve phase
cusparseZcsric02(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA, d_colIdxA,
icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
// --- End of the preconditioning part ---
// Declare an initial solution
host_m = new cuDoubleComplex[N];
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-6;
// Preconditioning with Diagonal elements
for (size_t i = 0; i < N; i++)
{
host_m[i].x = 0.0; host_m[i].y = 0.0;
}
use_incomplete_cholesky = false;
MinimizePreconditioned(cubHandle, cusHandle, host_m, b, N, nz, CLCG_PCG);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Preconditioning with incomplete-Cholesky factorization
for (size_t i = 0; i < N; i++)
{
host_m[i].x = 0.0; host_m[i].y = 0.0;
}
use_incomplete_cholesky = true;
MinimizePreconditioned(cubHandle, cusHandle, host_m, b, N, nz, CLCG_PCG);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Free Host memory
delete[] A;
delete[] rowIdxA;
delete[] colIdxA;
delete[] b;
delete[] ans_x;
delete[] host_m;
// Free Device memory
cudaFree(d_A);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cudaFree(d_pd);
cudaFree(d_ic);
cusparseDestroyDnVec(dvec_tmp);
cusparseDestroySpMat(smat_A);
cudaFree(d_buf);
cusparseDestroyMatDescr(descr_A);
cusparseDestroyMatDescr(descr_L);
cusparseDestroyCsric02Info(icinfo_A);
cusparseDestroyCsrsv2Info(info_L);
cusparseDestroyCsrsv2Info(info_LT);
// Free handles
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_10K_cA";
std::string answerPath = "data/case_10K_cB";
sample10 sp;
sp.set_report_interval(0);
sp.solve(inputPath, answerPath);
return 0;
}

299
src/sample/sample11.cu Normal file
View File

@ -0,0 +1,299 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/clcg_cuda.h"
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new cuDoubleComplex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new cuDoubleComplex[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
}
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new cuDoubleComplex[*pN]{};
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
return;
}
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
{
lcg_float avg = 0.0;
cuDoubleComplex tmp;
for (size_t i = 0; i < n; i++)
{
tmp = clcg_Zdiff(a[i], b[i]);
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
}
return sqrt(avg)/n;
}
// Declare as global variables
cuDoubleComplex one, zero;
void *d_buf;
cusparseSpMatDescr_t smat_A;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
cuDoubleComplex *d_A;
cuDoubleComplex *d_pd;
cuDoubleComplex *d_iu;
cusparseMatDescr_t descr_A = 0;
cusparseMatDescr_t descr_L = 0;
cusparseMatDescr_t descr_U = 0;
csrilu02Info_t info_ILU = 0;
csrsv2Info_t info_L = 0;
csrsv2Info_t info_U = 0;
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
cusparseOperation_t oper_t)
{
one.x = 1.0; one.y = 0.0;
zero.x = 0.0; zero.y = 0.0;
// Calculate the product of A*x
//cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
return;
}
void cudaMx_ILU(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
cusparseOperation_t oper_t)
{
void *d_x, *d_Ax;
cusparseDnVecGetValues(x, &d_x);
cusparseDnVecGetValues(prod_Ax, &d_Ax);
one.x = 1.0; one.y = 0.0;
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, (cuDoubleComplex*) d_x, (cuDoubleComplex*) d_pd,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, (cuDoubleComplex*) d_pd, (cuDoubleComplex*) d_Ax,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
return;
}
int cudaProgress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k)
{
if (converge <= param->epsilon) {
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
}
return 0;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_1M_cA";
std::string answerPath = "data/case_1M_cB";
int N;
int nz;
cuDoubleComplex *A;
int *rowIdxA;
int *colIdxA;
cuDoubleComplex *b;
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
cuDoubleComplex *ans_x;
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
// Create handles
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMalloc(&d_pd, N * sizeof(cuDoubleComplex));
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
// This is just used to get bufferSize;
cusparseDnVecDescr_t dvec_tmp;
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_C_64F);
size_t bufferSize_B;
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
// --- Start of the preconditioning part ---
// Copy A
cudaMalloc(&d_iu, nz * sizeof(cuDoubleComplex));
cudaMemcpy(d_iu, d_A, nz * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_U;
bufferSize = bufferSize_B;
// create descriptor for matrix A
cusparseCreateMatDescr(&descr_A);
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
// create descriptor for matrix L
cusparseCreateMatDescr(&descr_L);
// initialize properties of matrix L
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT);
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
// create descriptor for matrix U
cusparseCreateMatDescr(&descr_U);
cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER);
cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
cusparseSetMatIndexBase(descr_U, CUSPARSE_INDEX_BASE_ZERO);
// Create empty info objects for incomplete-cholesky factorization
cusparseCreateCsrilu02Info(&info_ILU);
cusparseCreateCsrsv2Info(&info_L);
cusparseCreateCsrsv2Info(&info_U);
// Compute buffer size in computing ic factorization
cusparseZcsrilu02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
d_colIdxA, info_ILU, &bufferSize_A);
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, &bufferSize_U);
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_U);
cudaMalloc(&d_buf, bufferSize);
// Perform incomplete-choleskey factorization: analysis phase
cusparseZcsrilu02_analysis(cusHandle, N, nz, descr_A, d_iu, d_rowPtrA,
d_colIdxA, info_ILU, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
// Perform incomplete-choleskey factorization: solve phase
cusparseZcsrilu02(cusHandle, N, nz, descr_A, d_iu, d_rowPtrA, d_colIdxA,
info_ILU, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
// --- End of the preconditioning part ---
// Declare an initial solution
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
int ret;
cuDoubleComplex *host_m = new cuDoubleComplex[N];
// Preconditioning with incomplete-LU factorization
for (size_t i = 0; i < N; i++)
{
host_m[i].x = 0.0; host_m[i].y = 0.0;
}
ret = clcg_solver_preconditioned_cuda(cudaAx, cudaMx_ILU, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_PCG);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Free Host memory
delete[] A;
delete[] rowIdxA;
delete[] colIdxA;
delete[] b;
delete[] ans_x;
delete[] host_m;
// Free Device memory
cudaFree(d_A);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cudaFree(d_pd);
cudaFree(d_iu);
cusparseDestroyDnVec(dvec_tmp);
cusparseDestroySpMat(smat_A);
cudaFree(d_buf);
cusparseDestroyMatDescr(descr_A);
cusparseDestroyMatDescr(descr_L);
cusparseDestroyMatDescr(descr_U);
cusparseDestroyCsrilu02Info(info_ILU);
cusparseDestroyCsrsv2Info(info_L);
cusparseDestroyCsrsv2Info(info_U);
// Free handles
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return 0;
}

306
src/sample/sample12.cu Normal file
View File

@ -0,0 +1,306 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/solver_cuda.h"
#include "../lib/preconditioner_cuda.h"
// Declare as global variables
cuDoubleComplex one = {1.0, 0.0};
cuDoubleComplex zero = {0.0, 0.0};
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new cuDoubleComplex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new cuDoubleComplex[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
}
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new cuDoubleComplex[*pN]{};
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
return;
}
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
{
lcg_float avg = 0.0;
cuDoubleComplex tmp;
for (size_t i = 0; i < n; i++)
{
tmp = clcg_Zdiff(a[i], b[i]);
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
}
return sqrt(avg)/n;
}
class sample12 : public CLCG_CUDA_Solver
{
public:
sample12(){}
virtual ~sample12(){}
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
// Calculate the product of A*x
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
return;
}
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, x, dvec_p,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p, prod_Ax,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
return;
}
private:
int N, nz;
int *rowIdxA, *colIdxA;
cuDoubleComplex *A, *b;
cuDoubleComplex *ans_x;
int *IC_row, *IC_col;
cuDoubleComplex *IC_val;
void *d_buf, *d_buf2;
cusparseSpMatDescr_t smat_A;
cusparseSpMatDescr_t smat_IC;
cusparseSpSVDescr_t descr_L, descr_LT;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
cuDoubleComplex *d_A;
cuDoubleComplex *d_p;
cusparseDnVecDescr_t dvec_p;
int *d_rowIdxIC; // COO
int *d_rowPtrIC; // CSR
int *d_colIdxIC;
cuDoubleComplex *d_IC;
cuDoubleComplex *host_m;
cuDoubleComplex *d_t;
cusparseDnVecDescr_t dvec_tmp;
};
void sample12::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
IC_row = new int [nz];
IC_col = new int [nz];
IC_val = new cuDoubleComplex [nz];
clcg_incomplete_Cholesky_cuda_full(rowIdxA, colIdxA, A, N, nz, IC_row, IC_col, IC_val);
/*
for (size_t i = 0; i < nz; i++)
{
if (IC_row[i] >= IC_col[i])
{
std::cout << IC_row[i] << " " << IC_col[i] << " (" << IC_val[i].x << "," << IC_val[i].y << ")\n";
}
}
*/
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMalloc(&d_p, N * sizeof(cuDoubleComplex));
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_64F);
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMalloc(&d_IC, nz * sizeof(cuDoubleComplex));
cudaMalloc(&d_rowIdxIC, nz * sizeof(int));
cudaMalloc(&d_rowPtrIC, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxIC, nz * sizeof(int));
cudaMemcpy(d_IC, IC_val, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxIC, IC_row, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxIC, IC_col, nz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
// Convert matrix L from COO format to CSR format
cusparseXcoo2csr(cus_handle, d_rowIdxIC, nz, N, d_rowPtrIC, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_IC, N, N, nz, d_rowPtrIC, d_colIdxIC, d_IC, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
// Specify Non-Unit diagonal type.
//cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
//cusparseSpMatSetAttribute(smat_IC, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
// This is just used to get bufferSize;
cudaMalloc(&d_t, N * sizeof(cuDoubleComplex));
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_64F);
size_t bufferSize_B;
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
// --- Start of the preconditioning part ---
cusparseSpSV_createDescr(&descr_L);
cusparseSpSV_createDescr(&descr_LT);
size_t bufferSize, bufferSize_L, bufferSize_LT;
bufferSize = bufferSize_B;
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, dvec_p,
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p,
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
cudaMalloc(&d_buf, bufferSize);
cudaMalloc(&d_buf2, bufferSize);
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, dvec_tmp, dvec_p,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_buf);
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p, dvec_tmp,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_buf2);
// --- End of the preconditioning part ---
// Declare an initial solution
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
host_m = new cuDoubleComplex[N];
// Preconditioning with incomplete-chelosky factorization
for (size_t i = 0; i < N; i++)
{
host_m[i].x = 0.0; host_m[i].y = 0.0;
}
MinimizePreconditioned(cub_handle, cus_handle, host_m, b, N, nz, CLCG_PCG);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Free Host memory
if (rowIdxA != nullptr) delete[] rowIdxA;
if (colIdxA != nullptr) delete[] colIdxA;
if (A != nullptr) delete[] A;
if (b != nullptr) delete[] b;
if (ans_x != nullptr) delete[] ans_x;
if (IC_row != nullptr) delete[] IC_row;
if (IC_col != nullptr) delete[] IC_col;
if (IC_val != nullptr) delete[] IC_val;
if (host_m != nullptr) delete[] host_m;
cusparseDestroyDnVec(dvec_tmp);
cusparseDestroyDnVec(dvec_p);
cudaFree(d_buf);
cudaFree(d_buf2);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cudaFree(d_A);
cudaFree(d_p);
cudaFree(d_t);
cudaFree(d_rowIdxIC);
cudaFree(d_rowPtrIC);
cudaFree(d_colIdxIC);
cudaFree(d_IC);
cusparseDestroySpMat(smat_A);
cusparseDestroySpMat(smat_IC);
cusparseSpSV_destroyDescr(descr_L);
cusparseSpSV_destroyDescr(descr_LT);
return;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_1M_cA";
std::string answerPath = "data/case_1M_cB";
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
sample12 sp;
sp.set_report_interval(0);
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return 0;
}

305
src/sample/sample13.cu Normal file
View File

@ -0,0 +1,305 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/solver_cuda.h"
#include "../lib/preconditioner_cuda.h"
// Declare as global variables
cuDoubleComplex one = {1.0, 0.0};
cuDoubleComplex zero = {0.0, 0.0};
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new cuDoubleComplex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new cuDoubleComplex[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
}
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new cuDoubleComplex[*pN]{};
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
return;
}
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
{
lcg_float avg = 0.0;
cuDoubleComplex tmp;
for (size_t i = 0; i < n; i++)
{
tmp = clcg_Zdiff(a[i], b[i]);
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
}
return sqrt(avg)/n;
}
class sample13 : public CLCG_CUDA_Solver
{
public:
sample13(){}
virtual ~sample13(){}
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
// Calculate the product of A*x
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_tuf);
return;
}
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, x, dvec_p,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, prod_Ax,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
return;
}
private:
int N, nz, lnz;
int *rowIdxA, *colIdxA;
cuDoubleComplex *A, *b;
cuDoubleComplex *ans_x;
int *L_row, *L_col;
cuDoubleComplex *L_val;
void *d_tuf, *d_tuf2;
cusparseSpMatDescr_t smat_A;
cusparseSpMatDescr_t smat_L;
cusparseSpSVDescr_t descr_L, descr_LT;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
cuDoubleComplex *d_A;
cuDoubleComplex *d_t;
cuDoubleComplex *d_p;
cusparseDnVecDescr_t dvec_p;
int *d_rowIdxL; // COO
int *d_rowPtrL; // CSR
int *d_colIdxL;
cuDoubleComplex *d_L;
cuDoubleComplex *host_m;
cusparseDnVecDescr_t dvec_tmp;
};
void sample13::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
readAnswer(answerPath, &N, &ans_x);
clcg_incomplete_Cholesky_cuda_half_buffsize(rowIdxA, colIdxA, nz, &lnz);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
std::clog << "lnz = " << lnz << std::endl;
L_row = new int [lnz];
L_col = new int [lnz];
L_val = new cuDoubleComplex [lnz];
clcg_incomplete_Cholesky_cuda_half(rowIdxA, colIdxA, A, N, nz, lnz, L_row, L_col, L_val);
/*
for (size_t i = 0; i < lnz; i++)
{
std::cout << L_row[i] << " " << L_col[i] << " (" << L_val[i].x << "," << L_val[i].y << ")\n";
}
*/
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMalloc(&d_t, N * sizeof(cuDoubleComplex));
cudaMalloc(&d_p, N * sizeof(cuDoubleComplex));
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_64F);
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMalloc(&d_L, lnz * sizeof(cuDoubleComplex));
cudaMalloc(&d_rowIdxL, lnz * sizeof(int));
cudaMalloc(&d_rowPtrL, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxL, lnz * sizeof(int));
cudaMemcpy(d_L, L_val, lnz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxL, L_row, lnz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxL, L_col, lnz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
// Convert matrix L from COO format to CSR format
cusparseXcoo2csr(cus_handle, d_rowIdxL, lnz, N, d_rowPtrL, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_L, N, N, lnz, d_rowPtrL, d_colIdxL, d_L, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
// Specify Lower fill mode.
cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
// Specify Non-Unit diagonal type.
cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
// This is just used to get bufferSize;
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_64F);
size_t bufferSize_B;
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
// --- Start of the preconditioning part ---
cusparseSpSV_createDescr(&descr_L);
cusparseSpSV_createDescr(&descr_LT);
size_t bufferSize, bufferSize_L, bufferSize_LT;
bufferSize = bufferSize_B;
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_p,
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p,
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
cudaMalloc(&d_tuf, bufferSize);
cudaMalloc(&d_tuf2, bufferSize);
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_tmp, dvec_p,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_tuf);
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, dvec_tmp,
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_tuf2);
// --- End of the preconditioning part ---
// Declare an initial solution
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
// Preconditioning with incomplete-chelosky factorization
host_m = clcg_malloc_cuda(N);
clcg_vecset_cuda(host_m, zero, N);
MinimizePreconditioned(cub_handle, cus_handle, host_m, b, N, nz, CLCG_PCG);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Free Host memory
if (rowIdxA != nullptr) delete[] rowIdxA;
if (colIdxA != nullptr) delete[] colIdxA;
if (A != nullptr) delete[] A;
if (b != nullptr) delete[] b;
if (ans_x != nullptr) delete[] ans_x;
if (L_row != nullptr) delete[] L_row;
if (L_col != nullptr) delete[] L_col;
if (L_val != nullptr) delete[] L_val;
clcg_free_cuda(host_m);
cusparseDestroyDnVec(dvec_tmp);
cusparseDestroyDnVec(dvec_p);
cudaFree(d_tuf);
cudaFree(d_tuf2);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cudaFree(d_A);
cudaFree(d_t);
cudaFree(d_p);
cudaFree(d_rowIdxL);
cudaFree(d_rowPtrL);
cudaFree(d_colIdxL);
cudaFree(d_L);
cusparseDestroySpMat(smat_A);
cusparseDestroySpMat(smat_L);
cusparseSpSV_destroyDescr(descr_L);
cusparseSpSV_destroyDescr(descr_LT);
return;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_10K_cA";
std::string answerPath = "data/case_10K_cB";
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
sample13 sp;
sp.set_report_interval(0);
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return 0;
}

327
src/sample/sample14.cu Normal file
View File

@ -0,0 +1,327 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/solver_cuda.h"
#include "../lib/preconditioner_cuda.h"
// Declare as global variables
cuComplex one = {1.0, 0.0};
cuComplex zero = {0.0, 0.0};
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new cuDoubleComplex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new cuDoubleComplex[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
}
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new cuDoubleComplex[*pN]{};
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
return;
}
float avg_error(cuComplex *a, cuComplex *b, int n)
{
float avg = 0.0;
cuComplex tmp;
for (size_t i = 0; i < n; i++)
{
tmp = clcg_Cdiff(a[i], b[i]);
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
}
return sqrt(avg)/n;
}
class sample14 : public CLCG_CUDAF_Solver
{
public:
sample14(){}
virtual ~sample14(){}
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
// Calculate the product of A*x
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_32F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
return;
}
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
const int n_size, const int nz_size, cusparseOperation_t oper_t)
{
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, x, dvec_p,
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, prod_Ax,
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
return;
}
private:
int N, nz, lnz;
int *rowIdxA, *colIdxA;
cuDoubleComplex *A, *b;
cuDoubleComplex *ans_x;
cuComplex *Af, *bf;
cuComplex *ans_xf;
int *L_row, *L_col;
cuComplex *L_val;
void *d_buf, *d_buf2;
cusparseSpMatDescr_t smat_A;
cusparseSpMatDescr_t smat_L;
cusparseSpSVDescr_t descr_L, descr_LT;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
cuComplex *d_A;
cuComplex *d_t;
cuComplex *d_p;
cusparseDnVecDescr_t dvec_p;
int *d_rowIdxL; // COO
int *d_rowPtrL; // CSR
int *d_colIdxL;
cuComplex *d_L;
cuComplex *host_m;
cusparseDnVecDescr_t dvec_tmp;
};
void sample14::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
{
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
readAnswer(answerPath, &N, &ans_x);
clcg_incomplete_Cholesky_cuda_half_buffsize(rowIdxA, colIdxA, nz, &lnz);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
std::clog << "lnz = " << lnz << std::endl;
Af = new cuComplex [nz];
bf = new cuComplex [nz];
ans_xf = new cuComplex [nz];
// Note that converting complex numbers from double to single precisions may case stack overflow
for (size_t i = 0; i < nz; i++)
{
Af[i].x = A[i].x; Af[i].y = A[i].y;
bf[i].x = b[i].x; bf[i].y = b[i].y;
ans_xf[i].x = ans_x[i].x; ans_xf[i].y = ans_x[i].y;
}
L_row = new int [lnz];
L_col = new int [lnz];
L_val = new cuComplex [lnz];
clcg_incomplete_Cholesky_cuda_half(rowIdxA, colIdxA, Af, N, nz, lnz, L_row, L_col, L_val);
/*
for (size_t i = 0; i < lnz; i++)
{
std::cout << L_row[i] << " " << L_col[i] << " (" << L_val[i].x << "," << L_val[i].y << ")\n";
}
*/
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(cuComplex));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMalloc(&d_t, N * sizeof(cuComplex));
cudaMalloc(&d_p, N * sizeof(cuComplex));
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_32F);
cudaMemcpy(d_A, Af, nz * sizeof(cuComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_t, bf, N * sizeof(cuComplex), cudaMemcpyHostToDevice);
cudaMalloc(&d_L, lnz * sizeof(cuComplex));
cudaMalloc(&d_rowIdxL, lnz * sizeof(int));
cudaMalloc(&d_rowPtrL, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxL, lnz * sizeof(int));
cudaMemcpy(d_L, L_val, lnz * sizeof(cuComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxL, L_row, lnz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxL, L_col, lnz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
// Convert matrix L from COO format to CSR format
cusparseXcoo2csr(cus_handle, d_rowIdxL, lnz, N, d_rowPtrL, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_L, N, N, lnz, d_rowPtrL, d_colIdxL, d_L, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
// Specify Lower fill mode.
cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
// Specify Non-Unit diagonal type.
cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
// This is just used to get bufferSize;
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_32F);
size_t bufferSize_B;
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_C_32F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
// --- Start of the preconditioning part ---
cusparseSpSV_createDescr(&descr_L);
cusparseSpSV_createDescr(&descr_LT);
size_t bufferSize, bufferSize_L, bufferSize_LT;
bufferSize = bufferSize_B;
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_p,
dvec_tmp, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p,
dvec_tmp, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
cudaMalloc(&d_buf, bufferSize);
cudaMalloc(&d_buf2, bufferSize);
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_tmp, dvec_p,
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_buf);
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, dvec_tmp,
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_buf2);
// --- End of the preconditioning part ---
// Declare an initial solution
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
host_m = new cuComplex[N];
// Preconditioning with incomplete-chelosky factorization
for (size_t i = 0; i < N; i++)
{
host_m[i].x = 0.0; host_m[i].y = 0.0;
}
MinimizePreconditioned(cub_handle, cus_handle, host_m, bf, N, nz, CLCG_PCG);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_xf, N) << std::endl;
// Free Host memory
if (rowIdxA != nullptr) delete[] rowIdxA;
if (colIdxA != nullptr) delete[] colIdxA;
if (A != nullptr) delete[] A;
if (b != nullptr) delete[] b;
if (ans_x != nullptr) delete[] ans_x;
if (Af != nullptr) delete[] Af;
if (bf != nullptr) delete[] bf;
if (ans_xf != nullptr) delete[] ans_xf;
if (L_row != nullptr) delete[] L_row;
if (L_col != nullptr) delete[] L_col;
if (L_val != nullptr) delete[] L_val;
if (host_m != nullptr) delete[] host_m;
cusparseDestroyDnVec(dvec_tmp);
cusparseDestroyDnVec(dvec_p);
cudaFree(d_buf);
cudaFree(d_buf2);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cudaFree(d_A);
cudaFree(d_t);
cudaFree(d_p);
cudaFree(d_rowIdxL);
cudaFree(d_rowPtrL);
cudaFree(d_colIdxL);
cudaFree(d_L);
cusparseDestroySpMat(smat_A);
cusparseDestroySpMat(smat_L);
cusparseSpSV_destroyDescr(descr_L);
cusparseSpSV_destroyDescr(descr_LT);
return;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_1K_cA";
std::string answerPath = "data/case_1K_cB";
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
sample14 sp;
sp.set_report_interval(100);
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return 0;
}

223
src/sample/sample15.cu Normal file
View File

@ -0,0 +1,223 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/lcg_cuda.h"
void read(std::string filePath, int *pN, int *pnz, double **cooVal,
int **cooRowIdx, int **cooColIdx, double **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new double[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new double[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(double));
}
in.read((char*)(*b), sizeof(double)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, double **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new double[*pN]{};
in.read((char*)(*x), sizeof(double)*(*pN));
return;
}
lcg_float avg_error(lcg_float *a, lcg_float *b, int n)
{
lcg_float avg = 0.0;
for (size_t i = 0; i < n; i++)
{
avg += (a[i] - b[i])*(a[i] - b[i]);
}
return sqrt(avg)/n;
}
// Declare as global variables
lcg_float one = 1.0;
lcg_float zero = 0.0;
void *d_buf;
cusparseSpMatDescr_t smat_A;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
double *d_A;
cusparseMatDescr_t descr_A = 0;
csric02Info_t icinfo_A = 0;
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
{
// Calculate the product of A*x
cusparseSpMV(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
x, &zero, prod_Ax, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
return;
}
int cudaProgress(void* instance, const lcg_float* m, const lcg_float converge,
const lcg_para* param, const int n_size, const int nz_size, const int k)
{
if (converge <= param->epsilon) {
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
}
return 0;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_1M_A";
std::string answerPath = "data/case_1M_B";
int N;
int nz;
double *A;
int *rowIdxA;
int *colIdxA;
double *b;
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
double *ans_x;
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
// Create handles
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(double));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMemcpy(d_A, A, nz * sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
// create descriptor for matrix A
cusparseCreateMatDescr(&descr_A);
// initialize properties of matrix A
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
int bufferSize;
cusparseCreateCsric02Info(&icinfo_A);
cusparseDcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
d_colIdxA, icinfo_A, &bufferSize);
cudaMalloc(&d_buf, bufferSize);
// Declare an initial solution
lcg_para self_para = lcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
int ret;
double *host_m = new double[N];
// Solve with CG
for (size_t i = 0; i < N; i++)
{
host_m[i] = 0.0;
}
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CG);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Solve with CGS
for (size_t i = 0; i < N; i++)
{
host_m[i] = 0.0;
}
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CGS);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Free Host memory
delete[] A;
delete[] rowIdxA;
delete[] colIdxA;
delete[] b;
delete[] ans_x;
delete[] host_m;
// Free Device memory
cudaFree(d_A);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cusparseDestroySpMat(smat_A);
cudaFree(d_buf);
cusparseDestroyMatDescr(descr_A);
cusparseDestroyCsric02Info(icinfo_A);
// Free handles
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return 0;
}

170
src/sample/sample2.cpp Normal file
View File

@ -0,0 +1,170 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "iostream"
#include "random"
#include "../lib/solver.h"
#define M 1000
#define N 800
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
{
lcg_float max = -1;
for (int i = 0; i < size; i++)
{
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
}
return max;
}
class TESTFUNC : public LCG_Solver
{
public:
TESTFUNC();
~TESTFUNC();
// 计算共轭梯度的B项
void cal_partb(lcg_float *B, const lcg_float *x);
//定义共轭梯度中Ax的算法
void AxProduct(const lcg_float* a, lcg_float* b, const int num)
{
lcg_matvec(kernel, a, tmp_arr, M, num, MatNormal);
lcg_matvec(kernel, tmp_arr, b, M, num, MatTranspose);
return;
}
void MxProduct(const lcg_float* a, lcg_float* b, const int num)
{
for (size_t i = 0; i < num; i++)
{
b[i] = p[i]*a[i];
}
return;
}
private:
// 普通二维数组做核矩阵
lcg_float **kernel;
// 中间结果数组
lcg_float *tmp_arr;
// 预优矩阵
lcg_float *p;
};
TESTFUNC::TESTFUNC()
{
kernel = lcg_malloc(M, N);
tmp_arr = lcg_malloc(M);
p = lcg_malloc(N);
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
lcg_vecset(p, 1.0, N);
lcg_float diag;
for (size_t i = 0; i < N; i++)
{
diag = 0.0;
for (size_t j = 0; j < M; j++)
{
diag += kernel[j][i]*kernel[j][i];
}
p[i] = 1.0/diag;
}
}
TESTFUNC::~TESTFUNC()
{
lcg_free(kernel, M);
lcg_free(tmp_arr);
lcg_free(p);
}
void TESTFUNC::cal_partb(lcg_float *B, const lcg_float *x)
{
lcg_matvec(kernel, x, tmp_arr, M, N, MatNormal);
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
}
int main(int argc, char const *argv[])
{
// 生成一组正演解
double *fm = lcg_malloc(N);
lcg_vecrnd(fm, 1.0, 2.0, N);
TESTFUNC test;
// 计算共轭梯度B项
double *B = lcg_malloc(N);
test.cal_partb(B, fm);
/********************准备工作完成************************/
lcg_para self_para = lcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
test.set_lcg_parameter(self_para);
// 声明一组解
lcg_float *m = lcg_malloc(N);
lcg_vecset(m, 0.0, N);
// 约束解的范围
lcg_float *low = lcg_malloc(N);
lcg_float *hig = lcg_malloc(N);
lcg_vecset(low, 1.0, N);
lcg_vecset(hig, 2.0, N);
test.Minimize(m, B, N, LCG_CG);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
test.MinimizePreconditioned(m, B, N);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
test.Minimize(m, B, N, LCG_CGS);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
test.Minimize(m, B, N, LCG_BICGSTAB);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
test.Minimize(m, B, N, LCG_BICGSTAB2);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
test.MinimizeConstrained(m, B, low, hig, N, LCG_PG);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_vecset(m, 0.0, N);
test.MinimizeConstrained(m, B, low, hig, N, LCG_SPG);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
lcg_free(fm);
lcg_free(B);
lcg_free(m);
lcg_free(low);
lcg_free(hig);
return 0;
}

129
src/sample/sample3.cpp Normal file
View File

@ -0,0 +1,129 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "iostream"
#include "../lib/clcg.h"
#define N 100
lcg_float max_diff(const lcg_complex *a, const lcg_complex *b, int size)
{
lcg_float max = -1;
lcg_complex t;
for (int i = 0; i < size; i++)
{
t = a[i] - b[i];
max = lcg_max(clcg_module(&t), max);
}
return max;
}
// 普通二维数组做核矩阵
lcg_complex **kernel;
// 计算核矩阵乘向量的乘积
void CalAx(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate)
{
clcg_matvec(kernel, x, prod_Ax, N, x_size, layout, conjugate);
return;
}
//定义共轭梯度监控函数
int Prog(void* instance, const lcg_complex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int k)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
int main(int argc, char const *argv[])
{
srand(time(0));
kernel = clcg_malloc(N, N);
clcg_vecrnd(kernel, lcg_complex(-1.0, -1.0), lcg_complex(1.0, 1.0), N, N);
// 设置核矩阵为一个对称阵
for (int i = 0; i < N; i++)
{
for (int j = i; j < N; j++)
{
kernel[j][i] = kernel[i][j];
}
}
// 生成一组正演解
lcg_complex *fm = clcg_malloc(N);
clcg_vecrnd(fm, lcg_complex(1.0, 1.0), lcg_complex(2.0, 2.0), N);
// 计算共轭梯度B项
lcg_complex *B = clcg_malloc(N);
clcg_matvec(kernel, fm, B, N, N, MatNormal, NonConjugate);
/********************准备工作完成************************/
clcg_para self_para = clcg_default_parameters();
self_para.abs_diff = 0;
self_para.epsilon = 1e-8;
// 声明一组解
lcg_complex *m = clcg_malloc(N);
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
int ret;
std::clog << "solver: bicg" << std::endl;
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICG);
std::clog << std::endl; clcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
std::clog << "solver: bicg-symmetric" << std::endl;
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICG_SYM);
std::clog << std::endl; clcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
std::clog << "solver: cgs" << std::endl;
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_CGS);
std::clog << std::endl; clcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
std::clog << "solver: bicgstab" << std::endl;
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICGSTAB);
std::clog << std::endl; clcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
std::clog << "solver: tfqmr" << std::endl;
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_TFQMR);
std::clog << std::endl; clcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
clcg_free(kernel, N);
clcg_free(fm);
clcg_free(B);
clcg_free(m);
return 0;
}

199
src/sample/sample4.cpp Normal file
View File

@ -0,0 +1,199 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "../lib/solver.h"
#include "ctime"
#include "random"
#include "iostream"
#include "fstream"
#include "iomanip"
#include "complex"
void read(std::string filePath, int *pN, int *pnz, lcg_complex **cooVal,
int **cooRowIdx, int **cooColIdx, lcg_complex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new lcg_complex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new lcg_complex[*pN]{};
std::complex<double> std_c;
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&std_c, sizeof(std_c));
(*cooVal)[i].real(std_c.real());
(*cooVal)[i].imag(std_c.imag());
}
for (int i = 0; i < *pN; i++)
{
in.read((char*)&std_c, sizeof(std_c));
(*b)[i].real(std_c.real());
(*b)[i].imag(std_c.imag());
}
return;
}
void readAnswer(std::string filePath, int *pN, lcg_complex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new lcg_complex[*pN]{};
std::complex<double> std_c;
for (size_t i = 0; i < *pN; i++)
{
in.read((char*)&std_c, sizeof(std_c));
(*x)[i].real(std_c.real());
(*x)[i].imag(std_c.imag());
}
return;
}
lcg_float max_diff(const lcg_complex *a, const lcg_complex *b, int size)
{
lcg_float max = -1;
lcg_complex t;
for (int i = 0; i < size; i++)
{
t = a[i] - b[i];
max = lcg_max(clcg_module(&t), max);
}
return max;
}
class TESTFUNC : public CLCG_Solver
{
public:
TESTFUNC(int n);
~TESTFUNC();
void set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size);
//定义共轭梯度中Ax的算法
void AxProduct(const lcg_complex *x, lcg_complex *prod_Ax, const int x_size,
lcg_matrix_e layout, clcg_complex_e conjugate)
{
clcg_matvec(kernel, x, prod_Ax, x_size, x_size, layout, conjugate);
return;
}
private:
// 普通二维数组做核矩阵
lcg_complex **kernel;
int n_size;
};
TESTFUNC::TESTFUNC(int n)
{
n_size = n;
kernel = clcg_malloc(n_size, n_size);
}
TESTFUNC::~TESTFUNC()
{
clcg_free(kernel, n_size);
}
void TESTFUNC::set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size)
{
for (size_t i = 0; i < n_size; i++)
{
for (size_t j = 0; j < n_size; j++)
{
kernel[i][j] = lcg_complex(0.0, 0.0);
}
}
for (size_t i = 0; i < nz_size; i++)
{
kernel[row_id[i]][col_id[i]] = val[i];
}
return;
}
int main(int argc, char const *argv[])
{
std::string inputPath = "data/case_1K_cA";
std::string answerPath = "data/case_1K_cB";
int N;
int nz;
lcg_complex *A;
int *rowIdxA;
int *colIdxA;
lcg_complex *b;
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
lcg_complex *ans_x;
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
TESTFUNC test(N);
test.set_kernel(rowIdxA, colIdxA, A, nz);
/********************准备工作完成************************/
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-8;
self_para.abs_diff = 0;
test.set_clcg_parameter(self_para);
// 声明一组解
lcg_complex *m = clcg_malloc(N);
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
test.Minimize(m, b, N, CLCG_BICG);
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
test.Minimize(m, b, N, CLCG_BICG_SYM);
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
test.Minimize(m, b, N, CLCG_CGS);
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
test.Minimize(m, b, N, CLCG_TFQMR);
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
clcg_free(m);
delete[] A;
delete[] rowIdxA;
delete[] colIdxA;
delete[] b;
delete[] ans_x;
return 0;
}

155
src/sample/sample5.cpp Normal file
View File

@ -0,0 +1,155 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "../lib/lcg_eigen.h"
#include "iostream"
#include "Eigen/Dense"
#define M 1000
#define N 800
lcg_float max_diff(const Eigen::VectorXd &a, const Eigen::VectorXd &b)
{
lcg_float max = -1;
for (int i = 0; i < a.size(); i++)
{
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
}
return max;
}
// 普通二维数组做核矩阵
Eigen::MatrixXd kernel = Eigen::MatrixXd::Random(M, N);
// 中间结果数组
Eigen::VectorXd tmp_arr(M);
Eigen::VectorXd p = Eigen::VectorXd::Constant(N, 1.0);
// 计算核矩阵乘向量的乘积
void CalAx(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax)
{
tmp_arr = kernel * x;
prod_Ax = kernel.transpose() * tmp_arr;
return;
}
void CalMx(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx)
{
prod_Mx = p.cwiseProduct(x);
return;
}
//定义共轭梯度监控函数
int Prog(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
const lcg_para *param, const int k)
{
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
return 0;
}
int main(int argc, char const *argv[])
{
// 生成一组正演解
lcg_float LO = 1.0, HI = 2.0, Range = HI - LO;
Eigen::VectorXd fm = Eigen::VectorXd::Random(N);
fm = (fm + Eigen::VectorXd::Constant(N, 1.0))*0.5*Range;
fm = (fm + Eigen::VectorXd::Constant(N, LO));
// 计算共轭梯度B项
Eigen::VectorXd B(N);
tmp_arr = kernel * fm;
B = kernel.transpose() * tmp_arr;
/********************准备工作完成************************/
lcg_para self_para = lcg_default_parameters();
self_para.epsilon = 1e-5;
self_para.abs_diff = 0;
// 声明一组解
Eigen::VectorXd m = Eigen::VectorXd::Zero(N);
//Eigen::VectorXd p = Eigen::VectorXd::Constant(N, 1.0);
Eigen::VectorXd low = Eigen::VectorXd::Constant(N, LO);
Eigen::VectorXd hig = Eigen::VectorXd::Constant(N, HI);
std::clog << "solver: cg" << std::endl;
clock_t start = clock();
int ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_CG);
clock_t end = clock();
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
m.setZero();
std::clog << "solver: pcg" << std::endl;
start = clock();
ret = lcg_solver_preconditioned_eigen(CalAx, CalMx, Prog, m, B, &self_para, NULL, LCG_PCG);
end = clock();
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
m.setZero();
std::clog << "solver: cgs" << std::endl;
start = clock();
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_CGS);
end = clock();
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
m.setZero();
std::clog << "solver: bicgstab" << std::endl;
start = clock();
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_BICGSTAB);
end = clock();
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
m.setZero();
std::clog << "solver: bicgstab2" << std::endl;
start = clock();
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_BICGSTAB2);
end = clock();
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
m.setZero();
std::clog << "solver: pg" << std::endl;
start = clock();
ret = lcg_solver_constrained_eigen(CalAx, Prog, m, B, low, hig, &self_para, NULL, LCG_PG);
end = clock();
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
m.setZero();
std::clog << "solver: spg" << std::endl;
start = clock();
ret = lcg_solver_constrained_eigen(CalAx, Prog, m, B, low, hig, &self_para, NULL, LCG_SPG);
end = clock();
std::clog << std::endl; lcg_error_str(ret);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
return 0;
}

235
src/sample/sample6.cpp Normal file
View File

@ -0,0 +1,235 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "iostream"
#include "fstream"
#include "complex"
#include "../lib/lcg_complex.h"
#include "../lib/solver_eigen.h"
#include "Eigen/Sparse"
typedef Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor> spmat_cd; // 注意Eigen默认的稀疏矩阵排序为列优先
typedef Eigen::Triplet<lcg_complex> triplt_cd;
void read(std::string filePath, int *pN, int *pnz, lcg_complex **cooVal,
int **cooRowIdx, int **cooColIdx, lcg_complex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new lcg_complex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new lcg_complex[*pN]{};
std::complex<double> std_c;
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&std_c, sizeof(std_c));
(*cooVal)[i].real(std_c.real());
(*cooVal)[i].imag(std_c.imag());
}
for (int i = 0; i < *pN; i++)
{
in.read((char*)&std_c, sizeof(std_c));
(*b)[i].real(std_c.real());
(*b)[i].imag(std_c.imag());
}
return;
}
void readAnswer(std::string filePath, int *pN, lcg_complex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new lcg_complex[*pN]{};
std::complex<double> std_c;
for (size_t i = 0; i < *pN; i++)
{
in.read((char*)&std_c, sizeof(std_c));
(*x)[i].real(std_c.real());
(*x)[i].imag(std_c.imag());
}
return;
}
lcg_float max_diff(const Eigen::VectorXcd &a, const Eigen::VectorXcd &b)
{
lcg_float max = -1;
std::complex<lcg_float> t;
for (int i = 0; i < a.size(); i++)
{
t = a[i] - b[i];
max = lcg_max(t.real()*t.real() + t.imag()*t.imag(), max);
}
return max;
}
class TESTFUNC : public CLCG_EIGEN_Solver
{
public:
TESTFUNC(int n);
~TESTFUNC();
void set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size);
void set_p();
//定义共轭梯度中Ax的算法
void AxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
lcg_matrix_e layout, clcg_complex_e conjugate)
{
if (conjugate == Conjugate) prod_Ax = kernel.conjugate() * x;
else prod_Ax = kernel * x;
return;
}
void MxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
lcg_matrix_e layout, clcg_complex_e conjugate)
{
prod_Mx = P.cwiseProduct(x);
return;
}
private:
spmat_cd kernel;
Eigen::VectorXcd P;
int n_size;
};
TESTFUNC::TESTFUNC(int n)
{
n_size = n;
kernel.resize(n_size, n_size);
kernel.setZero();
P.resize(n_size);
}
TESTFUNC::~TESTFUNC()
{
kernel.resize(0, 0);
}
void TESTFUNC::set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size)
{
std::vector<triplt_cd> val_triplt;
for (size_t i = 0; i < nz_size; i++)
{
val_triplt.push_back(triplt_cd(row_id[i], col_id[i], val[i]));
}
kernel.setFromTriplets(val_triplt.begin(), val_triplt.end());
return;
}
void TESTFUNC::set_p()
{
for (size_t i = 0; i < n_size; i++)
{
P[i] = 1.0/kernel.coeff(i, i);
}
return;
}
int main(int argc, char const *argv[])
{
std::string inputPath = "data/case_10K_cA";
std::string answerPath = "data/case_10K_cB";
int N;
int nz;
lcg_complex *A;
int *rowIdxA;
int *colIdxA;
lcg_complex *b;
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
lcg_complex *ans_x;
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
TESTFUNC test(N);
test.set_kernel(rowIdxA, colIdxA, A, nz);
test.set_p();
Eigen::VectorXcd B, ANS;
B.resize(N);
ANS.resize(N);
for (size_t i = 0; i < N; i++)
{
B[i] = b[i];
ANS[i] = ans_x[i];
}
/********************准备工作完成************************/
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-16;
self_para.abs_diff = 0;
test.set_clcg_parameter(self_para);
test.set_report_interval(10);
// 声明一组解
Eigen::VectorXcd m = Eigen::VectorXcd::Constant(N, std::complex<double>(0.0, 0.0));
test.Minimize(m, B, CLCG_BICG);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
m.setZero();
test.Minimize(m, B, CLCG_BICG_SYM);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
m.setZero();
test.Minimize(m, B, CLCG_CGS);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
m.setZero();
test.Minimize(m, B, CLCG_TFQMR);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
m.setZero();
test.MinimizePreconditioned(m, B, CLCG_PCG);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
m.setZero();
test.MinimizePreconditioned(m, B, CLCG_PBICG);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
B.resize(0);
ANS.resize(0);
m.resize(0);
delete[] A;
delete[] rowIdxA;
delete[] colIdxA;
delete[] b;
delete[] ans_x;
return 0;
}

233
src/sample/sample7.cpp Normal file
View File

@ -0,0 +1,233 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include "iostream"
#include "fstream"
#include "../lib/solver_eigen.h"
#include "../lib/preconditioner_eigen.h"
typedef std::complex<double> complex_d;
typedef Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> spmat_cd;
typedef Eigen::Triplet<complex_d> triplt_cd;
typedef Eigen::VectorXcd vector_cd;
void read(std::string filePath, int *pN, int *pnz, complex_d **cooVal,
int **cooRowIdx, int **cooColIdx, complex_d **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new complex_d[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new complex_d[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(complex_d));
}
in.read((char*)(*b), sizeof(complex_d)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, complex_d **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new complex_d[*pN]{};
in.read((char*)(*x), sizeof(complex_d)*(*pN));
return;
}
double max_diff(const vector_cd &a, const vector_cd &b)
{
double max = -1;
complex_d t;
for (int i = 0; i < a.size(); i++)
{
t = a[i] - b[i];
max = lcg_max(std::sqrt(std::norm(t)), max);
}
return max;
}
class TESTFUNC : public CLCG_EIGEN_Solver
{
public:
TESTFUNC(int n);
~TESTFUNC();
void set_kernel(int *row_id, int *col_id, complex_d *val, int nz_size);
void set_preconditioner();
//定义共轭梯度中Ax的算法
void AxProduct(const vector_cd &x, vector_cd &prod_Ax, lcg_matrix_e layout, clcg_complex_e conjugate)
{
if (conjugate == Conjugate) prod_Ax = kernel.conjugate() * x;
else prod_Ax = kernel * x;
return;
}
void MxProduct(const vector_cd &x, vector_cd &prod_Mx, lcg_matrix_e layout, clcg_complex_e conjugate)
{
// No preconditioning
//prod_Mx = x;
// Preconditioning using the diagonal kernel
//prod_Mx = p.cwiseProduct(x);
// Preconditioning using the ILUT/IC
clcg_solve_lower_triangle(l_tri, x, p);
clcg_solve_upper_triangle(u_tri, p, prod_Mx);
return;
}
private:
// 普通二维数组做核矩阵
spmat_cd kernel, l_tri, u_tri;
vector_cd p;
int n_size;
};
TESTFUNC::TESTFUNC(int n)
{
n_size = n;
kernel.resize(n_size, n_size);
kernel.setZero();
p.resize(n_size);
}
TESTFUNC::~TESTFUNC()
{
kernel.resize(0, 0);
l_tri.resize(0, 0);
u_tri.resize(0, 0);
p.resize(0);
}
void TESTFUNC::set_kernel(int *row_id, int *col_id, complex_d *val, int nz_size)
{
std::vector<triplt_cd> val_triplt;
for (size_t i = 0; i < nz_size; i++)
{
val_triplt.push_back(triplt_cd(row_id[i], col_id[i], val[i]));
}
kernel.setFromTriplets(val_triplt.begin(), val_triplt.end());
return;
}
void TESTFUNC::set_preconditioner()
{
// 1 Preconditioning using the incomplete LU decomposition
/*
for (size_t i = 0; i < n_size; i++)
{
p[i] = 1.0/kernel.coeff(i, i);
}
*/
// 2. Preconditioning using the incomplete LU decomposition
//incomplete_LU(kernel, l_tri, u_tri);
// 3. Preconditioning using the incomplete Cholesky decomposition
clcg_incomplete_Cholesky(kernel, l_tri);
u_tri = l_tri.transpose();
// 4. Preconditioning using compressed incomplete decompositions
/*
vector_cd one = Eigen::VectorXcd::Ones(n_size);
vector_cd x = Eigen::VectorXcd::Zero(n_size);
solve_lower_triangle(l_tri, one, x);
solve_upper_triangle(u_tri, x, p);
*/
return;
}
int main(int argc, char const *argv[]) try
{
std::string inputPath = "data/case_1K_cA";
std::string answerPath = "data/case_1K_cB";
int N;
int nz;
complex_d *A;
int *rowIdxA;
int *colIdxA;
complex_d *b;
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
complex_d *ans_x;
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
TESTFUNC test(N);
test.set_kernel(rowIdxA, colIdxA, A, nz);
test.set_preconditioner();
vector_cd B, ANS;
B.resize(N);
ANS.resize(N);
for (size_t i = 0; i < N; i++)
{
B[i] = b[i];
ANS[i] = ans_x[i];
}
/********************准备工作完成************************/
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-12;
self_para.abs_diff = 0;
test.set_clcg_parameter(self_para);
test.set_report_interval(10);
Eigen::VectorXcd m = Eigen::VectorXcd::Constant(N, std::complex<double>(0.0, 0.0));
test.MinimizePreconditioned(m, B, CLCG_PCG);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
m.setZero();
test.MinimizePreconditioned(m, B, CLCG_PBICG);
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
ANS.resize(0);
B.resize(0);
m.resize(0);
return 0;
}
catch (std::exception &e)
{
std::cerr << e.what() << std::endl;
}

312
src/sample/sample8.cu Normal file
View File

@ -0,0 +1,312 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/lcg_cuda.h"
void read(std::string filePath, int *pN, int *pnz, double **cooVal,
int **cooRowIdx, int **cooColIdx, double **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new double[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new double[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(double));
}
in.read((char*)(*b), sizeof(double)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, double **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new double[*pN]{};
in.read((char*)(*x), sizeof(double)*(*pN));
return;
}
lcg_float avg_error(lcg_float *a, lcg_float *b, int n)
{
lcg_float avg = 0.0;
for (size_t i = 0; i < n; i++)
{
avg += (a[i] - b[i])*(a[i] - b[i]);
}
return sqrt(avg)/n;
}
// Declare as global variables
lcg_float one = 1.0;
lcg_float zero = 0.0;
void *d_buf;
cusparseSpMatDescr_t smat_A;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
double *d_A;
double *d_pd;
double *d_ic;
cusparseMatDescr_t descr_A = 0;
cusparseMatDescr_t descr_L = 0;
csric02Info_t icinfo_A = 0;
csrsv2Info_t info_L = 0;
csrsv2Info_t info_LT = 0;
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
{
// Calculate the product of A*x
cusparseSpMV(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
x, &zero, prod_Ax, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
return;
}
void cudaMx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
{
void *d_x, *d_Ax;
cusparseDnVecGetValues(x, &d_x);
cusparseDnVecGetValues(prod_Ax, &d_Ax);
cusparseDcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, (double*) d_x, (double*) d_pd,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseDcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE,
n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, (double*) d_pd, (double*) d_Ax,
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
return;
}
int cudaProgress(void* instance, const lcg_float* m, const lcg_float converge,
const lcg_para* param, const int n_size, const int nz_size, const int k)
{
if (converge <= param->epsilon) {
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
}
return 0;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_10K_A";
std::string answerPath = "data/case_10K_B";
int N;
int nz;
double *A;
int *rowIdxA;
int *colIdxA;
double *b;
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
double *ans_x;
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
// Create handles
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(double));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMalloc(&d_pd, N * sizeof(double));
cudaMemcpy(d_A, A, nz * sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
// This is just used to get bufferSize;
cusparseDnVecDescr_t dvec_tmp;
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_R_64F);
size_t bufferSize_B;
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
// --- Start of the preconditioning part ---
// Copy A
cudaMalloc(&d_ic, nz * sizeof(lcg_float));
cudaMemcpy(d_ic, d_A, nz * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_LT;
bufferSize = bufferSize_B;
// create descriptor for matrix A
cusparseCreateMatDescr(&descr_A);
// initialize properties of matrix A
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
// create descriptor for matrix L
cusparseCreateMatDescr(&descr_L);
// initialize properties of matrix L
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_NON_UNIT);
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
// Create empty info objects for incomplete-cholesky factorization
cusparseCreateCsric02Info(&icinfo_A);
cusparseCreateCsrsv2Info(&info_L);
cusparseCreateCsrsv2Info(&info_LT);
// Compute buffer size in computing ic factorization
cusparseDcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
d_colIdxA, icinfo_A, &bufferSize_A);
cusparseDcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
cusparseDcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, &bufferSize_LT);
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_LT);
cudaMalloc(&d_buf, bufferSize);
// Perform incomplete-choleskey factorization: analysis phase
cusparseDcsric02_analysis(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA,
d_colIdxA, icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseDcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
cusparseDcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
// Perform incomplete-choleskey factorization: solve phase
cusparseDcsric02(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA, d_colIdxA,
icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
// --- End of the preconditioning part ---
// Declare an initial solution
lcg_para self_para = lcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
int ret;
double *host_m = new double[N];
// Solve with CG
for (size_t i = 0; i < N; i++)
{
host_m[i] = 0.0;
}
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CG);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Solve with CGS
for (size_t i = 0; i < N; i++)
{
host_m[i] = 0.0;
}
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CGS);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Solve with PCG
for (size_t i = 0; i < N; i++)
{
host_m[i] = 0.0;
}
ret = lcg_solver_preconditioned_cuda(cudaAx, cudaMx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_PCG);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Free Host memory
delete[] A;
delete[] rowIdxA;
delete[] colIdxA;
delete[] b;
delete[] ans_x;
delete[] host_m;
// Free Device memory
cudaFree(d_A);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cudaFree(d_pd);
cudaFree(d_ic);
cusparseDestroyDnVec(dvec_tmp);
cusparseDestroySpMat(smat_A);
cudaFree(d_buf);
cusparseDestroyMatDescr(descr_A);
cusparseDestroyMatDescr(descr_L);
cusparseDestroyCsric02Info(icinfo_A);
cusparseDestroyCsrsv2Info(info_L);
cusparseDestroyCsrsv2Info(info_LT);
// Free handles
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return 0;
}

221
src/sample/sample9.cu Normal file
View File

@ -0,0 +1,221 @@
/******************************************************
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
*
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* LibLCG is distributed under a dual licensing scheme. You can
* redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License (LGPL) as published by the Free Software Foundation,
* either version 2 of the License, or (at your option) any later version.
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from
* using the LibLCG, please consider the option to obtain a commercial
* license for a fee. These licenses are offered by the LibLCG developing
* team. As a rule, licenses are provided "as-is", unlimited in time for
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
* Please do not forget to include some description of your company and the
* realm of its activities. Also add information on how to contact you by
* electronic and paper mail.
******************************************************/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cmath>
#include "../lib/clcg_cuda.h"
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
in.read((char*)pnz, sizeof(int));
*cooVal = new cuDoubleComplex[*pnz]{};
*cooRowIdx = new int[*pnz]{};
*cooColIdx = new int[*pnz]{};
*b = new cuDoubleComplex[*pN]{};
for (int i = 0; i < *pnz; ++i)
{
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
in.read((char*)&(*cooColIdx)[i], sizeof(int));
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
}
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
return;
}
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
{
std::ifstream in(filePath, std::ios::binary);
in.read((char*)pN, sizeof(int));
*x = new cuDoubleComplex[*pN]{};
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
return;
}
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
{
lcg_float avg = 0.0;
cuDoubleComplex tmp;
for (size_t i = 0; i < n; i++)
{
tmp = clcg_Zdiff(a[i], b[i]);
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
}
return sqrt(avg)/n;
}
// Declare as global variables
cuDoubleComplex one, zero;
void *d_buf;
cusparseSpMatDescr_t smat_A;
int *d_rowIdxA; // COO
int *d_rowPtrA; // CSR
int *d_colIdxA;
cuDoubleComplex *d_A;
cuDoubleComplex *d_B;
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
cusparseOperation_t oper_t)
{
one.x = 1.0; one.y = 0.0;
zero.x = 0.0; zero.y = 0.0;
// Calculate the product of A*x
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
return;
}
int cudaProgress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
const clcg_para* param, const int n_size, const int nz_size, const int k)
{
if (converge <= param->epsilon) {
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
}
return 0;
}
int main(int argc, char **argv)
{
std::string inputPath = "data/case_1K_cA";
std::string answerPath = "data/case_1K_cB";
int N, nz;
int *rowIdxA, *colIdxA;
cuDoubleComplex *A, *b;
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
cuDoubleComplex *ans_x;
readAnswer(answerPath, &N, &ans_x);
std::clog << "N = " << N << std::endl;
std::clog << "nz = " << nz << std::endl;
// Create handles
cublasHandle_t cubHandle;
cusparseHandle_t cusHandle;
cublasCreate(&cubHandle);
cusparseCreate(&cusHandle);
// Allocate GPU memory & copy matrix/vector to device
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
cudaMalloc(&d_colIdxA, nz * sizeof(int));
cudaMalloc(&d_B, N * sizeof(cuDoubleComplex));
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
// Convert matrix A from COO format to CSR format
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
// Create sparse matrix
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
// This is just used to get bufferSize;
cusparseDnVecDescr_t dvec_tmp;
cusparseCreateDnVec(&dvec_tmp, N, d_B, CUDA_C_64F);
size_t bufferSize_B, bufferSize_B2;
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE, &one, smat_A,
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B2);
if (bufferSize_B2 > bufferSize_B) bufferSize_B = bufferSize_B2;
cudaMalloc(&d_buf, bufferSize_B);
// Declare an initial solution
clcg_para self_para = clcg_default_parameters();
self_para.epsilon = 1e-6;
self_para.abs_diff = 0;
int ret;
cuDoubleComplex *host_m = new cuDoubleComplex[N];
// Solve with BICG
for (size_t i = 0; i < N; i++)
{
host_m[i].x = 0.0; host_m[i].y = 0.0;
}
ret = clcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_BICG);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Solve with BICG_SYM
for (size_t i = 0; i < N; i++)
{
host_m[i].x = 0.0; host_m[i].y = 0.0;
}
ret = clcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_BICG_SYM);
lcg_error_str(ret);
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
// Free Host memory
delete[] A;
delete[] rowIdxA;
delete[] colIdxA;
delete[] b;
delete[] ans_x;
delete[] host_m;
// Free Device memory
cudaFree(d_A);
cudaFree(d_rowIdxA);
cudaFree(d_rowPtrA);
cudaFree(d_colIdxA);
cudaFree(d_B);
cusparseDestroyDnVec(dvec_tmp);
cusparseDestroySpMat(smat_A);
cudaFree(d_buf);
// Free handles
cublasDestroy(cubHandle);
cusparseDestroy(cusHandle);
return 0;
}