initial upload
This commit is contained in:
parent
c7e8487a02
commit
834df92696
10
.gitignore
vendored
10
.gitignore
vendored
@ -1,4 +1,3 @@
|
||||
# ---> C++
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
||||
@ -32,3 +31,12 @@
|
||||
*.out
|
||||
*.app
|
||||
|
||||
# folder preferences and build folder
|
||||
.DS_Store
|
||||
build/
|
||||
pack/
|
||||
.vscode/
|
||||
out/
|
||||
*.sh
|
||||
case_*
|
||||
config.h
|
30
CMakeLists.txt
Normal file
30
CMakeLists.txt
Normal file
@ -0,0 +1,30 @@
|
||||
cmake_minimum_required(VERSION 3.15.2)
|
||||
# 设置工程名称
|
||||
project(LibLCG VERSION 3.1 LANGUAGES CXX)
|
||||
# 添加配置配件编写的函数
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
message(STATUS "Platform: " ${CMAKE_HOST_SYSTEM_NAME})
|
||||
# CMake默认的安装路径 Windows下为C:/Program\ Files/${Project_Name} Linux/Unix下为/usr/local
|
||||
message(STATUS "Install prefix: " ${CMAKE_INSTALL_PREFIX})
|
||||
# CMake默认的变异类型为空
|
||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||
|
||||
# 添加编译选项
|
||||
option(LibLCG_OPENMP "Use OpenMP" ON) # Set OFF to disable the functionality
|
||||
option(LibLCG_EIGEN "Use Eigen" ON)
|
||||
option(LibLCG_STD_COMPLEX "Use STD complex" ON)
|
||||
option(LibLCG_CUDA "Use CUDA" ON)
|
||||
message(STATUS "Use OpenMP: " ${LibLCG_OPENMP})
|
||||
message(STATUS "Use Eigen: " ${LibLCG_EIGEN})
|
||||
message(STATUS "Use STD complex: " ${LibLCG_STD_COMPLEX})
|
||||
message(STATUS "Use CUDA: " ${LibLCG_CUDA})
|
||||
|
||||
# 加入一个头文件配置,让cmake对源码进行操作
|
||||
configure_file(
|
||||
"${PROJECT_SOURCE_DIR}/config.h.in"
|
||||
"${PROJECT_SOURCE_DIR}/src/lib/config.h"
|
||||
)
|
||||
|
||||
# 添加源文件地址
|
||||
add_subdirectory(src/)
|
524
LICENSE
Normal file
524
LICENSE
Normal file
@ -0,0 +1,524 @@
|
||||
LibLCG License
|
||||
--------------
|
||||
|
||||
LibLCG is distributed under a dual licensing scheme. You can
|
||||
redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
General Public License (LGPL) as published by the Free Software
|
||||
Foundation, either version 2 of the License, or (at your option) any
|
||||
later version. A copy of the GNU Lesser General Public License is
|
||||
reproduced below.
|
||||
|
||||
If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
using the LibLCG, please consider the option to obtain a commercial
|
||||
license for a fee. These licenses are offered by the LibLCG developing
|
||||
team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
a one time fee. Please send corresponding requests to:
|
||||
yizhang-geo@zju.edu.cn. Please do not forget to include some
|
||||
description of your company and the realm of its activities. Also add
|
||||
information on how to contact you by electronic and paper mail.
|
||||
|
||||
=====================================================================
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 2.1, February 1999
|
||||
|
||||
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
[This is the first released version of the Lesser GPL. It also counts
|
||||
as the successor of the GNU Library Public License, version 2, hence
|
||||
the version number 2.1.]
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
Licenses are intended to guarantee your freedom to share and change
|
||||
free software--to make sure the software is free for all its users.
|
||||
|
||||
This license, the Lesser General Public License, applies to some
|
||||
specially designated software packages--typically libraries--of the
|
||||
Free Software Foundation and other authors who decide to use it. You
|
||||
can use it too, but we suggest you first think carefully about whether
|
||||
this license or the ordinary General Public License is the better
|
||||
strategy to use in any particular case, based on the explanations below.
|
||||
|
||||
When we speak of free software, we are referring to freedom of use,
|
||||
not price. Our General Public Licenses are designed to make sure that
|
||||
you have the freedom to distribute copies of free software (and charge
|
||||
for this service if you wish); that you receive source code or can get
|
||||
it if you want it; that you can change the software and use pieces of
|
||||
it in new free programs; and that you are informed that you can do
|
||||
these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
distributors to deny you these rights or to ask you to surrender these
|
||||
rights. These restrictions translate to certain responsibilities for
|
||||
you if you distribute copies of the library or if you modify it.
|
||||
|
||||
For example, if you distribute copies of the library, whether gratis
|
||||
or for a fee, you must give the recipients all the rights that we gave
|
||||
you. You must make sure that they, too, receive or can get the source
|
||||
code. If you link other code with the library, you must provide
|
||||
complete object files to the recipients, so that they can relink them
|
||||
with the library after making changes to the library and recompiling
|
||||
it. And you must show them these terms so they know their rights.
|
||||
|
||||
We protect your rights with a two-step method: (1) we copyright the
|
||||
library, and (2) we offer you this license, which gives you legal
|
||||
permission to copy, distribute and/or modify the library.
|
||||
|
||||
To protect each distributor, we want to make it very clear that
|
||||
there is no warranty for the free library. Also, if the library is
|
||||
modified by someone else and passed on, the recipients should know
|
||||
that what they have is not the original version, so that the original
|
||||
author's reputation will not be affected by problems that might be
|
||||
introduced by others.
|
||||
|
||||
Finally, software patents pose a constant threat to the existence of
|
||||
any free program. We wish to make sure that a company cannot
|
||||
effectively restrict the users of a free program by obtaining a
|
||||
restrictive license from a patent holder. Therefore, we insist that
|
||||
any patent license obtained for a version of the library must be
|
||||
consistent with the full freedom of use specified in this license.
|
||||
|
||||
Most GNU software, including some libraries, is covered by the
|
||||
ordinary GNU General Public License. This license, the GNU Lesser
|
||||
General Public License, applies to certain designated libraries, and
|
||||
is quite different from the ordinary General Public License. We use
|
||||
this license for certain libraries in order to permit linking those
|
||||
libraries into non-free programs.
|
||||
|
||||
When a program is linked with a library, whether statically or using
|
||||
a shared library, the combination of the two is legally speaking a
|
||||
combined work, a derivative of the original library. The ordinary
|
||||
General Public License therefore permits such linking only if the
|
||||
entire combination fits its criteria of freedom. The Lesser General
|
||||
Public License permits more lax criteria for linking other code with
|
||||
the library.
|
||||
|
||||
We call this license the "Lesser" General Public License because it
|
||||
does Less to protect the user's freedom than the ordinary General
|
||||
Public License. It also provides other free software developers Less
|
||||
of an advantage over competing non-free programs. These disadvantages
|
||||
are the reason we use the ordinary General Public License for many
|
||||
libraries. However, the Lesser license provides advantages in certain
|
||||
special circumstances.
|
||||
|
||||
For example, on rare occasions, there may be a special need to
|
||||
encourage the widest possible use of a certain library, so that it becomes
|
||||
a de-facto standard. To achieve this, non-free programs must be
|
||||
allowed to use the library. A more frequent case is that a free
|
||||
library does the same job as widely used non-free libraries. In this
|
||||
case, there is little to gain by limiting the free library to free
|
||||
software only, so we use the Lesser General Public License.
|
||||
|
||||
In other cases, permission to use a particular library in non-free
|
||||
programs enables a greater number of people to use a large body of
|
||||
free software. For example, permission to use the GNU C Library in
|
||||
non-free programs enables many more people to use the whole GNU
|
||||
operating system, as well as its variant, the GNU/Linux operating
|
||||
system.
|
||||
|
||||
Although the Lesser General Public License is Less protective of the
|
||||
users' freedom, it does ensure that the user of a program that is
|
||||
linked with the Library has the freedom and the wherewithal to run
|
||||
that program using a modified version of the Library.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow. Pay close attention to the difference between a
|
||||
"work based on the library" and a "work that uses the library". The
|
||||
former contains code derived from the library, whereas the latter must
|
||||
be combined with the library in order to run.
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License Agreement applies to any software library or other
|
||||
program which contains a notice placed by the copyright holder or
|
||||
other authorized party saying it may be distributed under the terms of
|
||||
this Lesser General Public License (also called "this License").
|
||||
Each licensee is addressed as "you".
|
||||
|
||||
A "library" means a collection of software functions and/or data
|
||||
prepared so as to be conveniently linked with application programs
|
||||
(which use some of those functions and data) to form executables.
|
||||
|
||||
The "Library", below, refers to any such software library or work
|
||||
which has been distributed under these terms. A "work based on the
|
||||
Library" means either the Library or any derivative work under
|
||||
copyright law: that is to say, a work containing the Library or a
|
||||
portion of it, either verbatim or with modifications and/or translated
|
||||
straightforwardly into another language. (Hereinafter, translation is
|
||||
included without limitation in the term "modification".)
|
||||
|
||||
"Source code" for a work means the preferred form of the work for
|
||||
making modifications to it. For a library, complete source code means
|
||||
all the source code for all modules it contains, plus any associated
|
||||
interface definition files, plus the scripts used to control compilation
|
||||
and installation of the library.
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running a program using the Library is not restricted, and output from
|
||||
such a program is covered only if its contents constitute a work based
|
||||
on the Library (independent of the use of the Library in a tool for
|
||||
writing it). Whether that is true depends on what the Library does
|
||||
and what the program that uses the Library does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Library's
|
||||
complete source code as you receive it, in any medium, provided that
|
||||
you conspicuously and appropriately publish on each copy an
|
||||
appropriate copyright notice and disclaimer of warranty; keep intact
|
||||
all the notices that refer to this License and to the absence of any
|
||||
warranty; and distribute a copy of this License along with the
|
||||
Library.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy,
|
||||
and you may at your option offer warranty protection in exchange for a
|
||||
fee.
|
||||
|
||||
2. You may modify your copy or copies of the Library or any portion
|
||||
of it, thus forming a work based on the Library, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) The modified work must itself be a software library.
|
||||
|
||||
b) You must cause the files modified to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
c) You must cause the whole of the work to be licensed at no
|
||||
charge to all third parties under the terms of this License.
|
||||
|
||||
d) If a facility in the modified Library refers to a function or a
|
||||
table of data to be supplied by an application program that uses
|
||||
the facility, other than as an argument passed when the facility
|
||||
is invoked, then you must make a good faith effort to ensure that,
|
||||
in the event an application does not supply such function or
|
||||
table, the facility still operates, and performs whatever part of
|
||||
its purpose remains meaningful.
|
||||
|
||||
(For example, a function in a library to compute square roots has
|
||||
a purpose that is entirely well-defined independent of the
|
||||
application. Therefore, Subsection 2d requires that any
|
||||
application-supplied function or table used by this function must
|
||||
be optional: if the application does not supply it, the square
|
||||
root function must still compute square roots.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Library,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Library, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote
|
||||
it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Library.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Library
|
||||
with the Library (or with a work based on the Library) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may opt to apply the terms of the ordinary GNU General Public
|
||||
License instead of this License to a given copy of the Library. To do
|
||||
this, you must alter all the notices that refer to this License, so
|
||||
that they refer to the ordinary GNU General Public License, version 2,
|
||||
instead of to this License. (If a newer version than version 2 of the
|
||||
ordinary GNU General Public License has appeared, then you can specify
|
||||
that version instead if you wish.) Do not make any other change in
|
||||
these notices.
|
||||
|
||||
Once this change is made in a given copy, it is irreversible for
|
||||
that copy, so the ordinary GNU General Public License applies to all
|
||||
subsequent copies and derivative works made from that copy.
|
||||
|
||||
This option is useful when you wish to copy part of the code of
|
||||
the Library into a program that is not a library.
|
||||
|
||||
4. You may copy and distribute the Library (or a portion or
|
||||
derivative of it, under Section 2) in object code or executable form
|
||||
under the terms of Sections 1 and 2 above provided that you accompany
|
||||
it with the complete corresponding machine-readable source code, which
|
||||
must be distributed under the terms of Sections 1 and 2 above on a
|
||||
medium customarily used for software interchange.
|
||||
|
||||
If distribution of object code is made by offering access to copy
|
||||
from a designated place, then offering equivalent access to copy the
|
||||
source code from the same place satisfies the requirement to
|
||||
distribute the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
5. A program that contains no derivative of any portion of the
|
||||
Library, but is designed to work with the Library by being compiled or
|
||||
linked with it, is called a "work that uses the Library". Such a
|
||||
work, in isolation, is not a derivative work of the Library, and
|
||||
therefore falls outside the scope of this License.
|
||||
|
||||
However, linking a "work that uses the Library" with the Library
|
||||
creates an executable that is a derivative of the Library (because it
|
||||
contains portions of the Library), rather than a "work that uses the
|
||||
library". The executable is therefore covered by this License.
|
||||
Section 6 states terms for distribution of such executables.
|
||||
|
||||
When a "work that uses the Library" uses material from a header file
|
||||
that is part of the Library, the object code for the work may be a
|
||||
derivative work of the Library even though the source code is not.
|
||||
Whether this is true is especially significant if the work can be
|
||||
linked without the Library, or if the work is itself a library. The
|
||||
threshold for this to be true is not precisely defined by law.
|
||||
|
||||
If such an object file uses only numerical parameters, data
|
||||
structure layouts and accessors, and small macros and small inline
|
||||
functions (ten lines or less in length), then the use of the object
|
||||
file is unrestricted, regardless of whether it is legally a derivative
|
||||
work. (Executables containing this object code plus portions of the
|
||||
Library will still fall under Section 6.)
|
||||
|
||||
Otherwise, if the work is a derivative of the Library, you may
|
||||
distribute the object code for the work under the terms of Section 6.
|
||||
Any executables containing that work also fall under Section 6,
|
||||
whether or not they are linked directly with the Library itself.
|
||||
|
||||
6. As an exception to the Sections above, you may also combine or
|
||||
link a "work that uses the Library" with the Library to produce a
|
||||
work containing portions of the Library, and distribute that work
|
||||
under terms of your choice, provided that the terms permit
|
||||
modification of the work for the customer's own use and reverse
|
||||
engineering for debugging such modifications.
|
||||
|
||||
You must give prominent notice with each copy of the work that the
|
||||
Library is used in it and that the Library and its use are covered by
|
||||
this License. You must supply a copy of this License. If the work
|
||||
during execution displays copyright notices, you must include the
|
||||
copyright notice for the Library among them, as well as a reference
|
||||
directing the user to the copy of this License. Also, you must do one
|
||||
of these things:
|
||||
|
||||
a) Accompany the work with the complete corresponding
|
||||
machine-readable source code for the Library including whatever
|
||||
changes were used in the work (which must be distributed under
|
||||
Sections 1 and 2 above); and, if the work is an executable linked
|
||||
with the Library, with the complete machine-readable "work that
|
||||
uses the Library", as object code and/or source code, so that the
|
||||
user can modify the Library and then relink to produce a modified
|
||||
executable containing the modified Library. (It is understood
|
||||
that the user who changes the contents of definitions files in the
|
||||
Library will not necessarily be able to recompile the application
|
||||
to use the modified definitions.)
|
||||
|
||||
b) Use a suitable shared library mechanism for linking with the
|
||||
Library. A suitable mechanism is one that (1) uses at run time a
|
||||
copy of the library already present on the user's computer system,
|
||||
rather than copying library functions into the executable, and (2)
|
||||
will operate properly with a modified version of the library, if
|
||||
the user installs one, as long as the modified version is
|
||||
interface-compatible with the version that the work was made with.
|
||||
|
||||
c) Accompany the work with a written offer, valid for at
|
||||
least three years, to give the same user the materials
|
||||
specified in Subsection 6a, above, for a charge no more
|
||||
than the cost of performing this distribution.
|
||||
|
||||
d) If distribution of the work is made by offering access to copy
|
||||
from a designated place, offer equivalent access to copy the above
|
||||
specified materials from the same place.
|
||||
|
||||
e) Verify that the user has already received a copy of these
|
||||
materials or that you have already sent this user a copy.
|
||||
|
||||
For an executable, the required form of the "work that uses the
|
||||
Library" must include any data and utility programs needed for
|
||||
reproducing the executable from it. However, as a special exception,
|
||||
the materials to be distributed need not include anything that is
|
||||
normally distributed (in either source or binary form) with the major
|
||||
components (compiler, kernel, and so on) of the operating system on
|
||||
which the executable runs, unless that component itself accompanies
|
||||
the executable.
|
||||
|
||||
It may happen that this requirement contradicts the license
|
||||
restrictions of other proprietary libraries that do not normally
|
||||
accompany the operating system. Such a contradiction means you cannot
|
||||
use both them and the Library together in an executable that you
|
||||
distribute.
|
||||
|
||||
7. You may place library facilities that are a work based on the
|
||||
Library side-by-side in a single library together with other library
|
||||
facilities not covered by this License, and distribute such a combined
|
||||
library, provided that the separate distribution of the work based on
|
||||
the Library and of the other library facilities is otherwise
|
||||
permitted, and provided that you do these two things:
|
||||
|
||||
a) Accompany the combined library with a copy of the same work
|
||||
based on the Library, uncombined with any other library
|
||||
facilities. This must be distributed under the terms of the
|
||||
Sections above.
|
||||
|
||||
b) Give prominent notice with the combined library of the fact
|
||||
that part of it is a work based on the Library, and explaining
|
||||
where to find the accompanying uncombined form of the same work.
|
||||
|
||||
8. You may not copy, modify, sublicense, link with, or distribute
|
||||
the Library except as expressly provided under this License. Any
|
||||
attempt otherwise to copy, modify, sublicense, link with, or
|
||||
distribute the Library is void, and will automatically terminate your
|
||||
rights under this License. However, parties who have received copies,
|
||||
or rights, from you under this License will not have their licenses
|
||||
terminated so long as such parties remain in full compliance.
|
||||
|
||||
9. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Library or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Library (or any work based on the
|
||||
Library), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Library or works based on it.
|
||||
|
||||
10. Each time you redistribute the Library (or any work based on the
|
||||
Library), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute, link with or modify the Library
|
||||
subject to these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties with
|
||||
this License.
|
||||
|
||||
11. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Library at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Library by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Library.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under any
|
||||
particular circumstance, the balance of the section is intended to apply,
|
||||
and the section as a whole is intended to apply in other circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
12. If the distribution and/or use of the Library is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Library under this License may add
|
||||
an explicit geographical distribution limitation excluding those countries,
|
||||
so that distribution is permitted only in or among countries not thus
|
||||
excluded. In such case, this License incorporates the limitation as if
|
||||
written in the body of this License.
|
||||
|
||||
13. The Free Software Foundation may publish revised and/or new
|
||||
versions of the Lesser General Public License from time to time.
|
||||
Such new versions will be similar in spirit to the present version,
|
||||
but may differ in detail to address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Library
|
||||
specifies a version number of this License which applies to it and
|
||||
"any later version", you have the option of following the terms and
|
||||
conditions either of that version or of any later version published by
|
||||
the Free Software Foundation. If the Library does not specify a
|
||||
license version number, you may choose any version ever published by
|
||||
the Free Software Foundation.
|
||||
|
||||
14. If you wish to incorporate parts of the Library into other free
|
||||
programs whose distribution conditions are incompatible with these,
|
||||
write to the author to ask for permission. For software which is
|
||||
copyrighted by the Free Software Foundation, write to the Free
|
||||
Software Foundation; we sometimes make exceptions for this. Our
|
||||
decision will be guided by the two goals of preserving the free status
|
||||
of all derivatives of our free software and of promoting the sharing
|
||||
and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
|
||||
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
|
||||
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
|
||||
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
|
||||
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
|
||||
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
|
||||
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
|
||||
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
|
||||
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
|
||||
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
|
||||
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
|
||||
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
|
||||
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
|
||||
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
|
||||
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Libraries
|
||||
|
||||
If you develop a new library, and you want it to be of the greatest
|
||||
possible use to the public, we recommend making it free software that
|
||||
everyone can redistribute and change. You can do so by permitting
|
||||
redistribution under these terms (or, alternatively, under the terms of the
|
||||
ordinary General Public License).
|
||||
|
||||
To apply these terms, attach the following notices to the library. It is
|
||||
safest to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least the
|
||||
"copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the library's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
|
||||
USA
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the library, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||
library `Frob' (a library for tweaking knobs) written by James Random
|
||||
Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1990
|
||||
Ty Coon, President of Vice
|
||||
|
||||
That's all there is to it!
|
20
LibLCGConfig.cmake.in
Normal file
20
LibLCGConfig.cmake.in
Normal file
@ -0,0 +1,20 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set(@PROJECT_NAME@_Version "@PROJECT_VERSION@")
|
||||
set_and_check(@PROJECT_NAME@_INSTALL_PREFIX "${PACKAGE_PREFIX_DIR}")
|
||||
set_and_check(@PROJECT_NAME@_INC_DIR "${PACKAGE_PREFIX_DIR}/include")
|
||||
set_and_check(@PROJECT_NAME@_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")
|
||||
set_and_check(@PROJECT_NAME@_LIB_DIR "${PACKAGE_PREFIX_DIR}/lib")
|
||||
set_and_check(@PROJECT_NAME@_LIBRARY_DIR "${PACKAGE_PREFIX_DIR}/lib")
|
||||
|
||||
set(@PROJECT_NAME@_LIB lcg)
|
||||
set(@PROJECT_NAME@_LIBRARY lcg)
|
||||
set(@PROJECT_NAME@_FOUND 1)
|
||||
|
||||
set(@PROJECT_NAME@_OPENMP @LibLCG_OPENMP@)
|
||||
set(@PROJECT_NAME@_EIGEN @LibLCG_EIGEN@)
|
||||
set(@PROJECT_NAME@_STD_COMPLEX @LibLCG_STD_COMPLEX@)
|
||||
set(@PROJECT_NAME@_CUDA @LibLCG_CUDA@)
|
||||
|
||||
# include target information
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
225
README.md
225
README.md
@ -1,2 +1,225 @@
|
||||
# liblcg
|
||||
# C++ Library of the Linear Conjugate Gradient Methods (LibLCG) 说明文档
|
||||
|
||||
张壹(yizhang-geo@zju.edu.cn)
|
||||
|
||||
_浙江大学地球科学学院·地球物理研究所_
|
||||
|
||||
**此说明仅覆盖算法库的简单介绍及使用,更详细的内容请查看代码注释。如果还有问题,请发邮件联系我。同时也欢迎有兴趣的同学加入开发团队!**
|
||||
|
||||
## 简介
|
||||
|
||||
liblcg 是一个高效的、可扩展的 C++ 线性共轭梯度算法库,在原生数据结构接口的基础上,同时提供基于Eigen3和CUDA的算法接口,可以方便的实现基于CPU或GPU并行的加速计算,其中基于Eigen3的算法包含了稠密与稀疏矩阵的实现,而基于CUDA的算法主要为稀疏矩阵的实现。liblcg 包含多种实数与复数域的共轭梯度算法与其他一些迭代求解方法。目前已有得方法包括共轭梯度法、预优的共轭梯度算法、共轭梯度平方算法、双稳共轭梯度算法、BB步共轭梯度投影法与SPG共轭梯度投影法;复数域的双共轭梯度法、共轭梯度平方法、预优的共轭梯度法与TFQMR法。共轭梯度法广泛应用于无约束与不等式约束的线性最优化问题,拥有优良的收敛与计算效率。
|
||||
|
||||
共轭梯度算法可用于求解如下形式的线性方程组:
|
||||
|
||||
```
|
||||
Ax = B
|
||||
```
|
||||
|
||||
其中,A 是一个 N 阶的方阵、x 为 N\*1 大小的待求解的模型向量,B 为 N\*1 大小的需拟合的目标向量。需要注意的是,不同种类的共轭梯度算法对A可能有不同的要求,比如必须是正定的,或者对称的。不同算法的具体要求可以查阅其他参考文献或者查看代码中的注释。
|
||||
|
||||
## 安装
|
||||
|
||||
算法库使用 CMake 工具进行汇编,可在不同操作平台生成相应的Makefile或工程文件。
|
||||
|
||||
### 编译选项
|
||||
|
||||
算法库目前可用的编译选项有:
|
||||
* LibLCG_OPENMP:是否使用OpenMP进行加速,需要安装OpeMP。默认为ON。
|
||||
* LibLCG_EIGEN:是否编译基于Eigen的算法与借口,需要安装Eigen。默认为ON。
|
||||
* LibLCG_STD_COMPLEX:是否使用std::complex\<double\>作为复数的默认类型。默认为ON。
|
||||
* LibLCG_CUDA:是否编译基于CUDA的算法与借口,需要安装CUDA。默认为ON。
|
||||
|
||||
用户可以使用cmake命令中的-D选项对编译选项进行设置,比如关闭LibLCG_Eigen:
|
||||
|
||||
```shell
|
||||
cmake -DLibLCG_EIGEN=OFF
|
||||
```
|
||||
|
||||
### Linux 与 MacOS
|
||||
|
||||
liblcg的默认安装路径为 /usr/local。头文件与动态库分别安装于 include 与 lib 文件夹。具体的编译与安装步骤如下:
|
||||
|
||||
1. 下载安装CMake软件;
|
||||
2. 下载安装GCC编译器(常见系统已内置);
|
||||
3. 在源文件路径内使用如下命令进行编译与安装:
|
||||
|
||||
```shell
|
||||
mkdir build && cd build && cmake .. && make install
|
||||
```
|
||||
|
||||
### Windows
|
||||
|
||||
#### MinGW 和 GCC
|
||||
|
||||
Windows系统不包含GNU编译环境,用户需自行下载并配置。方法如下:
|
||||
|
||||
1. 下载MinGW安装文件,并选择gcc、pthreads与make相关软件包安装;
|
||||
2. 下载安装CMake软件;
|
||||
3. 添加CMake与MinGW可执行文件路径至Windows环境变量;
|
||||
4. 在源文件路径内使用如下命令进行编译与安装:
|
||||
|
||||
```shell
|
||||
mkdir build && cd build && cmake .. -G "MinGW Makefiles" && make install
|
||||
```
|
||||
|
||||
默认的安装路径为C:/Program\\ Files。头文件与动态库分别安装于 include 与 lib 文件夹。
|
||||
|
||||
**注意:用户需要手动添加头文件与动态库地址到计算机的环境变量中。**
|
||||
|
||||
#### Visual Studio
|
||||
|
||||
用户可使用CMake工具构建VS工程文件并编译使用动态库。方法如下:
|
||||
|
||||
1. 下载安装 Visual Studio 软件;
|
||||
2. 下载安装CMake软件;
|
||||
3. 在源文件路径内使用如下命令生成VS工程文件:
|
||||
|
||||
```shell
|
||||
mkdir build && cd build && cmake .. -G "Visual Studio 16 2019"
|
||||
```
|
||||
|
||||
_注:如需生成其他版本的VS工程文件,请使用-G命令查看相应的识别码。_
|
||||
|
||||
4. 使用 Visual Studio 打开.sln工程文件并编译动态库。
|
||||
|
||||
## 使用与编译
|
||||
|
||||
用户使用库函数时需在源文件中引入相应的头文件,如:
|
||||
|
||||
```cpp
|
||||
#include "lcg/lcg.h"
|
||||
```
|
||||
|
||||
编译可执行文件时需链接lcg动态库。以g++为例:
|
||||
|
||||
```shell
|
||||
g++ example.cpp -llcg -o example_out
|
||||
```
|
||||
|
||||
## 快速开始
|
||||
|
||||
要使用liblcg求解线性方程组Ax=B,用户需要定义Ax乘积的计算函数(回调函数),该函数的功能为计算不同的x所对应的乘积Ax。以实数类型的共轭梯度算法为例,其回调函数的接口定义为:
|
||||
|
||||
```cpp
|
||||
typedef void (*lcg_axfunc_ptr)(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_size);
|
||||
```
|
||||
|
||||
其中,`x`为输入的向量,`prod_Ax`为返回的乘积向量,`n`为这两个向量的长度。注意此处参数列表中并不包含矩阵A,这意味这A必须为全局或者类变量。这样设计的主要原因是在某些复杂最优化问题的编程中,计算并存储A并不实际或者划算,此时一般采用的策略是存储相关变量且仅计算Ax的乘积,所以矩阵A并不总是存在。
|
||||
|
||||
用户在定义Ax计算函数后即可调用求解函数 lcg_solver() 对线性方程组进行求解。以无约束的求解函数为例,其声明如下:
|
||||
|
||||
```cpp
|
||||
int lcg_solver(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CGS);
|
||||
```
|
||||
|
||||
其中:
|
||||
1. `lcg_axfunc_ptr Afp` 为正演计算的回调函数;
|
||||
2. `lcg_progress_ptr Pfp` 监控迭代过程的回调函数(非必须,无需监控时使用 nullptr 参数即可);
|
||||
3. `lcg_float* m` 初始解向量,迭代取得的解也保存与此数组;
|
||||
4. `const lcg_float* B` Ax = B 中的 B 项;
|
||||
5. `const int n_size` 解向量的大小;
|
||||
6. `const lcg_para* param` 迭代使用的参数,此参数为 nullptr 即使用默认参数;
|
||||
7. `void* instance` 传入的实例对象, 此函数在类中使用即为类的 this 指针, 在普通函数中使用时即为 nullptr;
|
||||
8. `int solver_id` 求解函数使用的求解方法,具体的方法代号可查看对应的头文件;
|
||||
|
||||
### 一个简单的例子
|
||||
|
||||
```cpp
|
||||
#include "cmath"
|
||||
#include "iostream"
|
||||
#include "lcg/lcg.h"
|
||||
|
||||
#define M 100
|
||||
#define N 80
|
||||
|
||||
// 返回两个数组元素之间的最大差值
|
||||
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
|
||||
{
|
||||
lcg_float max = -1;
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
// 普通二维数组做核矩阵
|
||||
lcg_float **kernel;
|
||||
// 中间结果数组
|
||||
lcg_float *tmp_arr;
|
||||
|
||||
// 计算核矩阵乘向量的乘积 lcg_solver的回调函数
|
||||
void CalAx(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_s)
|
||||
{
|
||||
// 注意核矩阵实际为 kernel^T * kernel,大小为N*N
|
||||
lcg_matvec(kernel, x, tmp_arr, M, n_s, MatNormal); // tmp_tar = kernel * x
|
||||
lcg_matvec(kernel, tmp_arr, prod_Ax, M, n_s, MatTranspose); // prod_Ax = kernel^T * tmp_tar
|
||||
return;
|
||||
}
|
||||
|
||||
// 定义监控函数 lcg_solver的回调函数
|
||||
// 这个函数显示当前的迭代次数与收敛值
|
||||
int Prog(void* instance, const lcg_float* m, const lcg_float converge, const lcg_para* param, const int n_s, const int k)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
// 开辟数组空间
|
||||
kernel = lcg_malloc(M, N);
|
||||
tmp_arr = lcg_malloc(M);
|
||||
|
||||
// 为核矩阵赋初值
|
||||
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
|
||||
|
||||
// 生成一组理论解
|
||||
lcg_float *fm = lcg_malloc(N);
|
||||
lcg_vecrnd(fm, 1.0, 2.0, N);
|
||||
|
||||
// 计算共轭梯度B项
|
||||
lcg_float *B = lcg_malloc(N);
|
||||
lcg_matvec(kernel, fm, tmp_arr, M, N, MatNormal);
|
||||
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
|
||||
|
||||
// 设置共轭梯度参数
|
||||
lcg_para self_para = lcg_default_parameters();
|
||||
self_para.epsilon = 1e-5;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
// 声明一组解
|
||||
lcg_float *m = lcg_malloc(N);
|
||||
lcg_vecset(m, 0.0, N);
|
||||
|
||||
// 使用标准共轭梯度方法(LCG_CG)求解线性方程组
|
||||
// 将回调函数传递给solver
|
||||
// 由于回调函数为全局函数,因此instance变量的值为NULL
|
||||
int ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CG);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
// 销毁数组
|
||||
lcg_free(kernel, M);
|
||||
lcg_free(tmp_arr);
|
||||
lcg_free(fm);
|
||||
lcg_free(B);
|
||||
lcg_free(m);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
**完整的例子储存在[sample](src/sample)文件夹内。**
|
||||
|
||||
## 类模版
|
||||
|
||||
liblcg为不同类型的共轭梯度算法定义了通用的求解类模版,包含了类中函数的指针代理及通用的监控函数实现,用户可直接继承并使用。需要注意的是这些类模版中定义了纯虚的函数接口,用户需要全部实现。其中没用到的定义成空函数就行了。以实数的求解类模版为例,需要实现的接口函数包括:
|
||||
|
||||
```cpp
|
||||
void AxProduct(const lcg_float* a, lcg_float* b, const int num) = 0
|
||||
void MxProduct(const lcg_float* a, lcg_float* b, const int num) = 0
|
||||
```
|
||||
|
||||
其中`AxProduct`是Ax的计算函数,`MxProduct`是预优过程的计算函数,即M^-1x。
|
4
config.h.in
Normal file
4
config.h.in
Normal file
@ -0,0 +1,4 @@
|
||||
#cmakedefine LibLCG_OPENMP
|
||||
#cmakedefine LibLCG_EIGEN
|
||||
#cmakedefine LibLCG_STD_COMPLEX
|
||||
#cmakedefine LibLCG_CUDA
|
11
data/README
Normal file
11
data/README
Normal file
@ -0,0 +1,11 @@
|
||||
case_*_A: Full symmetric matrix
|
||||
|
||||
[ N (int) | nz (int) ]
|
||||
[ RowIdx (int) | ColIdx (int) | Val (double) ] * nz
|
||||
[ b (double) * N ]
|
||||
[ d (double) * N ] (complex matrix only)
|
||||
|
||||
case_*_B: Vector
|
||||
|
||||
[ N (int) ]
|
||||
[ x (double) * N]
|
BIN
data/cases.7z
Normal file
BIN
data/cases.7z
Normal file
Binary file not shown.
105
data/get_cdat.cpp
Normal file
105
data/get_cdat.cpp
Normal file
@ -0,0 +1,105 @@
|
||||
#include "../src/lib/lcg_complex.h"
|
||||
#include "iostream"
|
||||
#include "fstream"
|
||||
#include "vector"
|
||||
|
||||
#include "Eigen/Sparse"
|
||||
|
||||
#define random(x) (rand()%x)
|
||||
|
||||
typedef Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor> spmat_cd; // 注意Eigen默认的稀疏矩阵排序为列优先
|
||||
typedef Eigen::Triplet<lcg_complex> triplt_cd;
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
int N = 1000000;
|
||||
int nz = 1013000;
|
||||
|
||||
lcg_complex *v = new lcg_complex[nz];
|
||||
lcg_complex *x = new lcg_complex[N];
|
||||
lcg_complex *b = new lcg_complex[N];
|
||||
|
||||
lcg_complex one(1.0, 1.0), none(-1.0, -1.0), zero(0.0, 0.0);
|
||||
|
||||
clcg_vecrnd(v, 1.0*one, 10.0*one, nz);
|
||||
clcg_vecrnd(x, 1.0*one, 2.0*one, N);
|
||||
clcg_vecset(b, zero, N);
|
||||
|
||||
std::vector<triplt_cd> val_triplt;
|
||||
val_triplt.reserve(2*(nz-N) + N);
|
||||
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
val_triplt.push_back(triplt_cd(i, i, v[i]));
|
||||
b[i] += v[i]*x[i];
|
||||
}
|
||||
|
||||
srand((int)time(0));
|
||||
|
||||
int r, c;
|
||||
size_t j = N;
|
||||
while (j < nz)
|
||||
{
|
||||
r = random(N);
|
||||
c = random(N);
|
||||
if (r != c)
|
||||
{
|
||||
val_triplt.push_back(triplt_cd(r, c, v[j]));
|
||||
val_triplt.push_back(triplt_cd(c, r, v[j]));
|
||||
|
||||
b[r] += v[j]*x[c];
|
||||
b[c] += v[j]*x[r];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
spmat_cd A;
|
||||
A.resize(N, N);
|
||||
A.setZero();
|
||||
|
||||
A.setFromTriplets(val_triplt.begin(), val_triplt.end());
|
||||
|
||||
std::ofstream Aout, Bout;
|
||||
Aout.open("case_1M_cA", std::ios::binary);
|
||||
Bout.open("case_1M_cB", std::ios::binary);
|
||||
|
||||
lcg_complex tmp;
|
||||
|
||||
nz = A.nonZeros();
|
||||
|
||||
Aout.write((char*)&N, sizeof(int));
|
||||
Aout.write((char*)&nz, sizeof(int));
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
for (Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor>::InnerIterator it(A, i); it; ++it) // 列循环
|
||||
{
|
||||
r = it.row();
|
||||
c = it.col();
|
||||
tmp = it.value();
|
||||
|
||||
Aout.write((char*)&r, sizeof(int));
|
||||
Aout.write((char*)&c, sizeof(int));
|
||||
Aout.write((char*)&tmp, sizeof(lcg_complex));
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
tmp = b[i];
|
||||
Aout.write((char*)&tmp, sizeof(lcg_complex));
|
||||
}
|
||||
Aout.close();
|
||||
|
||||
Bout.write((char*)&N, sizeof(int));
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
tmp = x[i];
|
||||
Bout.write((char*)&tmp, sizeof(lcg_complex));
|
||||
}
|
||||
Bout.close();
|
||||
|
||||
delete[] v;
|
||||
delete[] x;
|
||||
delete[] b;
|
||||
return 0;
|
||||
}
|
577
doxy/doxygen.sty
Normal file
577
doxy/doxygen.sty
Normal file
@ -0,0 +1,577 @@
|
||||
% stylesheet for doxygen 1.8.17
|
||||
\NeedsTeXFormat{LaTeX2e}
|
||||
\ProvidesPackage{doxygen}
|
||||
|
||||
% Packages used by this style file
|
||||
\RequirePackage{alltt}
|
||||
%%\RequirePackage{array} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package
|
||||
\RequirePackage{calc}
|
||||
\RequirePackage{float}
|
||||
%%\RequirePackage{ifthen} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package
|
||||
\RequirePackage{verbatim}
|
||||
\RequirePackage[table]{xcolor}
|
||||
\RequirePackage{longtable_doxygen}
|
||||
\RequirePackage{tabu_doxygen}
|
||||
\RequirePackage{fancyvrb}
|
||||
\RequirePackage{tabularx}
|
||||
\RequirePackage{multirow}
|
||||
\RequirePackage{hanging}
|
||||
\RequirePackage{ifpdf}
|
||||
\RequirePackage{adjustbox}
|
||||
\RequirePackage{amssymb}
|
||||
\RequirePackage{stackengine}
|
||||
\RequirePackage[normalem]{ulem} % for strikeout, but don't modify emphasis
|
||||
|
||||
%---------- Internal commands used in this style file ----------------
|
||||
|
||||
\newcommand{\ensurespace}[1]{%
|
||||
\begingroup%
|
||||
\setlength{\dimen@}{#1}%
|
||||
\vskip\z@\@plus\dimen@%
|
||||
\penalty -100\vskip\z@\@plus -\dimen@%
|
||||
\vskip\dimen@%
|
||||
\penalty 9999%
|
||||
\vskip -\dimen@%
|
||||
\vskip\z@skip% hide the previous |\vskip| from |\addvspace|
|
||||
\endgroup%
|
||||
}
|
||||
|
||||
\newcommand{\DoxyHorRuler}[1]{%
|
||||
\setlength{\parskip}{0ex plus 0ex minus 0ex}%
|
||||
\ifthenelse{#1=0}%
|
||||
{%
|
||||
\hrule%
|
||||
}%
|
||||
{%
|
||||
\hrulefilll%
|
||||
}%
|
||||
}
|
||||
\newcommand{\DoxyLabelFont}{}
|
||||
\newcommand{\entrylabel}[1]{%
|
||||
{%
|
||||
\parbox[b]{\labelwidth-4pt}{%
|
||||
\makebox[0pt][l]{\DoxyLabelFont#1}%
|
||||
\vspace{1.5\baselineskip}%
|
||||
}%
|
||||
}%
|
||||
}
|
||||
|
||||
\newenvironment{DoxyDesc}[1]{%
|
||||
\ensurespace{4\baselineskip}%
|
||||
\begin{list}{}{%
|
||||
\settowidth{\labelwidth}{20pt}%
|
||||
%\setlength{\parsep}{0pt}%
|
||||
\setlength{\itemsep}{0pt}%
|
||||
\setlength{\leftmargin}{\labelwidth+\labelsep}%
|
||||
\renewcommand{\makelabel}{\entrylabel}%
|
||||
}%
|
||||
\item[#1]%
|
||||
}{%
|
||||
\end{list}%
|
||||
}
|
||||
|
||||
\newsavebox{\xrefbox}
|
||||
\newlength{\xreflength}
|
||||
\newcommand{\xreflabel}[1]{%
|
||||
\sbox{\xrefbox}{#1}%
|
||||
\setlength{\xreflength}{\wd\xrefbox}%
|
||||
\ifthenelse{\xreflength>\labelwidth}{%
|
||||
\begin{minipage}{\textwidth}%
|
||||
\setlength{\parindent}{0pt}%
|
||||
\hangindent=15pt\bfseries #1\vspace{1.2\itemsep}%
|
||||
\end{minipage}%
|
||||
}{%
|
||||
\parbox[b]{\labelwidth}{\makebox[0pt][l]{\textbf{#1}}}%
|
||||
}%
|
||||
}
|
||||
|
||||
%---------- Commands used by doxygen LaTeX output generator ----------
|
||||
|
||||
% Used by <pre> ... </pre>
|
||||
\newenvironment{DoxyPre}{%
|
||||
\small%
|
||||
\begin{alltt}%
|
||||
}{%
|
||||
\end{alltt}%
|
||||
\normalsize%
|
||||
}
|
||||
% Necessary for redefining not defined characters, i.e. "Replacement Character" in tex output.
|
||||
\newlength{\CodeWidthChar}
|
||||
\newlength{\CodeHeightChar}
|
||||
\settowidth{\CodeWidthChar}{?}
|
||||
\settoheight{\CodeHeightChar}{?}
|
||||
% Necessary for hanging indent
|
||||
\newlength{\DoxyCodeWidth}
|
||||
|
||||
\newcommand\DoxyCodeLine[1]{\hangpara{\DoxyCodeWidth}{1}{#1}\par}
|
||||
|
||||
\newcommand\NiceSpace{%
|
||||
\discretionary{}{\kern\fontdimen2\font}{\kern\fontdimen2\font}%
|
||||
}
|
||||
|
||||
% Used by @code ... @endcode
|
||||
\newenvironment{DoxyCode}[1]{%
|
||||
\par%
|
||||
\scriptsize%
|
||||
\normalfont\ttfamily%
|
||||
\rightskip0pt plus 1fil%
|
||||
\settowidth{\DoxyCodeWidth}{000000}%
|
||||
\settowidth{\CodeWidthChar}{?}%
|
||||
\settoheight{\CodeHeightChar}{?}%
|
||||
\setlength{\parskip}{0ex plus 0ex minus 0ex}%
|
||||
\ifthenelse{\equal{#1}{0}}
|
||||
{
|
||||
{\lccode`~32 \lowercase{\global\let~}\NiceSpace}\obeyspaces%
|
||||
}
|
||||
{
|
||||
{\lccode`~32 \lowercase{\global\let~}}\obeyspaces%
|
||||
}
|
||||
|
||||
}{%
|
||||
\normalfont%
|
||||
\normalsize%
|
||||
\settowidth{\CodeWidthChar}{?}%
|
||||
\settoheight{\CodeHeightChar}{?}%
|
||||
}
|
||||
|
||||
% Redefining not defined characters, i.e. "Replacement Character" in tex output.
|
||||
\def\ucr{\adjustbox{width=\CodeWidthChar,height=\CodeHeightChar}{\stackinset{c}{}{c}{-.2pt}{%
|
||||
\textcolor{white}{\sffamily\bfseries\small ?}}{%
|
||||
\rotatebox{45}{$\blacksquare$}}}}
|
||||
|
||||
% Used by @example, @include, @includelineno and @dontinclude
|
||||
\newenvironment{DoxyCodeInclude}[1]{%
|
||||
\DoxyCode{#1}%
|
||||
}{%
|
||||
\endDoxyCode%
|
||||
}
|
||||
|
||||
% Used by @verbatim ... @endverbatim
|
||||
\newenvironment{DoxyVerb}{%
|
||||
\footnotesize%
|
||||
\verbatim%
|
||||
}{%
|
||||
\endverbatim%
|
||||
\normalsize%
|
||||
}
|
||||
|
||||
% Used by @verbinclude
|
||||
\newenvironment{DoxyVerbInclude}{%
|
||||
\DoxyVerb%
|
||||
}{%
|
||||
\endDoxyVerb%
|
||||
}
|
||||
|
||||
% Used by numbered lists (using '-#' or <ol> ... </ol>)
|
||||
\newenvironment{DoxyEnumerate}{%
|
||||
\enumerate%
|
||||
}{%
|
||||
\endenumerate%
|
||||
}
|
||||
|
||||
% Used by bullet lists (using '-', @li, @arg, or <ul> ... </ul>)
|
||||
\newenvironment{DoxyItemize}{%
|
||||
\itemize%
|
||||
}{%
|
||||
\enditemize%
|
||||
}
|
||||
|
||||
% Used by description lists (using <dl> ... </dl>)
|
||||
\newenvironment{DoxyDescription}{%
|
||||
\description%
|
||||
}{%
|
||||
\enddescription%
|
||||
}
|
||||
|
||||
% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc
|
||||
% (only if caption is specified)
|
||||
\newenvironment{DoxyImage}{%
|
||||
\begin{figure}[H]%
|
||||
\begin{center}%
|
||||
}{%
|
||||
\end{center}%
|
||||
\end{figure}%
|
||||
}
|
||||
|
||||
% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc
|
||||
% (only if no caption is specified)
|
||||
\newenvironment{DoxyImageNoCaption}{%
|
||||
\begin{center}%
|
||||
}{%
|
||||
\end{center}%
|
||||
}
|
||||
|
||||
% Used by @image
|
||||
% (only if inline is specified)
|
||||
\newenvironment{DoxyInlineImage}{%
|
||||
}{%
|
||||
}
|
||||
|
||||
% Used by @attention
|
||||
\newenvironment{DoxyAttention}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @author and @authors
|
||||
\newenvironment{DoxyAuthor}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @date
|
||||
\newenvironment{DoxyDate}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @invariant
|
||||
\newenvironment{DoxyInvariant}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @note
|
||||
\newenvironment{DoxyNote}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @post
|
||||
\newenvironment{DoxyPostcond}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @pre
|
||||
\newenvironment{DoxyPrecond}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @copyright
|
||||
\newenvironment{DoxyCopyright}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @remark
|
||||
\newenvironment{DoxyRemark}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @return and @returns
|
||||
\newenvironment{DoxyReturn}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @since
|
||||
\newenvironment{DoxySince}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @see
|
||||
\newenvironment{DoxySeeAlso}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @version
|
||||
\newenvironment{DoxyVersion}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @warning
|
||||
\newenvironment{DoxyWarning}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by @internal
|
||||
\newenvironment{DoxyInternal}[1]{%
|
||||
\paragraph*{#1}%
|
||||
}{%
|
||||
}
|
||||
|
||||
% Used by @par and @paragraph
|
||||
\newenvironment{DoxyParagraph}[1]{%
|
||||
\begin{DoxyDesc}{#1}%
|
||||
}{%
|
||||
\end{DoxyDesc}%
|
||||
}
|
||||
|
||||
% Used by parameter lists
|
||||
\newenvironment{DoxyParams}[2][]{%
|
||||
\tabulinesep=1mm%
|
||||
\par%
|
||||
\ifthenelse{\equal{#1}{}}%
|
||||
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|}}% name + description
|
||||
{\ifthenelse{\equal{#1}{1}}%
|
||||
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + name + desc
|
||||
{\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + type + name + desc
|
||||
}
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]%
|
||||
\hline%
|
||||
\endfirsthead%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]%
|
||||
\hline%
|
||||
\endhead%
|
||||
}{%
|
||||
\end{longtabu*}%
|
||||
\vspace{6pt}%
|
||||
}
|
||||
|
||||
% Used for fields of simple structs
|
||||
\newenvironment{DoxyFields}[1]{%
|
||||
\tabulinesep=1mm%
|
||||
\par%
|
||||
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|X[-1,l]|}%
|
||||
\multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endfirsthead%
|
||||
\multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endhead%
|
||||
}{%
|
||||
\end{longtabu*}%
|
||||
\vspace{6pt}%
|
||||
}
|
||||
|
||||
% Used for fields simple class style enums
|
||||
\newenvironment{DoxyEnumFields}[1]{%
|
||||
\tabulinesep=1mm%
|
||||
\par%
|
||||
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endfirsthead%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endhead%
|
||||
}{%
|
||||
\end{longtabu*}%
|
||||
\vspace{6pt}%
|
||||
}
|
||||
|
||||
% Used for parameters within a detailed function description
|
||||
\newenvironment{DoxyParamCaption}{%
|
||||
\renewcommand{\item}[2][]{\\ \hspace*{2.0cm} ##1 {\em ##2}}%
|
||||
}{%
|
||||
}
|
||||
|
||||
% Used by return value lists
|
||||
\newenvironment{DoxyRetVals}[1]{%
|
||||
\tabulinesep=1mm%
|
||||
\par%
|
||||
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endfirsthead%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endhead%
|
||||
}{%
|
||||
\end{longtabu*}%
|
||||
\vspace{6pt}%
|
||||
}
|
||||
|
||||
% Used by exception lists
|
||||
\newenvironment{DoxyExceptions}[1]{%
|
||||
\tabulinesep=1mm%
|
||||
\par%
|
||||
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endfirsthead%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endhead%
|
||||
}{%
|
||||
\end{longtabu*}%
|
||||
\vspace{6pt}%
|
||||
}
|
||||
|
||||
% Used by template parameter lists
|
||||
\newenvironment{DoxyTemplParams}[1]{%
|
||||
\tabulinesep=1mm%
|
||||
\par%
|
||||
\begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endfirsthead%
|
||||
\multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]%
|
||||
\hline%
|
||||
\endhead%
|
||||
}{%
|
||||
\end{longtabu*}%
|
||||
\vspace{6pt}%
|
||||
}
|
||||
|
||||
% Used for member lists
|
||||
\newenvironment{DoxyCompactItemize}{%
|
||||
\begin{itemize}%
|
||||
\setlength{\itemsep}{-3pt}%
|
||||
\setlength{\parsep}{0pt}%
|
||||
\setlength{\topsep}{0pt}%
|
||||
\setlength{\partopsep}{0pt}%
|
||||
}{%
|
||||
\end{itemize}%
|
||||
}
|
||||
|
||||
% Used for member descriptions
|
||||
\newenvironment{DoxyCompactList}{%
|
||||
\begin{list}{}{%
|
||||
\setlength{\leftmargin}{0.5cm}%
|
||||
\setlength{\itemsep}{0pt}%
|
||||
\setlength{\parsep}{0pt}%
|
||||
\setlength{\topsep}{0pt}%
|
||||
\renewcommand{\makelabel}{\hfill}%
|
||||
}%
|
||||
}{%
|
||||
\end{list}%
|
||||
}
|
||||
|
||||
% Used for reference lists (@bug, @deprecated, @todo, etc.)
|
||||
\newenvironment{DoxyRefList}{%
|
||||
\begin{list}{}{%
|
||||
\setlength{\labelwidth}{10pt}%
|
||||
\setlength{\leftmargin}{\labelwidth}%
|
||||
\addtolength{\leftmargin}{\labelsep}%
|
||||
\renewcommand{\makelabel}{\xreflabel}%
|
||||
}%
|
||||
}{%
|
||||
\end{list}%
|
||||
}
|
||||
|
||||
% Used by @bug, @deprecated, @todo, etc.
|
||||
\newenvironment{DoxyRefDesc}[1]{%
|
||||
\begin{list}{}{%
|
||||
\renewcommand\makelabel[1]{\textbf{##1}}%
|
||||
\settowidth\labelwidth{\makelabel{#1}}%
|
||||
\setlength\leftmargin{\labelwidth+\labelsep}%
|
||||
}%
|
||||
}{%
|
||||
\end{list}%
|
||||
}
|
||||
|
||||
% Used by parameter lists and simple sections
|
||||
\newenvironment{Desc}
|
||||
{\begin{list}{}{%
|
||||
\settowidth{\labelwidth}{20pt}%
|
||||
\setlength{\parsep}{0pt}%
|
||||
\setlength{\itemsep}{0pt}%
|
||||
\setlength{\leftmargin}{\labelwidth+\labelsep}%
|
||||
\renewcommand{\makelabel}{\entrylabel}%
|
||||
}
|
||||
}{%
|
||||
\end{list}%
|
||||
}
|
||||
|
||||
% Used by tables
|
||||
\newcommand{\PBS}[1]{\let\temp=\\#1\let\\=\temp}%
|
||||
\newenvironment{TabularC}[1]%
|
||||
{\tabulinesep=1mm
|
||||
\begin{longtabu*}spread 0pt [c]{*#1{|X[-1]}|}}%
|
||||
{\end{longtabu*}\par}%
|
||||
|
||||
\newenvironment{TabularNC}[1]%
|
||||
{\begin{tabu}spread 0pt [l]{*#1{|X[-1]}|}}%
|
||||
{\end{tabu}\par}%
|
||||
|
||||
% Used for member group headers
|
||||
\newenvironment{Indent}{%
|
||||
\begin{list}{}{%
|
||||
\setlength{\leftmargin}{0.5cm}%
|
||||
}%
|
||||
\item[]\ignorespaces%
|
||||
}{%
|
||||
\unskip%
|
||||
\end{list}%
|
||||
}
|
||||
|
||||
% Used when hyperlinks are turned off
|
||||
\newcommand{\doxyref}[3]{%
|
||||
\textbf{#1} (\textnormal{#2}\,\pageref{#3})%
|
||||
}
|
||||
|
||||
% Used to link to a table when hyperlinks are turned on
|
||||
\newcommand{\doxytablelink}[2]{%
|
||||
\ref{#1}%
|
||||
}
|
||||
|
||||
% Used to link to a table when hyperlinks are turned off
|
||||
\newcommand{\doxytableref}[3]{%
|
||||
\ref{#3}%
|
||||
}
|
||||
|
||||
% Used by @addindex
|
||||
\newcommand{\lcurly}{\{}
|
||||
\newcommand{\rcurly}{\}}
|
||||
|
||||
% Colors used for syntax highlighting
|
||||
\definecolor{comment}{rgb}{0.5,0.0,0.0}
|
||||
\definecolor{keyword}{rgb}{0.0,0.5,0.0}
|
||||
\definecolor{keywordtype}{rgb}{0.38,0.25,0.125}
|
||||
\definecolor{keywordflow}{rgb}{0.88,0.5,0.0}
|
||||
\definecolor{preprocessor}{rgb}{0.5,0.38,0.125}
|
||||
\definecolor{stringliteral}{rgb}{0.0,0.125,0.25}
|
||||
\definecolor{charliteral}{rgb}{0.0,0.5,0.5}
|
||||
\definecolor{vhdldigit}{rgb}{1.0,0.0,1.0}
|
||||
\definecolor{vhdlkeyword}{rgb}{0.43,0.0,0.43}
|
||||
\definecolor{vhdllogic}{rgb}{1.0,0.0,0.0}
|
||||
\definecolor{vhdlchar}{rgb}{0.0,0.0,0.0}
|
||||
|
||||
% Color used for table heading
|
||||
\newcommand{\tableheadbgcolor}{lightgray}%
|
||||
|
||||
% Version of hypertarget with correct landing location
|
||||
\newcommand{\Hypertarget}[1]{\Hy@raisedlink{\hypertarget{#1}{}}}
|
||||
|
||||
% possibility to have sections etc. be within the margins
|
||||
% unfortunately had to copy part of book.cls and add \raggedright
|
||||
\makeatletter
|
||||
\newcommand\doxysection{\@startsection {section}{1}{\z@}%
|
||||
{-3.5ex \@plus -1ex \@minus -.2ex}%
|
||||
{2.3ex \@plus.2ex}%
|
||||
{\raggedright\normalfont\Large\bfseries}}
|
||||
\newcommand\doxysubsection{\@startsection{subsection}{2}{\z@}%
|
||||
{-3.25ex\@plus -1ex \@minus -.2ex}%
|
||||
{1.5ex \@plus .2ex}%
|
||||
{\raggedright\normalfont\large\bfseries}}
|
||||
\newcommand\doxysubsubsection{\@startsection{subsubsection}{3}{\z@}%
|
||||
{-3.25ex\@plus -1ex \@minus -.2ex}%
|
||||
{1.5ex \@plus .2ex}%
|
||||
{\raggedright\normalfont\normalsize\bfseries}}
|
||||
\newcommand\doxyparagraph{\@startsection{paragraph}{4}{\z@}%
|
||||
{3.25ex \@plus1ex \@minus.2ex}%
|
||||
{-1em}%
|
||||
{\raggedright\normalfont\normalsize\bfseries}}
|
||||
\newcommand\doxysubparagraph{\@startsection{subparagraph}{5}{\parindent}%
|
||||
{3.25ex \@plus1ex \@minus .2ex}%
|
||||
{-1em}%
|
||||
{\raggedright\normalfont\normalsize\bfseries}}
|
||||
\makeatother
|
||||
% Define caption that is also suitable in a table
|
||||
\makeatletter
|
||||
\def\doxyfigcaption{%
|
||||
\refstepcounter{figure}%
|
||||
\@dblarg{\@caption{figure}}}
|
||||
\makeatother
|
12
doxy/footer.tex
Normal file
12
doxy/footer.tex
Normal file
@ -0,0 +1,12 @@
|
||||
% Latex footer for doxygen 1.8.17
|
||||
%--- End generated contents ---
|
||||
|
||||
% Index
|
||||
\backmatter
|
||||
\newpage
|
||||
\phantomsection
|
||||
\clearemptydoublepage
|
||||
\addcontentsline{toc}{chapter}{\indexname}
|
||||
\printindex
|
||||
|
||||
\end{document}
|
174
doxy/header.tex
Normal file
174
doxy/header.tex
Normal file
@ -0,0 +1,174 @@
|
||||
% Latex header for doxygen 1.8.17
|
||||
\let\mypdfximage\pdfximage\def\pdfximage{\immediate\mypdfximage}\documentclass[twoside]{book}
|
||||
|
||||
%% moved from doxygen.sty due to workaround for LaTex 2019 version and unmaintained tabu package
|
||||
\usepackage{ifthen}
|
||||
\ifx\requestedLaTeXdate\undefined
|
||||
\usepackage{array}
|
||||
\else
|
||||
\usepackage{array}[=2016-10-06]
|
||||
\fi
|
||||
%%
|
||||
% Packages required by doxygen
|
||||
\usepackage{fixltx2e}
|
||||
\usepackage{calc}
|
||||
\usepackage{doxygen}
|
||||
\usepackage{graphicx}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage{makeidx}
|
||||
\usepackage{multicol}
|
||||
\usepackage{multirow}
|
||||
\PassOptionsToPackage{warn}{textcomp}
|
||||
\usepackage{textcomp}
|
||||
\usepackage[nointegrals]{wasysym}
|
||||
\usepackage[table]{xcolor}
|
||||
\usepackage{ifpdf,ifxetex}
|
||||
|
||||
% Font selection
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[scaled=.90]{helvet}
|
||||
\usepackage{courier}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{sectsty}
|
||||
\renewcommand{\familydefault}{\sfdefault}
|
||||
\allsectionsfont{%
|
||||
\fontseries{bc}\selectfont%
|
||||
\color{darkgray}%
|
||||
}
|
||||
\renewcommand{\DoxyLabelFont}{%
|
||||
\fontseries{bc}\selectfont%
|
||||
\color{darkgray}%
|
||||
}
|
||||
\newcommand{\+}{\discretionary{\mbox{\scriptsize$\hookleftarrow$}}{}{}}
|
||||
|
||||
% Arguments of doxygenemoji:
|
||||
% 1) ':<text>:' form of the emoji, already "LaTeX"-escaped
|
||||
% 2) file with the name of the emoji without the .png extension
|
||||
% in case image exist use this otherwise use the ':<text>:' form
|
||||
\newcommand{\doxygenemoji}[2]{%
|
||||
\IfFileExists{./#2.png}{\raisebox{-0.1em}{\includegraphics[height=0.9em]{./#2.png}}}{#1}%
|
||||
}
|
||||
% Page & text layout
|
||||
\usepackage{geometry}
|
||||
\geometry{%
|
||||
a4paper,%
|
||||
top=2.5cm,%
|
||||
bottom=2.5cm,%
|
||||
left=2.5cm,%
|
||||
right=2.5cm%
|
||||
}
|
||||
\tolerance=750
|
||||
\hfuzz=15pt
|
||||
\hbadness=750
|
||||
\setlength{\emergencystretch}{15pt}
|
||||
\setlength{\parindent}{0cm}
|
||||
\newcommand{\doxynormalparskip}{\setlength{\parskip}{3ex plus 2ex minus 2ex}}
|
||||
\newcommand{\doxytocparskip}{\setlength{\parskip}{1ex plus 0ex minus 0ex}}
|
||||
\doxynormalparskip
|
||||
\makeatletter
|
||||
\renewcommand{\paragraph}{%
|
||||
\@startsection{paragraph}{4}{0ex}{-1.0ex}{1.0ex}{%
|
||||
\normalfont\normalsize\bfseries\SS@parafont%
|
||||
}%
|
||||
}
|
||||
\renewcommand{\subparagraph}{%
|
||||
\@startsection{subparagraph}{5}{0ex}{-1.0ex}{1.0ex}{%
|
||||
\normalfont\normalsize\bfseries\SS@subparafont%
|
||||
}%
|
||||
}
|
||||
\makeatother
|
||||
|
||||
\makeatletter
|
||||
\newcommand\hrulefilll{\leavevmode\leaders\hrule\hskip 0pt plus 1filll\kern\z@}
|
||||
\makeatother
|
||||
|
||||
% Headers & footers
|
||||
\usepackage{fancyhdr}
|
||||
\pagestyle{fancyplain}
|
||||
\fancyhead[LE]{\fancyplain{}{\bfseries\thepage}}
|
||||
\fancyhead[CE]{\fancyplain{}{}}
|
||||
\fancyhead[RE]{\fancyplain{}{\bfseries\leftmark}}
|
||||
\fancyhead[LO]{\fancyplain{}{\bfseries\rightmark}}
|
||||
\fancyhead[CO]{\fancyplain{}{}}
|
||||
\fancyhead[RO]{\fancyplain{}{\bfseries\thepage}}
|
||||
\fancyfoot[LE]{\fancyplain{}{}}
|
||||
\fancyfoot[CE]{\fancyplain{}{}}
|
||||
\fancyfoot[RE]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }}
|
||||
\fancyfoot[LO]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }}
|
||||
\fancyfoot[CO]{\fancyplain{}{}}
|
||||
\fancyfoot[RO]{\fancyplain{}{}}
|
||||
\renewcommand{\footrulewidth}{0.4pt}
|
||||
\renewcommand{\chaptermark}[1]{%
|
||||
\markboth{#1}{}%
|
||||
}
|
||||
\renewcommand{\sectionmark}[1]{%
|
||||
\markright{\thesection\ #1}%
|
||||
}
|
||||
|
||||
% Indices & bibliography
|
||||
\usepackage{natbib}
|
||||
\usepackage[titles]{tocloft}
|
||||
\setcounter{tocdepth}{3}
|
||||
\setcounter{secnumdepth}{5}
|
||||
\makeindex
|
||||
|
||||
\usepackage{newunicodechar}
|
||||
\newunicodechar{⁻}{${}^{-}$}% Superscript minus
|
||||
\newunicodechar{²}{${}^{2}$}% Superscript two
|
||||
\newunicodechar{³}{${}^{3}$}% Superscript three
|
||||
|
||||
% Hyperlinks (required, but should be loaded last)
|
||||
\ifpdf
|
||||
\usepackage[pdftex,pagebackref=true]{hyperref}
|
||||
\else
|
||||
\ifxetex
|
||||
\usepackage[pagebackref=true]{hyperref}
|
||||
\else
|
||||
\usepackage[ps2pdf,pagebackref=true]{hyperref}
|
||||
\fi
|
||||
\fi
|
||||
|
||||
\hypersetup{%
|
||||
colorlinks=true,%
|
||||
linkcolor=blue,%
|
||||
citecolor=blue,%
|
||||
unicode%
|
||||
}
|
||||
|
||||
% Custom commands
|
||||
\newcommand{\clearemptydoublepage}{%
|
||||
\newpage{\pagestyle{empty}\cleardoublepage}%
|
||||
}
|
||||
|
||||
\usepackage{caption}
|
||||
\captionsetup{labelsep=space,justification=centering,font={bf},singlelinecheck=off,skip=4pt,position=top}
|
||||
|
||||
\usepackage{etoc}
|
||||
\etocsettocstyle{\doxytocparskip}{\doxynormalparskip}
|
||||
\renewcommand{\numberline}[1]{#1~}
|
||||
%===== C O N T E N T S =====
|
||||
|
||||
\begin{document}
|
||||
|
||||
% Titlepage & ToC
|
||||
\hypersetup{pageanchor=false,
|
||||
bookmarksnumbered=true,
|
||||
pdfencoding=unicode
|
||||
}
|
||||
\pagenumbering{alph}
|
||||
\begin{titlepage}
|
||||
\vspace*{7cm}
|
||||
\begin{center}%
|
||||
{\Large C++ Library of the Linear Conjugate Gradient Methods (LibLCG)}\\
|
||||
\vspace*{1cm}
|
||||
{\large Yi Zhang}\\
|
||||
\end{center}
|
||||
\end{titlepage}
|
||||
\clearemptydoublepage
|
||||
\pagenumbering{roman}
|
||||
\tableofcontents
|
||||
\clearemptydoublepage
|
||||
\pagenumbering{arabic}
|
||||
\hypersetup{pageanchor=true}
|
||||
|
||||
%--- Begin generated contents ---
|
BIN
refman.pdf
Normal file
BIN
refman.pdf
Normal file
Binary file not shown.
181
src/CMakeLists.txt
Normal file
181
src/CMakeLists.txt
Normal file
@ -0,0 +1,181 @@
|
||||
# 设定源文件文件夹
|
||||
aux_source_directory(lib LCGLIB_SRC)
|
||||
|
||||
if(NOT LibLCG_EIGEN)
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/algebra_eigen.cpp")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_eigen.cpp")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_eigen.cpp")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/solver_eigen.cpp")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/preconditioner_eigen.cpp")
|
||||
endif()
|
||||
|
||||
if(NOT LibLCG_CUDA)
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/algebra_cuda.cu")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_complex_cuda.cu")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/lcg_cuda.cu")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_cuda.cu")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/clcg_cuda_f.cu")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/solver_cuda.cu")
|
||||
list(REMOVE_ITEM LCGLIB_SRC "lib/preconditioner_cuda.cu")
|
||||
endif()
|
||||
|
||||
# 以下部分为库的编译
|
||||
# 注意目标名必须唯一 所以不能直接生成相同名称的动态库与静态库
|
||||
# 注意此处不必为目标名称添加lib前缀和相应后缀,cmake会自行添加
|
||||
add_library(lcg SHARED ${LCGLIB_SRC})
|
||||
# 首先添加静态库的生成命令
|
||||
add_library(lcg_static STATIC ${LCGLIB_SRC})
|
||||
# 设置静态库的输出名称从而获得与动态库名称相同的静态库
|
||||
set_target_properties(lcg_static PROPERTIES OUTPUT_NAME "lcg")
|
||||
# 设置输出目标属性以同时输出动态库与静态库
|
||||
set_target_properties(lcg PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
set_target_properties(lcg_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
if(LibLCG_CUDA)
|
||||
set_target_properties(lcg PROPERTIES CUDA_ARCHITECTURES 70)
|
||||
set_target_properties(lcg_static PROPERTIES CUDA_ARCHITECTURES 70)
|
||||
endif()
|
||||
# 设置动态库的版本号
|
||||
set_target_properties(lcg PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||
# 设置库文件的输出地址
|
||||
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
|
||||
|
||||
# 设置编译选项
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||
|
||||
if(LibLCG_EIGEN)
|
||||
find_package(Eigen3 REQUIRED)
|
||||
if(EIGEN3_FOUND)
|
||||
message(STATUS "Eigen3 Found.")
|
||||
include_directories(${EIGEN3_INCLUDE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(LibLCG_CUDA)
|
||||
enable_language(CUDA)
|
||||
find_package(CUDA REQUIRED)
|
||||
if(CUDA_FOUND)
|
||||
message(STATUS "CUDA Found.")
|
||||
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
find_library(CUBLAS_LIBRARY cublas ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
find_library(CUSPARSE_LIBRARY cusparse ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
find_library(CUSOLVER_LIBRARY cusolver ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
target_link_libraries(lcg PUBLIC ${CUBLAS_LIBRARY})
|
||||
target_link_libraries(lcg_static ${CUBLAS_LIBRARY})
|
||||
target_link_libraries(lcg PUBLIC ${CUSPARSE_LIBRARY})
|
||||
target_link_libraries(lcg_static ${CUSPARSE_LIBRARY})
|
||||
target_link_libraries(lcg PUBLIC ${CUSOLVER_LIBRARY})
|
||||
target_link_libraries(lcg_static ${CUSOLVER_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(LibLCG_OPENMP)
|
||||
# 添加openmp的编译命令 设置编译选项
|
||||
find_package(OpenMP REQUIRED)
|
||||
if (OpenMP_CXX_FOUND)
|
||||
message(STATUS "OpenMP Found.")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
|
||||
target_link_libraries(lcg PUBLIC OpenMP::OpenMP_CXX)
|
||||
target_link_libraries(lcg_static OpenMP::OpenMP_CXX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CONFIG_FILE_PATH lib/cmake/${PROJECT_NAME})
|
||||
|
||||
configure_package_config_file(${PROJECT_SOURCE_DIR}/${PROJECT_NAME}Config.cmake.in
|
||||
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||
INSTALL_DESTINATION ${CONFIG_FILE_PATH}
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO)
|
||||
|
||||
write_basic_package_version_file(${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
|
||||
# 库的安装命令
|
||||
if(WIN32)
|
||||
install(TARGETS lcg DESTINATION lib)
|
||||
install(TARGETS lcg_static DESTINATION lib)
|
||||
else()
|
||||
install(TARGETS lcg lcg_static
|
||||
EXPORT ${PROJECT_NAME}Targets
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
install(EXPORT ${PROJECT_NAME}Targets
|
||||
DESTINATION ${CONFIG_FILE_PATH})
|
||||
install(FILES
|
||||
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||
${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||
DESTINATION ${CONFIG_FILE_PATH})
|
||||
endif()
|
||||
# 头文件安装命令
|
||||
install(FILES lib/config.h DESTINATION include/lcg)
|
||||
install(FILES lib/algebra.h DESTINATION include/lcg)
|
||||
install(FILES lib/lcg_complex.h DESTINATION include/lcg)
|
||||
install(FILES lib/util.h DESTINATION include/lcg)
|
||||
install(FILES lib/lcg.h DESTINATION include/lcg)
|
||||
install(FILES lib/clcg.h DESTINATION include/lcg)
|
||||
install(FILES lib/solver.h DESTINATION include/lcg)
|
||||
install(FILES lib/preconditioner.h DESTINATION include/lcg)
|
||||
|
||||
if(LibLCG_CUDA)
|
||||
install(FILES lib/algebra_cuda.h DESTINATION include/lcg)
|
||||
install(FILES lib/lcg_complex_cuda.h DESTINATION include/lcg)
|
||||
install(FILES lib/lcg_cuda.h DESTINATION include/lcg)
|
||||
install(FILES lib/clcg_cuda.h DESTINATION include/lcg)
|
||||
install(FILES lib/clcg_cudaf.h DESTINATION include/lcg)
|
||||
install(FILES lib/solver_cuda.h DESTINATION include/lcg)
|
||||
install(FILES lib/preconditioner_cuda.h DESTINATION include/lcg)
|
||||
endif()
|
||||
|
||||
if(LibLCG_EIGEN)
|
||||
install(FILES lib/algebra_eigen.h DESTINATION include/lcg)
|
||||
install(FILES lib/lcg_eigen.h DESTINATION include/lcg)
|
||||
install(FILES lib/clcg_eigen.h DESTINATION include/lcg)
|
||||
install(FILES lib/solver_eigen.h DESTINATION include/lcg)
|
||||
install(FILES lib/preconditioner_eigen.h DESTINATION include/lcg)
|
||||
endif()
|
||||
|
||||
# 以下部分为例子程序的编译
|
||||
# 设置可执行文件的输出地址
|
||||
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
|
||||
|
||||
# 例子的编译方法
|
||||
macro(add_sample name file)
|
||||
# 添加可执行文件 命令行
|
||||
add_executable(${name} sample/${file})
|
||||
# 为安装文件添加动态库的搜索地址 在Windows下并没有什么用 直接忽略
|
||||
set_target_properties(${name} PROPERTIES INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib)
|
||||
# 链接动态库
|
||||
target_link_libraries(${name} PUBLIC lcg)
|
||||
# 设置CUDA
|
||||
if(LibLCG_CUDA)
|
||||
set_target_properties(${name} PROPERTIES CUDA_ARCHITECTURES 70)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
add_sample(lcg_sample1 sample1.cpp)
|
||||
add_sample(lcg_sample2 sample2.cpp)
|
||||
add_sample(lcg_sample3 sample3.cpp)
|
||||
add_sample(lcg_sample4 sample4.cpp)
|
||||
|
||||
if(LibLCG_EIGEN)
|
||||
add_sample(lcg_sample5 sample5.cpp)
|
||||
add_sample(lcg_sample7 sample7.cpp)
|
||||
if(LibLCG_STD_COMPLEX)
|
||||
add_sample(lcg_sample6 sample6.cpp)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(LibLCG_CUDA)
|
||||
# The followings are not working for now due to CUDA 12+ compatibility issues. Check more later
|
||||
#add_sample(lcg_sample8 sample8.cu)
|
||||
#add_sample(lcg_sample9 sample9.cu)
|
||||
#add_sample(lcg_sample10 sample10.cu)
|
||||
#add_sample(lcg_sample11 sample11.cu)
|
||||
#add_sample(lcg_sample12 sample12.cu)
|
||||
#add_sample(lcg_sample13 sample13.cu)
|
||||
#add_sample(lcg_sample14 sample14.cu)
|
||||
add_sample(lcg_sample15 sample15.cu)
|
||||
endif()
|
222
src/lib/algebra.cpp
Normal file
222
src/lib/algebra.cpp
Normal file
@ -0,0 +1,222 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "ctime"
|
||||
#include "random"
|
||||
|
||||
#include "algebra.h"
|
||||
|
||||
#ifdef LibLCG_OPENMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
lcg_float lcg_abs(lcg_float a)
|
||||
{
|
||||
if (a >= 0.0) return a;
|
||||
return -1.0*a;
|
||||
}
|
||||
|
||||
lcg_float lcg_max(lcg_float a, lcg_float b)
|
||||
{
|
||||
if (a >= b) return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
lcg_float lcg_min(lcg_float a, lcg_float b)
|
||||
{
|
||||
if (a <= b) return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
lcg_float lcg_set2box(lcg_float low, lcg_float hig, lcg_float a,
|
||||
bool low_bound, bool hig_bound)
|
||||
{
|
||||
if (hig_bound && a >= hig) return hig;
|
||||
if (!hig_bound && a >= hig) return (hig - 1e-16);
|
||||
if (low_bound && a <= low) return low;
|
||||
if (!low_bound && a <= low) return (low + 1e-16);
|
||||
return a;
|
||||
}
|
||||
|
||||
lcg_float* lcg_malloc(int n)
|
||||
{
|
||||
lcg_float* x = new lcg_float [n];
|
||||
return x;
|
||||
}
|
||||
|
||||
lcg_float** lcg_malloc(int m, int n)
|
||||
{
|
||||
lcg_float **x = new lcg_float* [m];
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
x[i] = new lcg_float [n];
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
void lcg_free(lcg_float* x)
|
||||
{
|
||||
if (x != nullptr)
|
||||
{
|
||||
delete[] x;
|
||||
x = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_free(lcg_float **x, int m)
|
||||
{
|
||||
if (x != nullptr)
|
||||
{
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
delete[] x[i];
|
||||
}
|
||||
delete[] x;
|
||||
x = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_vecset(lcg_float *a, lcg_float b, int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
a[i] = b;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_vecset(lcg_float **a, lcg_float b, int m, int n)
|
||||
{
|
||||
for (int i = 0; i < m; ++i)
|
||||
{
|
||||
for (int j = 0; j < n; ++j)
|
||||
{
|
||||
a[i][j] = b;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_vecrnd(lcg_float *a, lcg_float l, lcg_float h, int size)
|
||||
{
|
||||
srand(time(nullptr));
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
a[i] = (h-l)*rand()*1.0/RAND_MAX + l;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_vecrnd(lcg_float **a, lcg_float l, lcg_float h, int m, int n)
|
||||
{
|
||||
srand(time(nullptr));
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
a[i][j] = (h-l)*rand()*1.0/RAND_MAX + l;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
double lcg_squaredl2norm(lcg_float *a, int n)
|
||||
{
|
||||
lcg_float sum = 0;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
sum += a[i]*a[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
void lcg_dot(lcg_float &ret, const lcg_float *a,
|
||||
const lcg_float *b, int size)
|
||||
{
|
||||
ret = 0.0;
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
ret += a[i]*b[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_matvec(lcg_float **A, const lcg_float *x, lcg_float *Ax,
|
||||
int m_size, int n_size, lcg_matrix_e layout)
|
||||
{
|
||||
int i, j;
|
||||
if (layout == MatNormal)
|
||||
{
|
||||
#pragma omp parallel for private (i, j) schedule(guided)
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
Ax[i] = 0.0;
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
Ax[i] += A[i][j]*x[j];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#pragma omp parallel for private (i, j) schedule(guided)
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
Ax[j] = 0.0;
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
Ax[j] += A[i][j]*x[i];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_matvec_coo(const int *row, const int *col, const lcg_float *Mat, const lcg_float *V, lcg_float *p, int M, int N, int nz_size, bool pre_position)
|
||||
{
|
||||
if (!pre_position)
|
||||
{
|
||||
for (size_t i = 0; i < M; i++)
|
||||
{
|
||||
p[i] = 0.0;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
p[row[i]] += Mat[i]*V[col[i]];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
p[i] = 0.0;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
p[col[i]] += Mat[i]*V[row[i]];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
219
src/lib/algebra.h
Normal file
219
src/lib/algebra.h
Normal file
@ -0,0 +1,219 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _ALGEBRA_H
|
||||
#define _ALGEBRA_H
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/**
|
||||
* @brief Matrix layouts.
|
||||
*/
|
||||
enum lcg_matrix_e
|
||||
{
|
||||
MatNormal,
|
||||
MatTranspose,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Conjugate types for a complex number.
|
||||
*/
|
||||
enum clcg_complex_e
|
||||
{
|
||||
NonConjugate,
|
||||
Conjugate,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A simple definition of the float type we use here.
|
||||
* Easy to change in the future. Right now it is just an alias of double
|
||||
*/
|
||||
typedef double lcg_float;
|
||||
|
||||
/**
|
||||
* @brief Return absolute value
|
||||
*
|
||||
* @param[in] a input value
|
||||
*
|
||||
* @return The absolute value
|
||||
*/
|
||||
lcg_float lcg_abs(lcg_float a);
|
||||
|
||||
/**
|
||||
* @brief Return the bigger value
|
||||
*
|
||||
* @param[in] a input value
|
||||
* @param[in] b input value
|
||||
*
|
||||
* @return The bigger value
|
||||
*/
|
||||
lcg_float lcg_max(lcg_float a, lcg_float b);
|
||||
|
||||
/**
|
||||
* @brief Return the smaller value
|
||||
*
|
||||
* @param[in] a input value
|
||||
* @param[in] b input value
|
||||
*
|
||||
* @return The smaller value
|
||||
*/
|
||||
lcg_float lcg_min(lcg_float a, lcg_float b);
|
||||
|
||||
/**
|
||||
* @brief Set the input value within a box constraint
|
||||
*
|
||||
* @param a low boundary
|
||||
* @param b high boundary
|
||||
* @param in input value
|
||||
* @param low_bound Whether to include the low boundary value
|
||||
* @param hig_bound Whether to include the high boundary value
|
||||
*
|
||||
* @return box constrained value
|
||||
*/
|
||||
lcg_float lcg_set2box(lcg_float low, lcg_float hig, lcg_float a,
|
||||
bool low_bound = true, bool hig_bound = true);
|
||||
|
||||
/**
|
||||
* @brief Locate memory for a lcg_float pointer type.
|
||||
*
|
||||
* @param[in] n Size of the lcg_float array.
|
||||
*
|
||||
* @return Pointer of the array's location.
|
||||
*/
|
||||
lcg_float* lcg_malloc(int n);
|
||||
|
||||
/**
|
||||
* @brief Locate memory for a lcg_float second pointer type.
|
||||
*
|
||||
* @param[in] n Size of the lcg_float array.
|
||||
*
|
||||
* @return Pointer of the array's location.
|
||||
*/
|
||||
lcg_float** lcg_malloc(int m, int n);
|
||||
|
||||
/**
|
||||
* @brief Destroy memory used by the lcg_float type array.
|
||||
*
|
||||
* @param x Pointer of the array.
|
||||
*/
|
||||
void lcg_free(lcg_float* x);
|
||||
|
||||
/**
|
||||
* @brief Destroy memory used by the 2D lcg_float type array.
|
||||
*
|
||||
* @param x Pointer of the array.
|
||||
*/
|
||||
void lcg_free(lcg_float **x, int m);
|
||||
|
||||
/**
|
||||
* @brief set a vector's value
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param[in] b initial value
|
||||
* @param[in] size vector size
|
||||
*/
|
||||
void lcg_vecset(lcg_float *a, lcg_float b, int size);
|
||||
|
||||
/**
|
||||
* @brief set a 2d vector's value
|
||||
*
|
||||
* @param a pointer of the matrix
|
||||
* @param[in] b initial value
|
||||
* @param[in] m row size of the matrix
|
||||
* @param[in] n column size of the matrix
|
||||
*/
|
||||
void lcg_vecset(lcg_float **a, lcg_float b, int m, int n);
|
||||
|
||||
/**
|
||||
* @brief set a vector using random values
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param[in] l the lower bound of random values
|
||||
* @param[in] h the higher bound of random values
|
||||
* @param[in] size size of the vector
|
||||
*/
|
||||
void lcg_vecrnd(lcg_float *a, lcg_float l, lcg_float h, int size);
|
||||
|
||||
/**
|
||||
* @brief set a 2D vector using random values
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param[in] l the lower bound of random values
|
||||
* @param[in] h the higher bound of random values
|
||||
* @param[in] m row size of the vector
|
||||
* @param[in] n column size of the vector
|
||||
*/
|
||||
void lcg_vecrnd(lcg_float **a, lcg_float l, lcg_float h, int m, int n);
|
||||
|
||||
/**
|
||||
* @brief calculate the squared L2 norm of the input vector
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param n size of the vector
|
||||
* @return double L2 norm
|
||||
*/
|
||||
double lcg_squaredl2norm(lcg_float *a, int n);
|
||||
|
||||
/**
|
||||
* @brief calculate dot product of two real vectors
|
||||
*
|
||||
* @param[in] a pointer of the vector a
|
||||
* @param[in] b pointer of the vector b
|
||||
* @param[in] size size of the vector
|
||||
*
|
||||
* @return dot product
|
||||
*/
|
||||
void lcg_dot(lcg_float &ret, const lcg_float *a, const lcg_float *b, int size);
|
||||
|
||||
/**
|
||||
* @brief calculate product of a real matrix and a vector
|
||||
*
|
||||
* Different configurations:
|
||||
* layout=Normal -> A
|
||||
* layout=Transpose -> A^T
|
||||
*
|
||||
* @param A matrix A
|
||||
* @param[in] x vector x
|
||||
* @param Ax product of Ax
|
||||
* @param[in] m_size row size of A
|
||||
* @param[in] n_size column size of A
|
||||
* @param[in] layout layout of A used for multiplication. Must be Normal or Transpose
|
||||
*/
|
||||
void lcg_matvec(lcg_float **A, const lcg_float *x, lcg_float *Ax, int m_size, int n_size,
|
||||
lcg_matrix_e layout = MatNormal);
|
||||
|
||||
/**
|
||||
* @brief Calculate the product of a sparse matrix multipled by a vector. The matrix is stored in the COO format.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param Mat Non-zero values of the input sparse matrix.
|
||||
* @param V Multipler vector
|
||||
* @param p Output prodcut
|
||||
* @param M Row number of the sparse matrix
|
||||
* @param N Column number of the sparse matrix
|
||||
* @param nz_size Non-zero size of the matrix
|
||||
* @param pre_position If ture, the multipler is seen as a row vector. Otherwise, it is treated as a column vector.
|
||||
*/
|
||||
void lcg_matvec_coo(const int *row, const int *col, const lcg_float *Mat, const lcg_float *V, lcg_float *p, int M, int N, int nz_size, bool pre_position = false);
|
||||
|
||||
#endif //_ALGEBRA_H
|
110
src/lib/algebra_cuda.cu
Normal file
110
src/lib/algebra_cuda.cu
Normal file
@ -0,0 +1,110 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "algebra_cuda.h"
|
||||
|
||||
|
||||
__global__ void lcg_set2box_cuda_device(const lcg_float *low, const lcg_float *hig, lcg_float *a,
|
||||
int n, bool low_bound, bool hig_bound)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
if (hig_bound && a[i] >= hig[i]) a[i] = hig[i];
|
||||
if (!hig_bound && a[i] > hig[i]) a[i] = hig[i];
|
||||
if (low_bound && a[i] <= low[i]) a[i] = low[i];
|
||||
if (!low_bound && a[i] < low[i]) a[i] = low[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void lcg_smDcsr_get_diagonal_device(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag)
|
||||
{
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < A_len)
|
||||
{
|
||||
const int num_non0_row = A_ptr[i + 1] - A_ptr[i];
|
||||
|
||||
for (int j = 0; j < num_non0_row; j++)
|
||||
{
|
||||
if (A_col[j + A_ptr[i]] == i)
|
||||
{
|
||||
A_diag[i] = A_val[j + A_ptr[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void lcg_vecMvecD_element_wise_device(const lcg_float *a, const lcg_float *b, lcg_float *c, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
c[i] = a[i] * b[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void lcg_vecDvecD_element_wise_device(const lcg_float *a, const lcg_float *b, lcg_float *c, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
c[i] = a[i] / b[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_set2box_cuda(const lcg_float *low, const lcg_float *hig, lcg_float *a,
|
||||
int n, bool low_bound, bool hig_bound)
|
||||
{
|
||||
int blockSize = 1024;
|
||||
int numBlocks = (n+ blockSize - 1) / blockSize;
|
||||
lcg_set2box_cuda_device<<<numBlocks, blockSize>>>(low, hig, a, n, low_bound, hig_bound);
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_smDcsr_get_diagonal(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (A_len+ blockSize - 1) / blockSize;
|
||||
lcg_smDcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_vecMvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
lcg_vecMvecD_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_vecDvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
lcg_vecDvecD_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||
return;
|
||||
}
|
88
src/lib/algebra_cuda.h
Normal file
88
src/lib/algebra_cuda.h
Normal file
@ -0,0 +1,88 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _ALGEBRA_CUDA_H
|
||||
#define _ALGEBRA_CUDA_H
|
||||
|
||||
#include "algebra.h"
|
||||
|
||||
#ifdef LibLCG_CUDA
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
/**
|
||||
* @brief Set the input value within a box constraint
|
||||
*
|
||||
* @param a low boundary
|
||||
* @param b high boundary
|
||||
* @param in input value
|
||||
* @param low_bound Whether to include the low boundary value
|
||||
* @param hig_bound Whether to include the high boundary value
|
||||
*
|
||||
* @return box constrained value
|
||||
*/
|
||||
void lcg_set2box_cuda(const lcg_float *low, const lcg_float *hig, lcg_float *a,
|
||||
int n, bool low_bound = true, bool hig_bound = true);
|
||||
|
||||
/**
|
||||
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] A_ptr Row index pointer
|
||||
* @param[in] A_col Column index
|
||||
* @param[in] A_val Non-zero values of the matrix
|
||||
* @param[in] A_len Dimension of the matrix
|
||||
* @param A_diag Output digonal elements
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void lcg_smDcsr_get_diagonal(const int *A_ptr, const int *A_col, const lcg_float *A_val, const int A_len, lcg_float *A_diag, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Element-wise muplication between two CUDA arries.
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] a Pointer of the input array
|
||||
* @param[in] b Pointer of the input array
|
||||
* @param c Pointer of the output array
|
||||
* @param[in] n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void lcg_vecMvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Element-wise division between two CUDA arries.
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] a Pointer of the input array
|
||||
* @param[in] b Pointer of the input array
|
||||
* @param c Pointer of the output array
|
||||
* @param[in] n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void lcg_vecDvecD_element_wise(const lcg_float *a, const lcg_float *b, lcg_float *c, int n, int bk_size = 1024);
|
||||
|
||||
#endif // LibLCG_CUDA
|
||||
|
||||
#endif //_ALGEBRA_CUDA_H
|
32
src/lib/algebra_eigen.cpp
Normal file
32
src/lib/algebra_eigen.cpp
Normal file
@ -0,0 +1,32 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "algebra_eigen.h"
|
||||
|
||||
void lcg_set2box_eigen(const Eigen::VectorXd &low, const Eigen::VectorXd &hig, Eigen::VectorXd m)
|
||||
{
|
||||
for (int i = 0; i < m.size(); i++)
|
||||
{
|
||||
m[i] = lcg_set2box(low[i], hig[i], m[i]);
|
||||
}
|
||||
return;
|
||||
}
|
43
src/lib/algebra_eigen.h
Normal file
43
src/lib/algebra_eigen.h
Normal file
@ -0,0 +1,43 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _ALGEBRA_EIGEN_H
|
||||
#define _ALGEBRA_EIGEN_H
|
||||
|
||||
#include "algebra.h"
|
||||
|
||||
#ifdef LibLCG_EIGEN
|
||||
|
||||
#include "Eigen/Dense"
|
||||
|
||||
/**
|
||||
* @brief Set the input value within a box constraint
|
||||
*
|
||||
* @param low_bound Whether to include the low boundary value
|
||||
* @param hig_bound Whether to include the high boundary value
|
||||
* @param m Returned values
|
||||
*/
|
||||
void lcg_set2box_eigen(const Eigen::VectorXd &low, const Eigen::VectorXd &hig, Eigen::VectorXd m);
|
||||
|
||||
#endif // LibLCG_EIGEN
|
||||
|
||||
#endif // _ALGEBRA_EIGEN_H
|
837
src/lib/clcg.cpp
Normal file
837
src/lib/clcg.cpp
Normal file
@ -0,0 +1,837 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "clcg.h"
|
||||
|
||||
#include "cmath"
|
||||
|
||||
#include "config.h"
|
||||
#ifdef LibLCG_OPENMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
typedef int (*clcg_solver_ptr)(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
|
||||
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance);
|
||||
|
||||
int clbicg(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance);
|
||||
int clbicg_symmetric(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance);
|
||||
int clcgs(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance);
|
||||
int clbicgstab(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance);
|
||||
int cltfqmr(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance);
|
||||
|
||||
int clcg_solver(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
|
||||
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance,
|
||||
clcg_solver_enum solver_id)
|
||||
{
|
||||
clcg_solver_ptr cg_solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
cg_solver = clbicg;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
cg_solver = clbicg_symmetric;
|
||||
break;
|
||||
case CLCG_CGS:
|
||||
cg_solver = clcgs;
|
||||
break;
|
||||
case CLCG_BICGSTAB:
|
||||
cg_solver = clbicgstab;
|
||||
break;
|
||||
case CLCG_TFQMR:
|
||||
cg_solver = cltfqmr;
|
||||
break;
|
||||
default:
|
||||
cg_solver = clcgs;
|
||||
break;
|
||||
}
|
||||
|
||||
return cg_solver(Afp, Pfp, m, B, n_size, param, instance);
|
||||
}
|
||||
|
||||
|
||||
int clbicg(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
|
||||
int i;
|
||||
lcg_complex *r1k = nullptr, *r2k = nullptr, *d1k = nullptr, *d2k = nullptr;
|
||||
lcg_complex *Ax = nullptr;
|
||||
r1k = clcg_malloc(n_size); r2k = clcg_malloc(n_size);
|
||||
d1k = clcg_malloc(n_size); d2k = clcg_malloc(n_size);
|
||||
Ax = clcg_malloc(n_size);
|
||||
|
||||
lcg_complex ak, Ad1d2, r1r2_next, betak;
|
||||
|
||||
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
d1k[i] = r1k[i] = B[i] - Ax[i];
|
||||
d2k[i] = r2k[i] = clcg_conjugate(&r1k[i]);
|
||||
}
|
||||
|
||||
lcg_complex r1r2;
|
||||
clcg_inner(r1r2, r2k, r1k, n_size);
|
||||
|
||||
lcg_float r0_square, rk_square;
|
||||
lcg_complex r0_mod, rk_mod;
|
||||
clcg_inner(rk_mod, r1k, r1k, n_size);
|
||||
r0_square = rk_square = clcg_square(&rk_mod);
|
||||
if (r0_square < 1.0) r0_square = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_square/r0_square <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||
else residual = rk_square/r0_square;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, d1k, Ax, n_size, MatNormal, NonConjugate);
|
||||
clcg_inner(Ad1d2, d2k, Ax, n_size);
|
||||
ak = r1r2/Ad1d2;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
m[i] = m[i] + ak*d1k[i];
|
||||
r1k[i] = r1k[i] - ak*Ax[i];
|
||||
}
|
||||
|
||||
clcg_inner(rk_mod, r1k, r1k, n_size);
|
||||
rk_square = clcg_square(&rk_mod);
|
||||
|
||||
Afp(instance, d2k, Ax, n_size, MatTranspose, Conjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
r2k[i] = r2k[i] - clcg_conjugate(&ak)*Ax[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
if (m[i] != m[i])
|
||||
{
|
||||
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
clcg_inner(r1r2_next, r2k, r1k, n_size);
|
||||
betak = r1r2_next/r1r2;
|
||||
r1r2 = r1r2_next;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
d1k[i] = r1k[i] + betak*d1k[i];
|
||||
d2k[i] = r2k[i] + clcg_conjugate(&betak)*d2k[i];
|
||||
}
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
clcg_free(r1k);
|
||||
clcg_free(r2k);
|
||||
clcg_free(d1k);
|
||||
clcg_free(d2k);
|
||||
clcg_free(Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clbicg_symmetric(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
|
||||
int i;
|
||||
lcg_complex *rk = nullptr, *dk = nullptr;
|
||||
lcg_complex *Ax = nullptr;
|
||||
rk = clcg_malloc(n_size); dk = clcg_malloc(n_size);
|
||||
Ax = clcg_malloc(n_size);
|
||||
|
||||
lcg_complex ak, rkrk2, betak, dkAx;
|
||||
|
||||
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
dk[i] = rk[i] = B[i] - Ax[i];
|
||||
}
|
||||
|
||||
lcg_complex rkrk;
|
||||
clcg_dot(rkrk, rk, rk, n_size);
|
||||
|
||||
lcg_float r0_square, rk_square;
|
||||
lcg_complex r0_mod, rk_mod;
|
||||
clcg_inner(rk_mod, rk, rk, n_size);
|
||||
r0_square = rk_square = clcg_square(&rk_mod);
|
||||
if (r0_square < 1.0) r0_square = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_square/r0_square <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||
else residual = rk_square/r0_square;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, dk, Ax, n_size, MatNormal, NonConjugate);
|
||||
clcg_dot(dkAx, dk, Ax, n_size);
|
||||
ak = rkrk/dkAx;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
m[i] = m[i] + ak*dk[i];
|
||||
rk[i] = rk[i] - ak*Ax[i];
|
||||
}
|
||||
|
||||
clcg_inner(rk_mod, rk, rk, n_size);
|
||||
rk_square = clcg_square(&rk_mod);
|
||||
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
if (m[i] != m[i])
|
||||
{
|
||||
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
clcg_dot(rkrk2, rk, rk, n_size);
|
||||
betak = rkrk2/rkrk;
|
||||
rkrk = rkrk2;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
dk[i] = rk[i] + betak*dk[i];
|
||||
}
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
clcg_free(rk);
|
||||
clcg_free(dk);
|
||||
clcg_free(Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clcgs(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
|
||||
int i;
|
||||
lcg_complex *rk = nullptr, *rbar0 = nullptr, *pk = nullptr;
|
||||
lcg_complex *Ax = nullptr, *uk = nullptr, *qk = nullptr, *wk = nullptr; // w_k = u_{k-1} + q_k
|
||||
rk = clcg_malloc(n_size); rbar0 = clcg_malloc(n_size);
|
||||
pk = clcg_malloc(n_size); Ax = clcg_malloc(n_size);
|
||||
uk = clcg_malloc(n_size); qk = clcg_malloc(n_size);
|
||||
wk = clcg_malloc(n_size);
|
||||
|
||||
lcg_complex ak, rhok2, sigma, betak;
|
||||
|
||||
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
pk[i] = uk[i] = rk[i] = B[i] - Ax[i];
|
||||
}
|
||||
|
||||
lcg_complex rhok;
|
||||
do
|
||||
{
|
||||
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
|
||||
clcg_inner(rhok, rbar0, rk, n_size);
|
||||
} while (clcg_module(&rhok) < 1e-8);
|
||||
|
||||
lcg_float r0_square, rk_square;
|
||||
lcg_complex r0_mod, rk_mod;
|
||||
clcg_inner(rk_mod, rk, rk, n_size);
|
||||
r0_square = rk_square = clcg_square(&rk_mod);
|
||||
if (r0_square < 1.0) r0_square = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_square/r0_square <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||
else residual = rk_square/r0_square;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, pk, Ax, n_size, MatNormal, NonConjugate); // vk = Apk
|
||||
clcg_inner(sigma, rbar0, Ax, n_size);
|
||||
ak = rhok/sigma;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
qk[i] = uk[i] - ak*Ax[i];
|
||||
wk[i] = uk[i] + qk[i];
|
||||
}
|
||||
|
||||
Afp(instance, wk, Ax, n_size, MatNormal, NonConjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
m[i] = m[i] + ak*wk[i];
|
||||
rk[i] = rk[i] - ak*Ax[i];
|
||||
}
|
||||
|
||||
clcg_inner(rk_mod, rk, rk, n_size);
|
||||
rk_square = clcg_square(&rk_mod);
|
||||
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
if (m[i] != m[i])
|
||||
{
|
||||
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
clcg_inner(rhok2, rbar0, rk, n_size);
|
||||
betak = rhok2/rhok;
|
||||
rhok = rhok2;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
uk[i] = rk[i] + betak*qk[i];
|
||||
pk[i] = uk[i] + betak*(qk[i] + betak*pk[i]);
|
||||
}
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
clcg_free(rk);
|
||||
clcg_free(rbar0);
|
||||
clcg_free(pk);
|
||||
clcg_free(Ax);
|
||||
clcg_free(uk);
|
||||
clcg_free(qk);
|
||||
clcg_free(wk);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clbicgstab(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set BICGSTAB parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
|
||||
int i;
|
||||
lcg_complex *rk = nullptr, *rbar0 = nullptr, *pk = nullptr, *sk = nullptr;
|
||||
lcg_complex *Ap = nullptr, *As = nullptr;
|
||||
rk = clcg_malloc(n_size); rbar0 = clcg_malloc(n_size);
|
||||
pk = clcg_malloc(n_size); sk = clcg_malloc(n_size);
|
||||
Ap = clcg_malloc(n_size); As = clcg_malloc(n_size);
|
||||
|
||||
lcg_complex ak, rhok2, sigma, omega, betak, Ass, AsAs;
|
||||
|
||||
Afp(instance, m, Ap, n_size, MatNormal, NonConjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
pk[i] = rk[i] = B[i] - Ap[i];
|
||||
}
|
||||
|
||||
lcg_complex rhok;
|
||||
do
|
||||
{
|
||||
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
|
||||
clcg_inner(rhok, rbar0, rk, n_size);
|
||||
} while (clcg_module(&rhok) < 1e-8);
|
||||
|
||||
lcg_float r0_square, rk_square;
|
||||
lcg_complex r0_mod, rk_mod;
|
||||
clcg_inner(rk_mod, rk, rk, n_size);
|
||||
r0_square = rk_square = clcg_square(&rk_mod);
|
||||
if (r0_square < 1.0) r0_square = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_square/r0_square <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||
else residual = rk_square/r0_square;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, pk, Ap, n_size, MatNormal, NonConjugate);
|
||||
clcg_inner(sigma, rbar0, Ap, n_size);
|
||||
ak = rhok/sigma;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
sk[i] = rk[i] - ak*Ap[i];
|
||||
}
|
||||
|
||||
Afp(instance, sk, As, n_size, MatNormal, NonConjugate);
|
||||
clcg_inner(Ass, As, sk, n_size);
|
||||
clcg_inner(AsAs, As, As, n_size);
|
||||
omega = Ass/AsAs;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
m[i] = m[i] + ak*pk[i] + omega*sk[i];
|
||||
rk[i] = sk[i] - omega*As[i];
|
||||
}
|
||||
|
||||
clcg_inner(rk_mod, rk, rk, n_size);
|
||||
rk_square = clcg_square(&rk_mod);
|
||||
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
if (m[i] != m[i])
|
||||
{
|
||||
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
clcg_inner(rhok2, rbar0, rk, n_size);
|
||||
betak = rhok2*ak/(rhok*omega);
|
||||
rhok = rhok2;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
pk[i] = rk[i] + betak*(pk[i] - omega*Ap[i]);
|
||||
}
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
clcg_free(rk);
|
||||
clcg_free(rbar0);
|
||||
clcg_free(pk);
|
||||
clcg_free(sk);
|
||||
clcg_free(Ap);
|
||||
clcg_free(As);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cltfqmr(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m, const lcg_complex* B,
|
||||
const int n_size, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
|
||||
int i, j;
|
||||
lcg_complex *pk = nullptr, *uk = nullptr;
|
||||
lcg_complex *vk = nullptr, *dk = nullptr;
|
||||
lcg_complex *rbar0 = nullptr, *rk = nullptr;
|
||||
lcg_complex *Ax = nullptr, *qk = nullptr;
|
||||
lcg_complex *uqk = nullptr;
|
||||
pk = clcg_malloc(n_size); uk = clcg_malloc(n_size);
|
||||
vk = clcg_malloc(n_size); dk = clcg_malloc(n_size);
|
||||
rbar0 = clcg_malloc(n_size); rk = clcg_malloc(n_size);
|
||||
Ax = clcg_malloc(n_size); qk = clcg_malloc(n_size);
|
||||
uqk = clcg_malloc(n_size);
|
||||
|
||||
Afp(instance, m, Ax, n_size, MatNormal, NonConjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
pk[i] = uk[i] = rk[i] = B[i] - Ax[i];
|
||||
clcg_set(&dk[i], 0.0, 0.0);
|
||||
}
|
||||
|
||||
lcg_complex rho, rk_mod, rk_mod2;
|
||||
lcg_float r0_square, rk_square;
|
||||
clcg_inner(rk_mod, rk, rk, n_size);
|
||||
r0_square = rk_square = clcg_square(&rk_mod);
|
||||
if (r0_square < 1.0) r0_square = 1.0;
|
||||
|
||||
do
|
||||
{
|
||||
clcg_vecrnd(rbar0, lcg_complex(1.0, 0.0), lcg_complex(2.0, 0.0), n_size);
|
||||
clcg_inner(rho, rbar0, rk, n_size);
|
||||
} while (clcg_module(&rho) < 1e-8);
|
||||
|
||||
lcg_float theta = 0.0, omega = clcg_module(&rk_mod);
|
||||
lcg_float residual, tao = omega;
|
||||
lcg_complex sigma, alpha, betak, rho2, sign, eta(0.0, 0.0);
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_square)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, sqrt(rk_square)/n_size, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_square/r0_square <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, m, rk_square/r0_square, ¶, n_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
while(1)
|
||||
{
|
||||
Afp(instance, pk, vk, n_size, MatNormal, NonConjugate);
|
||||
|
||||
clcg_inner(sigma, rbar0, vk, n_size);
|
||||
alpha = rho/sigma;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
qk[i] = uk[i] - alpha*vk[i];
|
||||
uqk[i] = uk[i] + qk[i];
|
||||
}
|
||||
|
||||
Afp(instance, uqk, Ax, n_size, MatNormal, NonConjugate);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
rk[i] = rk[i] - alpha*Ax[i];
|
||||
}
|
||||
|
||||
clcg_inner(rk_mod2, rk, rk, n_size);
|
||||
|
||||
for (j = 1; j <= 2; j++)
|
||||
{
|
||||
|
||||
if (para.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||
else residual = rk_square/r0_square;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, m, residual, ¶, n_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
sign = theta*theta*(eta/alpha);
|
||||
|
||||
if (j == 1)
|
||||
{
|
||||
omega = sqrt(clcg_module(&rk_mod)*clcg_module(&rk_mod2));
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
dk[i] = uk[i] + sign*dk[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
omega = clcg_module(&rk_mod2);
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
dk[i] = qk[i] + sign*dk[i];
|
||||
}
|
||||
}
|
||||
|
||||
theta = omega/tao;
|
||||
tao = omega/sqrt(1.0+theta*theta);
|
||||
eta = (1.0/(1.0+theta*theta))*alpha;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
m[i] = m[i] + eta*dk[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
if (m[i] != m[i])
|
||||
{
|
||||
ret = CLCG_NAN_VALUE; goto func_ends;
|
||||
}
|
||||
}
|
||||
}
|
||||
rk_mod = rk_mod2;
|
||||
rk_square = clcg_square(&rk_mod);
|
||||
|
||||
clcg_inner(rho2, rbar0, rk, n_size);
|
||||
betak = rho2/rho;
|
||||
rho = rho2;
|
||||
|
||||
#pragma omp parallel for private (i) schedule(guided)
|
||||
for (i = 0; i < n_size; i++)
|
||||
{
|
||||
uk[i] = rk[i] + betak*qk[i];
|
||||
pk[i] = uk[i] + betak*(qk[i] + betak*pk[i]);
|
||||
}
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
clcg_free(pk);
|
||||
clcg_free(uk);
|
||||
clcg_free(vk);
|
||||
clcg_free(dk);
|
||||
clcg_free(rbar0);
|
||||
clcg_free(rk);
|
||||
clcg_free(Ax);
|
||||
clcg_free(qk);
|
||||
clcg_free(uqk);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
78
src/lib/clcg.h
Normal file
78
src/lib/clcg.h
Normal file
@ -0,0 +1,78 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _CLCG_H
|
||||
#define _CLCG_H
|
||||
|
||||
#include "lcg_complex.h"
|
||||
#include "util.h"
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the complex product of a N*N matrix 'A' multiplied
|
||||
* by a complex vertical vector 'x'.
|
||||
*
|
||||
* @param instance The user data sent for the clcg_solver() functions by the client.
|
||||
* @param x Multiplier of the Ax product.
|
||||
* @param Ax Product of A multiplied by x.
|
||||
* @param x_size Size of x and column/row numbers of A.
|
||||
* @param layout Whether to use the transpose of A for calculation.
|
||||
* @param conjugate Whether to use the conjugate of A for calculation.
|
||||
*/
|
||||
typedef void (*clcg_axfunc_ptr)(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
|
||||
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary.
|
||||
*
|
||||
* @param instance The user data sent for the clcg_solver() functions by the client.
|
||||
* @param m The current solutions.
|
||||
* @param converge The current value evaluating the iteration progress.
|
||||
* @param n_size The size of the variables
|
||||
* @param k The iteration count.
|
||||
*
|
||||
* @retval int Zero to continue the optimization process. Returning a
|
||||
* non-zero value will terminate the optimization process.
|
||||
*/
|
||||
typedef int (*clcg_progress_ptr)(void* instance, const lcg_complex* m,
|
||||
const lcg_float converge, const clcg_para* param, const int n_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief A combined complex conjugate gradient solver function.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int clcg_solver(clcg_axfunc_ptr Afp, clcg_progress_ptr Pfp, lcg_complex* m,
|
||||
const lcg_complex* B, const int n_size, const clcg_para* param, void* instance,
|
||||
clcg_solver_enum solver_id = CLCG_BICG);
|
||||
|
||||
#endif // _CLCG_H
|
529
src/lib/clcg_cuda.cu
Normal file
529
src/lib/clcg_cuda.cu
Normal file
@ -0,0 +1,529 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "cmath"
|
||||
#include "ctime"
|
||||
#include "iostream"
|
||||
|
||||
#include "clcg_cuda.h"
|
||||
|
||||
|
||||
typedef int (*cuda_solver_ptr)(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clbicg(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clbicg_symmetric(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clcg_solver_cuda(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m, const cuDoubleComplex* B,
|
||||
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||
cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||
{
|
||||
cuda_solver_ptr cg_solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
cg_solver = clbicg;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
cg_solver = clbicg_symmetric;
|
||||
break;
|
||||
default:
|
||||
return CLCG_UNKNOWN_SOLVER;
|
||||
}
|
||||
|
||||
return cg_solver(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||
}
|
||||
|
||||
typedef int (*cuda_precondtioned_solver_ptr)(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
|
||||
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param,
|
||||
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clpcg(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clcg_solver_preconditioned_cuda(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
|
||||
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||
{
|
||||
cuda_precondtioned_solver_ptr cgp_solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_PCG:
|
||||
cgp_solver = clpcg; break;
|
||||
default:
|
||||
return CLCG_UNKNOWN_SOLVER;
|
||||
}
|
||||
|
||||
return cgp_solver(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||
}
|
||||
|
||||
int clbicg(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
|
||||
cuDoubleComplex *r1k = nullptr, *r2k = nullptr;
|
||||
cuDoubleComplex *d1k = nullptr, *d2k = nullptr, *Ax = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&r1k, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&r2k, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d1k, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d2k, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_d1k, dvec_d2k, dvec_Ax;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_d1k, n_size, d1k, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_d2k, n_size, d2k, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
|
||||
|
||||
cuDoubleComplex one, none;
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
none.x = -1.0; none.y = 0.0;
|
||||
cuDoubleComplex ak, nak, conj_ak, Ad1d2, r1r2_next, betak, conj_betak;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(r1k, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, r1k, 1); // r0 -= Ax
|
||||
cudaMemcpy(d1k, r1k, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||
|
||||
clcg_vecZ_conjugate(r1k, r2k, n_size);
|
||||
cudaMemcpy(d2k, r2k, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
|
||||
|
||||
cuDoubleComplex r1r2;
|
||||
cublasZdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2);
|
||||
|
||||
lcg_float rk_mod;
|
||||
cublasDznrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = rk_mod/n_size;
|
||||
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_d1k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
cublasZdotc_v2(cub_handle, n_size, d2k, 1, Ax, 1, &Ad1d2);
|
||||
ak = cuCdiv(r1r2, Ad1d2);
|
||||
nak = cuCmul(none, ak);
|
||||
conj_ak = cuConj(nak);
|
||||
|
||||
cublasZaxpy_v2(cub_handle, n_size, &ak, d1k, 1, d_m, 1);
|
||||
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, r1k, 1);
|
||||
|
||||
cublasDznrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_d2k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE);
|
||||
|
||||
cublasZaxpy_v2(cub_handle, n_size, &conj_ak, Ax, 1, r2k, 1);
|
||||
|
||||
cublasZdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2_next);
|
||||
betak = cuCdiv(r1r2_next, r1r2);
|
||||
conj_betak = cuConj(betak);
|
||||
r1r2 = r1r2_next;
|
||||
|
||||
cublasZscal_v2(cub_handle, n_size, &betak, d1k, 1);
|
||||
cublasZaxpy_v2(cub_handle, n_size, &one, r1k, 1, d1k, 1);
|
||||
|
||||
cublasZscal_v2(cub_handle, n_size, &conj_betak, d2k, 1);
|
||||
cublasZaxpy_v2(cub_handle, n_size, &one, r2k, 1, d2k, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(r1k);
|
||||
cudaFree(r2k);
|
||||
cudaFree(d1k);
|
||||
cudaFree(d2k);
|
||||
cudaFree(Ax);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_d1k);
|
||||
cusparseDestroyDnVec(dvec_d2k);
|
||||
cusparseDestroyDnVec(dvec_Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clbicg_symmetric(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
|
||||
cuDoubleComplex *rk = nullptr, *dk = nullptr, *Ax = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&rk, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&dk, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Ax;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
|
||||
|
||||
cuDoubleComplex one, none;
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
none.x = -1.0; none.y = 0.0;
|
||||
cuDoubleComplex ak, nak, rkrk2, betak, dkAx;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(rk, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||
cudaMemcpy(dk, rk, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||
|
||||
cuDoubleComplex rkrk;
|
||||
cublasZdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk);
|
||||
|
||||
lcg_float rk_mod;
|
||||
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = rk_mod/n_size;
|
||||
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
cublasZdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||
ak = cuCdiv(rkrk, dkAx);
|
||||
nak = cuCmul(none, ak);
|
||||
|
||||
cublasZaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||
|
||||
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
cublasZdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk2);
|
||||
betak = cuCdiv(rkrk2, rkrk);
|
||||
rkrk = rkrk2;
|
||||
|
||||
cublasZscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||
cublasZaxpy_v2(cub_handle, n_size, &one, rk, 1, dk, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(rk);
|
||||
cudaFree(dk);
|
||||
cudaFree(Ax);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_dk);
|
||||
cusparseDestroyDnVec(dvec_Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clpcg(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m,
|
||||
const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
cuDoubleComplex *d_m = nullptr, *d_B = nullptr;
|
||||
cuDoubleComplex *rk = nullptr, *dk = nullptr, *sk = nullptr, *Ax = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&rk, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&dk, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&sk, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&Ax, n_size * sizeof(cuDoubleComplex));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_dk, dvec_sk, dvec_Ax;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_sk, n_size, sk, CUDA_C_64F);
|
||||
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_64F);
|
||||
|
||||
cuDoubleComplex one, none;
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
none.x = -1.0; none.y = 0.0;
|
||||
cuDoubleComplex ak, nak, d_old, betak, dkAx;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(rk, d_B, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasZaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||
|
||||
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_dk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
cuDoubleComplex d_new;
|
||||
cublasZdotu_v2(cub_handle, n_size, rk, 1, dk, 1, &d_new);
|
||||
|
||||
lcg_float rk_mod;
|
||||
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = rk_mod/n_size;
|
||||
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
cublasZdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||
ak = cuCdiv(d_new, dkAx);
|
||||
nak = cuCmul(none, ak);
|
||||
|
||||
cublasZaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||
cublasZaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||
|
||||
cublasDznrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_sk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
d_old = d_new;
|
||||
cublasZdotu_v2(cub_handle, n_size, rk, 1, sk, 1, &d_new);
|
||||
|
||||
betak = cuCdiv(d_new, d_old);
|
||||
|
||||
cublasZscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||
cublasZaxpy_v2(cub_handle, n_size, &one, sk, 1, dk, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(rk);
|
||||
cudaFree(dk);
|
||||
cudaFree(sk);
|
||||
cudaFree(Ax);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_rk);
|
||||
cusparseDestroyDnVec(dvec_dk);
|
||||
cusparseDestroyDnVec(dvec_sk);
|
||||
cusparseDestroyDnVec(dvec_Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
109
src/lib/clcg_cuda.h
Normal file
109
src/lib/clcg_cuda.h
Normal file
@ -0,0 +1,109 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _CLCG_CUDA_H
|
||||
#define _CLCG_CUDA_H
|
||||
|
||||
#include "util.h"
|
||||
#include "lcg_complex_cuda.h"
|
||||
|
||||
#ifdef LibLCG_CUDA
|
||||
|
||||
#include <cublas_v2.h>
|
||||
#include <cusparse_v2.h>
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* @param x Multiplier of the Ax product.
|
||||
* @param Ax Product of A multiplied by x.
|
||||
* @param n_size Size of x and column/row numbers of A.
|
||||
*/
|
||||
typedef void (*clcg_axfunc_cuda_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size, cusparseOperation_t oper_t);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary. Note that m is hosted on the GPU device.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||
* @param m The current solutions.
|
||||
* @param converge The current value evaluating the iteration progress.
|
||||
* @param n_size The size of the variables
|
||||
* @param k The iteration count.
|
||||
*
|
||||
* @retval int Zero to continue the optimization process. Returning a
|
||||
* non-zero value will terminate the optimization process.
|
||||
*/
|
||||
typedef int (*clcg_progress_cuda_ptr)(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_BICG.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int clcg_solver_cuda(clcg_axfunc_cuda_ptr Afp, clcg_progress_cuda_ptr Pfp, cuDoubleComplex* m, const cuDoubleComplex* B,
|
||||
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||
cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_BICG);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int clcg_solver_preconditioned_cuda(clcg_axfunc_cuda_ptr Afp, clcg_axfunc_cuda_ptr Mfp, clcg_progress_cuda_ptr Pfp,
|
||||
cuDoubleComplex* m, const cuDoubleComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_PCG);
|
||||
|
||||
#endif // LibLCG_CUDA
|
||||
|
||||
#endif // _CLCG_CUDA_H
|
529
src/lib/clcg_cudaf.cu
Normal file
529
src/lib/clcg_cudaf.cu
Normal file
@ -0,0 +1,529 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "cmath"
|
||||
#include "ctime"
|
||||
#include "iostream"
|
||||
|
||||
#include "clcg_cudaf.h"
|
||||
|
||||
|
||||
typedef int (*cuda_solver_ptr)(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clbicg(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clbicg_symmetric(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clcg_solver_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m, const cuComplex* B,
|
||||
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||
cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||
{
|
||||
cuda_solver_ptr cg_solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
cg_solver = clbicg;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
cg_solver = clbicg_symmetric;
|
||||
break;
|
||||
default:
|
||||
return CLCG_UNKNOWN_SOLVER;
|
||||
}
|
||||
|
||||
return cg_solver(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||
}
|
||||
|
||||
typedef int (*cuda_precondtioned_solver_ptr)(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
|
||||
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param,
|
||||
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clpcg(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int clcg_solver_preconditioned_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
|
||||
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id)
|
||||
{
|
||||
cuda_precondtioned_solver_ptr cgp_solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_PCG:
|
||||
cgp_solver = clpcg; break;
|
||||
default:
|
||||
return CLCG_UNKNOWN_SOLVER;
|
||||
}
|
||||
|
||||
return cgp_solver(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||
}
|
||||
|
||||
int clbicg(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
cuComplex *d_m = nullptr, *d_B = nullptr;
|
||||
cuComplex *r1k = nullptr, *r2k = nullptr;
|
||||
cuComplex *d1k = nullptr, *d2k = nullptr, *Ax = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&d_B, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&r1k, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&r2k, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&d1k, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&d2k, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_d1k, dvec_d2k, dvec_Ax;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_d1k, n_size, d1k, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_d2k, n_size, d2k, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
|
||||
|
||||
cuComplex one, none;
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
none.x = -1.0; none.y = 0.0;
|
||||
cuComplex ak, nak, conj_ak, Ad1d2, r1r2_next, betak, conj_betak;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(r1k, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, r1k, 1); // r0 -= Ax
|
||||
cudaMemcpy(d1k, r1k, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||
|
||||
clcg_vecC_conjugate(r1k, r2k, n_size);
|
||||
cudaMemcpy(d2k, r2k, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice);
|
||||
|
||||
cuComplex r1r2;
|
||||
cublasCdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2);
|
||||
|
||||
float rk_mod;
|
||||
cublasScnrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||
|
||||
float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = rk_mod/n_size;
|
||||
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_d1k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
cublasCdotc_v2(cub_handle, n_size, d2k, 1, Ax, 1, &Ad1d2);
|
||||
ak = cuCdivf(r1r2, Ad1d2);
|
||||
nak = cuCmulf(none, ak);
|
||||
conj_ak = cuConjf(nak);
|
||||
|
||||
cublasCaxpy_v2(cub_handle, n_size, &ak, d1k, 1, d_m, 1);
|
||||
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, r1k, 1);
|
||||
|
||||
cublasScnrm2_v2(cub_handle, n_size, r1k, 1, &rk_mod);
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_d2k, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE);
|
||||
|
||||
cublasCaxpy_v2(cub_handle, n_size, &conj_ak, Ax, 1, r2k, 1);
|
||||
|
||||
cublasCdotc_v2(cub_handle, n_size, r2k, 1, r1k, 1, &r1r2_next);
|
||||
betak = cuCdivf(r1r2_next, r1r2);
|
||||
conj_betak = cuConjf(betak);
|
||||
r1r2 = r1r2_next;
|
||||
|
||||
cublasCscal_v2(cub_handle, n_size, &betak, d1k, 1);
|
||||
cublasCaxpy_v2(cub_handle, n_size, &one, r1k, 1, d1k, 1);
|
||||
|
||||
cublasCscal_v2(cub_handle, n_size, &conj_betak, d2k, 1);
|
||||
cublasCaxpy_v2(cub_handle, n_size, &one, r2k, 1, d2k, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(r1k);
|
||||
cudaFree(r2k);
|
||||
cudaFree(d1k);
|
||||
cudaFree(d2k);
|
||||
cudaFree(Ax);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_d1k);
|
||||
cusparseDestroyDnVec(dvec_d2k);
|
||||
cusparseDestroyDnVec(dvec_Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clbicg_symmetric(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
cuComplex *d_m = nullptr, *d_B = nullptr;
|
||||
cuComplex *rk = nullptr, *dk = nullptr, *Ax = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_B, n_size * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&rk, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&dk, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Ax;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
|
||||
|
||||
cuComplex one, none;
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
none.x = -1.0; none.y = 0.0;
|
||||
cuComplex ak, nak, rkrk2, betak, dkAx;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(rk, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||
cudaMemcpy(dk, rk, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // d0 = r0
|
||||
|
||||
cuComplex rkrk;
|
||||
cublasCdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk);
|
||||
|
||||
float rk_mod;
|
||||
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = rk_mod/n_size;
|
||||
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
cublasCdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||
ak = cuCdivf(rkrk, dkAx);
|
||||
nak = cuCmulf(none, ak);
|
||||
|
||||
cublasCaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||
|
||||
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
cublasCdotu_v2(cub_handle, n_size, rk, 1, rk, 1, &rkrk2);
|
||||
betak = cuCdivf(rkrk2, rkrk);
|
||||
rkrk = rkrk2;
|
||||
|
||||
cublasCscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||
cublasCaxpy_v2(cub_handle, n_size, &one, rk, 1, dk, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(rk);
|
||||
cudaFree(dk);
|
||||
cudaFree(Ax);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_dk);
|
||||
cusparseDestroyDnVec(dvec_Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clpcg(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp, cuComplex* m,
|
||||
const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (B == nullptr) return CLCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
cuComplex *d_m = nullptr, *d_B = nullptr;
|
||||
cuComplex *rk = nullptr, *dk = nullptr, *sk = nullptr, *Ax = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&d_B, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&rk, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&dk, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&sk, n_size * sizeof(cuComplex));
|
||||
cudaMalloc(&Ax, n_size * sizeof(cuComplex));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_dk, dvec_sk, dvec_Ax;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_sk, n_size, sk, CUDA_C_32F);
|
||||
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_C_32F);
|
||||
|
||||
cuComplex one, none;
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
none.x = -1.0; none.y = 0.0;
|
||||
cuComplex ak, nak, d_old, betak, dkAx;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(rk, d_B, n_size * sizeof(cuComplex), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasCaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||
|
||||
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_dk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
cuComplex d_new;
|
||||
cublasCdotu_v2(cub_handle, n_size, rk, 1, dk, 1, &d_new);
|
||||
|
||||
float rk_mod;
|
||||
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && rk_mod/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod*rk_mod/(r0_mod*r0_mod) <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod*rk_mod/(r0_mod*r0_mod), ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = rk_mod/n_size;
|
||||
else residual = rk_mod*rk_mod/(r0_mod*r0_mod);
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Ax, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
cublasCdotu_v2(cub_handle, n_size, dk, 1, Ax, 1, &dkAx);
|
||||
ak = cuCdivf(d_new, dkAx);
|
||||
nak = cuCmulf(none, ak);
|
||||
|
||||
cublasCaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||
cublasCaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||
|
||||
cublasScnrm2_v2(cub_handle, n_size, rk, 1, &rk_mod);
|
||||
|
||||
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_sk, n_size, nz_size, CUSPARSE_OPERATION_NON_TRANSPOSE);
|
||||
|
||||
d_old = d_new;
|
||||
cublasCdotu_v2(cub_handle, n_size, rk, 1, sk, 1, &d_new);
|
||||
|
||||
betak = cuCdivf(d_new, d_old);
|
||||
|
||||
cublasCscal_v2(cub_handle, n_size, &betak, dk, 1);
|
||||
cublasCaxpy_v2(cub_handle, n_size, &one, sk, 1, dk, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(cuComplex), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(rk);
|
||||
cudaFree(dk);
|
||||
cudaFree(sk);
|
||||
cudaFree(Ax);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_rk);
|
||||
cusparseDestroyDnVec(dvec_dk);
|
||||
cusparseDestroyDnVec(dvec_sk);
|
||||
cusparseDestroyDnVec(dvec_Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
109
src/lib/clcg_cudaf.h
Normal file
109
src/lib/clcg_cudaf.h
Normal file
@ -0,0 +1,109 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _CLCG_CUDA_FLOAT_H
|
||||
#define _CLCG_CUDA_FLOAT_H
|
||||
|
||||
#include "util.h"
|
||||
#include "lcg_complex_cuda.h"
|
||||
|
||||
#ifdef LibLCG_CUDA
|
||||
|
||||
#include <cublas_v2.h>
|
||||
#include <cusparse_v2.h>
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* @param x Multiplier of the Ax product.
|
||||
* @param Ax Product of A multiplied by x.
|
||||
* @param n_size Size of x and column/row numbers of A.
|
||||
*/
|
||||
typedef void (*clcg_axfunc_cudaf_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size, cusparseOperation_t oper_t);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary. Note that m is hosted on the GPU device.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||
* @param m The current solutions.
|
||||
* @param converge The current value evaluating the iteration progress.
|
||||
* @param n_size The size of the variables
|
||||
* @param k The iteration count.
|
||||
*
|
||||
* @retval int Zero to continue the optimization process. Returning a
|
||||
* non-zero value will terminate the optimization process.
|
||||
*/
|
||||
typedef int (*clcg_progress_cudaf_ptr)(void* instance, const cuComplex* m, const float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_BICG.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int clcg_solver_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_progress_cudaf_ptr Pfp, cuComplex* m, const cuComplex* B,
|
||||
const int n_size, const int nz_size, const clcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||
cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_BICG);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int clcg_solver_preconditioned_cuda(clcg_axfunc_cudaf_ptr Afp, clcg_axfunc_cudaf_ptr Mfp, clcg_progress_cudaf_ptr Pfp,
|
||||
cuComplex* m, const cuComplex* B, const int n_size, const int nz_size, const clcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, clcg_solver_enum solver_id = CLCG_PCG);
|
||||
|
||||
#endif // LibLCG_CUDA
|
||||
|
||||
#endif // _CLCG_CUDA_FLOAT_H
|
777
src/lib/clcg_eigen.cpp
Normal file
777
src/lib/clcg_eigen.cpp
Normal file
@ -0,0 +1,777 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "cmath"
|
||||
#include "ctime"
|
||||
#include "iostream"
|
||||
|
||||
#include "clcg_eigen.h"
|
||||
|
||||
#include "config.h"
|
||||
#ifdef LibLCG_OPENMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
|
||||
typedef int (*eigen_solver_ptr)(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
|
||||
int clbicg(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
int clbicg_symmetric(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
int clcgs(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
int cltfqmr(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
|
||||
int clcg_solver_eigen(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id)
|
||||
{
|
||||
eigen_solver_ptr cg_solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
cg_solver = clbicg;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
cg_solver = clbicg_symmetric;
|
||||
break;
|
||||
case CLCG_CGS:
|
||||
cg_solver = clcgs;
|
||||
break;
|
||||
case CLCG_TFQMR:
|
||||
cg_solver = cltfqmr;
|
||||
break;
|
||||
default:
|
||||
return CLCG_UNKNOWN_SOLVER;
|
||||
}
|
||||
|
||||
return cg_solver(Afp, Pfp, m, B, param, instance);
|
||||
}
|
||||
|
||||
|
||||
typedef int (*eigen_preconditioned_solver_ptr)(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
|
||||
int clpcg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
int clpbicg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance);
|
||||
|
||||
int clcg_solver_preconditioned_eigen(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id)
|
||||
{
|
||||
eigen_preconditioned_solver_ptr cgp_solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_PCG:
|
||||
cgp_solver = clpcg; break;
|
||||
case CLCG_PBICG:
|
||||
cgp_solver = clpbicg; break;
|
||||
default:
|
||||
return CLCG_UNKNOWN_SOLVER;
|
||||
}
|
||||
|
||||
return cgp_solver(Afp, Mfp, Pfp, m, B, param, instance);
|
||||
}
|
||||
|
||||
|
||||
int clbicg(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
int n_size = B.size();
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
std::complex<lcg_float> ak, Ad1d2, r1r2_next, betak;
|
||||
Eigen::VectorXcd r1k(n_size), r2k(n_size), d1k(n_size), d2k(n_size);
|
||||
Eigen::VectorXcd Ax(n_size);
|
||||
|
||||
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||
|
||||
d1k = r1k = B - Ax;
|
||||
d2k = r2k = r1k.conjugate();
|
||||
|
||||
// Eigen's dot is inner product
|
||||
std::complex<lcg_float> r1r2 = r2k.dot(r1k);
|
||||
|
||||
lcg_float rk_mod = std::norm(r1k.dot(r1k));
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||
else residual = rk_mod/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, &m, residual, ¶, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, d1k, Ax, MatNormal, NonConjugate);
|
||||
Ad1d2 = d2k.dot(Ax);
|
||||
ak = r1r2/Ad1d2;
|
||||
|
||||
m = m + ak*d1k;
|
||||
r1k = r1k - ak*Ax;
|
||||
|
||||
rk_mod = std::norm(r1k.dot(r1k));
|
||||
|
||||
Afp(instance, d2k, Ax, MatTranspose, Conjugate);
|
||||
|
||||
r2k = r2k - std::conj(ak)*Ax;
|
||||
|
||||
r1r2_next = r2k.dot(r1k);
|
||||
betak = r1r2_next/r1r2;
|
||||
r1r2 = r1r2_next;
|
||||
|
||||
d1k = r1k + betak*d1k;
|
||||
d2k = r2k + std::conj(betak)*d2k;
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
r1k.resize(0);
|
||||
r2k.resize(0);
|
||||
d1k.resize(0);
|
||||
d2k.resize(0);
|
||||
Ax.resize(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clbicg_symmetric(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
int n_size = B.size();
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
std::complex<lcg_float> ak, rkrk2, betak, dkAx;
|
||||
Eigen::VectorXcd rk(n_size), dk(n_size), Ax(n_size);
|
||||
|
||||
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||
|
||||
dk = rk = (B - Ax);
|
||||
|
||||
std::complex<lcg_float> rkrk = rk.conjugate().dot(rk);
|
||||
|
||||
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||
else residual = rk_mod/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, &m, residual, ¶, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, dk, Ax, MatNormal, NonConjugate);
|
||||
dkAx = dk.conjugate().dot(Ax);
|
||||
ak = rkrk/dkAx;
|
||||
|
||||
m += ak*dk;
|
||||
rk -= ak*Ax;
|
||||
|
||||
rk_mod = std::norm(rk.dot(rk));
|
||||
|
||||
rkrk2 = rk.conjugate().dot(rk);
|
||||
betak = rkrk2/rkrk;
|
||||
rkrk = rkrk2;
|
||||
|
||||
dk = rk + betak*dk;
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
rk.resize(0);
|
||||
dk.resize(0);
|
||||
Ax.resize(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clcgs(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
int n_size = B.size();
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
std::complex<lcg_float> ak, rhok2, sigma, betak, rkmod;
|
||||
Eigen::VectorXcd rk(n_size), s0, pk(n_size);
|
||||
Eigen::VectorXcd Ax(n_size), uk(n_size), qk(n_size), wk(n_size);
|
||||
|
||||
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||
|
||||
pk = uk = rk = (B - Ax);
|
||||
|
||||
std::complex<lcg_float> rhok;
|
||||
do
|
||||
{
|
||||
s0 = Eigen::VectorXcd::Random(n_size);
|
||||
rhok = s0.conjugate().dot(rk);
|
||||
} while (std::sqrt(std::norm(rhok)) < 1e-8);
|
||||
|
||||
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||
else residual = rk_mod/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, &m, residual, ¶, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, pk, Ax, MatNormal, NonConjugate);
|
||||
sigma = s0.conjugate().dot(Ax);
|
||||
ak = rhok/sigma;
|
||||
|
||||
qk = uk - ak*Ax;
|
||||
wk = uk + qk;
|
||||
|
||||
Afp(instance, wk, Ax, MatNormal, NonConjugate);
|
||||
|
||||
m += ak*wk;
|
||||
rk -= ak*Ax;
|
||||
|
||||
rk_mod = std::norm(rk.dot(rk));
|
||||
|
||||
rhok2 = s0.conjugate().dot(rk);
|
||||
betak = rhok2/rhok;
|
||||
rhok = rhok2;
|
||||
|
||||
uk = rk + betak*qk;
|
||||
pk = uk + betak*(qk + betak*pk);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
rk.resize(0);
|
||||
s0.resize(0);
|
||||
pk.resize(0);
|
||||
Ax.resize(0);
|
||||
uk.resize(0);
|
||||
qk.resize(0);
|
||||
wk.resize(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cltfqmr(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
int n_size = B.size();
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
int j;
|
||||
Eigen::VectorXcd pk(n_size), uk(n_size), vk(n_size), dk(n_size);
|
||||
Eigen::VectorXcd r0(n_size), rk(n_size), Ax(n_size), qk(n_size);
|
||||
Eigen::VectorXcd uqk(n_size);
|
||||
|
||||
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||
|
||||
pk = uk = r0 = rk = (B - Ax);
|
||||
dk.setZero();
|
||||
|
||||
std::complex<lcg_float> rk_mod = rk.dot(rk);
|
||||
lcg_float r0_mod = std::norm(rk_mod);
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
lcg_float theta = 0.0, omega = sqrt(rk_mod.real());
|
||||
lcg_float residual, tao = omega;
|
||||
std::complex<lcg_float> rk_mod2, sigma, alpha, betak, rho, rho2, sign, eta(0.0, 0.0);
|
||||
|
||||
rho = r0.dot(r0);
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(std::norm(rk_mod))/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, sqrt(std::norm(rk_mod))/n_size, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (std::norm(rk_mod)/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, std::norm(rk_mod)/r0_mod, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
while(1)
|
||||
{
|
||||
Afp(instance, pk, vk, MatNormal, NonConjugate);
|
||||
|
||||
sigma = r0.dot(vk);
|
||||
alpha = rho/sigma;
|
||||
|
||||
qk = uk - alpha*vk;
|
||||
uqk = uk + qk;
|
||||
|
||||
Afp(instance, uqk, Ax, MatNormal, NonConjugate);
|
||||
|
||||
rk -= alpha*Ax;
|
||||
rk_mod2 = rk.dot(rk);
|
||||
|
||||
for (j = 1; j <= 2; j++)
|
||||
{
|
||||
if (para.abs_diff) residual = std::sqrt(std::norm(rk_mod))/n_size;
|
||||
else residual = std::norm(rk_mod)/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, &m, residual, ¶, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
sign = theta*theta*(eta/alpha);
|
||||
|
||||
if (j == 1)
|
||||
{
|
||||
omega = sqrt(sqrt(rk_mod.real())*sqrt(rk_mod2.real()));
|
||||
dk = uk + sign*dk;
|
||||
}
|
||||
else
|
||||
{
|
||||
omega = sqrt(rk_mod2.real());
|
||||
dk = qk + sign*dk;
|
||||
}
|
||||
|
||||
theta = omega/tao;
|
||||
tao = omega/sqrt(1.0+theta*theta);
|
||||
eta = (1.0/(1.0+theta*theta))*alpha;
|
||||
|
||||
m += eta*dk;
|
||||
}
|
||||
rk_mod = rk_mod2;
|
||||
|
||||
rho2 = r0.dot(rk);
|
||||
betak = rho2/rho;
|
||||
rho = rho2;
|
||||
|
||||
uk = rk + betak*qk;
|
||||
pk = uk + betak*(qk + betak*pk);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
pk.resize(0);
|
||||
uk.resize(0);
|
||||
vk.resize(0);
|
||||
dk.resize(0);
|
||||
r0.resize(0);
|
||||
rk.resize(0);
|
||||
Ax.resize(0);
|
||||
qk.resize(0);
|
||||
uqk.resize(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clpcg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
int n_size = B.size();
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
Eigen::VectorXcd rk(n_size), dk(n_size), sk(n_size), Ax(n_size);
|
||||
|
||||
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||
|
||||
rk = (B - Ax);
|
||||
Mfp(instance, rk, dk, MatNormal, NonConjugate);
|
||||
|
||||
std::complex<lcg_float> ak, d_old, betak, dkAx;
|
||||
std::complex<lcg_float> d_new = rk.conjugate().dot(dk);
|
||||
|
||||
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||
else residual = rk_mod/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, &m, residual, ¶, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, dk, Ax, MatNormal, NonConjugate);
|
||||
dkAx = dk.conjugate().dot(Ax);
|
||||
ak = d_new/dkAx;
|
||||
|
||||
m += ak*dk;
|
||||
rk -= ak*Ax;
|
||||
|
||||
rk_mod = std::norm(rk.dot(rk));
|
||||
|
||||
Mfp(instance, rk, sk, MatNormal, NonConjugate);
|
||||
|
||||
d_old = d_new;
|
||||
d_new = rk.conjugate().dot(sk);
|
||||
|
||||
betak = d_new/d_old;
|
||||
|
||||
dk = sk + betak*dk;
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
rk.resize(0);
|
||||
dk.resize(0);
|
||||
sk.resize(0);
|
||||
Ax.resize(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int clpbicg(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance)
|
||||
{
|
||||
// set CGS parameters
|
||||
clcg_para para = (param != nullptr) ? (*param) : defparam2;
|
||||
|
||||
int n_size = B.size();
|
||||
//check parameters
|
||||
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||
if (n_size != m.size()) return CLCG_SIZE_NOT_MATCH;
|
||||
if (para.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||
|
||||
std::complex<lcg_float> ak, betak, pkAx, rhok2;
|
||||
Eigen::VectorXcd rk(n_size), rsk(n_size), zk(n_size), pk(n_size), psk(n_size), Ax(n_size), Asx(n_size);
|
||||
|
||||
Afp(instance, m, Ax, MatNormal, NonConjugate);
|
||||
|
||||
rk = (B - Ax);
|
||||
Mfp(instance, rk, zk, MatNormal, NonConjugate);
|
||||
|
||||
pk = zk;
|
||||
rsk = rk.conjugate();
|
||||
psk = pk.conjugate();
|
||||
|
||||
std::complex<lcg_float> rhok = rsk.dot(zk);
|
||||
|
||||
lcg_float rk_mod = std::norm(rk.dot(rk));
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, sqrt(rk_mod)/n_size, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, &m, rk_mod/r0_mod, ¶, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = std::sqrt(rk_mod)/n_size;
|
||||
else residual = rk_mod/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, &m, residual, ¶, t))
|
||||
{
|
||||
ret = CLCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = CLCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, pk, Ax, MatNormal, NonConjugate);
|
||||
Afp(instance, psk, Asx, MatNormal, Conjugate);
|
||||
|
||||
pkAx = psk.dot(Ax);
|
||||
ak = rhok/pkAx;
|
||||
|
||||
m += ak*pk;
|
||||
rsk = rk.conjugate() - std::conj(ak)*Asx;
|
||||
rk -= ak*Ax;
|
||||
|
||||
rk_mod = std::norm(rk.dot(rk));
|
||||
|
||||
Mfp(instance, rk, zk, MatNormal, NonConjugate);
|
||||
|
||||
rhok2 = rsk.dot(zk);
|
||||
betak = rhok2/rhok;
|
||||
rhok = rhok2;
|
||||
|
||||
pk = zk + betak*pk;
|
||||
psk = zk.conjugate() + std::conj(betak)*psk;
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
rk.resize(0);
|
||||
rsk.resize(0);
|
||||
zk.resize(0);
|
||||
pk.resize(0);
|
||||
psk.resize(0);
|
||||
Ax.resize(0);
|
||||
Asx.resize(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
94
src/lib/clcg_eigen.h
Normal file
94
src/lib/clcg_eigen.h
Normal file
@ -0,0 +1,94 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _CLCG_EIGEN_H
|
||||
#define _CLCG_EIGEN_H
|
||||
|
||||
#include "util.h"
|
||||
#include "complex"
|
||||
#include "Eigen/Dense"
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||
* by a vertical vector 'x'.
|
||||
*
|
||||
* @param instance The user data sent for the solver functions by the client.
|
||||
* @param x Multiplier of the Ax product.
|
||||
* @param Ax Product of A multiplied by x.
|
||||
* @param layout layout information of the matrix A passed by the solver functions.
|
||||
* @param conjugate Layout information of the matrix A passed by the solver functions.
|
||||
*/
|
||||
typedef void (*clcg_axfunc_eigen_ptr)(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary.
|
||||
*
|
||||
* @param instance The user data sent for the solver functions by the client.
|
||||
* @param m The current solutions.
|
||||
* @param converge The current value evaluating the iteration progress.
|
||||
* @param param The parameter object passed by the solver functions.
|
||||
* @param k The iteration count.
|
||||
*
|
||||
* @retval int Zero to continue the optimization process. Returning a
|
||||
* non-zero value will terminate the optimization process.
|
||||
*/
|
||||
typedef int (*clcg_progress_eigen_ptr)(void* instance, const Eigen::VectorXcd *m, const lcg_float converge,
|
||||
const clcg_para *param, const int k);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the solver function by the client.
|
||||
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is CLCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int clcg_solver_eigen(clcg_axfunc_eigen_ptr Afp, clcg_progress_eigen_ptr Pfp, Eigen::VectorXcd &m,
|
||||
const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id = CLCG_CGS);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the solver function by the client.
|
||||
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. the value must CLCG_PBICG (default) or CLCG_PCG.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int clcg_solver_preconditioned_eigen(clcg_axfunc_eigen_ptr Afp, clcg_axfunc_eigen_ptr Mfp, clcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXcd &m, const Eigen::VectorXcd &B, const clcg_para* param, void* instance, clcg_solver_enum solver_id = CLCG_PBICG);
|
||||
|
||||
#endif // _CLCG_EIGEN_H
|
1419
src/lib/lcg.cpp
Normal file
1419
src/lib/lcg.cpp
Normal file
File diff suppressed because it is too large
Load Diff
171
src/lib/lcg.h
Normal file
171
src/lib/lcg.h
Normal file
@ -0,0 +1,171 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _LCG_H
|
||||
#define _LCG_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||
* by a vertical vector 'x'.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||
* @param x Multiplier of the Ax product.
|
||||
* @param Ax Product of A multiplied by x.
|
||||
* @param n_size Size of x and column/row numbers of A.
|
||||
*/
|
||||
typedef void (*lcg_axfunc_ptr)(void* instance, const lcg_float* x, lcg_float* prod_Ax,
|
||||
const int n_size);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||
* @param m The current solutions.
|
||||
* @param converge The current value evaluating the iteration progress.
|
||||
* @param n_size The size of the variables
|
||||
* @param k The iteration count.
|
||||
*
|
||||
* @retval int Zero to continue the optimization process. Returning a
|
||||
* non-zero value will terminate the optimization process.
|
||||
*/
|
||||
typedef int (*lcg_progress_ptr)(void* instance, const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para* param, const int n_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CGS);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_PCG.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_preconditioned(lcg_axfunc_ptr Afp, lcg_axfunc_ptr Mfp, lcg_progress_ptr Pfp, lcg_float* m,
|
||||
const lcg_float* B, const int n_size, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PCG);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function with inequality constraints.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] low The lower boundary of the acceptable solution.
|
||||
* @param[in] hig The higher boundary of the acceptable solution.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
* @param P Precondition vector (optional expect for the LCG_PCG method). The default value is NULL.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_constrained(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const lcg_float* low, const lcg_float *hig, const int n_size, const lcg_para* param,
|
||||
void* instance, lcg_solver_enum solver_id = LCG_PG);
|
||||
|
||||
/**
|
||||
* @brief Standalone function of the Linear Conjugate Gradient algorithm
|
||||
*
|
||||
* @note To use the lcg() function for massive inversions, it is better to provide
|
||||
* external vectors Gk, Dk and ADk to avoid allocating and destroying temporary vectors.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector of the size n_size
|
||||
* @param[in] B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param[in] param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param Gk Conjugate gradient vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param Dk Directional gradient vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param ADk Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||
const lcg_para* param, void* instance, lcg_float* Gk = nullptr, lcg_float* Dk = nullptr,
|
||||
lcg_float* ADk = nullptr);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Standalone function of the Conjugate Gradient Squared algorithm.
|
||||
*
|
||||
* @note Algorithm 2 in "Generalized conjugate gradient method" by Fokkema et al. (1996).
|
||||
*
|
||||
* @note To use the lcgs() function for massive inversions, it is better to provide
|
||||
* external vectors RK, R0T, PK, AX, UK, QK, and WK to avoid allocating and destroying temporary vectors.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'nullptr' for global functions.
|
||||
* @param RK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param R0T Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param PK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param AX Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param UK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param QK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
* @param WK Intermediate vector of the size n_size. If this pointer is null, the function will create an internal vector instead.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcgs(lcg_axfunc_ptr Afp, lcg_progress_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||
const lcg_para* param, void* instance, lcg_float* RK = nullptr, lcg_float* R0T = nullptr,
|
||||
lcg_float* PK = nullptr, lcg_float* AX = nullptr, lcg_float* UK = nullptr, lcg_float* QK = nullptr,
|
||||
lcg_float* WK = nullptr);
|
||||
|
||||
#endif // _LCG_H
|
496
src/lib/lcg_complex.cpp
Normal file
496
src/lib/lcg_complex.cpp
Normal file
@ -0,0 +1,496 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "cmath"
|
||||
#include "ctime"
|
||||
#include "random"
|
||||
|
||||
#include "lcg_complex.h"
|
||||
|
||||
#ifdef LibLCG_OPENMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
lcg_complex* clcg_malloc(int n)
|
||||
{
|
||||
lcg_complex *x = new lcg_complex [n];
|
||||
return x;
|
||||
}
|
||||
|
||||
lcg_complex** clcg_malloc(int m, int n)
|
||||
{
|
||||
lcg_complex **x = new lcg_complex* [m];
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
x[i] = new lcg_complex [n];
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
void clcg_free(lcg_complex* x)
|
||||
{
|
||||
if (x != nullptr)
|
||||
{
|
||||
delete[] x; x = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_free(lcg_complex **x, int m)
|
||||
{
|
||||
if (x != nullptr)
|
||||
{
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
delete[] x[i];
|
||||
}
|
||||
delete[] x;
|
||||
x = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecset(lcg_complex *a, lcg_complex b, int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
a[i] = b;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecset(lcg_complex **a, lcg_complex b, int m, int n)
|
||||
{
|
||||
for (int i = 0; i < m; ++i)
|
||||
{
|
||||
for (int j = 0; j < n; ++j)
|
||||
{
|
||||
a[i][j] = b;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef LibLCG_STD_COMPLEX
|
||||
|
||||
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i)
|
||||
{
|
||||
a->real(r);
|
||||
a->imag(i);
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float clcg_square(const lcg_complex *a)
|
||||
{
|
||||
return std::norm(*a);
|
||||
}
|
||||
|
||||
lcg_float clcg_module(const lcg_complex *a)
|
||||
{
|
||||
return sqrt(std::norm(*a));
|
||||
}
|
||||
|
||||
lcg_complex clcg_conjugate(const lcg_complex *a)
|
||||
{
|
||||
lcg_complex b = std::conj(*a);
|
||||
return b;
|
||||
}
|
||||
|
||||
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size)
|
||||
{
|
||||
srand(time(0));
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
a[i].real((h.real()-l.real())*rand()*1.0/RAND_MAX + l.real());
|
||||
a[i].imag((h.imag()-l.imag())*rand()*1.0/RAND_MAX + l.imag());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n)
|
||||
{
|
||||
srand(time(0));
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
a[i][j].real((h.real()-l.real())*rand()*1.0/RAND_MAX + l.real());
|
||||
a[i][j].imag((h.imag()-l.imag())*rand()*1.0/RAND_MAX + l.imag());
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||
{
|
||||
lcg_float re = 0.0, im = 0.0;
|
||||
// <a,b> = \sum{a_i \cdot b_i}
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
re += (a[i].real()*b[i].real() - a[i].imag()*b[i].imag());
|
||||
im += (a[i].real()*b[i].imag() + a[i].imag()*b[i].real());
|
||||
}
|
||||
ret.real(re); ret.imag(im);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||
{
|
||||
lcg_float re = 0.0, im = 0.0;
|
||||
// <a,b> = \sum{\bar{a_i} \cdot b_i}
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
re += (a[i].real()*b[i].real() + a[i].imag()*b[i].imag());
|
||||
im += (a[i].real()*b[i].imag() - a[i].imag()*b[i].real());
|
||||
}
|
||||
ret.real(re); ret.imag(im);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax,
|
||||
int m_size, int n_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
size_t i, j;
|
||||
lcg_float re, im;
|
||||
if (conjugate == Conjugate)
|
||||
{
|
||||
if (layout == MatNormal)
|
||||
{
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re += (A[i][j].real()*x[j].real() + A[i][j].imag()*x[j].imag());
|
||||
im += (A[i][j].real()*x[j].imag() - A[i][j].imag()*x[j].real());
|
||||
}
|
||||
Ax[i].real(re); Ax[i].imag(im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re += (A[i][j].real()*x[i].real() + A[i][j].imag()*x[i].imag());
|
||||
im += (A[i][j].real()*x[i].imag() - A[i][j].imag()*x[i].real());
|
||||
}
|
||||
Ax[j].real(re); Ax[j].imag(im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (layout == MatNormal)
|
||||
{
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re += (A[i][j].real()*x[j].real() - A[i][j].imag()*x[j].imag());
|
||||
im += (A[i][j].real()*x[j].imag() + A[i][j].imag()*x[j].real());
|
||||
}
|
||||
Ax[i].real(re); Ax[i].imag(im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re += (A[i][j].real()*x[i].real() - A[i][j].imag()*x[i].imag());
|
||||
im += (A[i][j].real()*x[i].imag() + A[i][j].imag()*x[i].real());
|
||||
}
|
||||
Ax[j].real(re); Ax[j].imag(im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
lcg_complex::lcg_complex()
|
||||
{
|
||||
rel = img = 0.0;
|
||||
}
|
||||
|
||||
lcg_complex::lcg_complex(lcg_float r, lcg_float i)
|
||||
{
|
||||
rel = r; img = i;
|
||||
}
|
||||
|
||||
lcg_complex::~lcg_complex(){}
|
||||
|
||||
void lcg_complex::real(lcg_float a)
|
||||
{
|
||||
rel = a;
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_complex::imag(lcg_float a)
|
||||
{
|
||||
img = a;
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float lcg_complex::real()
|
||||
{
|
||||
return rel;
|
||||
}
|
||||
|
||||
lcg_float lcg_complex::imag()
|
||||
{
|
||||
return img;
|
||||
}
|
||||
|
||||
bool operator==(const lcg_complex &a, const lcg_complex &b)
|
||||
{
|
||||
if (a.rel == b.rel && a.img == b.img)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator!=(const lcg_complex &a, const lcg_complex &b)
|
||||
{
|
||||
if (a.rel != b.rel || a.img != b.img)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
lcg_complex operator+(const lcg_complex &a, const lcg_complex &b)
|
||||
{
|
||||
lcg_complex ret;
|
||||
ret.rel = a.rel + b.rel;
|
||||
ret.img = a.img + b.img;
|
||||
return ret;
|
||||
}
|
||||
|
||||
lcg_complex operator-(const lcg_complex &a, const lcg_complex &b)
|
||||
{
|
||||
lcg_complex ret;
|
||||
ret.rel = a.rel - b.rel;
|
||||
ret.img = a.img - b.img;
|
||||
return ret;
|
||||
}
|
||||
|
||||
lcg_complex operator*(const lcg_complex &a, const lcg_complex &b)
|
||||
{
|
||||
lcg_complex ret;
|
||||
ret.rel = a.rel*b.rel - a.img*b.img;
|
||||
ret.img = a.rel*b.img + a.img*b.rel;
|
||||
return ret;
|
||||
}
|
||||
|
||||
lcg_complex operator*(const lcg_float &a, const lcg_complex &b)
|
||||
{
|
||||
lcg_complex ret;
|
||||
ret.rel = a*b.rel;
|
||||
ret.img = a*b.img;
|
||||
return ret;
|
||||
}
|
||||
|
||||
lcg_complex operator/(const lcg_complex &a, const lcg_complex &b)
|
||||
{
|
||||
lcg_complex ret;
|
||||
if (b.rel == 0 && b.img == 0)
|
||||
{
|
||||
ret.rel = ret.img = NAN;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret.rel = (a.rel*b.rel + a.img*b.img)/(b.rel*b.rel + b.img*b.img);
|
||||
ret.img = (a.img*b.rel - a.rel*b.img)/(b.rel*b.rel + b.img*b.img);
|
||||
return ret;
|
||||
}
|
||||
|
||||
lcg_complex operator/(const lcg_float &a, const lcg_complex &b)
|
||||
{
|
||||
lcg_complex ret;
|
||||
if (b.rel == 0 && b.img == 0)
|
||||
{
|
||||
ret.rel = ret.img = NAN;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret.rel = a*b.rel/(b.rel*b.rel + b.img*b.img);
|
||||
ret.img = -1.0*a*b.img/(b.rel*b.rel + b.img*b.img);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const lcg_complex &a)
|
||||
{
|
||||
if (a.img >= 0)
|
||||
os << a.rel << "+" << a.img << "i";
|
||||
else
|
||||
os << a.rel << a.img << "i";
|
||||
return os;
|
||||
}
|
||||
|
||||
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i)
|
||||
{
|
||||
a->rel = r;
|
||||
a->img = i;
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float clcg_square(const lcg_complex *a)
|
||||
{
|
||||
return a->rel * a->rel + a->img * a->img;
|
||||
}
|
||||
|
||||
lcg_float clcg_module(const lcg_complex *a)
|
||||
{
|
||||
return sqrt(clcg_square(a));
|
||||
}
|
||||
|
||||
lcg_complex clcg_conjugate(const lcg_complex *a)
|
||||
{
|
||||
lcg_complex b;
|
||||
b.rel = a->rel;
|
||||
b.img = -1.0 * a->img;
|
||||
return b;
|
||||
}
|
||||
|
||||
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size)
|
||||
{
|
||||
srand(time(nullptr));
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
a[i].rel = (h.rel-l.rel)*rand()*1.0/RAND_MAX + l.rel;
|
||||
a[i].img = (h.img-l.img)*rand()*1.0/RAND_MAX + l.img;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n)
|
||||
{
|
||||
srand(time(nullptr));
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
a[i][j].rel = (h.rel-l.rel)*rand()*1.0/RAND_MAX + l.rel;
|
||||
a[i][j].img = (h.img-l.img)*rand()*1.0/RAND_MAX + l.img;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||
{
|
||||
clcg_set(&ret, 0.0, 0.0);
|
||||
// <a,b> = \sum{a_i \cdot b_i}
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
ret.rel += (a[i].rel*b[i].rel - a[i].img*b[i].img);
|
||||
ret.img += (a[i].rel*b[i].img + a[i].img*b[i].rel);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size)
|
||||
{
|
||||
clcg_set(&ret, 0.0, 0.0);
|
||||
// <a,b> = \sum{\bar{a_i} \cdot b_i}
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
ret.rel += (a[i].rel*b[i].rel + a[i].img*b[i].img);
|
||||
ret.img += (a[i].rel*b[i].img - a[i].img*b[i].rel);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax,
|
||||
int m_size, int n_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
int i, j;
|
||||
lcg_float re, im;
|
||||
if (conjugate == Conjugate)
|
||||
{
|
||||
if (layout == MatNormal)
|
||||
{
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re += (A[i][j].rel*x[j].rel + A[i][j].img*x[j].img);
|
||||
im += (A[i][j].rel*x[j].img - A[i][j].img*x[j].rel);
|
||||
}
|
||||
clcg_set(&Ax[i], re, im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re += (A[i][j].rel*x[i].rel + A[i][j].img*x[i].img);
|
||||
im += (A[i][j].rel*x[i].img - A[i][j].img*x[i].rel);
|
||||
}
|
||||
clcg_set(&Ax[j], re, im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (layout == MatNormal)
|
||||
{
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re += (A[i][j].rel*x[j].rel - A[i][j].img*x[j].img);
|
||||
im += (A[i][j].rel*x[j].img + A[i][j].img*x[j].rel);
|
||||
}
|
||||
clcg_set(&Ax[i], re, im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#pragma omp parallel for private (i, j, re, im) schedule(guided)
|
||||
for (j = 0; j < n_size; j++)
|
||||
{
|
||||
re = 0.0; im = 0.0;
|
||||
for (i = 0; i < m_size; i++)
|
||||
{
|
||||
re+= (A[i][j].rel*x[i].rel - A[i][j].img*x[i].img);
|
||||
im += (A[i][j].rel*x[i].img + A[i][j].img*x[i].rel);
|
||||
}
|
||||
clcg_set(&Ax[j], re, im);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // LibLCG_SYSTEM_COMPLEX
|
329
src/lib/lcg_complex.h
Normal file
329
src/lib/lcg_complex.h
Normal file
@ -0,0 +1,329 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _LCG_COMPLEX_H
|
||||
#define _LCG_COMPLEX_H
|
||||
|
||||
#include "iostream"
|
||||
|
||||
#include "algebra.h"
|
||||
#ifdef LibLCG_STD_COMPLEX
|
||||
|
||||
#include "complex"
|
||||
|
||||
typedef std::complex<lcg_float> lcg_complex;
|
||||
|
||||
#else
|
||||
|
||||
/**
|
||||
* @brief A simple definition of the complex number type.
|
||||
* Easy to change in the future. Right now it is just two double variables
|
||||
*/
|
||||
struct lcg_complex
|
||||
{
|
||||
lcg_float rel; ///< The real part
|
||||
lcg_float img; ///< The imaginary part
|
||||
|
||||
/**
|
||||
* @brief Constructs a new instance.
|
||||
*/
|
||||
lcg_complex();
|
||||
/**
|
||||
* @brief Constructs a new instance.
|
||||
*
|
||||
* @param[in] r The real part of the complex number
|
||||
* @param[in] i The imaginary part of the complex number
|
||||
*/
|
||||
lcg_complex(lcg_float r, lcg_float i);
|
||||
/**
|
||||
* @brief Destructor
|
||||
*/
|
||||
virtual ~lcg_complex();
|
||||
|
||||
/**
|
||||
* @brief Set real part of a complex number
|
||||
*
|
||||
* @param a Input value
|
||||
*/
|
||||
void real(lcg_float a);
|
||||
|
||||
/**
|
||||
* @brief Set image part of a complex number
|
||||
*
|
||||
* @param a Input value
|
||||
*/
|
||||
void imag(lcg_float a);
|
||||
|
||||
/**
|
||||
* @brief Get real part of a complex number
|
||||
*
|
||||
* @return lcg_float Real component
|
||||
*/
|
||||
lcg_float real();
|
||||
|
||||
/**
|
||||
* @brief Get image part of a complex number
|
||||
*
|
||||
* @return lcg_float Image component
|
||||
*/
|
||||
lcg_float imag();
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Reload equality operator.
|
||||
*
|
||||
* @param[in] a complex number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return equal or not
|
||||
*/
|
||||
bool operator==(const lcg_complex &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload inequality operator.
|
||||
*
|
||||
* @param[in] a complex number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return unequal or not
|
||||
*/
|
||||
bool operator!=(const lcg_complex &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload addition operator.
|
||||
*
|
||||
* @param[in] a complex number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return sum
|
||||
*/
|
||||
lcg_complex operator+(const lcg_complex &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload subtraction operator.
|
||||
*
|
||||
* @param[in] a complex number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return subtraction
|
||||
*/
|
||||
lcg_complex operator-(const lcg_complex &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload multiplication operator.
|
||||
*
|
||||
* @param[in] a complex number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return product
|
||||
*/
|
||||
lcg_complex operator*(const lcg_complex &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload multiplication operator.
|
||||
*
|
||||
* @param[in] a real number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return product
|
||||
*/
|
||||
lcg_complex operator*(const lcg_float &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload division operator.
|
||||
*
|
||||
* @param[in] a complex number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return quotient
|
||||
*/
|
||||
lcg_complex operator/(const lcg_complex &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload division operator.
|
||||
*
|
||||
* @param[in] a real number a
|
||||
* @param[in] b complex number b
|
||||
*
|
||||
* @return quotient
|
||||
*/
|
||||
lcg_complex operator/(const lcg_float &a, const lcg_complex &b);
|
||||
|
||||
/**
|
||||
* @brief Reload ostream operator.
|
||||
*
|
||||
* @param os The ostream
|
||||
* @param[in] a complex number a
|
||||
*
|
||||
* @return The ostream
|
||||
*/
|
||||
std::ostream &operator<<(std::ostream &os, const lcg_complex &a);
|
||||
|
||||
#endif // LibLCG_STD_COMPLEX
|
||||
|
||||
/**
|
||||
* @brief Locate memory for a lcg_complex pointer type.
|
||||
*
|
||||
* @param[in] n Size of the lcg_float array.
|
||||
*
|
||||
* @return Pointer of the array's location.
|
||||
*/
|
||||
lcg_complex* clcg_malloc(int n);
|
||||
|
||||
/**
|
||||
* @brief Locate memory for a lcg_complex second pointer type.
|
||||
*
|
||||
* @param[in] n Size of the lcg_float array.
|
||||
*
|
||||
* @return Pointer of the array's location.
|
||||
*/
|
||||
lcg_complex** clcg_malloc(int m, int n);
|
||||
|
||||
/**
|
||||
* @brief Destroy memory used by the lcg_complex type array.
|
||||
*
|
||||
* @param x Pointer of the array.
|
||||
*/
|
||||
void clcg_free(lcg_complex* x);
|
||||
|
||||
/**
|
||||
* @brief Destroy memory used by the 2D lcg_complex type array.
|
||||
*
|
||||
* @param x Pointer of the array.
|
||||
*/
|
||||
void clcg_free(lcg_complex **x, int m);
|
||||
|
||||
/**
|
||||
* @brief set a complex vector's value
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param[in] b initial value
|
||||
* @param[in] size vector size
|
||||
*/
|
||||
void clcg_vecset(lcg_complex *a, lcg_complex b, int size);
|
||||
|
||||
/**
|
||||
* @brief set a 2d complex vector's value
|
||||
*
|
||||
* @param a pointer of the matrix
|
||||
* @param[in] b initial value
|
||||
* @param[in] m row size of the matrix
|
||||
* @param[in] n column size of the matrix
|
||||
*/
|
||||
void clcg_vecset(lcg_complex **a, lcg_complex b, int m, int n);
|
||||
|
||||
/**
|
||||
* @brief setup a complex number
|
||||
*
|
||||
* @param[in] r The real part of the complex number
|
||||
* @param[in] i The imaginary part of the complex number
|
||||
*/
|
||||
void clcg_set(lcg_complex *a, lcg_float r, lcg_float i);
|
||||
|
||||
/**
|
||||
* @brief Calculate the squared module of a complex number
|
||||
*
|
||||
* @return The module
|
||||
*/
|
||||
lcg_float clcg_square(const lcg_complex *a);
|
||||
/**
|
||||
* @brief Calculate the module of a complex number
|
||||
*
|
||||
* @return The module
|
||||
*/
|
||||
lcg_float clcg_module(const lcg_complex *a);
|
||||
/**
|
||||
* @brief Calculate the conjugate of a complex number
|
||||
*
|
||||
* @return The complex conjugate.
|
||||
*/
|
||||
lcg_complex clcg_conjugate(const lcg_complex *a);
|
||||
|
||||
/**
|
||||
* @brief set a complex vector using random values
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param[in] l the lower bound of random values
|
||||
* @param[in] h the higher bound of random values
|
||||
* @param[in] size size of the vector
|
||||
*/
|
||||
void clcg_vecrnd(lcg_complex *a, lcg_complex l, lcg_complex h, int size);
|
||||
|
||||
/**
|
||||
* @brief set a 2D complex vector using random values
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param[in] l the lower bound of random values
|
||||
* @param[in] h the higher bound of random values
|
||||
* @param[in] m row size of the vector
|
||||
* @param[in] n column size of the vector
|
||||
*/
|
||||
void clcg_vecrnd(lcg_complex **a, lcg_complex l, lcg_complex h, int m, int n);
|
||||
|
||||
/**
|
||||
* @brief calculate dot product of two complex vectors
|
||||
*
|
||||
* the product of two complex vectors are defined as <a, b> = \sum{a_i \cdot b_i}
|
||||
*
|
||||
* @param[in] a complex vector a
|
||||
* @param[in] b complex vector b
|
||||
* @param[in] x_size size of the vector
|
||||
*
|
||||
* @return product
|
||||
*/
|
||||
void clcg_dot(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size);
|
||||
|
||||
/**
|
||||
* @brief calculate inner product of two complex vectors
|
||||
*
|
||||
* the product of two complex vectors are defined as <a, b> = \sum{\bar{a_i} \cdot b_i}
|
||||
*
|
||||
* @param[in] a complex vector a
|
||||
* @param[in] b complex vector b
|
||||
* @param[in] x_size size of the vector
|
||||
*
|
||||
* @return product
|
||||
*/
|
||||
void clcg_inner(lcg_complex &ret, const lcg_complex *a, const lcg_complex *b, int size);
|
||||
|
||||
/**
|
||||
* @brief calculate product of a complex matrix and a complex vector
|
||||
*
|
||||
* the product of two complex vectors are defined as <a, b> = \sum{\bar{a_i}\cdot\b_i}.
|
||||
* Different configurations:
|
||||
* layout=Normal,conjugate=false -> A
|
||||
* layout=Transpose,conjugate=false -> A^T
|
||||
* layout=Normal,conjugate=true -> \bar{A}
|
||||
* layout=Transpose,conjugate=true -> A^H
|
||||
*
|
||||
* @param A complex matrix A
|
||||
* @param[in] x complex vector x
|
||||
* @param Ax product of Ax
|
||||
* @param[in] m_size row size of A
|
||||
* @param[in] n_size column size of A
|
||||
* @param[in] layout layout of A used for multiplication. Must be Normal or Transpose
|
||||
* @param[in] conjugate whether to use the complex conjugate of A for calculation
|
||||
*/
|
||||
void clcg_matvec(lcg_complex **A, const lcg_complex *x, lcg_complex *Ax, int m_size, int n_size,
|
||||
lcg_matrix_e layout = MatNormal, clcg_complex_e conjugate = NonConjugate);
|
||||
|
||||
#endif // _LCG_COMPLEX_H
|
356
src/lib/lcg_complex_cuda.cu
Normal file
356
src/lib/lcg_complex_cuda.cu
Normal file
@ -0,0 +1,356 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "lcg_complex_cuda.h"
|
||||
#include "complex"
|
||||
#include "map"
|
||||
|
||||
__global__ void smCcsr_get_diagonal_device(const int *A_row, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag)
|
||||
{
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < A_len)
|
||||
{
|
||||
const int num_non0_row = A_row[i + 1] - A_row[i];
|
||||
|
||||
for (int j = 0; j < num_non0_row; j++)
|
||||
{
|
||||
if (A_col[j + A_row[i]] == i)
|
||||
{
|
||||
A_diag[i] = A_val[j + A_row[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void smZcsr_get_diagonal_device(const int *A_row, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag)
|
||||
{
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < A_len)
|
||||
{
|
||||
const int num_non0_row = A_row[i + 1] - A_row[i];
|
||||
|
||||
for (int j = 0; j < num_non0_row; j++)
|
||||
{
|
||||
if (A_col[j + A_row[i]] == i)
|
||||
{
|
||||
A_diag[i] = A_val[j + A_row[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void vecMvecC_element_wise_device(const cuComplex *a, const cuComplex *b, cuComplex *c, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
c[i] = cuCmulf(a[i], b[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void vecMvecZ_element_wise_device(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
c[i] = cuCmul(a[i], b[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void vecDvecC_element_wise_device(const cuComplex *a, const cuComplex *b, cuComplex *c, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
c[i] = cuCdivf(a[i], b[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void vecDvecZ_element_wise_device(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
c[i] = cuCdiv(a[i], b[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void vecC_conjugate_device(const cuComplex *a, cuComplex *ca, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
ca[i] = a[i];
|
||||
ca[i].y *= -1.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void vecZ_conjugate_device(const cuDoubleComplex *a, cuDoubleComplex *ca, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n)
|
||||
{
|
||||
ca[i] = a[i];
|
||||
ca[i].y *= -1.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_complex cuda2lcg_complex(cuDoubleComplex a)
|
||||
{
|
||||
return lcg_complex(a.x, a.y);
|
||||
}
|
||||
|
||||
#ifdef LibLCG_STD_COMPLEX
|
||||
|
||||
cuDoubleComplex lcg2cuda_complex(lcg_complex a)
|
||||
{
|
||||
cuDoubleComplex o;
|
||||
o.x = a.real(); o.y = a.imag();
|
||||
return o;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
cuDoubleComplex lcg2cuda_complex(lcg_complex a)
|
||||
{
|
||||
cuDoubleComplex o;
|
||||
o.x = a.rel(); o.y = a.img();
|
||||
return o;
|
||||
}
|
||||
|
||||
#endif // LibLCG_STD_COMPLEX
|
||||
|
||||
cuDoubleComplex* clcg_malloc_cuda(size_t n)
|
||||
{
|
||||
cuDoubleComplex *x = new cuDoubleComplex [n];
|
||||
return x;
|
||||
}
|
||||
|
||||
void clcg_free_cuda(cuDoubleComplex *x)
|
||||
{
|
||||
if (x != nullptr)
|
||||
{
|
||||
delete[] x; x = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecset_cuda(cuDoubleComplex *a, cuDoubleComplex b, size_t size)
|
||||
{
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
a[i].x = b.x; a[i].y = b.y;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
cuComplex clcg_Cscale(float s, cuComplex a)
|
||||
{
|
||||
cuComplex o;
|
||||
o.x = s*a.x;
|
||||
o.y = s*a.y;
|
||||
return o;
|
||||
}
|
||||
|
||||
cuComplex clcg_Csum(cuComplex a, cuComplex b)
|
||||
{
|
||||
cuComplex o;
|
||||
o.x = a.x + b.x;
|
||||
o.y = a.y + b.y;
|
||||
return o;
|
||||
}
|
||||
|
||||
cuComplex clcg_Cdiff(cuComplex a, cuComplex b)
|
||||
{
|
||||
cuComplex o;
|
||||
o.x = a.x - b.x;
|
||||
o.y = a.y - b.y;
|
||||
return o;
|
||||
}
|
||||
|
||||
cuComplex clcg_Csqrt(cuComplex a)
|
||||
{
|
||||
std::complex<float> c = std::sqrt(std::complex<float>(a.x, a.y));
|
||||
cuComplex s;
|
||||
s.x = c.real(); s.y = c.imag();
|
||||
return s;
|
||||
}
|
||||
|
||||
cuDoubleComplex clcg_Zscale(lcg_float s, cuDoubleComplex a)
|
||||
{
|
||||
cuDoubleComplex o;
|
||||
o.x = s*a.x;
|
||||
o.y = s*a.y;
|
||||
return o;
|
||||
}
|
||||
|
||||
cuDoubleComplex clcg_Zsum(cuDoubleComplex a, cuDoubleComplex b)
|
||||
{
|
||||
cuDoubleComplex o;
|
||||
o.x = a.x + b.x;
|
||||
o.y = a.y + b.y;
|
||||
return o;
|
||||
}
|
||||
|
||||
cuDoubleComplex clcg_Zdiff(cuDoubleComplex a, cuDoubleComplex b)
|
||||
{
|
||||
cuDoubleComplex o;
|
||||
o.x = a.x - b.x;
|
||||
o.y = a.y - b.y;
|
||||
return o;
|
||||
}
|
||||
|
||||
cuDoubleComplex clcg_Zsqrt(cuDoubleComplex a)
|
||||
{
|
||||
std::complex<lcg_float> c = std::sqrt(std::complex<lcg_float>(a.x, a.y));
|
||||
cuDoubleComplex s;
|
||||
s.x = c.real(); s.y = c.imag();
|
||||
return s;
|
||||
}
|
||||
|
||||
void clcg_smCcoo_row2col(const int *A_row, const int *A_col, const cuComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuComplex *Ac_val)
|
||||
{
|
||||
size_t i, order;
|
||||
std::map<size_t, cuComplex> sort_map;
|
||||
std::map<size_t, cuComplex>::iterator st_iter;
|
||||
|
||||
for (i = 0; i < nz; i++)
|
||||
{
|
||||
order = N*A_col[i] + A_row[i];
|
||||
sort_map[order] = A[i];
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (st_iter = sort_map.begin(); st_iter != sort_map.end(); st_iter++)
|
||||
{
|
||||
order = st_iter->first;
|
||||
// exchange the row and column indice to rotate the matrix
|
||||
Ac_row[i] = order/N;
|
||||
Ac_col[i] = order%N;
|
||||
Ac_val[i] = st_iter->second;
|
||||
i++;
|
||||
}
|
||||
|
||||
sort_map.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_smZcoo_row2col(const int *A_row, const int *A_col, const cuDoubleComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuDoubleComplex *Ac_val)
|
||||
{
|
||||
size_t i, order;
|
||||
std::map<size_t, cuDoubleComplex> sort_map;
|
||||
std::map<size_t, cuDoubleComplex>::iterator st_iter;
|
||||
|
||||
for (i = 0; i < nz; i++)
|
||||
{
|
||||
order = N*A_col[i] + A_row[i];
|
||||
sort_map[order] = A[i];
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (st_iter = sort_map.begin(); st_iter != sort_map.end(); st_iter++)
|
||||
{
|
||||
order = st_iter->first;
|
||||
// exchange the row and column indice to rotate the matrix
|
||||
Ac_row[i] = order/N;
|
||||
Ac_col[i] = order%N;
|
||||
Ac_val[i] = st_iter->second;
|
||||
i++;
|
||||
}
|
||||
|
||||
sort_map.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_smCcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (A_len + blockSize - 1) / blockSize;
|
||||
smCcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_smZcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (A_len + blockSize - 1) / blockSize;
|
||||
smZcsr_get_diagonal_device<<<numBlocks, blockSize>>>(A_ptr, A_col, A_val, A_len, A_diag);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecMvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
vecMvecC_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecMvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
vecMvecZ_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecDvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
vecDvecC_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecDvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
vecDvecZ_element_wise_device<<<numBlocks, blockSize>>>(a, b, c, n);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecC_conjugate(const cuComplex *a, cuComplex *ca, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
vecC_conjugate_device<<<numBlocks, blockSize>>>(a, ca, n);
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_vecZ_conjugate(const cuDoubleComplex *a, cuDoubleComplex *ca, int n, int bk_size)
|
||||
{
|
||||
int blockSize = bk_size;
|
||||
int numBlocks = (n + blockSize - 1) / blockSize;
|
||||
vecZ_conjugate_device<<<numBlocks, blockSize>>>(a, ca, n);
|
||||
return;
|
||||
}
|
278
src/lib/lcg_complex_cuda.h
Normal file
278
src/lib/lcg_complex_cuda.h
Normal file
@ -0,0 +1,278 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _LCG_COMPLEX_CUDA_H
|
||||
#define _LCG_COMPLEX_CUDA_H
|
||||
|
||||
#include "lcg_complex.h"
|
||||
|
||||
#ifdef LibLCG_CUDA
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuComplex.h>
|
||||
|
||||
/**
|
||||
* @brief Convert cuda complex number to lcg complex number
|
||||
*
|
||||
* @param a CUDA complex number
|
||||
* @return lcg_complex lcg complex number
|
||||
*/
|
||||
lcg_complex cuda2lcg_complex(cuDoubleComplex a);
|
||||
|
||||
/**
|
||||
* @brief Convert lcg complex number to CUDA complex number
|
||||
*
|
||||
* @param a lcg complex number
|
||||
* @return cuDoubleComplex CUDA complex number
|
||||
*/
|
||||
cuDoubleComplex lcg2cuda_complex(lcg_complex a);
|
||||
|
||||
/**
|
||||
* @brief Locate memory for a cuDoubleComplex pointer type.
|
||||
*
|
||||
* @param[in] n Size of the lcg_float array.
|
||||
*
|
||||
* @return Pointer of the array's location.
|
||||
*/
|
||||
cuDoubleComplex* clcg_malloc_cuda(size_t n);
|
||||
|
||||
/**
|
||||
* @brief Destroy memory used by the cuDoubleComplex type array.
|
||||
*
|
||||
* @param x Pointer of the array.
|
||||
*/
|
||||
void clcg_free_cuda(cuDoubleComplex *x);
|
||||
|
||||
/**
|
||||
* @brief set a complex vector's value
|
||||
*
|
||||
* @param a pointer of the vector
|
||||
* @param[in] b initial value
|
||||
* @param[in] size vector size
|
||||
*/
|
||||
void clcg_vecset_cuda(cuDoubleComplex *a, cuDoubleComplex b, size_t size);
|
||||
|
||||
/**
|
||||
* @brief Host side function for scale a cuDoubleComplex object
|
||||
*
|
||||
* @param s scale factor
|
||||
* @param a Complex number
|
||||
* @return cuComplex scaled complex number
|
||||
*/
|
||||
cuComplex clcg_Cscale(lcg_float s, cuComplex a);
|
||||
|
||||
/**
|
||||
* @brief Calculate the sum of two cuda complex number. This is a host side function.
|
||||
*
|
||||
* @param a Complex number
|
||||
* @param b Complex number
|
||||
* @return cuComplex Sum of the input complex number
|
||||
*/
|
||||
cuComplex clcg_Csum(cuComplex a, cuComplex b);
|
||||
|
||||
/**
|
||||
* @brief Calculate the difference of two cuda complex number. This is a host side function.
|
||||
*
|
||||
* @param a Complex number
|
||||
* @param b Complex number
|
||||
* @return cuComplex Difference of the input complex number
|
||||
*/
|
||||
cuComplex clcg_Cdiff(cuComplex a, cuComplex b);
|
||||
|
||||
/**
|
||||
* @brief Calculate the sqrt() of a cuda complex number
|
||||
*
|
||||
* @param a Complex number
|
||||
* @return cuComplex root value
|
||||
*/
|
||||
cuComplex clcg_Csqrt(cuComplex a);
|
||||
|
||||
/**
|
||||
* @brief Host side function for scale a cuDoubleComplex object
|
||||
*
|
||||
* @param s scale factor
|
||||
* @param a Complex number
|
||||
* @return cuDoubleComplex scaled complex number
|
||||
*/
|
||||
cuDoubleComplex clcg_Zscale(lcg_float s, cuDoubleComplex a);
|
||||
|
||||
/**
|
||||
* @brief Calculate the sum of two cuda complex number. This is a host side function.
|
||||
*
|
||||
* @param a Complex number
|
||||
* @param b Complex number
|
||||
* @return cuDoubleComplex Sum of the input complex number
|
||||
*/
|
||||
cuDoubleComplex clcg_Zsum(cuDoubleComplex a, cuDoubleComplex b);
|
||||
|
||||
/**
|
||||
* @brief Calculate the difference of two cuda complex number. This is a host side function.
|
||||
*
|
||||
* @param a Complex number
|
||||
* @param b Complex number
|
||||
* @return cuDoubleComplex Difference of the input complex number
|
||||
*/
|
||||
cuDoubleComplex clcg_Zdiff(cuDoubleComplex a, cuDoubleComplex b);
|
||||
|
||||
/**
|
||||
* @brief Calculate the sqrt() of a cuda complex number
|
||||
*
|
||||
* @param a Complex number
|
||||
* @return cuDoubleComplex root value
|
||||
*/
|
||||
cuDoubleComplex clcg_Zsqrt(cuDoubleComplex a);
|
||||
|
||||
/**
|
||||
* @brief Convert the indexing sequence of a sparse matrix from the row-major to col-major format.
|
||||
*
|
||||
* @note The sparse matrix is stored in the COO foramt. This is a host side function.
|
||||
*
|
||||
* @param A_row Row index
|
||||
* @param A_col Column index
|
||||
* @param A Non-zero values of the matrix
|
||||
* @param N Row/column length of A
|
||||
* @param nz Number of the non-zero values in A
|
||||
* @param Ac_row Output row index
|
||||
* @param Ac_col Output column index
|
||||
* @param Ac_val Non-zero values of the output matrix
|
||||
*/
|
||||
void clcg_smCcoo_row2col(const int *A_row, const int *A_col, const cuComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuComplex *Ac_val);
|
||||
|
||||
/**
|
||||
* @brief Convert the indexing sequence of a sparse matrix from the row-major to col-major format.
|
||||
*
|
||||
* @note The sparse matrix is stored in the COO foramt. This is a host side function.
|
||||
*
|
||||
* @param A_row Row index
|
||||
* @param A_col Column index
|
||||
* @param A Non-zero values of the matrix
|
||||
* @param N Row/column length of A
|
||||
* @param nz Number of the non-zero values in A
|
||||
* @param Ac_row Output row index
|
||||
* @param Ac_col Output column index
|
||||
* @param Ac_val Non-zero values of the output matrix
|
||||
*/
|
||||
void clcg_smZcoo_row2col(const int *A_row, const int *A_col, const cuDoubleComplex *A, int N, int nz, int *Ac_row, int *Ac_col, cuDoubleComplex *Ac_val);
|
||||
|
||||
/**
|
||||
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] A_ptr Row index pointer
|
||||
* @param[in] A_col Column index
|
||||
* @param[in] A_val Non-zero values of the matrix
|
||||
* @param[in] A_len Dimension of the matrix
|
||||
* @param A_diag Output digonal elements
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_smCcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuComplex *A_val, const int A_len, cuComplex *A_diag, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Extract diagonal elements from a square CUDA sparse matrix that is formatted in the CSR format
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] A_ptr Row index pointer
|
||||
* @param[in] A_col Column index
|
||||
* @param[in] A_val Non-zero values of the matrix
|
||||
* @param[in] A_len Dimension of the matrix
|
||||
* @param A_diag Output digonal elements
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_smZcsr_get_diagonal(const int *A_ptr, const int *A_col, const cuDoubleComplex *A_val, const int A_len, cuDoubleComplex *A_diag, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Element-wise muplication between two CUDA arries.
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] a Pointer of the input array
|
||||
* @param[in] b Pointer of the input array
|
||||
* @param c Pointer of the output array
|
||||
* @param[in] n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_vecMvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Element-wise muplication between two CUDA arries.
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] a Pointer of the input array
|
||||
* @param[in] b Pointer of the input array
|
||||
* @param c Pointer of the output array
|
||||
* @param[in] n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_vecMvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Element-wise division between two CUDA arries.
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] a Pointer of the input array
|
||||
* @param[in] b Pointer of the input array
|
||||
* @param c Pointer of the output array
|
||||
* @param[in] n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_vecDvecC_element_wise(const cuComplex *a, const cuComplex *b, cuComplex *c, int n, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Element-wise division between two CUDA arries.
|
||||
*
|
||||
* @note This is a device side function. All memories must be allocated on the GPU device.
|
||||
*
|
||||
* @param[in] a Pointer of the input array
|
||||
* @param[in] b Pointer of the input array
|
||||
* @param c Pointer of the output array
|
||||
* @param[in] n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_vecDvecZ_element_wise(const cuDoubleComplex *a, const cuDoubleComplex *b, cuDoubleComplex *c, int n, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Return complex conjugates of an input CUDA complex array
|
||||
*
|
||||
* @param a Pointer of the input arra
|
||||
* @param ca Pointer of the output array
|
||||
* @param n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_vecC_conjugate(const cuComplex *a, cuComplex *ca, int n, int bk_size = 1024);
|
||||
|
||||
/**
|
||||
* @brief Return complex conjugates of an input CUDA complex array
|
||||
*
|
||||
* @param a Pointer of the input arra
|
||||
* @param ca Pointer of the output array
|
||||
* @param n Length of the arraies
|
||||
* @param[in] bk_size Default CUDA block size.
|
||||
*/
|
||||
void clcg_vecZ_conjugate(const cuDoubleComplex *a, cuDoubleComplex *ca, int n, int bk_size = 1024);
|
||||
|
||||
#endif // LibLCG_CUDA
|
||||
|
||||
#endif // _LCG_COMPLEX_CUDA_H
|
685
src/lib/lcg_cuda.cu
Normal file
685
src/lib/lcg_cuda.cu
Normal file
@ -0,0 +1,685 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "cmath"
|
||||
#include "ctime"
|
||||
#include "iostream"
|
||||
|
||||
#include "lcg_cuda.h"
|
||||
|
||||
|
||||
typedef int (*lcg_solver_cuda_ptr)(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int lcg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
|
||||
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int lcgs(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
|
||||
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
|
||||
int lcg_solver_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size, const int nz_size,
|
||||
const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
|
||||
{
|
||||
lcg_solver_cuda_ptr cg_solver_cuda;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_CG:
|
||||
cg_solver_cuda = lcg;
|
||||
break;
|
||||
case LCG_CGS:
|
||||
cg_solver_cuda = lcgs;
|
||||
break;
|
||||
default:
|
||||
cg_solver_cuda = lcg;
|
||||
break;
|
||||
}
|
||||
|
||||
return cg_solver_cuda(Afp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||
}
|
||||
|
||||
|
||||
int lpcg(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int lcg_solver_preconditioned_cuda(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp,
|
||||
lcg_float* m, const lcg_float* B, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
|
||||
{
|
||||
return lpcg(Afp, Mfp, Pfp, m, B, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||
}
|
||||
|
||||
|
||||
int lpg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param,
|
||||
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
int lcg_solver_constrained_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id)
|
||||
{
|
||||
return lpg(Afp, Pfp, m, B, low, hig, n_size, nz_size, param, instance, cub_handle, cus_handle);
|
||||
}
|
||||
|
||||
|
||||
int lcg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||
const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CG parameters
|
||||
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
// locate memory
|
||||
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||
lcg_float *gk = nullptr, *dk = nullptr, *Adk = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&gk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&dk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&Adk, n_size * sizeof(lcg_float));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_dk, dvec_Adk;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
|
||||
|
||||
lcg_float none = -1.0;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
|
||||
|
||||
// g0 = Ax - B
|
||||
cudaMemcpy(gk, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
|
||||
cudaMemset(dk, 0, n_size * sizeof(lcg_float)); // d0 = 0
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, dk, 1); // d0 = -g0
|
||||
|
||||
lcg_float gk_mod;
|
||||
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
|
||||
|
||||
lcg_float g0_mod = gk_mod;
|
||||
if (g0_mod < 1.0) g0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(gk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, sqrt(gk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (gk_mod/g0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, gk_mod/g0_mod, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float dTAd, ak, betak, gk1_mod, residual;
|
||||
while (1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(gk_mod)/n_size;
|
||||
else residual = gk_mod/g0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = LCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = LCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Adk, n_size, nz_size);
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, dk, 1, Adk, 1, &dTAd); // dTAd = dk^T * Adk
|
||||
ak = gk_mod/dTAd;
|
||||
|
||||
cublasDaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1); // m += ak*dk
|
||||
cublasDaxpy_v2(cub_handle, n_size, &ak, Adk, 1, gk, 1); // gk += ak*Adk
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk1_mod); // gk1_mod = ||gk||
|
||||
betak = gk1_mod/gk_mod;
|
||||
gk_mod = gk1_mod;
|
||||
|
||||
cublasDscal_v2(cub_handle, n_size, &betak, dk, 1); // dk *= betak
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, dk, 1); // dk -= gk
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(dk);
|
||||
cudaFree(gk);
|
||||
cudaFree(Adk);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_dk);
|
||||
cusparseDestroyDnVec(dvec_Adk);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int lcgs(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B, const int n_size,
|
||||
const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CG parameters
|
||||
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
// locate memory
|
||||
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||
lcg_float *rk = nullptr, *r0T = nullptr, *pk = nullptr, *qpk = nullptr;
|
||||
lcg_float *Ax = nullptr, *uk = nullptr, *qk = nullptr, *wk = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&rk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&r0T, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&pk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&qpk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&Ax, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&uk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&qk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&wk, n_size * sizeof(lcg_float));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_wk, dvec_pk, dvec_Ax;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_wk, n_size, wk, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_pk, n_size, pk, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_Ax, n_size, Ax, CUDA_R_64F);
|
||||
|
||||
lcg_float one = 1.0;
|
||||
lcg_float none = -1.0;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Ax, n_size, nz_size);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(rk, d_B, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, Ax, 1, rk, 1); // r0 -= Ax
|
||||
// p0 = u0 = r0T = r0
|
||||
cudaMemcpy(pk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cudaMemcpy(uk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cudaMemcpy(r0T, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
|
||||
lcg_float rkr0T;
|
||||
cublasDdot_v2(cub_handle, n_size, rk, 1, r0T, 1, &rkr0T);
|
||||
|
||||
lcg_float rk_mod;
|
||||
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod); // rk_mod = ||rk||
|
||||
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, sqrt(rk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/r0_mod, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float ak, nak, rkr0T1, AprT, betak, residual;
|
||||
while (1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(rk_mod)/n_size;
|
||||
else residual = rk_mod/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = LCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = LCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_pk, dvec_Ax, n_size, nz_size);
|
||||
|
||||
AprT = 0.0;
|
||||
cublasDdot_v2(cub_handle, n_size, r0T, 1, Ax, 1, &AprT);
|
||||
ak = rkr0T/AprT;
|
||||
nak = -1.0*ak;
|
||||
|
||||
cudaMemcpy(qk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cudaMemcpy(wk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &nak, Ax, 1, qk, 1);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &one, qk, 1, wk, 1);
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_wk, dvec_Ax, n_size, nz_size);
|
||||
|
||||
cublasDaxpy_v2(cub_handle, n_size, &ak, wk, 1, d_m, 1);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &nak, Ax, 1, rk, 1);
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod);
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, rk, 1, r0T, 1, &rkr0T1);
|
||||
betak = rkr0T1/rkr0T;
|
||||
rkr0T = rkr0T1;
|
||||
|
||||
cudaMemcpy(uk, rk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &betak, qk, 1, uk, 1);
|
||||
|
||||
cudaMemcpy(qpk, qk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &betak, pk, 1, qpk, 1);
|
||||
|
||||
cudaMemcpy(pk, uk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &betak, qpk, 1, pk, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(rk);
|
||||
cudaFree(r0T);
|
||||
cudaFree(pk);
|
||||
cudaFree(qpk);
|
||||
cudaFree(Ax);
|
||||
cudaFree(uk);
|
||||
cudaFree(qk);
|
||||
cudaFree(wk);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_wk);
|
||||
cusparseDestroyDnVec(dvec_pk);
|
||||
cusparseDestroyDnVec(dvec_Ax);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int lpcg(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CG parameters
|
||||
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||
|
||||
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
// locate memory
|
||||
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||
lcg_float *rk = nullptr, *zk = nullptr, *dk = nullptr, *Adk = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&rk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&zk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&dk, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&Adk, n_size * sizeof(lcg_float));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_rk, dvec_zk, dvec_dk, dvec_Adk;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_rk, n_size, rk, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_zk, n_size, zk, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_dk, n_size, dk, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
|
||||
|
||||
lcg_float one = 1.0;
|
||||
lcg_float none = -1.0;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
|
||||
|
||||
// r0 = B - Ax
|
||||
cudaMemcpy(rk, d_B, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // r0 = B
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, Adk, 1, rk, 1); // r0 -= Ax
|
||||
|
||||
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_zk, n_size, nz_size);
|
||||
|
||||
// d0 = z0
|
||||
cudaMemcpy(dk, zk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
|
||||
lcg_float rk_mod;
|
||||
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod); // rk_mod = ||rk||
|
||||
|
||||
lcg_float r0_mod = rk_mod;
|
||||
if (r0_mod < 1.0) r0_mod = 1.0;
|
||||
|
||||
lcg_float zTr;
|
||||
cublasDdot_v2(cub_handle, n_size, zk, 1, rk, 1, &zTr);
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(rk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, sqrt(rk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (rk_mod/r0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, rk_mod/r0_mod, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
lcg_float dTAd, ak, nak, betak, zTr1, residual;
|
||||
while (1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(rk_mod)/n_size;
|
||||
else residual = rk_mod/r0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = LCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = LCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
Afp(instance, cub_handle, cus_handle, dvec_dk, dvec_Adk, n_size, nz_size);
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, dk, 1, Adk, 1, &dTAd);
|
||||
ak = zTr/dTAd;
|
||||
nak = -1.0*ak;
|
||||
|
||||
cublasDaxpy_v2(cub_handle, n_size, &ak, dk, 1, d_m, 1);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &nak, Adk, 1, rk, 1);
|
||||
|
||||
Mfp(instance, cub_handle, cus_handle, dvec_rk, dvec_zk, n_size, nz_size);
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, rk, 1, rk, 1, &rk_mod);
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, zk, 1, rk, 1, &zTr1);
|
||||
betak = zTr1/zTr;
|
||||
zTr = zTr1;
|
||||
|
||||
cublasDscal_v2(cub_handle, n_size, &betak, dk, 1); // dk *= betak
|
||||
cublasDaxpy_v2(cub_handle, n_size, &one, zk, 1, dk, 1);
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(rk);
|
||||
cudaFree(zk);
|
||||
cudaFree(dk);
|
||||
cudaFree(Adk);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_rk);
|
||||
cusparseDestroyDnVec(dvec_zk);
|
||||
cusparseDestroyDnVec(dvec_dk);
|
||||
cusparseDestroyDnVec(dvec_Adk);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int lpg(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param,
|
||||
void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
// set CG parameters
|
||||
lcg_para para = (param != nullptr) ? (*param) : defparam;
|
||||
|
||||
// check parameters
|
||||
if (n_size <= 0) return LCG_INVILAD_VARIABLE_SIZE;
|
||||
if (para.max_iterations < 0) return LCG_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon <= 0.0 || para.epsilon >= 1.0) return LCG_INVILAD_EPSILON;
|
||||
if (para.step <= 0.0) return LCG_INVALID_LAMBDA;
|
||||
|
||||
if (m == nullptr) return LCG_INVALID_POINTER;
|
||||
if (B == nullptr) return LCG_INVALID_POINTER;
|
||||
if (low == nullptr) return LCG_INVALID_POINTER;
|
||||
if (hig == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cub_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
if (cus_handle == nullptr) return LCG_INVALID_POINTER;
|
||||
|
||||
// locate memory
|
||||
lcg_float *d_m = nullptr, *d_B = nullptr;
|
||||
lcg_float *gk = nullptr, *Adk = nullptr;
|
||||
lcg_float *m_new = nullptr, *gk_new = nullptr;
|
||||
lcg_float *sk = nullptr, *yk = nullptr;
|
||||
cudaMalloc(&d_m, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&d_B, n_size * sizeof(lcg_float));
|
||||
cudaMalloc(&gk, n_size *sizeof(lcg_float));
|
||||
cudaMalloc(&Adk, n_size *sizeof(lcg_float));
|
||||
cudaMalloc(&m_new, n_size *sizeof(lcg_float));
|
||||
cudaMalloc(&gk_new, n_size *sizeof(lcg_float));
|
||||
cudaMalloc(&sk, n_size *sizeof(lcg_float));
|
||||
cudaMalloc(&yk, n_size *sizeof(lcg_float));
|
||||
|
||||
// Copy initial solutions
|
||||
cudaMemcpy(d_m, m, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, B, n_size * sizeof(lcg_float), cudaMemcpyHostToDevice);
|
||||
|
||||
cusparseDnVecDescr_t dvec_m, dvec_mnew, dvec_Adk;
|
||||
cusparseCreateDnVec(&dvec_m, n_size, d_m, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_mnew, n_size, m_new, CUDA_R_64F);
|
||||
cusparseCreateDnVec(&dvec_Adk, n_size, Adk, CUDA_R_64F);
|
||||
|
||||
lcg_float none = -1.0;
|
||||
lcg_float nalpha_k, alpha_k = para.step;
|
||||
|
||||
lcg_set2box_cuda(low, hig, m, n_size);
|
||||
Afp(instance, cub_handle, cus_handle, dvec_m, dvec_Adk, n_size, nz_size);
|
||||
|
||||
// g0 = Ax - B
|
||||
cudaMemcpy(gk, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
|
||||
|
||||
lcg_float gk_mod;
|
||||
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
|
||||
|
||||
lcg_float g0_mod = gk_mod;
|
||||
if (g0_mod < 1.0) g0_mod = 1.0;
|
||||
|
||||
int ret, t = 0;
|
||||
if (para.abs_diff && sqrt(gk_mod)/n_size <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, sqrt(gk_mod)/n_size, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
else if (gk_mod/g0_mod <= para.epsilon)
|
||||
{
|
||||
ret = LCG_ALREADY_OPTIMIZIED;
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
Pfp(instance, d_m, gk_mod/g0_mod, ¶, n_size, nz_size, 0);
|
||||
}
|
||||
goto func_ends;
|
||||
}
|
||||
|
||||
|
||||
lcg_float sk_mod, syk_mod, residual;
|
||||
while(1)
|
||||
{
|
||||
if (para.abs_diff) residual = sqrt(gk_mod)/n_size;
|
||||
else residual = gk_mod/g0_mod;
|
||||
|
||||
if (Pfp != nullptr)
|
||||
{
|
||||
if (Pfp(instance, d_m, residual, ¶, n_size, nz_size, t))
|
||||
{
|
||||
ret = LCG_STOP; goto func_ends;
|
||||
}
|
||||
}
|
||||
|
||||
if (residual <= para.epsilon)
|
||||
{
|
||||
ret = LCG_CONVERGENCE; goto func_ends;
|
||||
}
|
||||
|
||||
if (para.max_iterations > 0 && t+1 > para.max_iterations)
|
||||
{
|
||||
ret = LCG_REACHED_MAX_ITERATIONS;
|
||||
break;
|
||||
}
|
||||
|
||||
t++;
|
||||
|
||||
nalpha_k = -1.0*alpha_k;
|
||||
cudaMemcpy(m_new, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &nalpha_k, gk, 1, m_new, 1);
|
||||
|
||||
lcg_set2box_cuda(low, hig, m_new, n_size);
|
||||
Afp(instance, cub_handle, cus_handle, dvec_mnew, dvec_Adk, n_size, nz_size);
|
||||
|
||||
cudaMemcpy(gk_new, Adk, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice); // g0 = A*x
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, d_B, 1, gk, 1); // g0 -= B
|
||||
|
||||
cudaMemcpy(sk, m_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, d_m, 1, sk, 1);
|
||||
|
||||
cudaMemcpy(yk, gk_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cublasDaxpy_v2(cub_handle, n_size, &none, gk, 1, sk, 1);
|
||||
|
||||
cublasDdot_v2(cub_handle, n_size, sk, 1, sk, 1, &sk_mod);
|
||||
cublasDdot_v2(cub_handle, n_size, sk, 1, yk, 1, &syk_mod);
|
||||
alpha_k = sk_mod/syk_mod;
|
||||
|
||||
cudaMemcpy(d_m, m_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
cudaMemcpy(gk, gk_new, n_size * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
|
||||
lcg_float gk_mod;
|
||||
cublasDdot_v2(cub_handle, n_size, gk, 1, gk, 1, &gk_mod); // gk_mod = ||gk||
|
||||
}
|
||||
|
||||
func_ends:
|
||||
{
|
||||
// Copy to host memories
|
||||
cudaMemcpy(m, d_m, n_size * sizeof(lcg_float), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_m);
|
||||
cudaFree(d_B);
|
||||
cudaFree(gk);
|
||||
cudaFree(gk_new);
|
||||
cudaFree(m_new);
|
||||
cudaFree(sk);
|
||||
cudaFree(yk);
|
||||
cudaFree(Adk);
|
||||
cusparseDestroyDnVec(dvec_m);
|
||||
cusparseDestroyDnVec(dvec_mnew);
|
||||
cusparseDestroyDnVec(dvec_Adk);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
135
src/lib/lcg_cuda.h
Normal file
135
src/lib/lcg_cuda.h
Normal file
@ -0,0 +1,135 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _LCG_CUDA_H
|
||||
#define _LCG_CUDA_H
|
||||
|
||||
#include "util.h"
|
||||
#include "algebra_cuda.h"
|
||||
|
||||
#ifdef LibLCG_CUDA
|
||||
|
||||
#include <cublas_v2.h>
|
||||
#include <cusparse_v2.h>
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||
* by a vertical vector 'x'. Note that both A and x are hosted on the GPU device.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver_cuda() functions by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* @param x Multiplier of the Ax product.
|
||||
* @param Ax Product of A multiplied by x.
|
||||
* @param n_size Size of x and column/row numbers of A.
|
||||
*/
|
||||
typedef void (*lcg_axfunc_cuda_ptr)(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary. Note that m is hosted on the GPU device.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||
* @param m The current solutions.
|
||||
* @param converge The current value evaluating the iteration progress.
|
||||
* @param n_size The size of the variables
|
||||
* @param k The iteration count.
|
||||
*
|
||||
* @retval int Zero to continue the optimization process. Returning a
|
||||
* non-zero value will terminate the optimization process.
|
||||
*/
|
||||
typedef int (*lcg_progress_cuda_ptr)(void* instance, const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para* param, const int n_size, const int nz_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const int n_size, const int nz_size, const lcg_para* param, void* instance, cublasHandle_t cub_handle,
|
||||
cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_CG);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param[in] nz_size Size of the non-zero element of a cusparse object.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_preconditioned_cuda(lcg_axfunc_cuda_ptr Afp, lcg_axfunc_cuda_ptr Mfp, lcg_progress_cuda_ptr Pfp,
|
||||
lcg_float* m, const lcg_float* B, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_PCG);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function. Note that both m and B are hosted on the GPU device.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Mfp Callback function for calculating the product of 'Mx' for preconditioning.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param low Lower bound of the acceptable solution.
|
||||
* @param hig Higher bound of the acceptable solution.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] n_size Size of the solution vector and objective vector.
|
||||
* @param[in] nz_size Size of the non-zero element of a cusparse object.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* @param cub_handle Handler of the cublas object.
|
||||
* @param cus_handle Handlee of the cusparse object.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_constrained_cuda(lcg_axfunc_cuda_ptr Afp, lcg_progress_cuda_ptr Pfp, lcg_float* m, const lcg_float* B,
|
||||
const lcg_float* low, const lcg_float* hig, const int n_size, const int nz_size, const lcg_para* param, void* instance,
|
||||
cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_solver_enum solver_id = LCG_PG);
|
||||
|
||||
#endif // LibLCG_CUDA
|
||||
|
||||
#endif // _LCG_CUDA_H
|
1128
src/lib/lcg_eigen.cpp
Normal file
1128
src/lib/lcg_eigen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
110
src/lib/lcg_eigen.h
Normal file
110
src/lib/lcg_eigen.h
Normal file
@ -0,0 +1,110 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _LCG_EIGEN_H
|
||||
#define _LCG_EIGEN_H
|
||||
|
||||
#include "util.h"
|
||||
#include "algebra_eigen.h"
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the product of a N*N matrix 'A' multiplied
|
||||
* by a vertical vector 'x'.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||
* @param x Multiplier of the Ax product.
|
||||
* @param Ax Product of A multiplied by x.
|
||||
*/
|
||||
typedef void (*lcg_axfunc_eigen_ptr)(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary.
|
||||
*
|
||||
* @param instance The user data sent for the lcg_solver() functions by the client.
|
||||
* @param m The current solutions.
|
||||
* @param converge The current value evaluating the iteration progress.
|
||||
* @param k The iteration count.
|
||||
*
|
||||
* @retval int Zero to continue the optimization process. Returning a
|
||||
* non-zero value will terminate the optimization process.
|
||||
*/
|
||||
typedef int (*lcg_progress_eigen_ptr)(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
|
||||
const lcg_para *param, const int k);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_eigen(lcg_axfunc_eigen_ptr Afp, lcg_progress_eigen_ptr Pfp, Eigen::VectorXd &m,
|
||||
const Eigen::VectorXd &B, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_CG);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Mfp Callback function for calculating the product of 'M^{-1}x', in which M is the preconditioning matrix.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_PCG.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_preconditioned_eigen(lcg_axfunc_eigen_ptr Afp, lcg_axfunc_eigen_ptr Mfp, lcg_progress_eigen_ptr Pfp,
|
||||
Eigen::VectorXd &m, const Eigen::VectorXd &B, const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PCG);
|
||||
|
||||
/**
|
||||
* @brief A combined conjugate gradient solver function with inequality constraints.
|
||||
*
|
||||
* @param[in] Afp Callback function for calculating the product of 'Ax'.
|
||||
* @param[in] Pfp Callback function for monitoring the iteration progress.
|
||||
* @param m Initial solution vector.
|
||||
* @param B Objective vector of the linear system.
|
||||
* @param[in] low The lower boundary of the acceptable solution.
|
||||
* @param[in] hig The higher boundary of the acceptable solution.
|
||||
* @param param Parameter setup for the conjugate gradient methods.
|
||||
* @param instance The user data sent for the lcg_solver() function by the client.
|
||||
* This variable is either 'this' for class member functions or 'NULL' for global functions.
|
||||
* @param solver_id Solver type used to solve the linear system. The default value is LCG_CGS.
|
||||
* @param P Precondition vector (optional expect for the LCG_PCG method). The default value is NULL.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int lcg_solver_constrained_eigen(lcg_axfunc_eigen_ptr Afp, lcg_progress_eigen_ptr Pfp, Eigen::VectorXd &m,
|
||||
const Eigen::VectorXd &B, const Eigen::VectorXd &low, const Eigen::VectorXd &hig,
|
||||
const lcg_para* param, void* instance, lcg_solver_enum solver_id = LCG_PG);
|
||||
|
||||
#endif //_LCG_EIGEN_H
|
381
src/lib/preconditioner.cpp
Normal file
381
src/lib/preconditioner.cpp
Normal file
@ -0,0 +1,381 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "preconditioner.h"
|
||||
|
||||
#include "cmath"
|
||||
#include "map"
|
||||
|
||||
void lcg_incomplete_Cholesky_half_buffsize_coo(const int *row, const int *col, int nz_size, int *lnz_size)
|
||||
{
|
||||
size_t c = 0;
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i])
|
||||
{
|
||||
c++;
|
||||
}
|
||||
}
|
||||
*lnz_size = c;
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_incomplete_Cholesky_half_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size,
|
||||
int lnz_size, int *IC_row, int *IC_col, lcg_float *IC_val)
|
||||
{
|
||||
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||
lcg_float *diagonal = new lcg_float [N];
|
||||
// A temporary row
|
||||
lcg_float *tmp_row = new lcg_float [N];
|
||||
// index of non-zero elements in tmp_row
|
||||
int *filled_idx = new int [N];
|
||||
// Begining index of each row in the input matrix
|
||||
int *row_st_idx = new int [N];
|
||||
|
||||
size_t i, j, f;
|
||||
|
||||
// Set initial values
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
diagonal[i] = 0.0;
|
||||
tmp_row[i] = 0.0;
|
||||
filled_idx[i] = -1;
|
||||
row_st_idx[i] = -1;
|
||||
}
|
||||
|
||||
// copy elements in the lower triangle to the output matrix
|
||||
j = 0;
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i])
|
||||
{
|
||||
IC_row[j] = row[i];
|
||||
IC_col[j] = col[i];
|
||||
IC_val[j] = val[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the begining index of each row in the matrix
|
||||
j = 1;
|
||||
row_st_idx[0] = IC_row[0];
|
||||
size_t old_row = IC_row[0];
|
||||
for (i = 1; i < lnz_size; i++)
|
||||
{
|
||||
if (IC_row[i] > old_row)
|
||||
{
|
||||
row_st_idx[j] = i;
|
||||
old_row = IC_row[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the first element
|
||||
IC_val[0] = sqrt(IC_val[0]);
|
||||
diagonal[0] = IC_val[0];
|
||||
|
||||
lcg_float dia_sum;
|
||||
dia_sum = 0.0;
|
||||
// The first one is already calculated
|
||||
for (i = 1; i < lnz_size; i++)
|
||||
{
|
||||
// Calculate the first column if there is one
|
||||
if (IC_col[i] == 0)
|
||||
{
|
||||
IC_val[i] = IC_val[i]/IC_val[0];
|
||||
dia_sum = dia_sum + IC_val[i]*IC_val[i];
|
||||
continue; // Case 1 break
|
||||
}
|
||||
|
||||
// Calculate elements in the middle of a row
|
||||
if (IC_row[i] > IC_col[i])
|
||||
{
|
||||
// Find needed values from previous elements
|
||||
f = 0;
|
||||
j = row_st_idx[IC_col[i]];
|
||||
while (IC_col[j] < IC_col[i])
|
||||
{
|
||||
tmp_row[IC_col[j]] = IC_val[j];
|
||||
filled_idx[f] = IC_col[j];
|
||||
f++;
|
||||
j++;
|
||||
}
|
||||
|
||||
j = row_st_idx[IC_row[i]];
|
||||
while (IC_col[j] < IC_col[i])
|
||||
{
|
||||
IC_val[i] = IC_val[i] - IC_val[j]*tmp_row[IC_col[j]];
|
||||
j++;
|
||||
}
|
||||
|
||||
IC_val[i] = IC_val[i]/diagonal[IC_col[i]];
|
||||
dia_sum = dia_sum + IC_val[i]*IC_val[i];
|
||||
|
||||
// reset tmp variables
|
||||
for (j = 0; j < f; j++)
|
||||
{
|
||||
tmp_row[filled_idx[j]] = 0.0;
|
||||
}
|
||||
|
||||
continue; // Case 2 break
|
||||
}
|
||||
|
||||
// We have rearched the diagonal position
|
||||
if (IC_row[i] == IC_col[i])
|
||||
{
|
||||
IC_val[i] = sqrt(IC_val[i] - dia_sum);
|
||||
diagonal[IC_col[i]] = IC_val[i];
|
||||
dia_sum = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] diagonal;
|
||||
delete[] tmp_row;
|
||||
delete[] row_st_idx;
|
||||
delete[] filled_idx;
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_incomplete_Cholesky_full_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int *IC_row, int *IC_col, lcg_float *IC_val)
|
||||
{
|
||||
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||
lcg_float *diagonal = new lcg_float [N];
|
||||
// A temporary row
|
||||
lcg_float *tmp_row = new lcg_float [N];
|
||||
// index of non-zero elements in tmp_row
|
||||
int *filled_idx = new int [N];
|
||||
// Begining index of each row in the input matrix
|
||||
int *row_st_idx = new int [N];
|
||||
|
||||
size_t i, j, f, l;
|
||||
|
||||
// Set initial values
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
diagonal[i] = 0.0;
|
||||
tmp_row[i] = 0.0;
|
||||
filled_idx[i] = -1;
|
||||
row_st_idx[i] = -1;
|
||||
}
|
||||
|
||||
// copy elements to the output matrix
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
IC_row[i] = row[i];
|
||||
IC_col[i] = col[i];
|
||||
IC_val[i] = val[i];
|
||||
}
|
||||
|
||||
// count element number in the lower triangular part (including the diagonal) and the upper triangular part (excluding the diagonal)
|
||||
// build map from elements' cooridnate to their index in the array
|
||||
size_t order, L_nz = 0;
|
||||
std::map<size_t, size_t> index_map;
|
||||
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i]) // Count number for thr lower triangular part
|
||||
{
|
||||
L_nz++;
|
||||
}
|
||||
else // Only need to build the map for the upper triangular part
|
||||
{
|
||||
order = N*row[i] + col[i];
|
||||
index_map[order] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// We use to store element index in the lower triangle
|
||||
j = 0;
|
||||
size_t *low_idx = new size_t [L_nz];
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i])
|
||||
{
|
||||
low_idx[j] = i;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the begining index of each row in the matrix
|
||||
j = 1;
|
||||
row_st_idx[0] = IC_row[0];
|
||||
size_t old_row = IC_row[0];
|
||||
for (i = 1; i < nz_size; i++)
|
||||
{
|
||||
if (IC_row[i] > old_row)
|
||||
{
|
||||
row_st_idx[j] = i;
|
||||
old_row = IC_row[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the first element
|
||||
IC_val[0] = sqrt(IC_val[0]);
|
||||
diagonal[0] = IC_val[0];
|
||||
|
||||
lcg_float dia_sum;
|
||||
dia_sum = 0.0;
|
||||
// The first one is already calculated
|
||||
for (i = 1; i < L_nz; i++)
|
||||
{
|
||||
l = low_idx[i];
|
||||
|
||||
// Calculate the first column if there is one
|
||||
if (IC_col[l] == 0)
|
||||
{
|
||||
IC_val[l] = IC_val[l]/IC_val[0];
|
||||
dia_sum = dia_sum + IC_val[l]*IC_val[l];
|
||||
// Set value at the upper triangle
|
||||
order = IC_row[l];
|
||||
IC_val[index_map[order]] = IC_val[l];
|
||||
continue; // Case 1 break
|
||||
}
|
||||
|
||||
// Calculate elements in the middle of a row
|
||||
if (IC_row[l] > IC_col[l])
|
||||
{
|
||||
// Find needed values from previous elements
|
||||
f = 0;
|
||||
j = row_st_idx[IC_col[l]];
|
||||
while (IC_col[j] < IC_col[l])
|
||||
{
|
||||
tmp_row[IC_col[j]] = IC_val[j];
|
||||
filled_idx[f] = IC_col[j];
|
||||
f++;
|
||||
j++;
|
||||
}
|
||||
|
||||
j = row_st_idx[IC_row[l]];
|
||||
while (IC_col[j] < IC_col[l])
|
||||
{
|
||||
IC_val[l] = IC_val[l] - IC_val[j]*tmp_row[IC_col[j]];
|
||||
j++;
|
||||
}
|
||||
|
||||
IC_val[l] = IC_val[l]/diagonal[IC_col[l]];
|
||||
dia_sum = dia_sum + IC_val[l]*IC_val[l];
|
||||
|
||||
// Set value at the upper triangle
|
||||
order = N*IC_col[l] + IC_row[l];
|
||||
IC_val[index_map[order]] = IC_val[l];
|
||||
|
||||
// reset tmp variables
|
||||
for (j = 0; j < f; j++)
|
||||
{
|
||||
tmp_row[filled_idx[j]] = 0.0;
|
||||
}
|
||||
|
||||
continue; // Case 2 break
|
||||
}
|
||||
|
||||
// We have rearched the diagonal position
|
||||
if (IC_row[l] == IC_col[l])
|
||||
{
|
||||
IC_val[l] = sqrt(IC_val[l] - dia_sum);
|
||||
diagonal[IC_col[l]] = IC_val[l];
|
||||
dia_sum = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] diagonal;
|
||||
delete[] tmp_row;
|
||||
delete[] row_st_idx;
|
||||
delete[] filled_idx;
|
||||
delete[] low_idx;
|
||||
index_map.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_solve_upper_triangle_coo(const int *row, const int *col, const lcg_float *U, const lcg_float *B, lcg_float *x, int N, int nz_size)
|
||||
{
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
x[i] = 0.0;
|
||||
}
|
||||
|
||||
size_t iter = nz_size - 1;
|
||||
double sum;
|
||||
for (size_t i = N-1; i >= 0; i--)
|
||||
{
|
||||
sum = 0.0;
|
||||
for (size_t j = iter; j >= 0; j--)
|
||||
{
|
||||
if (row[j] == i && col[j] > i)
|
||||
{
|
||||
sum += U[j] * x[col[j]];
|
||||
}
|
||||
else if (row[j] == i && col[j] == i)
|
||||
{
|
||||
x[i] = (B[i] - sum)/U[j];
|
||||
if (j == 0) return;
|
||||
else iter = j-1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void lcg_solve_lower_triangle_coo(const int *row, const int *col, const lcg_float *L, const lcg_float *B, lcg_float *x, int N, int nz_size)
|
||||
{
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
x[i] = 0.0;
|
||||
}
|
||||
|
||||
size_t iter = 0;
|
||||
double sum;
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
sum = 0.0;
|
||||
for (size_t j = iter; j < nz_size; j++)
|
||||
{
|
||||
if (row[j] == i && col[j] < i)
|
||||
{
|
||||
sum += L[j] * x[col[j]];
|
||||
}
|
||||
else if (row[j] == i && col[j] == i)
|
||||
{
|
||||
x[i] = (B[i] - sum)/L[j];
|
||||
iter = j+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
bool lcg_full_rank_coo(const int *row, const int *col, const lcg_float *M, int N, int nz_size)
|
||||
{
|
||||
size_t s = 0;
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] == col[i] && M[i] != 0.0)
|
||||
{
|
||||
s++;
|
||||
}
|
||||
}
|
||||
|
||||
if (s == N) return true;
|
||||
else return false;
|
||||
}
|
110
src/lib/preconditioner.h
Normal file
110
src/lib/preconditioner.h
Normal file
@ -0,0 +1,110 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _PRECONDITIONER_H
|
||||
#define _PRECONDITIONER_H
|
||||
|
||||
#include "algebra.h"
|
||||
|
||||
/**
|
||||
* @brief Return the number of non-zero elements in the lower triangular part of the input matrix
|
||||
*
|
||||
* @param row[in] Row index of the input sparse matrix.
|
||||
* @param col[in] Column index of the input sparse matrix.
|
||||
* @param nz_size[in] Length of the non-zero elements.
|
||||
* @param lnz_size[out] Legnth of the non-zero elements in the lower triangle
|
||||
*/
|
||||
void lcg_incomplete_Cholesky_half_buffsize_coo(const int *row, const int *col, int nz_size, int *lnz_size);
|
||||
|
||||
/**
|
||||
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||
*
|
||||
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param val Non-zero values of the input sparse matrix.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zero elements.
|
||||
* @param lnz_size Legnth of the non-zero elements in the lower triangle
|
||||
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||
*/
|
||||
void lcg_incomplete_Cholesky_half_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, lcg_float *IC_val);
|
||||
|
||||
/**
|
||||
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||
*
|
||||
* @note The factorized lower and upper triangular matrixes are stored in the lower and upper triangular parts of the output matrix accordingly.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param val Non-zero values of the input sparse matrix.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zeor elements.
|
||||
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||
*/
|
||||
void lcg_incomplete_Cholesky_full_coo(const int *row, const int *col, const lcg_float *val, int N, int nz_size, int *IC_row, int *IC_col, lcg_float *IC_val);
|
||||
|
||||
/**
|
||||
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param U Non-zero values of the input sparse matrix.
|
||||
* @param B Object array.
|
||||
* @param x The returned solution.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zeor elements.
|
||||
*/
|
||||
void lcg_solve_upper_triangle_coo(const int *row, const int *col, const lcg_float *U, const lcg_float *B, lcg_float *x, int N, int nz_size);
|
||||
|
||||
/**
|
||||
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param L Non-zero values of the input sparse matrix.
|
||||
* @param B Object array.
|
||||
* @param x The returned solution.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zeor elements.
|
||||
*/
|
||||
void lcg_solve_lower_triangle_coo(const int *row, const int *col, const lcg_float *L, const lcg_float *B, lcg_float *x, int N, int nz_size);
|
||||
|
||||
/**
|
||||
* @brief Check to see if a square matrix is full ranked or not. The sparse matrix is stored in the COO format.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param M Non-zero values of the input sparse matrix.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zeor elements.
|
||||
* @return true The matrix is full ranked.
|
||||
* @return false The matrix is not full ranked.
|
||||
*/
|
||||
bool lcg_full_rank_coo(const int *row, const int *col, const lcg_float *M, int N, int nz_size);
|
||||
|
||||
#endif // _PRECONDITIONER_H
|
421
src/lib/preconditioner_cuda.cu
Normal file
421
src/lib/preconditioner_cuda.cu
Normal file
@ -0,0 +1,421 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "preconditioner_cuda.h"
|
||||
#include "map"
|
||||
|
||||
void clcg_incomplete_Cholesky_cuda_half_buffsize(const int *row, const int *col, int nz_size, int *lnz_size)
|
||||
{
|
||||
size_t c = 0;
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i])
|
||||
{
|
||||
c++;
|
||||
}
|
||||
}
|
||||
*lnz_size = c;
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuComplex *val, int N, int nz_size,
|
||||
int lnz_size, int *IC_row, int *IC_col, cuComplex *IC_val)
|
||||
{
|
||||
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||
cuComplex *diagonal = new cuComplex [N];
|
||||
// A temporary row
|
||||
cuComplex *tmp_row = new cuComplex [N];
|
||||
// index of non-zero elements in tmp_row
|
||||
int *filled_idx = new int [N];
|
||||
// Begining index of each row in the input matrix
|
||||
int *row_st_idx = new int [N];
|
||||
|
||||
size_t i, j, f;
|
||||
|
||||
// Set initial values
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
|
||||
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
|
||||
filled_idx[i] = -1;
|
||||
row_st_idx[i] = -1;
|
||||
}
|
||||
|
||||
// copy elements in the lower triangle to the output matrix
|
||||
j = 0;
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i])
|
||||
{
|
||||
IC_row[j] = row[i];
|
||||
IC_col[j] = col[i];
|
||||
IC_val[j] = val[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the begining index of each row in the matrix
|
||||
j = 1;
|
||||
row_st_idx[0] = IC_row[0];
|
||||
size_t old_row = IC_row[0];
|
||||
for (i = 1; i < lnz_size; i++)
|
||||
{
|
||||
if (IC_row[i] > old_row)
|
||||
{
|
||||
row_st_idx[j] = i;
|
||||
old_row = IC_row[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the first element
|
||||
IC_val[0] = clcg_Csqrt(IC_val[0]);
|
||||
diagonal[0] = IC_val[0];
|
||||
|
||||
cuComplex dia_sum;
|
||||
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||
// The first one is already calculated
|
||||
for (i = 1; i < lnz_size; i++)
|
||||
{
|
||||
// Calculate the first column if there is one
|
||||
if (IC_col[i] == 0)
|
||||
{
|
||||
IC_val[i] = cuCdivf(IC_val[i], IC_val[0]);
|
||||
dia_sum = clcg_Csum(dia_sum, cuCmulf(IC_val[i], IC_val[i]));
|
||||
continue; // Case 1 break
|
||||
}
|
||||
|
||||
// Calculate elements in the middle of a row
|
||||
if (IC_row[i] > IC_col[i])
|
||||
{
|
||||
// Find needed values from previous elements
|
||||
f = 0;
|
||||
j = row_st_idx[IC_col[i]];
|
||||
while (IC_col[j] < IC_col[i])
|
||||
{
|
||||
tmp_row[IC_col[j]] = IC_val[j];
|
||||
filled_idx[f] = IC_col[j];
|
||||
f++;
|
||||
j++;
|
||||
}
|
||||
|
||||
j = row_st_idx[IC_row[i]];
|
||||
while (IC_col[j] < IC_col[i])
|
||||
{
|
||||
IC_val[i] = clcg_Cdiff(IC_val[i], cuCmulf(IC_val[j], tmp_row[IC_col[j]]));
|
||||
j++;
|
||||
}
|
||||
|
||||
IC_val[i] = cuCdivf(IC_val[i], diagonal[IC_col[i]]);
|
||||
dia_sum = clcg_Csum(dia_sum, cuCmulf(IC_val[i], IC_val[i]));
|
||||
|
||||
// reset tmp variables
|
||||
for (j = 0; j < f; j++)
|
||||
{
|
||||
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
|
||||
}
|
||||
|
||||
continue; // Case 2 break
|
||||
}
|
||||
|
||||
// We have rearched the diagonal position
|
||||
if (IC_row[i] == IC_col[i])
|
||||
{
|
||||
IC_val[i] = clcg_Csqrt(clcg_Cdiff(IC_val[i], dia_sum));
|
||||
diagonal[IC_col[i]] = IC_val[i];
|
||||
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] diagonal;
|
||||
delete[] tmp_row;
|
||||
delete[] row_st_idx;
|
||||
delete[] filled_idx;
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size,
|
||||
int lnz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val)
|
||||
{
|
||||
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||
cuDoubleComplex *diagonal = new cuDoubleComplex [N];
|
||||
// A temporary row
|
||||
cuDoubleComplex *tmp_row = new cuDoubleComplex [N];
|
||||
// index of non-zero elements in tmp_row
|
||||
int *filled_idx = new int [N];
|
||||
// Begining index of each row in the input matrix
|
||||
int *row_st_idx = new int [N];
|
||||
|
||||
size_t i, j, f;
|
||||
|
||||
// Set initial values
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
|
||||
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
|
||||
filled_idx[i] = -1;
|
||||
row_st_idx[i] = -1;
|
||||
}
|
||||
|
||||
// copy elements in the lower triangle to the output matrix
|
||||
j = 0;
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i])
|
||||
{
|
||||
IC_row[j] = row[i];
|
||||
IC_col[j] = col[i];
|
||||
IC_val[j] = val[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the begining index of each row in the matrix
|
||||
j = 1;
|
||||
row_st_idx[0] = IC_row[0];
|
||||
size_t old_row = IC_row[0];
|
||||
for (i = 1; i < lnz_size; i++)
|
||||
{
|
||||
if (IC_row[i] > old_row)
|
||||
{
|
||||
row_st_idx[j] = i;
|
||||
old_row = IC_row[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the first element
|
||||
IC_val[0] = clcg_Zsqrt(IC_val[0]);
|
||||
diagonal[0] = IC_val[0];
|
||||
|
||||
cuDoubleComplex dia_sum;
|
||||
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||
// The first one is already calculated
|
||||
for (i = 1; i < lnz_size; i++)
|
||||
{
|
||||
// Calculate the first column if there is one
|
||||
if (IC_col[i] == 0)
|
||||
{
|
||||
IC_val[i] = cuCdiv(IC_val[i], IC_val[0]);
|
||||
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[i], IC_val[i]));
|
||||
continue; // Case 1 break
|
||||
}
|
||||
|
||||
// Calculate elements in the middle of a row
|
||||
if (IC_row[i] > IC_col[i])
|
||||
{
|
||||
// Find needed values from previous elements
|
||||
f = 0;
|
||||
j = row_st_idx[IC_col[i]];
|
||||
while (IC_col[j] < IC_col[i])
|
||||
{
|
||||
tmp_row[IC_col[j]] = IC_val[j];
|
||||
filled_idx[f] = IC_col[j];
|
||||
f++;
|
||||
j++;
|
||||
}
|
||||
|
||||
j = row_st_idx[IC_row[i]];
|
||||
while (IC_col[j] < IC_col[i])
|
||||
{
|
||||
IC_val[i] = clcg_Zdiff(IC_val[i], cuCmul(IC_val[j], tmp_row[IC_col[j]]));
|
||||
j++;
|
||||
}
|
||||
|
||||
IC_val[i] = cuCdiv(IC_val[i], diagonal[IC_col[i]]);
|
||||
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[i], IC_val[i]));
|
||||
|
||||
// reset tmp variables
|
||||
for (j = 0; j < f; j++)
|
||||
{
|
||||
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
|
||||
}
|
||||
|
||||
continue; // Case 2 break
|
||||
}
|
||||
|
||||
// We have rearched the diagonal position
|
||||
if (IC_row[i] == IC_col[i])
|
||||
{
|
||||
IC_val[i] = clcg_Zsqrt(clcg_Zdiff(IC_val[i], dia_sum));
|
||||
diagonal[IC_col[i]] = IC_val[i];
|
||||
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] diagonal;
|
||||
delete[] tmp_row;
|
||||
delete[] row_st_idx;
|
||||
delete[] filled_idx;
|
||||
return;
|
||||
}
|
||||
|
||||
void clcg_incomplete_Cholesky_cuda_full(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val)
|
||||
{
|
||||
// We use this to store diagonal elements of the factorizated lower triangular matrix
|
||||
cuDoubleComplex *diagonal = new cuDoubleComplex [N];
|
||||
// A temporary row
|
||||
cuDoubleComplex *tmp_row = new cuDoubleComplex [N];
|
||||
// index of non-zero elements in tmp_row
|
||||
int *filled_idx = new int [N];
|
||||
// Begining index of each row in the input matrix
|
||||
int *row_st_idx = new int [N];
|
||||
|
||||
size_t i, j, f, l;
|
||||
|
||||
// Set initial values
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
diagonal[i].x = 0.0; diagonal[i].y = 0.0;
|
||||
tmp_row[i].x = 0.0; tmp_row[i].y = 0.0;
|
||||
filled_idx[i] = -1;
|
||||
row_st_idx[i] = -1;
|
||||
}
|
||||
|
||||
// copy elements to the output matrix
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
IC_row[i] = row[i];
|
||||
IC_col[i] = col[i];
|
||||
IC_val[i] = val[i];
|
||||
}
|
||||
|
||||
// count element number in the lower triangular part (including the diagonal) and the upper triangular part (excluding the diagonal)
|
||||
// build map from elements' cooridnate to their index in the array
|
||||
size_t order, L_nz = 0;
|
||||
std::map<size_t, size_t> index_map;
|
||||
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i]) // Count number for thr lower triangular part
|
||||
{
|
||||
L_nz++;
|
||||
}
|
||||
else // Only need to build the map for the upper triangular part
|
||||
{
|
||||
order = N*row[i] + col[i];
|
||||
index_map[order] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// We use to store element index in the lower triangle
|
||||
j = 0;
|
||||
size_t *low_idx = new size_t [L_nz];
|
||||
for (i = 0; i < nz_size; i++)
|
||||
{
|
||||
if (row[i] >= col[i])
|
||||
{
|
||||
low_idx[j] = i;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the begining index of each row in the matrix
|
||||
j = 1;
|
||||
row_st_idx[0] = IC_row[0];
|
||||
size_t old_row = IC_row[0];
|
||||
for (i = 1; i < nz_size; i++)
|
||||
{
|
||||
if (IC_row[i] > old_row)
|
||||
{
|
||||
row_st_idx[j] = i;
|
||||
old_row = IC_row[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the first element
|
||||
IC_val[0] = clcg_Zsqrt(IC_val[0]);
|
||||
diagonal[0] = IC_val[0];
|
||||
|
||||
cuDoubleComplex dia_sum;
|
||||
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||
// The first one is already calculated
|
||||
for (i = 1; i < L_nz; i++)
|
||||
{
|
||||
l = low_idx[i];
|
||||
|
||||
// Calculate the first column if there is one
|
||||
if (IC_col[l] == 0)
|
||||
{
|
||||
IC_val[l] = cuCdiv(IC_val[l], IC_val[0]);
|
||||
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[l], IC_val[l]));
|
||||
// Set value at the upper triangle
|
||||
order = IC_row[l];
|
||||
IC_val[index_map[order]] = IC_val[l];
|
||||
continue; // Case 1 break
|
||||
}
|
||||
|
||||
// Calculate elements in the middle of a row
|
||||
if (IC_row[l] > IC_col[l])
|
||||
{
|
||||
// Find needed values from previous elements
|
||||
f = 0;
|
||||
j = row_st_idx[IC_col[l]];
|
||||
while (IC_col[j] < IC_col[l])
|
||||
{
|
||||
tmp_row[IC_col[j]] = IC_val[j];
|
||||
filled_idx[f] = IC_col[j];
|
||||
f++;
|
||||
j++;
|
||||
}
|
||||
|
||||
j = row_st_idx[IC_row[l]];
|
||||
while (IC_col[j] < IC_col[l])
|
||||
{
|
||||
IC_val[l] = clcg_Zdiff(IC_val[l], cuCmul(IC_val[j], tmp_row[IC_col[j]]));
|
||||
j++;
|
||||
}
|
||||
|
||||
IC_val[l] = cuCdiv(IC_val[l], diagonal[IC_col[l]]);
|
||||
dia_sum = clcg_Zsum(dia_sum, cuCmul(IC_val[l], IC_val[l]));
|
||||
|
||||
// Set value at the upper triangle
|
||||
order = N*IC_col[l] + IC_row[l];
|
||||
IC_val[index_map[order]] = IC_val[l];
|
||||
|
||||
// reset tmp variables
|
||||
for (j = 0; j < f; j++)
|
||||
{
|
||||
tmp_row[filled_idx[j]].x = 0.0; tmp_row[filled_idx[j]].y = 0.0;
|
||||
}
|
||||
|
||||
continue; // Case 2 break
|
||||
}
|
||||
|
||||
// We have rearched the diagonal position
|
||||
if (IC_row[l] == IC_col[l])
|
||||
{
|
||||
IC_val[l] = clcg_Zsqrt(clcg_Zdiff(IC_val[l], dia_sum));
|
||||
diagonal[IC_col[l]] = IC_val[l];
|
||||
dia_sum.x = 0.0; dia_sum.y = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] diagonal;
|
||||
delete[] tmp_row;
|
||||
delete[] row_st_idx;
|
||||
delete[] filled_idx;
|
||||
delete[] low_idx;
|
||||
index_map.clear();
|
||||
return;
|
||||
}
|
92
src/lib/preconditioner_cuda.h
Normal file
92
src/lib/preconditioner_cuda.h
Normal file
@ -0,0 +1,92 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _PRECONDITIONER_CUDA_H
|
||||
#define _PRECONDITIONER_CUDA_H
|
||||
|
||||
#include "lcg_complex_cuda.h"
|
||||
|
||||
#ifdef LibLCG_CUDA
|
||||
|
||||
/**
|
||||
* @brief Return the number of non-zero elements in the lower triangular part of the input matrix
|
||||
*
|
||||
* @param row[in] Row index of the input sparse matrix.
|
||||
* @param col[in] Column index of the input sparse matrix.
|
||||
* @param nz_size[in] Length of the non-zero elements.
|
||||
* @param lnz_size[out] Legnth of the non-zero elements in the lower triangle
|
||||
*/
|
||||
void clcg_incomplete_Cholesky_cuda_half_buffsize(const int *row, const int *col, int nz_size, int *lnz_size);
|
||||
|
||||
/**
|
||||
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||
*
|
||||
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param val Non-zero values of the input sparse matrix.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zero elements.
|
||||
* @param lnz_size Legnth of the non-zero elements in the lower triangle
|
||||
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||
*/
|
||||
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuComplex *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, cuComplex *IC_val);
|
||||
|
||||
/**
|
||||
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||
*
|
||||
* @note Only the factorized lower triangular matrix is stored in the lower part of the output matrix accordingly.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param val Non-zero values of the input sparse matrix.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zero elements.
|
||||
* @param lnz_size Legnth of the non-zero elements in the lower triangle
|
||||
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||
*/
|
||||
void clcg_incomplete_Cholesky_cuda_half(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int lnz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val);
|
||||
|
||||
/**
|
||||
* @brief Preform the incomplete Cholesky factorization for a sparse matrix that is saved in the COO format.
|
||||
*
|
||||
* @note The factorized lower and upper triangular matrixes are stored in the lower and upper triangular parts of the output matrix accordingly.
|
||||
*
|
||||
* @param row Row index of the input sparse matrix.
|
||||
* @param col Column index of the input sparse matrix.
|
||||
* @param val Non-zero values of the input sparse matrix.
|
||||
* @param N Row/Column size of the sparse matrix.
|
||||
* @param nz_size Length of the non-zeor elements.
|
||||
* @param IC_row Row index of the factorized triangular sparse matrix.
|
||||
* @param IC_col Column index of the factorized triangular sparse matrix.
|
||||
* @param IC_val Non-zero values of the factorized triangular sparse matrix.
|
||||
*/
|
||||
void clcg_incomplete_Cholesky_cuda_full(const int *row, const int *col, const cuDoubleComplex *val, int N, int nz_size, int *IC_row, int *IC_col, cuDoubleComplex *IC_val);
|
||||
|
||||
#endif // LibLCG_CUDA
|
||||
|
||||
#endif // _PRECONDITIONER_CUDA_H
|
1047
src/lib/preconditioner_eigen.cpp
Normal file
1047
src/lib/preconditioner_eigen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
159
src/lib/preconditioner_eigen.h
Normal file
159
src/lib/preconditioner_eigen.h
Normal file
@ -0,0 +1,159 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _PRECONDITIONER_EIGEN_H
|
||||
#define _PRECONDITIONER_EIGEN_H
|
||||
|
||||
#include "complex"
|
||||
#include "Eigen/Dense"
|
||||
#include "Eigen/SparseCore"
|
||||
|
||||
|
||||
/**
|
||||
* @brief Perform the Cholesky decomposition and return the lower triangular matrix.
|
||||
*
|
||||
* @note This could serve as a direct solver.
|
||||
*
|
||||
* @param A The input matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||
* @param L The output low triangular matrix
|
||||
*/
|
||||
void lcg_Cholesky(const Eigen::MatrixXd &A, Eigen::MatrixXd &L);
|
||||
|
||||
/**
|
||||
* @brief Perform the Cholesky decomposition and return the lower triangular matrix
|
||||
*
|
||||
* @note This could serve as a direct solver.
|
||||
*
|
||||
* @param[in] A The input matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||
* @param L The output low triangular matrix
|
||||
*/
|
||||
void clcg_Cholesky(const Eigen::MatrixXcd &A, Eigen::MatrixXcd &L);
|
||||
|
||||
/**
|
||||
* @brief Calculate the invert of a lower triangle matrix (Full rank only).
|
||||
*
|
||||
* @param L The operating lower triangle matrix
|
||||
* @param Linv The inverted lower triangle matrix
|
||||
*/
|
||||
void lcg_invert_lower_triangle(const Eigen::MatrixXd &L, Eigen::MatrixXd &Linv);
|
||||
|
||||
/**
|
||||
* @brief Calculate the invert of a upper triangle matrix (Full rank only).
|
||||
*
|
||||
* @param U The operating upper triangle matrix
|
||||
* @param Uinv The inverted upper triangle matrix
|
||||
*/
|
||||
void lcg_invert_upper_triangle(const Eigen::MatrixXd &U, Eigen::MatrixXd &Uinv);
|
||||
|
||||
/**
|
||||
* @brief Calculate the invert of a lower triangle matrix (Full rank only).
|
||||
*
|
||||
* @param L The operating lower triangle matrix
|
||||
* @param Linv The inverted lower triangle matrix
|
||||
*/
|
||||
void clcg_invert_lower_triangle(const Eigen::MatrixXcd &L, Eigen::MatrixXcd &Linv);
|
||||
|
||||
/**
|
||||
* @brief Calculate the invert of a upper triangle matrix (Full rank only).
|
||||
*
|
||||
* @param U The operating upper triangle matrix
|
||||
* @param Uinv The inverted upper triangle matrix
|
||||
*/
|
||||
void clcg_invert_upper_triangle(const Eigen::MatrixXcd &U, Eigen::MatrixXcd &Uinv);
|
||||
|
||||
/**
|
||||
* @brief Calculate the incomplete Cholesky decomposition and return the lower triangular matrix
|
||||
*
|
||||
* @param[in] A The input sparse matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||
* @param L The output lower triangular matrix
|
||||
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||
*/
|
||||
void lcg_incomplete_Cholesky(const Eigen::SparseMatrix<double, Eigen::RowMajor> &A, Eigen::SparseMatrix<double, Eigen::RowMajor> &L, size_t fill = 0);
|
||||
|
||||
/**
|
||||
* @brief Calculate the incomplete Cholesky decomposition and return the lower triangular matrix
|
||||
*
|
||||
* @param[in] A The input sparse matrix. Must be full rank and symmetric (aka. A = A^T)
|
||||
* @param L The output lower triangular matrix
|
||||
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||
*/
|
||||
void clcg_incomplete_Cholesky(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &A, Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L, size_t fill = 0);
|
||||
|
||||
/**
|
||||
* @brief Calculate the incomplete LU factorizations
|
||||
*
|
||||
* @param A The input sparse matrix. Must be full rank.
|
||||
* @param L The output lower triangular matrix.
|
||||
* @param U The output upper triangular matrix.
|
||||
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||
*/
|
||||
void lcg_incomplete_LU(const Eigen::SparseMatrix<double, Eigen::RowMajor> &A, Eigen::SparseMatrix<double, Eigen::RowMajor> &L, Eigen::SparseMatrix<double, Eigen::RowMajor> &U, size_t fill = 0);
|
||||
|
||||
/**
|
||||
* @brief Calculate the incomplete LU factorizations
|
||||
*
|
||||
* @param A The input sparse matrix. Must be full rank.
|
||||
* @param L The output lower triangular matrix.
|
||||
* @param U The output upper triangular matrix.
|
||||
* @param fill The fill-in number of the output sparse matrix. No fill-in reduction will be processed if this variable is set to zero.
|
||||
*/
|
||||
void clcg_incomplete_LU(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &A, Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L,
|
||||
Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &U, size_t fill = 0);
|
||||
|
||||
/**
|
||||
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
|
||||
*
|
||||
* @param L The input lower triangle matrix
|
||||
* @param B The object vector
|
||||
* @param X The solution vector
|
||||
*/
|
||||
void lcg_solve_lower_triangle(const Eigen::SparseMatrix<double, Eigen::RowMajor> &L, const Eigen::VectorXd &B, Eigen::VectorXd &X);
|
||||
|
||||
/**
|
||||
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
|
||||
*
|
||||
* @param U The input upper triangle matrix
|
||||
* @param B The object vector
|
||||
* @param X The solution vector
|
||||
*/
|
||||
void lcg_solve_upper_triangle(const Eigen::SparseMatrix<double, Eigen::RowMajor> &U, const Eigen::VectorXd &B, Eigen::VectorXd &X);
|
||||
|
||||
/**
|
||||
* @brief Solve the linear system Lx = B, in which L is a lower triangle matrix.
|
||||
*
|
||||
* @param L The input lower triangle matrix
|
||||
* @param B The object vector
|
||||
* @param X The solution vector
|
||||
*/
|
||||
void clcg_solve_lower_triangle(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &L, const Eigen::VectorXcd &B, Eigen::VectorXcd &X);
|
||||
|
||||
/**
|
||||
* @brief Solve the linear system Ux = B, in which U is a upper triangle matrix.
|
||||
*
|
||||
* @param U The input upper triangle matrix
|
||||
* @param B The object vector
|
||||
* @param X The solution vector
|
||||
*/
|
||||
void clcg_solve_upper_triangle(const Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> &U, const Eigen::VectorXcd &B, Eigen::VectorXcd &X);
|
||||
|
||||
|
||||
#endif // _PRECONDITIONER_EIGEN_H
|
311
src/lib/solver.cpp
Normal file
311
src/lib/solver.cpp
Normal file
@ -0,0 +1,311 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "solver.h"
|
||||
|
||||
#include "ctime"
|
||||
#include "iostream"
|
||||
|
||||
#include "config.h"
|
||||
#ifdef LibLCG_OPENMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
LCG_Solver::LCG_Solver()
|
||||
{
|
||||
param_ = lcg_default_parameters();
|
||||
inter_ = 1;
|
||||
silent_ = false;
|
||||
}
|
||||
|
||||
int LCG_Solver::Progress(const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para *param, const int n_size, const int k)
|
||||
{
|
||||
if (inter_ > 0 && k%inter_ == 0)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (converge <= param->epsilon)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LCG_Solver::silent()
|
||||
{
|
||||
silent_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_Solver::set_report_interval(unsigned int inter)
|
||||
{
|
||||
inter_ = inter;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_Solver::set_lcg_parameter(const lcg_para &in_param)
|
||||
{
|
||||
param_ = in_param;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_Solver::Minimize(lcg_float *m, const lcg_float *b, int x_size,
|
||||
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver(_AxProduct, nullptr, m, b, x_size, ¶m_, this, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = lcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_CG:
|
||||
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_CGS:
|
||||
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_BICGSTAB:
|
||||
std::clog << "Solver: BICGSTAB. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_BICGSTAB2:
|
||||
std::clog << "Solver: BICGSTAB2. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_Solver::MinimizePreconditioned(lcg_float *m, const lcg_float *b, int x_size,
|
||||
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, nullptr, m, b, x_size, ¶m_, this, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_preconditioned(_AxProduct, _MxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_PCG:
|
||||
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_Solver::MinimizeConstrained(lcg_float *m, const lcg_float *b, const lcg_float* low,
|
||||
const lcg_float *hig, int x_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_constrained(_AxProduct, nullptr, m, b, low, hig, x_size, ¶m_, this, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = lcg_solver_constrained(_AxProduct, _Progress, m, b, low, hig, x_size, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_constrained(_AxProduct, _Progress, m, b, low, hig, x_size, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_PG:
|
||||
std::clog << "Solver: PG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_SPG:
|
||||
std::clog << "Solver: SPG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
CLCG_Solver::CLCG_Solver()
|
||||
{
|
||||
param_ = clcg_default_parameters();
|
||||
inter_ = 1;
|
||||
silent_ = false;
|
||||
}
|
||||
|
||||
int CLCG_Solver::Progress(const lcg_complex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int k)
|
||||
{
|
||||
if (inter_ > 0 && k%inter_ == 0)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (converge <= param->epsilon)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CLCG_Solver::silent()
|
||||
{
|
||||
silent_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_Solver::set_report_interval(unsigned int inter)
|
||||
{
|
||||
inter_ = inter;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||
{
|
||||
param_ = in_param;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_Solver::Minimize(lcg_complex *m, const lcg_complex *b, int x_size,
|
||||
clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = clcg_solver(_AxProduct, nullptr, m, b, x_size, ¶m_, this, solver_id);
|
||||
if (ret < 0) clcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = clcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = clcg_solver(_AxProduct, _Progress, m, b, x_size, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
std::clog << "Solver: Bi-CG. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
std::clog << "Solver: Bi-CG (symmetrically accelerated). Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_CGS:
|
||||
std::clog << "Solver: CGS. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_TFQMR:
|
||||
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) clcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) clcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
285
src/lib/solver.h
Normal file
285
src/lib/solver.h
Normal file
@ -0,0 +1,285 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _SOLVER_H
|
||||
#define _SOLVER_H
|
||||
|
||||
#include "lcg.h"
|
||||
#include "clcg.h"
|
||||
|
||||
/**
|
||||
* @brief Linear conjugate gradient solver class
|
||||
*/
|
||||
class LCG_Solver
|
||||
{
|
||||
protected:
|
||||
lcg_para param_;
|
||||
unsigned int inter_;
|
||||
bool silent_;
|
||||
|
||||
public:
|
||||
LCG_Solver();
|
||||
virtual ~LCG_Solver(){}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param a[in] Pointer of the multiplier
|
||||
* @param b[out] Pointer of the product
|
||||
* @param num Size of the array
|
||||
*/
|
||||
static void _AxProduct(void* instance, const lcg_float* a, lcg_float* b, const int num)
|
||||
{
|
||||
return reinterpret_cast<LCG_Solver*>(instance)->AxProduct(a, b, num);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of A*x
|
||||
*
|
||||
* @param a[in] Pointer of the multiplier
|
||||
* @param b[out] Pointer of the product
|
||||
* @param num Size of the array
|
||||
*/
|
||||
virtual void AxProduct(const lcg_float* a, lcg_float* b, const int num) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param a[in] Pointer of the multiplier
|
||||
* @param b[out] Pointer of the product
|
||||
* @param num Size of the array
|
||||
*/
|
||||
static void _MxProduct(void* instance, const lcg_float* a, lcg_float* b, const int num)
|
||||
{
|
||||
return reinterpret_cast<LCG_Solver*>(instance)->MxProduct(a, b, num);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param a[in] Pointer of the multiplier
|
||||
* @param b[out] Pointer of the product
|
||||
* @param num Size of the array
|
||||
*/
|
||||
virtual void MxProduct(const lcg_float* a, lcg_float* b, const int num) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
static int _Progress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para *param, const int n_size, const int k)
|
||||
{
|
||||
return reinterpret_cast<LCG_Solver*>(instance)->Progress(m, converge, param, n_size, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the process monitoring
|
||||
*
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
virtual int Progress(const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para *param, const int n_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief Do not report any processes
|
||||
*/
|
||||
void silent();
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_report_interval(unsigned int inter);
|
||||
|
||||
/**
|
||||
* @brief Set the parameters of the algorithms
|
||||
*
|
||||
* @param in_param the input parameters
|
||||
*/
|
||||
void set_lcg_parameter(const lcg_para &in_param);
|
||||
|
||||
/**
|
||||
* @brief Run the minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param x_size Size of the solution vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void Minimize(lcg_float *m, const lcg_float *b, int x_size,
|
||||
lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the preconitioned minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param x_size Size of the solution vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizePreconditioned(lcg_float *m, const lcg_float *b, int x_size,
|
||||
lcg_solver_enum solver_id = LCG_PCG, bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the constrained minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param low Lower bound of the solution vector
|
||||
* @param hig Higher bound of the solution vector
|
||||
* @param x_size Size of the solution vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizeConstrained(lcg_float *m, const lcg_float *b, const lcg_float* low,
|
||||
const lcg_float *hig, int x_size, lcg_solver_enum solver_id = LCG_PG,
|
||||
bool verbose = true, bool er_throw = false);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Complex linear conjugate gradient solver class
|
||||
*/
|
||||
class CLCG_Solver
|
||||
{
|
||||
protected:
|
||||
clcg_para param_;
|
||||
unsigned int inter_;
|
||||
bool silent_;
|
||||
|
||||
public:
|
||||
CLCG_Solver();
|
||||
virtual ~CLCG_Solver(){}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param x_size Size of the array
|
||||
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
*/
|
||||
static void _AxProduct(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
|
||||
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
return reinterpret_cast<CLCG_Solver*>(instance)->AxProduct(x, prod_Ax, x_size, layout, conjugate);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param x_size Size of the array
|
||||
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
*/
|
||||
virtual void AxProduct(const lcg_complex *x, lcg_complex *prod_Ax,
|
||||
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
static int _Progress(void* instance, const lcg_complex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int k)
|
||||
{
|
||||
return reinterpret_cast<CLCG_Solver*>(instance)->Progress(m, converge, param, n_size, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
virtual int Progress(const lcg_complex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief Do not report any processes
|
||||
*/
|
||||
void silent();
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_report_interval(unsigned int inter);
|
||||
|
||||
/**
|
||||
* @brief Set the parameters of the algorithms
|
||||
*
|
||||
* @param in_param the input parameters
|
||||
*/
|
||||
void set_clcg_parameter(const clcg_para &in_param);
|
||||
|
||||
/**
|
||||
* @brief Run the minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param x_size Size of the solution vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void Minimize(lcg_complex *m, const lcg_complex *b, int x_size,
|
||||
clcg_solver_enum solver_id = CLCG_CGS, bool verbose = true,
|
||||
bool er_throw = false);
|
||||
};
|
||||
|
||||
#endif // _SOLVER_H
|
414
src/lib/solver_cuda.cu
Normal file
414
src/lib/solver_cuda.cu
Normal file
@ -0,0 +1,414 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "solver_cuda.h"
|
||||
|
||||
#include "cmath"
|
||||
#include "ctime"
|
||||
#include "iostream"
|
||||
|
||||
LCG_CUDA_Solver::LCG_CUDA_Solver()
|
||||
{
|
||||
param_ = lcg_default_parameters();
|
||||
inter_ = 1;
|
||||
silent_ = false;
|
||||
}
|
||||
|
||||
int LCG_CUDA_Solver::Progress(const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
if (inter_ > 0 && k%inter_ == 0)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (converge <= param->epsilon)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LCG_CUDA_Solver::silent()
|
||||
{
|
||||
silent_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_CUDA_Solver::set_report_interval(unsigned int inter)
|
||||
{
|
||||
inter_ = inter;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_CUDA_Solver::set_lcg_parameter(const lcg_para &in_param)
|
||||
{
|
||||
param_ = in_param;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_CUDA_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||
const int n_size, const int nz_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_CG:
|
||||
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_CGS:
|
||||
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_CUDA_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||
const int n_size, const int nz_size, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_PCG:
|
||||
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_CUDA_Solver::MinimizeConstrained(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, const lcg_float *b,
|
||||
const lcg_float* low, const lcg_float *hig, const int n_size, const int nz_size, lcg_solver_enum solver_id,
|
||||
bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_constrained_cuda(_AxProduct, nullptr, x, b, low, hig, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_constrained_cuda(_AxProduct, _Progress, x, b, low, hig, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_PG:
|
||||
std::clog << "Solver: PG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
CLCG_CUDAF_Solver::CLCG_CUDAF_Solver()
|
||||
{
|
||||
param_ = clcg_default_parameters();
|
||||
inter_ = 1;
|
||||
silent_ = false;
|
||||
}
|
||||
|
||||
int CLCG_CUDAF_Solver::Progress(const cuComplex* m, const float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
if (inter_ > 0 && k%inter_ == 0)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (converge <= param->epsilon)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CLCG_CUDAF_Solver::silent()
|
||||
{
|
||||
silent_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDAF_Solver::set_report_interval(unsigned int inter)
|
||||
{
|
||||
inter_ = inter;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDAF_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||
{
|
||||
param_ = in_param;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDAF_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = clcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
clock_t start = clock();
|
||||
int ret = clcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDAF_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
clock_t start = clock();
|
||||
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_PCG:
|
||||
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
CLCG_CUDA_Solver::CLCG_CUDA_Solver()
|
||||
{
|
||||
param_ = clcg_default_parameters();
|
||||
inter_ = 1;
|
||||
silent_ = false;
|
||||
}
|
||||
|
||||
int CLCG_CUDA_Solver::Progress(const cuDoubleComplex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
if (inter_ > 0 && k%inter_ == 0)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (converge <= param->epsilon)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CLCG_CUDA_Solver::silent()
|
||||
{
|
||||
silent_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDA_Solver::set_report_interval(unsigned int inter)
|
||||
{
|
||||
inter_ = inter;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDA_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||
{
|
||||
param_ = in_param;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDA_Solver::Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = clcg_solver_cuda(_AxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
clock_t start = clock();
|
||||
int ret = clcg_solver_cuda(_AxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_CUDA_Solver::MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, nullptr, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
clock_t start = clock();
|
||||
int ret = clcg_solver_preconditioned_cuda(_AxProduct, _MxProduct, _Progress, x, b, n_size, nz_size, ¶m_, this, cub_handle, cus_handle, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_PCG:
|
||||
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
545
src/lib/solver_cuda.h
Normal file
545
src/lib/solver_cuda.h
Normal file
@ -0,0 +1,545 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _SOLVER_CUDA_H
|
||||
#define _SOLVER_CUDA_H
|
||||
|
||||
#include "lcg_cuda.h"
|
||||
#include "clcg_cuda.h"
|
||||
#include "clcg_cudaf.h"
|
||||
|
||||
#ifdef LibLCG_CUDA
|
||||
|
||||
/**
|
||||
* @brief Linear conjugate gradient solver class
|
||||
*/
|
||||
class LCG_CUDA_Solver
|
||||
{
|
||||
protected:
|
||||
lcg_para param_;
|
||||
unsigned int inter_;
|
||||
bool silent_;
|
||||
|
||||
public:
|
||||
LCG_CUDA_Solver();
|
||||
virtual ~LCG_CUDA_Solver(){}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||
{
|
||||
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of A*x
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx, const int n_size, const int nz_size)
|
||||
{
|
||||
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx, const int n_size, const int nz_size) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
static int _Progress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
return reinterpret_cast<LCG_CUDA_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the process monitoring
|
||||
*
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
virtual int Progress(const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para* param, const int n_size, const int nz_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief Do not report any processes
|
||||
*/
|
||||
void silent();
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_report_interval(unsigned int inter);
|
||||
|
||||
/**
|
||||
* @brief Set the parameters of the algorithms
|
||||
*
|
||||
* @param in_param the input parameters
|
||||
*/
|
||||
void set_lcg_parameter(const lcg_para &in_param);
|
||||
|
||||
/**
|
||||
* @brief Run the constrained minimizing process
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param n_size Size of the solution vector
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||
const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the preconditioned minimizing process
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param n_size Size of the solution vector
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, lcg_float *b,
|
||||
const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_CG, bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the constrained minimizing process
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param low Lower bound of the solution vector
|
||||
* @param hig Higher bound of the solution vector
|
||||
* @param n_size Size of the solution vector
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizeConstrained(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, lcg_float *x, const lcg_float *b,
|
||||
const lcg_float* low, const lcg_float *hig, const int n_size, const int nz_size, lcg_solver_enum solver_id = LCG_PG,
|
||||
bool verbose = true, bool er_throw = false);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Complex linear conjugate gradient solver class
|
||||
*/
|
||||
class CLCG_CUDAF_Solver
|
||||
{
|
||||
protected:
|
||||
clcg_para param_;
|
||||
unsigned int inter_;
|
||||
bool silent_;
|
||||
|
||||
public:
|
||||
CLCG_CUDAF_Solver();
|
||||
virtual ~CLCG_CUDAF_Solver(){}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size, oper_t);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of A*x
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->MxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size, oper_t);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
static int _Progress(void* instance, const cuComplex* m, const float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
return reinterpret_cast<CLCG_CUDAF_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the process monitoring
|
||||
*
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
virtual int Progress(const cuComplex* m, const float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief Do not report any processes
|
||||
*/
|
||||
void silent();
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_report_interval(unsigned int inter);
|
||||
|
||||
/**
|
||||
* @brief Set the parameters of the algorithms
|
||||
*
|
||||
* @param in_param the input parameters
|
||||
*/
|
||||
void set_clcg_parameter(const clcg_para &in_param);
|
||||
|
||||
/**
|
||||
* @brief Run the constrained minimizing process
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param n_size Size of the solution vector
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_BICG, bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the preconditioned minimizing process
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param n_size Size of the solution vector
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuComplex *x, cuComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_PCG, bool verbose = true, bool er_throw = false);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Complex linear conjugate gradient solver class
|
||||
*/
|
||||
class CLCG_CUDA_Solver
|
||||
{
|
||||
protected:
|
||||
clcg_para param_;
|
||||
unsigned int inter_;
|
||||
bool silent_;
|
||||
|
||||
public:
|
||||
CLCG_CUDA_Solver();
|
||||
virtual ~CLCG_CUDA_Solver(){}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
static void _AxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->AxProduct(cub_handle, cus_handle, x, prod_Ax, n_size, nz_size, oper_t);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of A*x
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
virtual void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
static void _MxProduct(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->MxProduct(cub_handle, cus_handle, x, prod_Mx, n_size, nz_size, oper_t);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param oper_t Cusparse operator. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
*/
|
||||
virtual void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Mx,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
static int _Progress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
return reinterpret_cast<CLCG_CUDA_Solver*>(instance)->Progress(m, converge, param, n_size, nz_size, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the process monitoring
|
||||
*
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param n_size Size of the solution
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
virtual int Progress(const cuDoubleComplex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief Do not report any processes
|
||||
*/
|
||||
void silent();
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_report_interval(unsigned int inter);
|
||||
|
||||
/**
|
||||
* @brief Set the parameters of the algorithms
|
||||
*
|
||||
* @param in_param the input parameters
|
||||
*/
|
||||
void set_clcg_parameter(const clcg_para &in_param);
|
||||
|
||||
/**
|
||||
* @brief Run the constrained minimizing process
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param n_size Size of the solution vector
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void Minimize(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_BICG, bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the preconditioned minimizing process
|
||||
*
|
||||
* @param cub_handle Handler of the CuBLAS library
|
||||
* @param cus_handle Handler of the CuSparse library
|
||||
* @param x Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param n_size Size of the solution vector
|
||||
* @param nz_size Non-zero size of the sparse kernel matrix. This parameter is not need by the algorithm. It is passed for CUDA usages
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizePreconditioned(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cuDoubleComplex *x, cuDoubleComplex *b,
|
||||
const int n_size, const int nz_size, clcg_solver_enum solver_id = CLCG_PCG, bool verbose = true, bool er_throw = false);
|
||||
};
|
||||
|
||||
#endif // LibLCG_CUDA
|
||||
|
||||
#endif // _SOLVER_CUDA_H
|
365
src/lib/solver_eigen.cpp
Normal file
365
src/lib/solver_eigen.cpp
Normal file
@ -0,0 +1,365 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "solver_eigen.h"
|
||||
|
||||
#include "cmath"
|
||||
#include "ctime"
|
||||
#include "iostream"
|
||||
|
||||
#include "config.h"
|
||||
#ifdef LibLCG_OPENMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
LCG_EIGEN_Solver::LCG_EIGEN_Solver()
|
||||
{
|
||||
param_ = lcg_default_parameters();
|
||||
inter_ = 1;
|
||||
silent_ = false;
|
||||
}
|
||||
|
||||
int LCG_EIGEN_Solver::Progress(const Eigen::VectorXd *m, const lcg_float converge, const lcg_para *param,
|
||||
const int k)
|
||||
{
|
||||
if (inter_ > 0 && k%inter_ == 0)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (converge <= param->epsilon)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LCG_EIGEN_Solver::silent()
|
||||
{
|
||||
silent_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_EIGEN_Solver::set_report_interval(unsigned int inter)
|
||||
{
|
||||
inter_ = inter;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_EIGEN_Solver::set_lcg_parameter(const lcg_para &in_param)
|
||||
{
|
||||
param_ = in_param;
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_EIGEN_Solver::Minimize(Eigen::VectorXd &m, const Eigen::VectorXd &b,
|
||||
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_eigen(_AxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_CG:
|
||||
std::clog << "Solver: CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_CGS:
|
||||
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_BICGSTAB:
|
||||
std::clog << "Solver: BICGSTAB. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_BICGSTAB2:
|
||||
std::clog << "Solver: BICGSTAB2. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_EIGEN_Solver::MinimizePreconditioned(Eigen::VectorXd &m, const Eigen::VectorXd &b,
|
||||
lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = lcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_PCG:
|
||||
std::clog << "Solver: PCG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void LCG_EIGEN_Solver::MinimizeConstrained(Eigen::VectorXd &m, const Eigen::VectorXd &B, const Eigen::VectorXd &low,
|
||||
const Eigen::VectorXd &hig, lcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = lcg_solver_constrained_eigen(_AxProduct, nullptr, m, B, low, hig, ¶m_, this, solver_id);
|
||||
if (ret < 0) lcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = lcg_solver_constrained_eigen(_AxProduct, _Progress, m, B, low, hig, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_constrained_eigen(_AxProduct, _Progress, m, B, low, hig, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case LCG_PG:
|
||||
std::clog << "Solver: PG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case LCG_SPG:
|
||||
std::clog << "Solver: SPG-CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) lcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) lcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
CLCG_EIGEN_Solver::CLCG_EIGEN_Solver()
|
||||
{
|
||||
param_ = clcg_default_parameters();
|
||||
inter_ = 1;
|
||||
silent_ = false;
|
||||
}
|
||||
|
||||
int CLCG_EIGEN_Solver::Progress(const Eigen::VectorXcd *m, const lcg_float converge, const clcg_para *param,
|
||||
const int k)
|
||||
{
|
||||
if (inter_ > 0 && (k%inter_) == 0)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (converge <= param->epsilon)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CLCG_EIGEN_Solver::silent()
|
||||
{
|
||||
silent_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_EIGEN_Solver::set_clcg_parameter(const clcg_para &in_param)
|
||||
{
|
||||
param_ = in_param;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_EIGEN_Solver::set_report_interval(unsigned int inter)
|
||||
{
|
||||
inter_ = inter;
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_EIGEN_Solver::Minimize(Eigen::VectorXcd &m, const Eigen::VectorXcd &b,
|
||||
clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = clcg_solver_eigen(_AxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||
if (ret < 0) clcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = clcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = clcg_solver_eigen(_AxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_BICG:
|
||||
std::clog << "Solver: BI-CG. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_BICG_SYM:
|
||||
std::clog << "Solver: BI-CG (symmetrically accelerated). Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_CGS:
|
||||
std::clog << "Solver: CGS. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_TFQMR:
|
||||
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_PCG:
|
||||
std::clog << "Solver: PCG. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_PBICG:
|
||||
std::clog << "Solver: PBICG. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) clcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) clcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
||||
|
||||
void CLCG_EIGEN_Solver::MinimizePreconditioned(Eigen::VectorXcd &m, const Eigen::VectorXcd &b,
|
||||
clcg_solver_enum solver_id, bool verbose, bool er_throw)
|
||||
{
|
||||
if (silent_)
|
||||
{
|
||||
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, nullptr, m, b, ¶m_, this, solver_id);
|
||||
if (ret < 0) clcg_error_str(ret, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||
#ifdef LibLCG_OPENMP
|
||||
double start = omp_get_wtime();
|
||||
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
double end = omp_get_wtime();
|
||||
|
||||
lcg_float costime = 1000*(end-start);
|
||||
#else
|
||||
clock_t start = clock();
|
||||
int ret = clcg_solver_preconditioned_eigen(_AxProduct, _MxProduct, _Progress, m, b, ¶m_, this, solver_id);
|
||||
clock_t end = clock();
|
||||
|
||||
lcg_float costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||
#endif
|
||||
|
||||
if (!er_throw)
|
||||
{
|
||||
std::clog << std::endl;
|
||||
switch (solver_id)
|
||||
{
|
||||
case CLCG_PCG:
|
||||
std::clog << "Solver: PCG. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
case CLCG_PBICG:
|
||||
std::clog << "Solver: PBICG. Times cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) clcg_error_str(ret, er_throw);
|
||||
else if (ret < 0) clcg_error_str(ret, er_throw);
|
||||
return;
|
||||
}
|
308
src/lib/solver_eigen.h
Normal file
308
src/lib/solver_eigen.h
Normal file
@ -0,0 +1,308 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _SOLVER_EIGEN_H
|
||||
#define _SOLVER_EIGEN_H
|
||||
|
||||
#include "lcg_eigen.h"
|
||||
#include "clcg_eigen.h"
|
||||
|
||||
/**
|
||||
* @brief Linear conjugate gradient solver class
|
||||
*/
|
||||
class LCG_EIGEN_Solver
|
||||
{
|
||||
protected:
|
||||
lcg_para param_;
|
||||
unsigned int inter_;
|
||||
bool silent_;
|
||||
|
||||
public:
|
||||
LCG_EIGEN_Solver();
|
||||
virtual ~LCG_EIGEN_Solver(){}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
*/
|
||||
static void _AxProduct(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax)
|
||||
{
|
||||
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->AxProduct(x, prod_Ax);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of A*x
|
||||
*
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
*/
|
||||
virtual void AxProduct(const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
*/
|
||||
static void _MxProduct(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx)
|
||||
{
|
||||
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->MxProduct(x, prod_Mx);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
*/
|
||||
virtual void MxProduct(const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
static int _Progress(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
|
||||
const lcg_para *param, const int k)
|
||||
{
|
||||
return reinterpret_cast<LCG_EIGEN_Solver*>(instance)->Progress(m, converge, param, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the process monitoring
|
||||
*
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
virtual int Progress(const Eigen::VectorXd *m, const lcg_float converge, const lcg_para *param,
|
||||
const int k);
|
||||
|
||||
/**
|
||||
* @brief Do not report any processes
|
||||
*/
|
||||
void silent();
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_report_interval(unsigned int inter);
|
||||
|
||||
/**
|
||||
* @brief Set the parameters of the algorithms
|
||||
*
|
||||
* @param in_param the input parameters
|
||||
*/
|
||||
void set_lcg_parameter(const lcg_para &in_param);
|
||||
|
||||
/**
|
||||
* @brief Run the minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void Minimize(Eigen::VectorXd &m, const Eigen::VectorXd &b, lcg_solver_enum solver_id = LCG_CG,
|
||||
bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the preconitioned minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizePreconditioned(Eigen::VectorXd &m, const Eigen::VectorXd &b, lcg_solver_enum solver_id = LCG_PCG,
|
||||
bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the constrained minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param low Lower bound of the solution vector
|
||||
* @param hig Higher bound of the solution vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizeConstrained(Eigen::VectorXd &m, const Eigen::VectorXd &B, const Eigen::VectorXd &low,
|
||||
const Eigen::VectorXd &hig, lcg_solver_enum solver_id = LCG_PG, bool verbose = true,
|
||||
bool er_throw = false);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Complex linear conjugate gradient solver class
|
||||
*/
|
||||
class CLCG_EIGEN_Solver
|
||||
{
|
||||
protected:
|
||||
clcg_para param_;
|
||||
unsigned int inter_;
|
||||
bool silent_;
|
||||
|
||||
public:
|
||||
CLCG_EIGEN_Solver();
|
||||
virtual ~CLCG_EIGEN_Solver(){}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
*/
|
||||
static void _AxProduct(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->AxProduct(x, prod_Ax, layout, conjugate);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of A*x
|
||||
*
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Ax[out] Pointer of the product
|
||||
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
*/
|
||||
virtual void AxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
*/
|
||||
static void _MxProduct(void* instance, const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->MxProduct(x, prod_Mx, layout, conjugate);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the product of M^-1*x
|
||||
*
|
||||
* @param x[in] Pointer of the multiplier
|
||||
* @param prod_Mx[out] Pointer of the product
|
||||
* @param layout Layout of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
* @param conjugate Welther to use conjugate of the kernel matrix. This is passed for the clcg_matvec() function
|
||||
*/
|
||||
virtual void MxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate) = 0;
|
||||
|
||||
/**
|
||||
* @brief Interface of the virtual function of the process monitoring
|
||||
*
|
||||
* @param instance User data sent to identify the function address
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
static int _Progress(void* instance, const Eigen::VectorXcd *m, const lcg_float converge,
|
||||
const clcg_para *param, const int k)
|
||||
{
|
||||
return reinterpret_cast<CLCG_EIGEN_Solver*>(instance)->Progress(m, converge, param, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Virtual function of the process monitoring
|
||||
*
|
||||
* @param m Pointer of the current solution
|
||||
* @param converge Current value of the convergence
|
||||
* @param param Pointer of the parameters used in the algorithms
|
||||
* @param k Current iteration times
|
||||
* @return int Status of the process
|
||||
*/
|
||||
virtual int Progress(const Eigen::VectorXcd *m, const lcg_float converge, const clcg_para *param,
|
||||
const int k);
|
||||
|
||||
/**
|
||||
* @brief Do not report any processes
|
||||
*/
|
||||
void silent();
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_report_interval(unsigned int inter);
|
||||
|
||||
/**
|
||||
* @brief Set the interval to run the process monitoring function
|
||||
*
|
||||
* @param inter the interval
|
||||
*/
|
||||
void set_clcg_parameter(const clcg_para &in_param);
|
||||
|
||||
/**
|
||||
* @brief Run the minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void Minimize(Eigen::VectorXcd &m, const Eigen::VectorXcd &b, clcg_solver_enum solver_id = CLCG_CGS,
|
||||
bool verbose = true, bool er_throw = false);
|
||||
|
||||
/**
|
||||
* @brief Run the preconitioned minimizing process
|
||||
*
|
||||
* @param m Pointer of the solution vector
|
||||
* @param b Pointer of the targeting vector
|
||||
* @param solver_id Solver type
|
||||
* @param verbose Report more information of the full process
|
||||
* @param er_throw Instead of showing error messages on screen, throw them out using std::exception
|
||||
*/
|
||||
void MinimizePreconditioned(Eigen::VectorXcd &m, const Eigen::VectorXcd &b, clcg_solver_enum solver_id = CLCG_PBICG,
|
||||
bool verbose = true, bool er_throw = false);
|
||||
};
|
||||
|
||||
#endif // _SOLVER_EIGEN_H
|
253
src/lib/util.cpp
Normal file
253
src/lib/util.cpp
Normal file
@ -0,0 +1,253 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "iostream"
|
||||
#include "exception"
|
||||
#include "stdexcept"
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#if defined _WINDOWS || __WIN32__
|
||||
#include "windows.h"
|
||||
#endif
|
||||
|
||||
lcg_para lcg_default_parameters()
|
||||
{
|
||||
lcg_para param = defparam;
|
||||
return param;
|
||||
}
|
||||
|
||||
lcg_solver_enum lcg_select_solver(std::string slr_char)
|
||||
{
|
||||
lcg_solver_enum slr_id;
|
||||
if (slr_char == "LCG_CG") slr_id = LCG_CG;
|
||||
else if (slr_char == "LCG_PCG") slr_id = LCG_PCG;
|
||||
else if (slr_char == "LCG_CGS") slr_id = LCG_CGS;
|
||||
else if (slr_char == "LCG_BICGSTAB") slr_id = LCG_BICGSTAB;
|
||||
else if (slr_char == "LCG_BICGSTAB2") slr_id = LCG_BICGSTAB2;
|
||||
else if (slr_char == "LCG_PG") slr_id = LCG_PG;
|
||||
else if (slr_char == "LCG_SPG") slr_id = LCG_SPG;
|
||||
else throw std::invalid_argument("Invalid solver type.");
|
||||
return slr_id;
|
||||
}
|
||||
|
||||
void lcg_error_str(int er_index, bool er_throw)
|
||||
{
|
||||
#if defined _WINDOWS || __WIN32__
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
|
||||
std::cerr << "Success! ";
|
||||
}
|
||||
else
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
|
||||
std::cerr << "Fail! ";
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
std::cerr << "\033[1m\033[32mSuccess! ";
|
||||
else
|
||||
std::cerr << "\033[1m\033[31mFail! ";
|
||||
}
|
||||
#endif
|
||||
|
||||
std::string err_str;
|
||||
switch (er_index)
|
||||
{
|
||||
case LCG_SUCCESS:
|
||||
err_str = "Iteration reached convergence."; break;
|
||||
case LCG_STOP:
|
||||
err_str = "Iteration is stopped by the progress evaluation function."; break;
|
||||
case LCG_ALREADY_OPTIMIZIED:
|
||||
err_str = "The variables are already optimized."; break;
|
||||
case LCG_UNKNOWN_ERROR:
|
||||
err_str = "Unknown error."; break;
|
||||
case LCG_INVILAD_VARIABLE_SIZE:
|
||||
err_str = "The size of the variables is negative."; break;
|
||||
case LCG_INVILAD_MAX_ITERATIONS:
|
||||
err_str = "The maximal iteration times can't be negative."; break;
|
||||
case LCG_INVILAD_EPSILON:
|
||||
err_str = "The epsilon is not in the range (0, 1)."; break;
|
||||
case LCG_INVILAD_RESTART_EPSILON:
|
||||
err_str = "The restart threshold can't be negative."; break;
|
||||
case LCG_REACHED_MAX_ITERATIONS:
|
||||
err_str = "The maximal iteration has been reached."; break;
|
||||
case LCG_NULL_PRECONDITION_MATRIX:
|
||||
err_str = "The precondition matrix can't be null."; break;
|
||||
case LCG_NAN_VALUE:
|
||||
err_str = "The model values are NaN."; break;
|
||||
case LCG_INVALID_POINTER:
|
||||
err_str = "Invalid pointer."; break;
|
||||
case LCG_INVALID_LAMBDA:
|
||||
err_str = "Invalid value for lambda."; break;
|
||||
case LCG_INVALID_SIGMA:
|
||||
err_str = "Invalid value for sigma."; break;
|
||||
case LCG_INVALID_BETA:
|
||||
err_str = "Invalid value for beta."; break;
|
||||
case LCG_INVALID_MAXIM:
|
||||
err_str = "Invalid value for maxi_m."; break;
|
||||
case LCG_SIZE_NOT_MATCH:
|
||||
err_str = "The sizes of solution and target do not match."; break;
|
||||
default:
|
||||
err_str = "Unknown error."; break;
|
||||
}
|
||||
|
||||
if (er_throw && er_index < 0) throw std::runtime_error(err_str.c_str());
|
||||
else std::cerr << err_str;
|
||||
|
||||
#if defined _WINDOWS || __WIN32__
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
std::cerr << "\033[0m" << std::endl;
|
||||
else
|
||||
std::cerr << "\033[0m" << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
clcg_para clcg_default_parameters()
|
||||
{
|
||||
clcg_para param = defparam2;
|
||||
return param;
|
||||
}
|
||||
|
||||
clcg_solver_enum clcg_select_solver(std::string slr_char)
|
||||
{
|
||||
clcg_solver_enum slr_id;
|
||||
if (slr_char == "CLCG_BICG") slr_id = CLCG_BICG;
|
||||
else if (slr_char == "CLCG_BICG_SYM") slr_id = CLCG_BICG_SYM;
|
||||
else if (slr_char == "CLCG_CGS") slr_id = CLCG_CGS;
|
||||
else if (slr_char == "CLCG_TFQMR") slr_id = CLCG_TFQMR;
|
||||
else throw std::invalid_argument("Invalid solver type.");
|
||||
return slr_id;
|
||||
}
|
||||
|
||||
void clcg_error_str(int er_index, bool er_throw)
|
||||
{
|
||||
#if defined _WINDOWS || __WIN32__
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
|
||||
std::cerr << "Success! ";
|
||||
}
|
||||
else
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
|
||||
std::cerr << "Fail! ";
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
std::cerr << "\033[1m\033[32mSuccess! ";
|
||||
else
|
||||
std::cerr << "\033[1m\033[31mFail! ";
|
||||
}
|
||||
#endif
|
||||
|
||||
std::string err_str;
|
||||
switch (er_index)
|
||||
{
|
||||
case CLCG_SUCCESS:
|
||||
err_str = "Iteration reached convergence."; break;
|
||||
case CLCG_STOP:
|
||||
err_str = "Iteration is stopped by the progress evaluation function."; break;
|
||||
case CLCG_ALREADY_OPTIMIZIED:
|
||||
err_str = "The variables are already optimized."; break;
|
||||
case CLCG_UNKNOWN_ERROR:
|
||||
err_str = "Unknown error."; break;
|
||||
case CLCG_INVILAD_VARIABLE_SIZE:
|
||||
err_str = "The size of the variables is negative."; break;
|
||||
case CLCG_INVILAD_MAX_ITERATIONS:
|
||||
err_str = "The maximal iteration times is negative."; break;
|
||||
case CLCG_INVILAD_EPSILON:
|
||||
err_str = "The epsilon is not in the range (0, 1)."; break;
|
||||
case CLCG_REACHED_MAX_ITERATIONS:
|
||||
err_str = "The maximal iteration has been reached."; break;
|
||||
case CLCG_NAN_VALUE:
|
||||
err_str = "The model values are NaN."; break;
|
||||
case CLCG_INVALID_POINTER:
|
||||
err_str = "Invalid pointer."; break;
|
||||
case CLCG_SIZE_NOT_MATCH:
|
||||
err_str = "The sizes of the solution and target do not match."; break;
|
||||
case CLCG_UNKNOWN_SOLVER:
|
||||
err_str = "Unknown solver."; break;
|
||||
default:
|
||||
err_str = "Unknown error."; break;
|
||||
}
|
||||
|
||||
if (er_throw && er_index < 0) throw std::runtime_error(err_str.c_str());
|
||||
else std::cerr << err_str;
|
||||
|
||||
#if defined _WINDOWS || __WIN32__
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (!er_throw)
|
||||
{
|
||||
if (er_index >= 0)
|
||||
std::cerr << "\033[0m" << std::endl;
|
||||
else
|
||||
std::cerr << "\033[0m" << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
308
src/lib/util.h
Normal file
308
src/lib/util.h
Normal file
@ -0,0 +1,308 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#ifndef _LCG_UTIL_H
|
||||
#define _LCG_UTIL_H
|
||||
|
||||
#include "string"
|
||||
#include "algebra.h"
|
||||
|
||||
/**
|
||||
* @brief Types of method that could be recognized by the lcg_solver() function.
|
||||
*/
|
||||
enum lcg_solver_enum
|
||||
{
|
||||
/**
|
||||
* Conjugate gradient method.
|
||||
*/
|
||||
LCG_CG,
|
||||
/**
|
||||
* Preconditioned conjugate gradient method.
|
||||
*/
|
||||
LCG_PCG,
|
||||
/**
|
||||
* Conjugate gradient squared method.
|
||||
*/
|
||||
LCG_CGS,
|
||||
/**
|
||||
* Biconjugate gradient method.
|
||||
*/
|
||||
LCG_BICGSTAB,
|
||||
/**
|
||||
* Biconjugate gradient method with restart.
|
||||
*/
|
||||
LCG_BICGSTAB2,
|
||||
/**
|
||||
* Conjugate gradient method with projected gradient for inequality constraints.
|
||||
* This algorithm comes without non-monotonic linear search for the step length.
|
||||
*/
|
||||
LCG_PG,
|
||||
/**
|
||||
* Conjugate gradient method with spectral projected gradient for inequality constraints.
|
||||
* This algorithm comes with non-monotonic linear search for the step length.
|
||||
*/
|
||||
LCG_SPG,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief return value of the lcg_solver() function
|
||||
*/
|
||||
enum lcg_return_enum
|
||||
{
|
||||
LCG_SUCCESS = 0, ///< The solver function terminated successfully.
|
||||
LCG_CONVERGENCE = 0, ///< The iteration reached convergence.
|
||||
LCG_STOP, ///< The iteration is stopped by the monitoring function.
|
||||
LCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
|
||||
// A negative number means a error
|
||||
LCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
|
||||
LCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
|
||||
LCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
|
||||
LCG_INVILAD_EPSILON, ///< The epsilon is negative.
|
||||
LCG_INVILAD_RESTART_EPSILON, ///< The restart epsilon is negative.
|
||||
LCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
|
||||
LCG_NULL_PRECONDITION_MATRIX, ///< Null precondition matrix.
|
||||
LCG_NAN_VALUE, ///< Nan value.
|
||||
LCG_INVALID_POINTER, ///< Invalid pointer.
|
||||
LCG_INVALID_LAMBDA, ///< Invalid range for lambda.
|
||||
LCG_INVALID_SIGMA, ///< Invalid range for sigma.
|
||||
LCG_INVALID_BETA, ///< Invalid range for beta.
|
||||
LCG_INVALID_MAXIM, ///< Invalid range for maxi_m.
|
||||
LCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Parameters of the conjugate gradient methods.
|
||||
*/
|
||||
struct lcg_para
|
||||
{
|
||||
/**
|
||||
* Maximal iteration times. The process will continue till the convergence is met
|
||||
* if this option is set to zero (default).
|
||||
*/
|
||||
int max_iterations;
|
||||
|
||||
/**
|
||||
* Epsilon for convergence test.
|
||||
* This parameter determines the accuracy with which the solution is to be
|
||||
* found. A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or
|
||||
* sqrt(||g||)/N <= epsilon for the lcg_solver() function, where ||.|| denotes
|
||||
* the Euclidean (L2) norm. The default value of epsilon is 1e-8.
|
||||
*/
|
||||
lcg_float epsilon;
|
||||
|
||||
/**
|
||||
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
|
||||
* The default value is false which means the gradient based evaluating method is used.
|
||||
* The AMD based method will be used if this variable is set to true. This parameter is only
|
||||
* applied to the non-constrained methods.
|
||||
*/
|
||||
int abs_diff;
|
||||
|
||||
/**
|
||||
* Restart epsilon for the LCG_BICGSTAB2 algorithm. The default value is 1e-6
|
||||
*/
|
||||
lcg_float restart_epsilon;
|
||||
|
||||
/**
|
||||
* Initial step length for the project gradient method. The default is 1.0
|
||||
*/
|
||||
lcg_float step;
|
||||
|
||||
/**
|
||||
* multiplier for updating solutions with the spectral projected gradient method. The range of
|
||||
* this variable is (0, 1). The default is given as 0.95
|
||||
*/
|
||||
lcg_float sigma;
|
||||
|
||||
/**
|
||||
* descending ratio for conducting the non-monotonic linear search. The range of
|
||||
* this variable is (0, 1). The default is given as 0.9
|
||||
*/
|
||||
lcg_float beta;
|
||||
|
||||
/**
|
||||
* The maximal record times of the objective values for the SPG method. The method use the
|
||||
* objective values from the most recent maxi_m times to preform the non-monotonic linear search.
|
||||
* The default value is 10.
|
||||
*/
|
||||
int maxi_m;
|
||||
};
|
||||
|
||||
/**
|
||||
* Default parameter for conjugate gradient methods
|
||||
*/
|
||||
static const lcg_para defparam = {0, 1e-8, 0, 1e-6, 1.0, 0.95, 0.9, 10};
|
||||
|
||||
/**
|
||||
* @brief Return a lcg_para type instance with default values.
|
||||
*
|
||||
* Users can use this function to get default parameters' value for the conjugate gradient methods.
|
||||
*
|
||||
* @return A lcg_para type instance.
|
||||
*/
|
||||
lcg_para lcg_default_parameters();
|
||||
|
||||
/**
|
||||
* @brief Select a type of solver according to the name
|
||||
*
|
||||
* @param[in] slr_char Name of the solver
|
||||
*
|
||||
* @return The lcg solver enum.
|
||||
*/
|
||||
lcg_solver_enum lcg_select_solver(std::string slr_char);
|
||||
|
||||
/**
|
||||
* @brief Display or throw out a string explanation for the lcg_solver() function's return values.
|
||||
*
|
||||
* @param[in] er_index The error index returned by the lcg_solver() function.
|
||||
* @param[in] er_throw throw out a char string of the explanation.
|
||||
*
|
||||
* @return A string explanation of the error.
|
||||
*/
|
||||
void lcg_error_str(int er_index, bool er_throw = false);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Types of method that could be recognized by the clcg_solver() function.
|
||||
*/
|
||||
enum clcg_solver_enum
|
||||
{
|
||||
/**
|
||||
* Jacob's Bi-Conjugate Gradient Method
|
||||
*/
|
||||
CLCG_BICG,
|
||||
/**
|
||||
* Bi-Conjugate Gradient Method accelerated for complex symmetric A
|
||||
*/
|
||||
CLCG_BICG_SYM,
|
||||
/**
|
||||
* Conjugate Gradient Squared Method with real coefficients.
|
||||
*/
|
||||
CLCG_CGS,
|
||||
/**
|
||||
* Biconjugate gradient method.
|
||||
*/
|
||||
CLCG_BICGSTAB,
|
||||
/**
|
||||
* Quasi-Minimal Residual Method
|
||||
*/
|
||||
//CLCG_QMR,
|
||||
/**
|
||||
* Transpose Free Quasi-Minimal Residual Method
|
||||
*/
|
||||
CLCG_TFQMR,
|
||||
/**
|
||||
* Preconditioned conjugate gradient
|
||||
*/
|
||||
CLCG_PCG,
|
||||
/**
|
||||
* Preconditioned Bi-Conjugate Gradient Method
|
||||
*/
|
||||
CLCG_PBICG,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief return value of the clcg_solver() function
|
||||
*/
|
||||
enum clcg_return_enum
|
||||
{
|
||||
CLCG_SUCCESS = 0, ///< The solver function terminated successfully.
|
||||
CLCG_CONVERGENCE = 0, ///< The iteration reached convergence.
|
||||
CLCG_STOP, ///< The iteration is stopped by the monitoring function.
|
||||
CLCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
|
||||
// A negative number means a error
|
||||
CLCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
|
||||
CLCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
|
||||
CLCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
|
||||
CLCG_INVILAD_EPSILON, ///< The epsilon is negative.
|
||||
CLCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
|
||||
CLCG_NAN_VALUE, ///< Nan value.
|
||||
CLCG_INVALID_POINTER, ///< Invalid pointer.
|
||||
CLCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
|
||||
CLCG_UNKNOWN_SOLVER, ///< Unknown solver
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Parameters of the conjugate gradient methods.
|
||||
*/
|
||||
struct clcg_para
|
||||
{
|
||||
/**
|
||||
* Maximal iteration times. The process will continue till the convergence is met
|
||||
* if this option is set to zero (default).
|
||||
*/
|
||||
int max_iterations;
|
||||
|
||||
/**
|
||||
* Epsilon for convergence test.
|
||||
* This parameter determines the accuracy with which the solution is to be found.
|
||||
* A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or sqrt(||g||)/N
|
||||
* <= epsilon for the lcg_solver() function, where ||.|| denotes the Euclidean (L2) norm.
|
||||
* The default value of epsilon is 1e-8. For box-constrained methods,the convergence test
|
||||
* is implemented using ||P(m-g) - m|| <= epsilon, in which P is the projector that
|
||||
* transfers m into the constrained domain.
|
||||
*/
|
||||
lcg_float epsilon;
|
||||
|
||||
/**
|
||||
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
|
||||
* The default value is false which means the gradient based evaluating method is used.
|
||||
* The AMD based method will be used if this variable is set to true. This parameter is only
|
||||
* applied to the non-constrained methods.
|
||||
*/
|
||||
int abs_diff;
|
||||
};
|
||||
|
||||
/**
|
||||
* Default parameter for conjugate gradient methods
|
||||
*/
|
||||
static const clcg_para defparam2 = {0, 1e-8, 0};
|
||||
|
||||
/**
|
||||
* @brief Return a clcg_para type instance with default values.
|
||||
*
|
||||
* Users can use this function to get default parameters' value for the complex conjugate gradient methods.
|
||||
*
|
||||
* @return A clcg_para type instance.
|
||||
*/
|
||||
clcg_para clcg_default_parameters();
|
||||
|
||||
/**
|
||||
* @brief Select a type of solver according to the name
|
||||
*
|
||||
* @param[in] slr_char Name of the solver
|
||||
*
|
||||
* @return The clcg solver enum.
|
||||
*/
|
||||
clcg_solver_enum clcg_select_solver(std::string slr_char);
|
||||
|
||||
/**
|
||||
* @brief Display or throw out a string explanation for the clcg_solver() function's return values.
|
||||
*
|
||||
* @param[in] er_index The error index returned by the lcg_solver() function.
|
||||
* @param[in] er_throw throw out a char string of the explanation.
|
||||
*
|
||||
* @return A string explanation of the error.
|
||||
*/
|
||||
void clcg_error_str(int er_index, bool er_throw = false);
|
||||
|
||||
#endif // _LCG_UTIL_H
|
167
src/sample/sample1.cpp
Normal file
167
src/sample/sample1.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "cmath"
|
||||
#include "iostream"
|
||||
#include "../lib/lcg.h"
|
||||
|
||||
#define M 100
|
||||
#define N 80
|
||||
|
||||
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
|
||||
{
|
||||
lcg_float max = -1;
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
// 普通二维数组做核矩阵
|
||||
lcg_float **kernel;
|
||||
// 中间结果数组
|
||||
lcg_float *tmp_arr;
|
||||
// 预优矩阵
|
||||
lcg_float *p;
|
||||
|
||||
// 计算核矩阵乘向量的乘积
|
||||
void CalAx(void* instance, const lcg_float* x, lcg_float* prod_Ax, const int n_s)
|
||||
{
|
||||
lcg_matvec(kernel, x, tmp_arr, M, n_s, MatNormal);
|
||||
lcg_matvec(kernel, tmp_arr, prod_Ax, M, n_s, MatTranspose);
|
||||
return;
|
||||
}
|
||||
|
||||
void CalMx(void* instance, const lcg_float* x, lcg_float* prod_Mx, const int n_s)
|
||||
{
|
||||
for (size_t i = 0; i < n_s; i++)
|
||||
{
|
||||
prod_Mx[i] = p[i]*x[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//定义共轭梯度监控函数
|
||||
int Prog(void* instance, const lcg_float* m, const lcg_float converge, const lcg_para* param, const int n_s, const int k)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
kernel = lcg_malloc(M, N);
|
||||
tmp_arr = lcg_malloc(M);
|
||||
p = lcg_malloc(N);
|
||||
|
||||
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
|
||||
|
||||
// 生成一组正演解
|
||||
lcg_float *fm = lcg_malloc(N);
|
||||
lcg_vecrnd(fm, 1.0, 2.0, N);
|
||||
|
||||
// 计算共轭梯度B项
|
||||
lcg_float *B = lcg_malloc(N);
|
||||
lcg_matvec(kernel, fm, tmp_arr, M, N, MatNormal);
|
||||
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
|
||||
|
||||
/********************准备工作完成************************/
|
||||
lcg_para self_para = lcg_default_parameters();
|
||||
self_para.epsilon = 1e-7;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
// 声明一组解
|
||||
lcg_float *m = lcg_malloc(N);
|
||||
lcg_vecset(m, 0.0, N);
|
||||
|
||||
// 声明一组预优因子
|
||||
lcg_float diag;
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
diag = 0.0;
|
||||
for (size_t j = 0; j < M; j++)
|
||||
{
|
||||
diag += kernel[j][i]*kernel[j][i];
|
||||
}
|
||||
p[i] = 1.0/diag;
|
||||
}
|
||||
|
||||
// 约束解的范围
|
||||
lcg_float *low = lcg_malloc(N);
|
||||
lcg_float *hig = lcg_malloc(N);
|
||||
lcg_vecset(low, 1.0, N);
|
||||
lcg_vecset(hig, 2.0, N);
|
||||
|
||||
int ret;
|
||||
|
||||
std::clog << "solver: cg" << std::endl;
|
||||
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CG);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
std::clog << "solver: pcg" << std::endl;
|
||||
ret = lcg_solver_preconditioned(CalAx, CalMx, Prog, m, B, N, &self_para, NULL, LCG_PCG);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
std::clog << "solver: cgs" << std::endl;
|
||||
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_CGS);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
std::clog << "solver: bicgstab" << std::endl;
|
||||
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_BICGSTAB);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
std::clog << "solver: bicgstab2" << std::endl;
|
||||
ret = lcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, LCG_BICGSTAB2);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
std::clog << "solver: pg" << std::endl;
|
||||
ret = lcg_solver_constrained(CalAx, Prog, m, B, low, hig, N, &self_para, NULL, LCG_PG);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
std::clog << "solver: spg" << std::endl;
|
||||
ret = lcg_solver_constrained(CalAx, Prog, m, B, low, hig, N, &self_para, NULL, LCG_SPG);
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_free(kernel, M);
|
||||
lcg_free(tmp_arr);
|
||||
lcg_free(fm);
|
||||
lcg_free(B);
|
||||
lcg_free(m);
|
||||
lcg_free(p);
|
||||
lcg_free(low);
|
||||
lcg_free(hig);
|
||||
return 0;
|
||||
}
|
318
src/sample/sample10.cu
Normal file
318
src/sample/sample10.cu
Normal file
@ -0,0 +1,318 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/solver_cuda.h"
|
||||
|
||||
// Declare as global variables
|
||||
cuDoubleComplex one = {1.0, 0.0};
|
||||
cuDoubleComplex zero = {0.0, 0.0};
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new cuDoubleComplex[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new cuDoubleComplex[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||
{
|
||||
lcg_float avg = 0.0;
|
||||
cuDoubleComplex tmp;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
tmp = clcg_Zdiff(a[i], b[i]);
|
||||
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
class sample10 : public CLCG_CUDA_Solver
|
||||
{
|
||||
public:
|
||||
sample10(){}
|
||||
virtual ~sample10(){}
|
||||
|
||||
void solve(std::string inputPath, std::string answerPath);
|
||||
|
||||
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||
cusparseOperation_t oper_t)
|
||||
{
|
||||
// Calculate the product of A*x
|
||||
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||
cusparseOperation_t oper_t)
|
||||
{
|
||||
void *d_x, *d_Ax;
|
||||
cusparseDnVecGetValues(x, &d_x);
|
||||
cusparseDnVecGetValues(prod_Ax, &d_Ax);
|
||||
|
||||
if (use_incomplete_cholesky)
|
||||
{
|
||||
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, (cuDoubleComplex*) d_x, (cuDoubleComplex*) d_pd,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
|
||||
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, (cuDoubleComplex*) d_pd, (cuDoubleComplex*) d_Ax,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
clcg_vecDvecZ_element_wise((cuDoubleComplex*) d_x, d_pd, (cuDoubleComplex*) d_Ax, n_size);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
bool use_incomplete_cholesky;
|
||||
|
||||
int N, nz;
|
||||
int *rowIdxA, *colIdxA;
|
||||
cuDoubleComplex *A, *b;
|
||||
cuDoubleComplex *ans_x;
|
||||
|
||||
void *d_buf;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
cuDoubleComplex *d_A;
|
||||
cuDoubleComplex *d_pd;
|
||||
cuDoubleComplex *d_ic;
|
||||
|
||||
cusparseMatDescr_t descr_A;
|
||||
cusparseMatDescr_t descr_L;
|
||||
csric02Info_t icinfo_A;
|
||||
csrsv2Info_t info_L;
|
||||
csrsv2Info_t info_LT;
|
||||
|
||||
cuDoubleComplex *host_m;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
};
|
||||
|
||||
void sample10::solve(std::string inputPath, std::string answerPath)
|
||||
{
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
// Create handles
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_pd, N * sizeof(cuDoubleComplex));
|
||||
|
||||
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||
|
||||
// This is just used to get bufferSize;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_C_64F);
|
||||
|
||||
size_t bufferSize_B;
|
||||
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||
|
||||
// --- Start of the preconditioning part ---
|
||||
// Get the diagonal elemenets
|
||||
clcg_smZcsr_get_diagonal(d_rowPtrA, d_colIdxA, d_A, N, d_pd);
|
||||
|
||||
// Copy A
|
||||
cudaMalloc(&d_ic, nz * sizeof(cuDoubleComplex));
|
||||
cudaMemcpy(d_ic, d_A, nz * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
|
||||
|
||||
// create descriptor for matrix A
|
||||
cusparseCreateMatDescr(&descr_A);
|
||||
|
||||
// initialize properties of matrix A
|
||||
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_SYMMETRIC);
|
||||
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
|
||||
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// create descriptor for matrix L
|
||||
cusparseCreateMatDescr(&descr_L);
|
||||
|
||||
// initialize properties of matrix L
|
||||
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
|
||||
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create empty info objects for incomplete-cholesky factorization
|
||||
cusparseCreateCsric02Info(&icinfo_A);
|
||||
cusparseCreateCsrsv2Info(&info_L);
|
||||
cusparseCreateCsrsv2Info(&info_LT);
|
||||
|
||||
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_LT;
|
||||
bufferSize = bufferSize_B;
|
||||
|
||||
// Compute buffer size in computing ic factorization
|
||||
cusparseZcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||
d_colIdxA, icinfo_A, &bufferSize_A);
|
||||
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
|
||||
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, &bufferSize_LT);
|
||||
|
||||
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_LT);
|
||||
cudaMalloc(&d_buf, bufferSize);
|
||||
|
||||
// Perform incomplete-choleskey factorization: analysis phase
|
||||
cusparseZcsric02_analysis(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA,
|
||||
d_colIdxA, icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
|
||||
// Perform incomplete-choleskey factorization: solve phase
|
||||
cusparseZcsric02(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA, d_colIdxA,
|
||||
icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
// --- End of the preconditioning part ---
|
||||
|
||||
// Declare an initial solution
|
||||
host_m = new cuDoubleComplex[N];
|
||||
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
|
||||
// Preconditioning with Diagonal elements
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||
}
|
||||
|
||||
use_incomplete_cholesky = false;
|
||||
MinimizePreconditioned(cubHandle, cusHandle, host_m, b, N, nz, CLCG_PCG);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Preconditioning with incomplete-Cholesky factorization
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||
}
|
||||
|
||||
use_incomplete_cholesky = true;
|
||||
MinimizePreconditioned(cubHandle, cusHandle, host_m, b, N, nz, CLCG_PCG);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
delete[] A;
|
||||
delete[] rowIdxA;
|
||||
delete[] colIdxA;
|
||||
delete[] b;
|
||||
delete[] ans_x;
|
||||
delete[] host_m;
|
||||
|
||||
// Free Device memory
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
cudaFree(d_pd);
|
||||
cudaFree(d_ic);
|
||||
|
||||
cusparseDestroyDnVec(dvec_tmp);
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cudaFree(d_buf);
|
||||
|
||||
cusparseDestroyMatDescr(descr_A);
|
||||
cusparseDestroyMatDescr(descr_L);
|
||||
cusparseDestroyCsric02Info(icinfo_A);
|
||||
cusparseDestroyCsrsv2Info(info_L);
|
||||
cusparseDestroyCsrsv2Info(info_LT);
|
||||
|
||||
// Free handles
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_10K_cA";
|
||||
std::string answerPath = "data/case_10K_cB";
|
||||
|
||||
sample10 sp;
|
||||
sp.set_report_interval(0);
|
||||
sp.solve(inputPath, answerPath);
|
||||
return 0;
|
||||
}
|
299
src/sample/sample11.cu
Normal file
299
src/sample/sample11.cu
Normal file
@ -0,0 +1,299 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/clcg_cuda.h"
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new cuDoubleComplex[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new cuDoubleComplex[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||
{
|
||||
lcg_float avg = 0.0;
|
||||
cuDoubleComplex tmp;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
tmp = clcg_Zdiff(a[i], b[i]);
|
||||
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
// Declare as global variables
|
||||
cuDoubleComplex one, zero;
|
||||
|
||||
void *d_buf;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
cuDoubleComplex *d_A;
|
||||
cuDoubleComplex *d_pd;
|
||||
cuDoubleComplex *d_iu;
|
||||
|
||||
cusparseMatDescr_t descr_A = 0;
|
||||
cusparseMatDescr_t descr_L = 0;
|
||||
cusparseMatDescr_t descr_U = 0;
|
||||
csrilu02Info_t info_ILU = 0;
|
||||
csrsv2Info_t info_L = 0;
|
||||
csrsv2Info_t info_U = 0;
|
||||
|
||||
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||
cusparseOperation_t oper_t)
|
||||
{
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
zero.x = 0.0; zero.y = 0.0;
|
||||
// Calculate the product of A*x
|
||||
//cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
|
||||
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
void cudaMx_ILU(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||
cusparseOperation_t oper_t)
|
||||
{
|
||||
void *d_x, *d_Ax;
|
||||
cusparseDnVecGetValues(x, &d_x);
|
||||
cusparseDnVecGetValues(prod_Ax, &d_Ax);
|
||||
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, (cuDoubleComplex*) d_x, (cuDoubleComplex*) d_pd,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
|
||||
cusparseZcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n_size, nz_size, &one, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, (cuDoubleComplex*) d_pd, (cuDoubleComplex*) d_Ax,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
int cudaProgress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
if (converge <= param->epsilon) {
|
||||
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_1M_cA";
|
||||
std::string answerPath = "data/case_1M_cB";
|
||||
|
||||
int N;
|
||||
int nz;
|
||||
cuDoubleComplex *A;
|
||||
int *rowIdxA;
|
||||
int *colIdxA;
|
||||
cuDoubleComplex *b;
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
|
||||
cuDoubleComplex *ans_x;
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
// Create handles
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_pd, N * sizeof(cuDoubleComplex));
|
||||
|
||||
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||
|
||||
// This is just used to get bufferSize;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_C_64F);
|
||||
|
||||
size_t bufferSize_B;
|
||||
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||
|
||||
// --- Start of the preconditioning part ---
|
||||
// Copy A
|
||||
cudaMalloc(&d_iu, nz * sizeof(cuDoubleComplex));
|
||||
cudaMemcpy(d_iu, d_A, nz * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice);
|
||||
|
||||
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_U;
|
||||
bufferSize = bufferSize_B;
|
||||
|
||||
// create descriptor for matrix A
|
||||
cusparseCreateMatDescr(&descr_A);
|
||||
|
||||
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// create descriptor for matrix L
|
||||
cusparseCreateMatDescr(&descr_L);
|
||||
|
||||
// initialize properties of matrix L
|
||||
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
|
||||
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT);
|
||||
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// create descriptor for matrix U
|
||||
cusparseCreateMatDescr(&descr_U);
|
||||
|
||||
cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER);
|
||||
cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
cusparseSetMatIndexBase(descr_U, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create empty info objects for incomplete-cholesky factorization
|
||||
cusparseCreateCsrilu02Info(&info_ILU);
|
||||
cusparseCreateCsrsv2Info(&info_L);
|
||||
cusparseCreateCsrsv2Info(&info_U);
|
||||
|
||||
// Compute buffer size in computing ic factorization
|
||||
cusparseZcsrilu02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||
d_colIdxA, info_ILU, &bufferSize_A);
|
||||
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
|
||||
cusparseZcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, &bufferSize_U);
|
||||
|
||||
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_U);
|
||||
cudaMalloc(&d_buf, bufferSize);
|
||||
|
||||
// Perform incomplete-choleskey factorization: analysis phase
|
||||
cusparseZcsrilu02_analysis(cusHandle, N, nz, descr_A, d_iu, d_rowPtrA,
|
||||
d_colIdxA, info_ILU, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_L, d_iu, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
cusparseZcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_U, d_iu, d_rowPtrA, d_colIdxA, info_U, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
|
||||
// Perform incomplete-choleskey factorization: solve phase
|
||||
cusparseZcsrilu02(cusHandle, N, nz, descr_A, d_iu, d_rowPtrA, d_colIdxA,
|
||||
info_ILU, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
// --- End of the preconditioning part ---
|
||||
|
||||
// Declare an initial solution
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
int ret;
|
||||
cuDoubleComplex *host_m = new cuDoubleComplex[N];
|
||||
|
||||
// Preconditioning with incomplete-LU factorization
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||
}
|
||||
|
||||
ret = clcg_solver_preconditioned_cuda(cudaAx, cudaMx_ILU, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_PCG);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
delete[] A;
|
||||
delete[] rowIdxA;
|
||||
delete[] colIdxA;
|
||||
delete[] b;
|
||||
delete[] ans_x;
|
||||
delete[] host_m;
|
||||
|
||||
// Free Device memory
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
cudaFree(d_pd);
|
||||
cudaFree(d_iu);
|
||||
|
||||
cusparseDestroyDnVec(dvec_tmp);
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cudaFree(d_buf);
|
||||
|
||||
cusparseDestroyMatDescr(descr_A);
|
||||
cusparseDestroyMatDescr(descr_L);
|
||||
cusparseDestroyMatDescr(descr_U);
|
||||
cusparseDestroyCsrilu02Info(info_ILU);
|
||||
cusparseDestroyCsrsv2Info(info_L);
|
||||
cusparseDestroyCsrsv2Info(info_U);
|
||||
|
||||
// Free handles
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
|
||||
return 0;
|
||||
}
|
306
src/sample/sample12.cu
Normal file
306
src/sample/sample12.cu
Normal file
@ -0,0 +1,306 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/solver_cuda.h"
|
||||
#include "../lib/preconditioner_cuda.h"
|
||||
|
||||
// Declare as global variables
|
||||
cuDoubleComplex one = {1.0, 0.0};
|
||||
cuDoubleComplex zero = {0.0, 0.0};
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new cuDoubleComplex[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new cuDoubleComplex[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||
{
|
||||
lcg_float avg = 0.0;
|
||||
cuDoubleComplex tmp;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
tmp = clcg_Zdiff(a[i], b[i]);
|
||||
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
class sample12 : public CLCG_CUDA_Solver
|
||||
{
|
||||
public:
|
||||
sample12(){}
|
||||
virtual ~sample12(){}
|
||||
|
||||
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
// Calculate the product of A*x
|
||||
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, x, dvec_p,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
|
||||
|
||||
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p, prod_Ax,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
int N, nz;
|
||||
int *rowIdxA, *colIdxA;
|
||||
cuDoubleComplex *A, *b;
|
||||
cuDoubleComplex *ans_x;
|
||||
|
||||
int *IC_row, *IC_col;
|
||||
cuDoubleComplex *IC_val;
|
||||
|
||||
void *d_buf, *d_buf2;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
cusparseSpMatDescr_t smat_IC;
|
||||
cusparseSpSVDescr_t descr_L, descr_LT;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
cuDoubleComplex *d_A;
|
||||
cuDoubleComplex *d_p;
|
||||
cusparseDnVecDescr_t dvec_p;
|
||||
|
||||
int *d_rowIdxIC; // COO
|
||||
int *d_rowPtrIC; // CSR
|
||||
int *d_colIdxIC;
|
||||
cuDoubleComplex *d_IC;
|
||||
|
||||
cuDoubleComplex *host_m;
|
||||
cuDoubleComplex *d_t;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
};
|
||||
|
||||
void sample12::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
IC_row = new int [nz];
|
||||
IC_col = new int [nz];
|
||||
IC_val = new cuDoubleComplex [nz];
|
||||
|
||||
clcg_incomplete_Cholesky_cuda_full(rowIdxA, colIdxA, A, N, nz, IC_row, IC_col, IC_val);
|
||||
/*
|
||||
for (size_t i = 0; i < nz; i++)
|
||||
{
|
||||
if (IC_row[i] >= IC_col[i])
|
||||
{
|
||||
std::cout << IC_row[i] << " " << IC_col[i] << " (" << IC_val[i].x << "," << IC_val[i].y << ")\n";
|
||||
}
|
||||
}
|
||||
*/
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_p, N * sizeof(cuDoubleComplex));
|
||||
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_64F);
|
||||
|
||||
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
cudaMalloc(&d_IC, nz * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_rowIdxIC, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrIC, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxIC, nz * sizeof(int));
|
||||
|
||||
cudaMemcpy(d_IC, IC_val, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxIC, IC_row, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxIC, IC_col, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||
|
||||
// Convert matrix L from COO format to CSR format
|
||||
cusparseXcoo2csr(cus_handle, d_rowIdxIC, nz, N, d_rowPtrIC, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_IC, N, N, nz, d_rowPtrIC, d_colIdxIC, d_IC, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||
|
||||
// Specify Non-Unit diagonal type.
|
||||
//cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
|
||||
//cusparseSpMatSetAttribute(smat_IC, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
|
||||
|
||||
// This is just used to get bufferSize;
|
||||
cudaMalloc(&d_t, N * sizeof(cuDoubleComplex));
|
||||
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_64F);
|
||||
|
||||
size_t bufferSize_B;
|
||||
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
|
||||
|
||||
// --- Start of the preconditioning part ---
|
||||
cusparseSpSV_createDescr(&descr_L);
|
||||
cusparseSpSV_createDescr(&descr_LT);
|
||||
|
||||
size_t bufferSize, bufferSize_L, bufferSize_LT;
|
||||
bufferSize = bufferSize_B;
|
||||
|
||||
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, dvec_p,
|
||||
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
|
||||
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p,
|
||||
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
|
||||
|
||||
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
|
||||
cudaMalloc(&d_buf, bufferSize);
|
||||
cudaMalloc(&d_buf2, bufferSize);
|
||||
|
||||
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_IC, dvec_tmp, dvec_p,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_buf);
|
||||
|
||||
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_IC, dvec_p, dvec_tmp,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_buf2);
|
||||
|
||||
// --- End of the preconditioning part ---
|
||||
|
||||
// Declare an initial solution
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
host_m = new cuDoubleComplex[N];
|
||||
|
||||
// Preconditioning with incomplete-chelosky factorization
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||
}
|
||||
|
||||
MinimizePreconditioned(cub_handle, cus_handle, host_m, b, N, nz, CLCG_PCG);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
if (rowIdxA != nullptr) delete[] rowIdxA;
|
||||
if (colIdxA != nullptr) delete[] colIdxA;
|
||||
if (A != nullptr) delete[] A;
|
||||
if (b != nullptr) delete[] b;
|
||||
if (ans_x != nullptr) delete[] ans_x;
|
||||
|
||||
if (IC_row != nullptr) delete[] IC_row;
|
||||
if (IC_col != nullptr) delete[] IC_col;
|
||||
if (IC_val != nullptr) delete[] IC_val;
|
||||
|
||||
if (host_m != nullptr) delete[] host_m;
|
||||
|
||||
cusparseDestroyDnVec(dvec_tmp);
|
||||
cusparseDestroyDnVec(dvec_p);
|
||||
|
||||
cudaFree(d_buf);
|
||||
cudaFree(d_buf2);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_p);
|
||||
cudaFree(d_t);
|
||||
|
||||
cudaFree(d_rowIdxIC);
|
||||
cudaFree(d_rowPtrIC);
|
||||
cudaFree(d_colIdxIC);
|
||||
cudaFree(d_IC);
|
||||
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cusparseDestroySpMat(smat_IC);
|
||||
cusparseSpSV_destroyDescr(descr_L);
|
||||
cusparseSpSV_destroyDescr(descr_LT);
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_1M_cA";
|
||||
std::string answerPath = "data/case_1M_cB";
|
||||
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
sample12 sp;
|
||||
sp.set_report_interval(0);
|
||||
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
|
||||
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
return 0;
|
||||
}
|
305
src/sample/sample13.cu
Normal file
305
src/sample/sample13.cu
Normal file
@ -0,0 +1,305 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/solver_cuda.h"
|
||||
#include "../lib/preconditioner_cuda.h"
|
||||
|
||||
// Declare as global variables
|
||||
cuDoubleComplex one = {1.0, 0.0};
|
||||
cuDoubleComplex zero = {0.0, 0.0};
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new cuDoubleComplex[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new cuDoubleComplex[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||
{
|
||||
lcg_float avg = 0.0;
|
||||
cuDoubleComplex tmp;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
tmp = clcg_Zdiff(a[i], b[i]);
|
||||
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
class sample13 : public CLCG_CUDA_Solver
|
||||
{
|
||||
public:
|
||||
sample13(){}
|
||||
virtual ~sample13(){}
|
||||
|
||||
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
// Calculate the product of A*x
|
||||
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_tuf);
|
||||
return;
|
||||
}
|
||||
|
||||
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, x, dvec_p,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
|
||||
|
||||
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, prod_Ax,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
int N, nz, lnz;
|
||||
int *rowIdxA, *colIdxA;
|
||||
cuDoubleComplex *A, *b;
|
||||
cuDoubleComplex *ans_x;
|
||||
|
||||
int *L_row, *L_col;
|
||||
cuDoubleComplex *L_val;
|
||||
|
||||
void *d_tuf, *d_tuf2;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
cusparseSpMatDescr_t smat_L;
|
||||
cusparseSpSVDescr_t descr_L, descr_LT;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
cuDoubleComplex *d_A;
|
||||
cuDoubleComplex *d_t;
|
||||
cuDoubleComplex *d_p;
|
||||
cusparseDnVecDescr_t dvec_p;
|
||||
|
||||
int *d_rowIdxL; // COO
|
||||
int *d_rowPtrL; // CSR
|
||||
int *d_colIdxL;
|
||||
cuDoubleComplex *d_L;
|
||||
|
||||
cuDoubleComplex *host_m;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
};
|
||||
|
||||
void sample13::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
clcg_incomplete_Cholesky_cuda_half_buffsize(rowIdxA, colIdxA, nz, &lnz);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
std::clog << "lnz = " << lnz << std::endl;
|
||||
|
||||
L_row = new int [lnz];
|
||||
L_col = new int [lnz];
|
||||
L_val = new cuDoubleComplex [lnz];
|
||||
|
||||
clcg_incomplete_Cholesky_cuda_half(rowIdxA, colIdxA, A, N, nz, lnz, L_row, L_col, L_val);
|
||||
/*
|
||||
for (size_t i = 0; i < lnz; i++)
|
||||
{
|
||||
std::cout << L_row[i] << " " << L_col[i] << " (" << L_val[i].x << "," << L_val[i].y << ")\n";
|
||||
}
|
||||
*/
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_t, N * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_p, N * sizeof(cuDoubleComplex));
|
||||
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_64F);
|
||||
|
||||
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
cudaMalloc(&d_L, lnz * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_rowIdxL, lnz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrL, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxL, lnz * sizeof(int));
|
||||
|
||||
cudaMemcpy(d_L, L_val, lnz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxL, L_row, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxL, L_col, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||
|
||||
// Convert matrix L from COO format to CSR format
|
||||
cusparseXcoo2csr(cus_handle, d_rowIdxL, lnz, N, d_rowPtrL, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_L, N, N, lnz, d_rowPtrL, d_colIdxL, d_L, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||
|
||||
// Specify Lower fill mode.
|
||||
cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
|
||||
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
|
||||
|
||||
// Specify Non-Unit diagonal type.
|
||||
cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
|
||||
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
|
||||
|
||||
// This is just used to get bufferSize;
|
||||
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_64F);
|
||||
|
||||
size_t bufferSize_B;
|
||||
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
|
||||
|
||||
// --- Start of the preconditioning part ---
|
||||
cusparseSpSV_createDescr(&descr_L);
|
||||
cusparseSpSV_createDescr(&descr_LT);
|
||||
|
||||
size_t bufferSize, bufferSize_L, bufferSize_LT;
|
||||
bufferSize = bufferSize_B;
|
||||
|
||||
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_p,
|
||||
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
|
||||
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p,
|
||||
dvec_tmp, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
|
||||
|
||||
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
|
||||
cudaMalloc(&d_tuf, bufferSize);
|
||||
cudaMalloc(&d_tuf2, bufferSize);
|
||||
|
||||
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_tmp, dvec_p,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_tuf);
|
||||
|
||||
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, dvec_tmp,
|
||||
CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_tuf2);
|
||||
// --- End of the preconditioning part ---
|
||||
|
||||
// Declare an initial solution
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
// Preconditioning with incomplete-chelosky factorization
|
||||
host_m = clcg_malloc_cuda(N);
|
||||
clcg_vecset_cuda(host_m, zero, N);
|
||||
|
||||
MinimizePreconditioned(cub_handle, cus_handle, host_m, b, N, nz, CLCG_PCG);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
if (rowIdxA != nullptr) delete[] rowIdxA;
|
||||
if (colIdxA != nullptr) delete[] colIdxA;
|
||||
if (A != nullptr) delete[] A;
|
||||
if (b != nullptr) delete[] b;
|
||||
if (ans_x != nullptr) delete[] ans_x;
|
||||
|
||||
if (L_row != nullptr) delete[] L_row;
|
||||
if (L_col != nullptr) delete[] L_col;
|
||||
if (L_val != nullptr) delete[] L_val;
|
||||
|
||||
clcg_free_cuda(host_m);
|
||||
|
||||
cusparseDestroyDnVec(dvec_tmp);
|
||||
cusparseDestroyDnVec(dvec_p);
|
||||
|
||||
cudaFree(d_tuf);
|
||||
cudaFree(d_tuf2);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_t);
|
||||
cudaFree(d_p);
|
||||
|
||||
cudaFree(d_rowIdxL);
|
||||
cudaFree(d_rowPtrL);
|
||||
cudaFree(d_colIdxL);
|
||||
cudaFree(d_L);
|
||||
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cusparseDestroySpMat(smat_L);
|
||||
cusparseSpSV_destroyDescr(descr_L);
|
||||
cusparseSpSV_destroyDescr(descr_LT);
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_10K_cA";
|
||||
std::string answerPath = "data/case_10K_cB";
|
||||
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
sample13 sp;
|
||||
sp.set_report_interval(0);
|
||||
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
|
||||
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
return 0;
|
||||
}
|
327
src/sample/sample14.cu
Normal file
327
src/sample/sample14.cu
Normal file
@ -0,0 +1,327 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/solver_cuda.h"
|
||||
#include "../lib/preconditioner_cuda.h"
|
||||
|
||||
// Declare as global variables
|
||||
cuComplex one = {1.0, 0.0};
|
||||
cuComplex zero = {0.0, 0.0};
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new cuDoubleComplex[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new cuDoubleComplex[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
float avg_error(cuComplex *a, cuComplex *b, int n)
|
||||
{
|
||||
float avg = 0.0;
|
||||
cuComplex tmp;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
tmp = clcg_Cdiff(a[i], b[i]);
|
||||
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
class sample14 : public CLCG_CUDAF_Solver
|
||||
{
|
||||
public:
|
||||
sample14(){}
|
||||
virtual ~sample14(){}
|
||||
|
||||
void solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle);
|
||||
|
||||
void AxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
// Calculate the product of A*x
|
||||
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_32F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
void MxProduct(cublasHandle_t cub_handle, cusparseHandle_t cus_handle, cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax,
|
||||
const int n_size, const int nz_size, cusparseOperation_t oper_t)
|
||||
{
|
||||
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, x, dvec_p,
|
||||
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L);
|
||||
|
||||
cusparseSpSV_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, prod_Ax,
|
||||
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT);
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
int N, nz, lnz;
|
||||
int *rowIdxA, *colIdxA;
|
||||
cuDoubleComplex *A, *b;
|
||||
cuDoubleComplex *ans_x;
|
||||
cuComplex *Af, *bf;
|
||||
cuComplex *ans_xf;
|
||||
|
||||
int *L_row, *L_col;
|
||||
cuComplex *L_val;
|
||||
|
||||
void *d_buf, *d_buf2;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
cusparseSpMatDescr_t smat_L;
|
||||
cusparseSpSVDescr_t descr_L, descr_LT;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
cuComplex *d_A;
|
||||
cuComplex *d_t;
|
||||
cuComplex *d_p;
|
||||
cusparseDnVecDescr_t dvec_p;
|
||||
|
||||
int *d_rowIdxL; // COO
|
||||
int *d_rowPtrL; // CSR
|
||||
int *d_colIdxL;
|
||||
cuComplex *d_L;
|
||||
|
||||
cuComplex *host_m;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
};
|
||||
|
||||
void sample14::solve(std::string inputPath, std::string answerPath, cublasHandle_t cub_handle, cusparseHandle_t cus_handle)
|
||||
{
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
clcg_incomplete_Cholesky_cuda_half_buffsize(rowIdxA, colIdxA, nz, &lnz);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
std::clog << "lnz = " << lnz << std::endl;
|
||||
|
||||
Af = new cuComplex [nz];
|
||||
bf = new cuComplex [nz];
|
||||
ans_xf = new cuComplex [nz];
|
||||
|
||||
// Note that converting complex numbers from double to single precisions may case stack overflow
|
||||
for (size_t i = 0; i < nz; i++)
|
||||
{
|
||||
Af[i].x = A[i].x; Af[i].y = A[i].y;
|
||||
bf[i].x = b[i].x; bf[i].y = b[i].y;
|
||||
ans_xf[i].x = ans_x[i].x; ans_xf[i].y = ans_x[i].y;
|
||||
}
|
||||
|
||||
L_row = new int [lnz];
|
||||
L_col = new int [lnz];
|
||||
L_val = new cuComplex [lnz];
|
||||
|
||||
clcg_incomplete_Cholesky_cuda_half(rowIdxA, colIdxA, Af, N, nz, lnz, L_row, L_col, L_val);
|
||||
/*
|
||||
for (size_t i = 0; i < lnz; i++)
|
||||
{
|
||||
std::cout << L_row[i] << " " << L_col[i] << " (" << L_val[i].x << "," << L_val[i].y << ")\n";
|
||||
}
|
||||
*/
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(cuComplex));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_t, N * sizeof(cuComplex));
|
||||
cudaMalloc(&d_p, N * sizeof(cuComplex));
|
||||
cusparseCreateDnVec(&dvec_p, N, d_p, CUDA_C_32F);
|
||||
|
||||
cudaMemcpy(d_A, Af, nz * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_t, bf, N * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
|
||||
cudaMalloc(&d_L, lnz * sizeof(cuComplex));
|
||||
cudaMalloc(&d_rowIdxL, lnz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrL, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxL, lnz * sizeof(int));
|
||||
|
||||
cudaMemcpy(d_L, L_val, lnz * sizeof(cuComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxL, L_row, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxL, L_col, lnz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cus_handle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
|
||||
|
||||
// Convert matrix L from COO format to CSR format
|
||||
cusparseXcoo2csr(cus_handle, d_rowIdxL, lnz, N, d_rowPtrL, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_L, N, N, lnz, d_rowPtrL, d_colIdxL, d_L, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
|
||||
|
||||
// Specify Lower fill mode.
|
||||
cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
|
||||
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
|
||||
|
||||
// Specify Non-Unit diagonal type.
|
||||
cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
|
||||
cusparseSpMatSetAttribute(smat_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
|
||||
|
||||
// This is just used to get bufferSize;
|
||||
cusparseCreateDnVec(&dvec_tmp, N, d_t, CUDA_C_32F);
|
||||
|
||||
size_t bufferSize_B;
|
||||
cusparseSpMV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_C_32F, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize_B);
|
||||
|
||||
// --- Start of the preconditioning part ---
|
||||
cusparseSpSV_createDescr(&descr_L);
|
||||
cusparseSpSV_createDescr(&descr_LT);
|
||||
|
||||
size_t bufferSize, bufferSize_L, bufferSize_LT;
|
||||
bufferSize = bufferSize_B;
|
||||
|
||||
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_p,
|
||||
dvec_tmp, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, &bufferSize_L);
|
||||
cusparseSpSV_bufferSize(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p,
|
||||
dvec_tmp, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, &bufferSize_LT);
|
||||
|
||||
bufferSize = max(max(bufferSize, bufferSize_L), bufferSize_LT);
|
||||
cudaMalloc(&d_buf, bufferSize);
|
||||
cudaMalloc(&d_buf2, bufferSize);
|
||||
|
||||
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_L, dvec_tmp, dvec_p,
|
||||
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_L, d_buf);
|
||||
|
||||
cusparseSpSV_analysis(cus_handle, CUSPARSE_OPERATION_TRANSPOSE, &one, smat_L, dvec_p, dvec_tmp,
|
||||
CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, descr_LT, d_buf2);
|
||||
// --- End of the preconditioning part ---
|
||||
|
||||
// Declare an initial solution
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
host_m = new cuComplex[N];
|
||||
|
||||
// Preconditioning with incomplete-chelosky factorization
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||
}
|
||||
|
||||
MinimizePreconditioned(cub_handle, cus_handle, host_m, bf, N, nz, CLCG_PCG);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_xf, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
if (rowIdxA != nullptr) delete[] rowIdxA;
|
||||
if (colIdxA != nullptr) delete[] colIdxA;
|
||||
if (A != nullptr) delete[] A;
|
||||
if (b != nullptr) delete[] b;
|
||||
if (ans_x != nullptr) delete[] ans_x;
|
||||
if (Af != nullptr) delete[] Af;
|
||||
if (bf != nullptr) delete[] bf;
|
||||
if (ans_xf != nullptr) delete[] ans_xf;
|
||||
|
||||
if (L_row != nullptr) delete[] L_row;
|
||||
if (L_col != nullptr) delete[] L_col;
|
||||
if (L_val != nullptr) delete[] L_val;
|
||||
|
||||
if (host_m != nullptr) delete[] host_m;
|
||||
|
||||
cusparseDestroyDnVec(dvec_tmp);
|
||||
cusparseDestroyDnVec(dvec_p);
|
||||
|
||||
cudaFree(d_buf);
|
||||
cudaFree(d_buf2);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_t);
|
||||
cudaFree(d_p);
|
||||
|
||||
cudaFree(d_rowIdxL);
|
||||
cudaFree(d_rowPtrL);
|
||||
cudaFree(d_colIdxL);
|
||||
cudaFree(d_L);
|
||||
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cusparseDestroySpMat(smat_L);
|
||||
cusparseSpSV_destroyDescr(descr_L);
|
||||
cusparseSpSV_destroyDescr(descr_LT);
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_1K_cA";
|
||||
std::string answerPath = "data/case_1K_cB";
|
||||
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
sample14 sp;
|
||||
sp.set_report_interval(100);
|
||||
sp.solve(inputPath, answerPath, cubHandle, cusHandle);
|
||||
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
return 0;
|
||||
}
|
223
src/sample/sample15.cu
Normal file
223
src/sample/sample15.cu
Normal file
@ -0,0 +1,223 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/lcg_cuda.h"
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, double **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, double **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new double[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new double[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(double));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(double)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, double **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new double[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(double)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float avg_error(lcg_float *a, lcg_float *b, int n)
|
||||
{
|
||||
lcg_float avg = 0.0;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
avg += (a[i] - b[i])*(a[i] - b[i]);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
// Declare as global variables
|
||||
lcg_float one = 1.0;
|
||||
lcg_float zero = 0.0;
|
||||
|
||||
void *d_buf;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
double *d_A;
|
||||
|
||||
cusparseMatDescr_t descr_A = 0;
|
||||
csric02Info_t icinfo_A = 0;
|
||||
|
||||
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||
{
|
||||
// Calculate the product of A*x
|
||||
cusparseSpMV(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
x, &zero, prod_Ax, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
int cudaProgress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
if (converge <= param->epsilon) {
|
||||
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_1M_A";
|
||||
std::string answerPath = "data/case_1M_B";
|
||||
|
||||
int N;
|
||||
int nz;
|
||||
double *A;
|
||||
int *rowIdxA;
|
||||
int *colIdxA;
|
||||
double *b;
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
|
||||
double *ans_x;
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
// Create handles
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(double));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
|
||||
cudaMemcpy(d_A, A, nz * sizeof(double), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
|
||||
|
||||
// create descriptor for matrix A
|
||||
cusparseCreateMatDescr(&descr_A);
|
||||
|
||||
// initialize properties of matrix A
|
||||
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
|
||||
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
int bufferSize;
|
||||
cusparseCreateCsric02Info(&icinfo_A);
|
||||
cusparseDcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||
d_colIdxA, icinfo_A, &bufferSize);
|
||||
|
||||
cudaMalloc(&d_buf, bufferSize);
|
||||
|
||||
// Declare an initial solution
|
||||
lcg_para self_para = lcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
int ret;
|
||||
double *host_m = new double[N];
|
||||
|
||||
// Solve with CG
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i] = 0.0;
|
||||
}
|
||||
|
||||
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CG);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Solve with CGS
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i] = 0.0;
|
||||
}
|
||||
|
||||
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CGS);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
delete[] A;
|
||||
delete[] rowIdxA;
|
||||
delete[] colIdxA;
|
||||
delete[] b;
|
||||
delete[] ans_x;
|
||||
delete[] host_m;
|
||||
|
||||
// Free Device memory
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cudaFree(d_buf);
|
||||
|
||||
cusparseDestroyMatDescr(descr_A);
|
||||
cusparseDestroyCsric02Info(icinfo_A);
|
||||
|
||||
// Free handles
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
|
||||
return 0;
|
||||
}
|
170
src/sample/sample2.cpp
Normal file
170
src/sample/sample2.cpp
Normal file
@ -0,0 +1,170 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "iostream"
|
||||
#include "random"
|
||||
#include "../lib/solver.h"
|
||||
|
||||
#define M 1000
|
||||
#define N 800
|
||||
|
||||
lcg_float max_diff(const lcg_float *a, const lcg_float *b, int size)
|
||||
{
|
||||
lcg_float max = -1;
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
class TESTFUNC : public LCG_Solver
|
||||
{
|
||||
public:
|
||||
TESTFUNC();
|
||||
~TESTFUNC();
|
||||
|
||||
// 计算共轭梯度的B项
|
||||
void cal_partb(lcg_float *B, const lcg_float *x);
|
||||
|
||||
//定义共轭梯度中Ax的算法
|
||||
void AxProduct(const lcg_float* a, lcg_float* b, const int num)
|
||||
{
|
||||
lcg_matvec(kernel, a, tmp_arr, M, num, MatNormal);
|
||||
lcg_matvec(kernel, tmp_arr, b, M, num, MatTranspose);
|
||||
return;
|
||||
}
|
||||
|
||||
void MxProduct(const lcg_float* a, lcg_float* b, const int num)
|
||||
{
|
||||
for (size_t i = 0; i < num; i++)
|
||||
{
|
||||
b[i] = p[i]*a[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
// 普通二维数组做核矩阵
|
||||
lcg_float **kernel;
|
||||
// 中间结果数组
|
||||
lcg_float *tmp_arr;
|
||||
// 预优矩阵
|
||||
lcg_float *p;
|
||||
};
|
||||
|
||||
TESTFUNC::TESTFUNC()
|
||||
{
|
||||
kernel = lcg_malloc(M, N);
|
||||
tmp_arr = lcg_malloc(M);
|
||||
p = lcg_malloc(N);
|
||||
|
||||
lcg_vecrnd(kernel, -1.0, 1.0, M, N);
|
||||
lcg_vecset(p, 1.0, N);
|
||||
|
||||
lcg_float diag;
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
diag = 0.0;
|
||||
for (size_t j = 0; j < M; j++)
|
||||
{
|
||||
diag += kernel[j][i]*kernel[j][i];
|
||||
}
|
||||
p[i] = 1.0/diag;
|
||||
}
|
||||
}
|
||||
|
||||
TESTFUNC::~TESTFUNC()
|
||||
{
|
||||
lcg_free(kernel, M);
|
||||
lcg_free(tmp_arr);
|
||||
lcg_free(p);
|
||||
}
|
||||
|
||||
void TESTFUNC::cal_partb(lcg_float *B, const lcg_float *x)
|
||||
{
|
||||
lcg_matvec(kernel, x, tmp_arr, M, N, MatNormal);
|
||||
lcg_matvec(kernel, tmp_arr, B, M, N, MatTranspose);
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
// 生成一组正演解
|
||||
double *fm = lcg_malloc(N);
|
||||
lcg_vecrnd(fm, 1.0, 2.0, N);
|
||||
|
||||
TESTFUNC test;
|
||||
|
||||
// 计算共轭梯度B项
|
||||
double *B = lcg_malloc(N);
|
||||
test.cal_partb(B, fm);
|
||||
|
||||
/********************准备工作完成************************/
|
||||
lcg_para self_para = lcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
test.set_lcg_parameter(self_para);
|
||||
|
||||
// 声明一组解
|
||||
lcg_float *m = lcg_malloc(N);
|
||||
lcg_vecset(m, 0.0, N);
|
||||
|
||||
// 约束解的范围
|
||||
lcg_float *low = lcg_malloc(N);
|
||||
lcg_float *hig = lcg_malloc(N);
|
||||
lcg_vecset(low, 1.0, N);
|
||||
lcg_vecset(hig, 2.0, N);
|
||||
|
||||
test.Minimize(m, B, N, LCG_CG);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
test.MinimizePreconditioned(m, B, N);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
test.Minimize(m, B, N, LCG_CGS);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
test.Minimize(m, B, N, LCG_BICGSTAB);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
test.Minimize(m, B, N, LCG_BICGSTAB2);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
test.MinimizeConstrained(m, B, low, hig, N, LCG_PG);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_vecset(m, 0.0, N);
|
||||
test.MinimizeConstrained(m, B, low, hig, N, LCG_SPG);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
lcg_free(fm);
|
||||
lcg_free(B);
|
||||
lcg_free(m);
|
||||
lcg_free(low);
|
||||
lcg_free(hig);
|
||||
return 0;
|
||||
}
|
129
src/sample/sample3.cpp
Normal file
129
src/sample/sample3.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "iostream"
|
||||
#include "../lib/clcg.h"
|
||||
|
||||
#define N 100
|
||||
|
||||
lcg_float max_diff(const lcg_complex *a, const lcg_complex *b, int size)
|
||||
{
|
||||
lcg_float max = -1;
|
||||
lcg_complex t;
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
t = a[i] - b[i];
|
||||
max = lcg_max(clcg_module(&t), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
// 普通二维数组做核矩阵
|
||||
lcg_complex **kernel;
|
||||
|
||||
// 计算核矩阵乘向量的乘积
|
||||
void CalAx(void *instance, const lcg_complex *x, lcg_complex *prod_Ax,
|
||||
const int x_size, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
clcg_matvec(kernel, x, prod_Ax, N, x_size, layout, conjugate);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
//定义共轭梯度监控函数
|
||||
int Prog(void* instance, const lcg_complex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int k)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
srand(time(0));
|
||||
|
||||
kernel = clcg_malloc(N, N);
|
||||
clcg_vecrnd(kernel, lcg_complex(-1.0, -1.0), lcg_complex(1.0, 1.0), N, N);
|
||||
|
||||
// 设置核矩阵为一个对称阵
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
for (int j = i; j < N; j++)
|
||||
{
|
||||
kernel[j][i] = kernel[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
// 生成一组正演解
|
||||
lcg_complex *fm = clcg_malloc(N);
|
||||
clcg_vecrnd(fm, lcg_complex(1.0, 1.0), lcg_complex(2.0, 2.0), N);
|
||||
|
||||
// 计算共轭梯度B项
|
||||
lcg_complex *B = clcg_malloc(N);
|
||||
clcg_matvec(kernel, fm, B, N, N, MatNormal, NonConjugate);
|
||||
|
||||
/********************准备工作完成************************/
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.abs_diff = 0;
|
||||
self_para.epsilon = 1e-8;
|
||||
|
||||
// 声明一组解
|
||||
lcg_complex *m = clcg_malloc(N);
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
|
||||
int ret;
|
||||
|
||||
std::clog << "solver: bicg" << std::endl;
|
||||
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICG);
|
||||
std::clog << std::endl; clcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
std::clog << "solver: bicg-symmetric" << std::endl;
|
||||
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICG_SYM);
|
||||
std::clog << std::endl; clcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
std::clog << "solver: cgs" << std::endl;
|
||||
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_CGS);
|
||||
std::clog << std::endl; clcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
std::clog << "solver: bicgstab" << std::endl;
|
||||
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_BICGSTAB);
|
||||
std::clog << std::endl; clcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
std::clog << "solver: tfqmr" << std::endl;
|
||||
ret = clcg_solver(CalAx, Prog, m, B, N, &self_para, NULL, CLCG_TFQMR);
|
||||
std::clog << std::endl; clcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_free(kernel, N);
|
||||
clcg_free(fm);
|
||||
clcg_free(B);
|
||||
clcg_free(m);
|
||||
return 0;
|
||||
}
|
199
src/sample/sample4.cpp
Normal file
199
src/sample/sample4.cpp
Normal file
@ -0,0 +1,199 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "../lib/solver.h"
|
||||
#include "ctime"
|
||||
#include "random"
|
||||
#include "iostream"
|
||||
#include "fstream"
|
||||
#include "iomanip"
|
||||
#include "complex"
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, lcg_complex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, lcg_complex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new lcg_complex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new lcg_complex[*pN]{};
|
||||
|
||||
std::complex<double> std_c;
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&std_c, sizeof(std_c));
|
||||
(*cooVal)[i].real(std_c.real());
|
||||
(*cooVal)[i].imag(std_c.imag());
|
||||
}
|
||||
|
||||
for (int i = 0; i < *pN; i++)
|
||||
{
|
||||
in.read((char*)&std_c, sizeof(std_c));
|
||||
(*b)[i].real(std_c.real());
|
||||
(*b)[i].imag(std_c.imag());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, lcg_complex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new lcg_complex[*pN]{};
|
||||
|
||||
std::complex<double> std_c;
|
||||
for (size_t i = 0; i < *pN; i++)
|
||||
{
|
||||
in.read((char*)&std_c, sizeof(std_c));
|
||||
(*x)[i].real(std_c.real());
|
||||
(*x)[i].imag(std_c.imag());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float max_diff(const lcg_complex *a, const lcg_complex *b, int size)
|
||||
{
|
||||
lcg_float max = -1;
|
||||
lcg_complex t;
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
t = a[i] - b[i];
|
||||
max = lcg_max(clcg_module(&t), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
class TESTFUNC : public CLCG_Solver
|
||||
{
|
||||
public:
|
||||
TESTFUNC(int n);
|
||||
~TESTFUNC();
|
||||
|
||||
void set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size);
|
||||
|
||||
//定义共轭梯度中Ax的算法
|
||||
void AxProduct(const lcg_complex *x, lcg_complex *prod_Ax, const int x_size,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
clcg_matvec(kernel, x, prod_Ax, x_size, x_size, layout, conjugate);
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
// 普通二维数组做核矩阵
|
||||
lcg_complex **kernel;
|
||||
int n_size;
|
||||
};
|
||||
|
||||
TESTFUNC::TESTFUNC(int n)
|
||||
{
|
||||
n_size = n;
|
||||
kernel = clcg_malloc(n_size, n_size);
|
||||
}
|
||||
|
||||
TESTFUNC::~TESTFUNC()
|
||||
{
|
||||
clcg_free(kernel, n_size);
|
||||
}
|
||||
|
||||
void TESTFUNC::set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size)
|
||||
{
|
||||
for (size_t i = 0; i < n_size; i++)
|
||||
{
|
||||
for (size_t j = 0; j < n_size; j++)
|
||||
{
|
||||
kernel[i][j] = lcg_complex(0.0, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
kernel[row_id[i]][col_id[i]] = val[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
std::string inputPath = "data/case_1K_cA";
|
||||
std::string answerPath = "data/case_1K_cB";
|
||||
|
||||
int N;
|
||||
int nz;
|
||||
lcg_complex *A;
|
||||
int *rowIdxA;
|
||||
int *colIdxA;
|
||||
lcg_complex *b;
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
|
||||
lcg_complex *ans_x;
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
TESTFUNC test(N);
|
||||
test.set_kernel(rowIdxA, colIdxA, A, nz);
|
||||
|
||||
/********************准备工作完成************************/
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-8;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
test.set_clcg_parameter(self_para);
|
||||
|
||||
// 声明一组解
|
||||
lcg_complex *m = clcg_malloc(N);
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
|
||||
test.Minimize(m, b, N, CLCG_BICG);
|
||||
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
test.Minimize(m, b, N, CLCG_BICG_SYM);
|
||||
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
test.Minimize(m, b, N, CLCG_CGS);
|
||||
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_vecset(m, lcg_complex(0.0, 0.0), N);
|
||||
test.Minimize(m, b, N, CLCG_TFQMR);
|
||||
std::clog << "maximal difference: " << max_diff(ans_x, m, N) << std::endl << std::endl;
|
||||
|
||||
clcg_free(m);
|
||||
|
||||
delete[] A;
|
||||
delete[] rowIdxA;
|
||||
delete[] colIdxA;
|
||||
delete[] b;
|
||||
delete[] ans_x;
|
||||
return 0;
|
||||
}
|
155
src/sample/sample5.cpp
Normal file
155
src/sample/sample5.cpp
Normal file
@ -0,0 +1,155 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "../lib/lcg_eigen.h"
|
||||
#include "iostream"
|
||||
#include "Eigen/Dense"
|
||||
|
||||
#define M 1000
|
||||
#define N 800
|
||||
|
||||
lcg_float max_diff(const Eigen::VectorXd &a, const Eigen::VectorXd &b)
|
||||
{
|
||||
lcg_float max = -1;
|
||||
for (int i = 0; i < a.size(); i++)
|
||||
{
|
||||
max = lcg_max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
// 普通二维数组做核矩阵
|
||||
Eigen::MatrixXd kernel = Eigen::MatrixXd::Random(M, N);
|
||||
// 中间结果数组
|
||||
Eigen::VectorXd tmp_arr(M);
|
||||
Eigen::VectorXd p = Eigen::VectorXd::Constant(N, 1.0);
|
||||
|
||||
// 计算核矩阵乘向量的乘积
|
||||
void CalAx(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Ax)
|
||||
{
|
||||
tmp_arr = kernel * x;
|
||||
prod_Ax = kernel.transpose() * tmp_arr;
|
||||
return;
|
||||
}
|
||||
|
||||
void CalMx(void* instance, const Eigen::VectorXd &x, Eigen::VectorXd &prod_Mx)
|
||||
{
|
||||
prod_Mx = p.cwiseProduct(x);
|
||||
return;
|
||||
}
|
||||
|
||||
//定义共轭梯度监控函数
|
||||
int Prog(void* instance, const Eigen::VectorXd *m, const lcg_float converge,
|
||||
const lcg_para *param, const int k)
|
||||
{
|
||||
std::clog << "\rIteration-times: " << k << "\tconvergence: " << converge;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
// 生成一组正演解
|
||||
lcg_float LO = 1.0, HI = 2.0, Range = HI - LO;
|
||||
Eigen::VectorXd fm = Eigen::VectorXd::Random(N);
|
||||
fm = (fm + Eigen::VectorXd::Constant(N, 1.0))*0.5*Range;
|
||||
fm = (fm + Eigen::VectorXd::Constant(N, LO));
|
||||
|
||||
// 计算共轭梯度B项
|
||||
Eigen::VectorXd B(N);
|
||||
tmp_arr = kernel * fm;
|
||||
B = kernel.transpose() * tmp_arr;
|
||||
|
||||
/********************准备工作完成************************/
|
||||
lcg_para self_para = lcg_default_parameters();
|
||||
self_para.epsilon = 1e-5;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
// 声明一组解
|
||||
Eigen::VectorXd m = Eigen::VectorXd::Zero(N);
|
||||
//Eigen::VectorXd p = Eigen::VectorXd::Constant(N, 1.0);
|
||||
Eigen::VectorXd low = Eigen::VectorXd::Constant(N, LO);
|
||||
Eigen::VectorXd hig = Eigen::VectorXd::Constant(N, HI);
|
||||
|
||||
std::clog << "solver: cg" << std::endl;
|
||||
clock_t start = clock();
|
||||
int ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_CG);
|
||||
clock_t end = clock();
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||
|
||||
m.setZero();
|
||||
std::clog << "solver: pcg" << std::endl;
|
||||
start = clock();
|
||||
ret = lcg_solver_preconditioned_eigen(CalAx, CalMx, Prog, m, B, &self_para, NULL, LCG_PCG);
|
||||
end = clock();
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||
|
||||
m.setZero();
|
||||
std::clog << "solver: cgs" << std::endl;
|
||||
start = clock();
|
||||
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_CGS);
|
||||
end = clock();
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||
|
||||
m.setZero();
|
||||
std::clog << "solver: bicgstab" << std::endl;
|
||||
start = clock();
|
||||
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_BICGSTAB);
|
||||
end = clock();
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||
|
||||
m.setZero();
|
||||
std::clog << "solver: bicgstab2" << std::endl;
|
||||
start = clock();
|
||||
ret = lcg_solver_eigen(CalAx, Prog, m, B, &self_para, NULL, LCG_BICGSTAB2);
|
||||
end = clock();
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||
|
||||
m.setZero();
|
||||
std::clog << "solver: pg" << std::endl;
|
||||
start = clock();
|
||||
ret = lcg_solver_constrained_eigen(CalAx, Prog, m, B, low, hig, &self_para, NULL, LCG_PG);
|
||||
end = clock();
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||
|
||||
m.setZero();
|
||||
std::clog << "solver: spg" << std::endl;
|
||||
start = clock();
|
||||
ret = lcg_solver_constrained_eigen(CalAx, Prog, m, B, low, hig, &self_para, NULL, LCG_SPG);
|
||||
end = clock();
|
||||
std::clog << std::endl; lcg_error_str(ret);
|
||||
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||
std::clog << "time use: "<<1000*(end-start)/(double)CLOCKS_PER_SEC<<" ms" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
235
src/sample/sample6.cpp
Normal file
235
src/sample/sample6.cpp
Normal file
@ -0,0 +1,235 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "iostream"
|
||||
#include "fstream"
|
||||
#include "complex"
|
||||
#include "../lib/lcg_complex.h"
|
||||
#include "../lib/solver_eigen.h"
|
||||
#include "Eigen/Sparse"
|
||||
|
||||
typedef Eigen::SparseMatrix<lcg_complex, Eigen::RowMajor> spmat_cd; // 注意Eigen默认的稀疏矩阵排序为列优先
|
||||
typedef Eigen::Triplet<lcg_complex> triplt_cd;
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, lcg_complex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, lcg_complex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new lcg_complex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new lcg_complex[*pN]{};
|
||||
|
||||
std::complex<double> std_c;
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&std_c, sizeof(std_c));
|
||||
(*cooVal)[i].real(std_c.real());
|
||||
(*cooVal)[i].imag(std_c.imag());
|
||||
}
|
||||
|
||||
for (int i = 0; i < *pN; i++)
|
||||
{
|
||||
in.read((char*)&std_c, sizeof(std_c));
|
||||
(*b)[i].real(std_c.real());
|
||||
(*b)[i].imag(std_c.imag());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, lcg_complex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new lcg_complex[*pN]{};
|
||||
|
||||
std::complex<double> std_c;
|
||||
for (size_t i = 0; i < *pN; i++)
|
||||
{
|
||||
in.read((char*)&std_c, sizeof(std_c));
|
||||
(*x)[i].real(std_c.real());
|
||||
(*x)[i].imag(std_c.imag());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float max_diff(const Eigen::VectorXcd &a, const Eigen::VectorXcd &b)
|
||||
{
|
||||
lcg_float max = -1;
|
||||
std::complex<lcg_float> t;
|
||||
for (int i = 0; i < a.size(); i++)
|
||||
{
|
||||
t = a[i] - b[i];
|
||||
max = lcg_max(t.real()*t.real() + t.imag()*t.imag(), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
class TESTFUNC : public CLCG_EIGEN_Solver
|
||||
{
|
||||
public:
|
||||
TESTFUNC(int n);
|
||||
~TESTFUNC();
|
||||
|
||||
void set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size);
|
||||
void set_p();
|
||||
|
||||
//定义共轭梯度中Ax的算法
|
||||
void AxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Ax,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
if (conjugate == Conjugate) prod_Ax = kernel.conjugate() * x;
|
||||
else prod_Ax = kernel * x;
|
||||
return;
|
||||
}
|
||||
|
||||
void MxProduct(const Eigen::VectorXcd &x, Eigen::VectorXcd &prod_Mx,
|
||||
lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
prod_Mx = P.cwiseProduct(x);
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
spmat_cd kernel;
|
||||
Eigen::VectorXcd P;
|
||||
int n_size;
|
||||
};
|
||||
|
||||
TESTFUNC::TESTFUNC(int n)
|
||||
{
|
||||
n_size = n;
|
||||
kernel.resize(n_size, n_size);
|
||||
kernel.setZero();
|
||||
P.resize(n_size);
|
||||
}
|
||||
|
||||
TESTFUNC::~TESTFUNC()
|
||||
{
|
||||
kernel.resize(0, 0);
|
||||
}
|
||||
|
||||
void TESTFUNC::set_kernel(int *row_id, int *col_id, lcg_complex *val, int nz_size)
|
||||
{
|
||||
std::vector<triplt_cd> val_triplt;
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
val_triplt.push_back(triplt_cd(row_id[i], col_id[i], val[i]));
|
||||
}
|
||||
|
||||
kernel.setFromTriplets(val_triplt.begin(), val_triplt.end());
|
||||
return;
|
||||
}
|
||||
|
||||
void TESTFUNC::set_p()
|
||||
{
|
||||
for (size_t i = 0; i < n_size; i++)
|
||||
{
|
||||
P[i] = 1.0/kernel.coeff(i, i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
std::string inputPath = "data/case_10K_cA";
|
||||
std::string answerPath = "data/case_10K_cB";
|
||||
|
||||
int N;
|
||||
int nz;
|
||||
lcg_complex *A;
|
||||
int *rowIdxA;
|
||||
int *colIdxA;
|
||||
lcg_complex *b;
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
|
||||
lcg_complex *ans_x;
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
TESTFUNC test(N);
|
||||
test.set_kernel(rowIdxA, colIdxA, A, nz);
|
||||
test.set_p();
|
||||
|
||||
Eigen::VectorXcd B, ANS;
|
||||
B.resize(N);
|
||||
ANS.resize(N);
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
B[i] = b[i];
|
||||
ANS[i] = ans_x[i];
|
||||
}
|
||||
|
||||
/********************准备工作完成************************/
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-16;
|
||||
self_para.abs_diff = 0;
|
||||
test.set_clcg_parameter(self_para);
|
||||
test.set_report_interval(10);
|
||||
|
||||
// 声明一组解
|
||||
Eigen::VectorXcd m = Eigen::VectorXcd::Constant(N, std::complex<double>(0.0, 0.0));
|
||||
|
||||
test.Minimize(m, B, CLCG_BICG);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
m.setZero();
|
||||
test.Minimize(m, B, CLCG_BICG_SYM);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
m.setZero();
|
||||
test.Minimize(m, B, CLCG_CGS);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
m.setZero();
|
||||
test.Minimize(m, B, CLCG_TFQMR);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
m.setZero();
|
||||
test.MinimizePreconditioned(m, B, CLCG_PCG);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
m.setZero();
|
||||
test.MinimizePreconditioned(m, B, CLCG_PBICG);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
B.resize(0);
|
||||
ANS.resize(0);
|
||||
m.resize(0);
|
||||
|
||||
delete[] A;
|
||||
delete[] rowIdxA;
|
||||
delete[] colIdxA;
|
||||
delete[] b;
|
||||
delete[] ans_x;
|
||||
return 0;
|
||||
}
|
233
src/sample/sample7.cpp
Normal file
233
src/sample/sample7.cpp
Normal file
@ -0,0 +1,233 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include "iostream"
|
||||
#include "fstream"
|
||||
#include "../lib/solver_eigen.h"
|
||||
#include "../lib/preconditioner_eigen.h"
|
||||
|
||||
typedef std::complex<double> complex_d;
|
||||
typedef Eigen::SparseMatrix<std::complex<double>, Eigen::RowMajor> spmat_cd;
|
||||
typedef Eigen::Triplet<complex_d> triplt_cd;
|
||||
typedef Eigen::VectorXcd vector_cd;
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, complex_d **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, complex_d **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new complex_d[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new complex_d[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(complex_d));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(complex_d)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, complex_d **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new complex_d[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(complex_d)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
double max_diff(const vector_cd &a, const vector_cd &b)
|
||||
{
|
||||
double max = -1;
|
||||
complex_d t;
|
||||
for (int i = 0; i < a.size(); i++)
|
||||
{
|
||||
t = a[i] - b[i];
|
||||
max = lcg_max(std::sqrt(std::norm(t)), max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
class TESTFUNC : public CLCG_EIGEN_Solver
|
||||
{
|
||||
public:
|
||||
TESTFUNC(int n);
|
||||
~TESTFUNC();
|
||||
|
||||
void set_kernel(int *row_id, int *col_id, complex_d *val, int nz_size);
|
||||
void set_preconditioner();
|
||||
|
||||
//定义共轭梯度中Ax的算法
|
||||
void AxProduct(const vector_cd &x, vector_cd &prod_Ax, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
if (conjugate == Conjugate) prod_Ax = kernel.conjugate() * x;
|
||||
else prod_Ax = kernel * x;
|
||||
return;
|
||||
}
|
||||
|
||||
void MxProduct(const vector_cd &x, vector_cd &prod_Mx, lcg_matrix_e layout, clcg_complex_e conjugate)
|
||||
{
|
||||
// No preconditioning
|
||||
//prod_Mx = x;
|
||||
|
||||
// Preconditioning using the diagonal kernel
|
||||
//prod_Mx = p.cwiseProduct(x);
|
||||
|
||||
// Preconditioning using the ILUT/IC
|
||||
clcg_solve_lower_triangle(l_tri, x, p);
|
||||
clcg_solve_upper_triangle(u_tri, p, prod_Mx);
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
// 普通二维数组做核矩阵
|
||||
spmat_cd kernel, l_tri, u_tri;
|
||||
vector_cd p;
|
||||
int n_size;
|
||||
};
|
||||
|
||||
TESTFUNC::TESTFUNC(int n)
|
||||
{
|
||||
n_size = n;
|
||||
kernel.resize(n_size, n_size);
|
||||
kernel.setZero();
|
||||
p.resize(n_size);
|
||||
}
|
||||
|
||||
TESTFUNC::~TESTFUNC()
|
||||
{
|
||||
kernel.resize(0, 0);
|
||||
l_tri.resize(0, 0);
|
||||
u_tri.resize(0, 0);
|
||||
p.resize(0);
|
||||
}
|
||||
|
||||
void TESTFUNC::set_kernel(int *row_id, int *col_id, complex_d *val, int nz_size)
|
||||
{
|
||||
std::vector<triplt_cd> val_triplt;
|
||||
for (size_t i = 0; i < nz_size; i++)
|
||||
{
|
||||
val_triplt.push_back(triplt_cd(row_id[i], col_id[i], val[i]));
|
||||
}
|
||||
|
||||
kernel.setFromTriplets(val_triplt.begin(), val_triplt.end());
|
||||
return;
|
||||
}
|
||||
|
||||
void TESTFUNC::set_preconditioner()
|
||||
{
|
||||
// 1 Preconditioning using the incomplete LU decomposition
|
||||
/*
|
||||
for (size_t i = 0; i < n_size; i++)
|
||||
{
|
||||
p[i] = 1.0/kernel.coeff(i, i);
|
||||
}
|
||||
*/
|
||||
|
||||
// 2. Preconditioning using the incomplete LU decomposition
|
||||
//incomplete_LU(kernel, l_tri, u_tri);
|
||||
|
||||
// 3. Preconditioning using the incomplete Cholesky decomposition
|
||||
clcg_incomplete_Cholesky(kernel, l_tri);
|
||||
u_tri = l_tri.transpose();
|
||||
|
||||
// 4. Preconditioning using compressed incomplete decompositions
|
||||
/*
|
||||
vector_cd one = Eigen::VectorXcd::Ones(n_size);
|
||||
vector_cd x = Eigen::VectorXcd::Zero(n_size);
|
||||
|
||||
solve_lower_triangle(l_tri, one, x);
|
||||
solve_upper_triangle(u_tri, x, p);
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[]) try
|
||||
{
|
||||
std::string inputPath = "data/case_1K_cA";
|
||||
std::string answerPath = "data/case_1K_cB";
|
||||
|
||||
int N;
|
||||
int nz;
|
||||
complex_d *A;
|
||||
int *rowIdxA;
|
||||
int *colIdxA;
|
||||
complex_d *b;
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
|
||||
complex_d *ans_x;
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
TESTFUNC test(N);
|
||||
test.set_kernel(rowIdxA, colIdxA, A, nz);
|
||||
test.set_preconditioner();
|
||||
|
||||
vector_cd B, ANS;
|
||||
B.resize(N);
|
||||
ANS.resize(N);
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
B[i] = b[i];
|
||||
ANS[i] = ans_x[i];
|
||||
}
|
||||
|
||||
/********************准备工作完成************************/
|
||||
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-12;
|
||||
self_para.abs_diff = 0;
|
||||
test.set_clcg_parameter(self_para);
|
||||
test.set_report_interval(10);
|
||||
|
||||
Eigen::VectorXcd m = Eigen::VectorXcd::Constant(N, std::complex<double>(0.0, 0.0));
|
||||
|
||||
test.MinimizePreconditioned(m, B, CLCG_PCG);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
m.setZero();
|
||||
test.MinimizePreconditioned(m, B, CLCG_PBICG);
|
||||
std::clog << "maximal difference: " << max_diff(ANS, m) << std::endl << std::endl;
|
||||
|
||||
ANS.resize(0);
|
||||
B.resize(0);
|
||||
m.resize(0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
std::cerr << e.what() << std::endl;
|
||||
}
|
312
src/sample/sample8.cu
Normal file
312
src/sample/sample8.cu
Normal file
@ -0,0 +1,312 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/lcg_cuda.h"
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, double **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, double **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new double[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new double[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(double));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(double)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, double **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new double[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(double)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float avg_error(lcg_float *a, lcg_float *b, int n)
|
||||
{
|
||||
lcg_float avg = 0.0;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
avg += (a[i] - b[i])*(a[i] - b[i]);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
// Declare as global variables
|
||||
lcg_float one = 1.0;
|
||||
lcg_float zero = 0.0;
|
||||
|
||||
void *d_buf;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
double *d_A;
|
||||
double *d_pd;
|
||||
double *d_ic;
|
||||
|
||||
cusparseMatDescr_t descr_A = 0;
|
||||
cusparseMatDescr_t descr_L = 0;
|
||||
csric02Info_t icinfo_A = 0;
|
||||
csrsv2Info_t info_L = 0;
|
||||
csrsv2Info_t info_LT = 0;
|
||||
|
||||
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||
{
|
||||
// Calculate the product of A*x
|
||||
cusparseSpMV(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
x, &zero, prod_Ax, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
void cudaMx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size)
|
||||
{
|
||||
void *d_x, *d_Ax;
|
||||
cusparseDnVecGetValues(x, &d_x);
|
||||
cusparseDnVecGetValues(prod_Ax, &d_Ax);
|
||||
|
||||
cusparseDcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, (double*) d_x, (double*) d_pd,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
|
||||
cusparseDcsrsv2_solve(cus_handle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||
n_size, nz_size, &one, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, (double*) d_pd, (double*) d_Ax,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
int cudaProgress(void* instance, const lcg_float* m, const lcg_float converge,
|
||||
const lcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
if (converge <= param->epsilon) {
|
||||
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_10K_A";
|
||||
std::string answerPath = "data/case_10K_B";
|
||||
|
||||
int N;
|
||||
int nz;
|
||||
double *A;
|
||||
int *rowIdxA;
|
||||
int *colIdxA;
|
||||
double *b;
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
|
||||
double *ans_x;
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
// Create handles
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(double));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_pd, N * sizeof(double));
|
||||
|
||||
cudaMemcpy(d_A, A, nz * sizeof(double), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
|
||||
|
||||
// This is just used to get bufferSize;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
cusparseCreateDnVec(&dvec_tmp, N, d_pd, CUDA_R_64F);
|
||||
|
||||
size_t bufferSize_B;
|
||||
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||
|
||||
// --- Start of the preconditioning part ---
|
||||
|
||||
// Copy A
|
||||
cudaMalloc(&d_ic, nz * sizeof(lcg_float));
|
||||
cudaMemcpy(d_ic, d_A, nz * sizeof(lcg_float), cudaMemcpyDeviceToDevice);
|
||||
|
||||
int bufferSize, bufferSize_A, bufferSize_L, bufferSize_LT;
|
||||
bufferSize = bufferSize_B;
|
||||
|
||||
// create descriptor for matrix A
|
||||
cusparseCreateMatDescr(&descr_A);
|
||||
|
||||
// initialize properties of matrix A
|
||||
cusparseSetMatType(descr_A, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatFillMode(descr_A, CUSPARSE_FILL_MODE_LOWER);
|
||||
cusparseSetMatDiagType(descr_A, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
cusparseSetMatIndexBase(descr_A, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// create descriptor for matrix L
|
||||
cusparseCreateMatDescr(&descr_L);
|
||||
|
||||
// initialize properties of matrix L
|
||||
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
|
||||
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create empty info objects for incomplete-cholesky factorization
|
||||
cusparseCreateCsric02Info(&icinfo_A);
|
||||
cusparseCreateCsrsv2Info(&info_L);
|
||||
cusparseCreateCsrsv2Info(&info_LT);
|
||||
|
||||
// Compute buffer size in computing ic factorization
|
||||
cusparseDcsric02_bufferSize(cusHandle, N, nz, descr_A, d_A, d_rowPtrA,
|
||||
d_colIdxA, icinfo_A, &bufferSize_A);
|
||||
cusparseDcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, &bufferSize_L);
|
||||
cusparseDcsrsv2_bufferSize(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, &bufferSize_LT);
|
||||
|
||||
bufferSize = max(max(max(bufferSize, bufferSize_A), bufferSize_L), bufferSize_LT);
|
||||
cudaMalloc(&d_buf, bufferSize);
|
||||
|
||||
// Perform incomplete-choleskey factorization: analysis phase
|
||||
cusparseDcsric02_analysis(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA,
|
||||
d_colIdxA, icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
cusparseDcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_L, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
cusparseDcsrsv2_analysis(cusHandle, CUSPARSE_OPERATION_TRANSPOSE,
|
||||
N, nz, descr_L, d_ic, d_rowPtrA, d_colIdxA, info_LT, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
|
||||
// Perform incomplete-choleskey factorization: solve phase
|
||||
cusparseDcsric02(cusHandle, N, nz, descr_A, d_ic, d_rowPtrA, d_colIdxA,
|
||||
icinfo_A, CUSPARSE_SOLVE_POLICY_USE_LEVEL, d_buf);
|
||||
|
||||
// --- End of the preconditioning part ---
|
||||
|
||||
// Declare an initial solution
|
||||
lcg_para self_para = lcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
int ret;
|
||||
double *host_m = new double[N];
|
||||
|
||||
// Solve with CG
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i] = 0.0;
|
||||
}
|
||||
|
||||
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CG);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Solve with CGS
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i] = 0.0;
|
||||
}
|
||||
|
||||
ret = lcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_CGS);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Solve with PCG
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i] = 0.0;
|
||||
}
|
||||
|
||||
ret = lcg_solver_preconditioned_cuda(cudaAx, cudaMx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, LCG_PCG);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
delete[] A;
|
||||
delete[] rowIdxA;
|
||||
delete[] colIdxA;
|
||||
delete[] b;
|
||||
delete[] ans_x;
|
||||
delete[] host_m;
|
||||
|
||||
// Free Device memory
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
cudaFree(d_pd);
|
||||
cudaFree(d_ic);
|
||||
|
||||
cusparseDestroyDnVec(dvec_tmp);
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cudaFree(d_buf);
|
||||
|
||||
cusparseDestroyMatDescr(descr_A);
|
||||
cusparseDestroyMatDescr(descr_L);
|
||||
cusparseDestroyCsric02Info(icinfo_A);
|
||||
cusparseDestroyCsrsv2Info(info_L);
|
||||
cusparseDestroyCsrsv2Info(info_LT);
|
||||
|
||||
// Free handles
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
|
||||
return 0;
|
||||
}
|
221
src/sample/sample9.cu
Normal file
221
src/sample/sample9.cu
Normal file
@ -0,0 +1,221 @@
|
||||
/******************************************************
|
||||
* C++ Library of the Linear Conjugate Gradient Methods (LibLCG)
|
||||
*
|
||||
* Copyright (C) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||
*
|
||||
* LibLCG is distributed under a dual licensing scheme. You can
|
||||
* redistribute it and/or modify it under the terms of the GNU Lesser
|
||||
* General Public License (LGPL) as published by the Free Software Foundation,
|
||||
* either version 2 of the License, or (at your option) any later version.
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* If the terms and conditions of the LGPL v.2. would prevent you from
|
||||
* using the LibLCG, please consider the option to obtain a commercial
|
||||
* license for a fee. These licenses are offered by the LibLCG developing
|
||||
* team. As a rule, licenses are provided "as-is", unlimited in time for
|
||||
* a one time fee. Please send corresponding requests to: yizhang-geo@zju.edu.cn.
|
||||
* Please do not forget to include some description of your company and the
|
||||
* realm of its activities. Also add information on how to contact you by
|
||||
* electronic and paper mail.
|
||||
******************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../lib/clcg_cuda.h"
|
||||
|
||||
void read(std::string filePath, int *pN, int *pnz, cuDoubleComplex **cooVal,
|
||||
int **cooRowIdx, int **cooColIdx, cuDoubleComplex **b)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
in.read((char*)pnz, sizeof(int));
|
||||
|
||||
*cooVal = new cuDoubleComplex[*pnz]{};
|
||||
*cooRowIdx = new int[*pnz]{};
|
||||
*cooColIdx = new int[*pnz]{};
|
||||
*b = new cuDoubleComplex[*pN]{};
|
||||
|
||||
for (int i = 0; i < *pnz; ++i)
|
||||
{
|
||||
in.read((char*)&(*cooRowIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooColIdx)[i], sizeof(int));
|
||||
in.read((char*)&(*cooVal)[i], sizeof(cuDoubleComplex));
|
||||
}
|
||||
|
||||
in.read((char*)(*b), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
void readAnswer(std::string filePath, int *pN, cuDoubleComplex **x)
|
||||
{
|
||||
std::ifstream in(filePath, std::ios::binary);
|
||||
|
||||
in.read((char*)pN, sizeof(int));
|
||||
|
||||
*x = new cuDoubleComplex[*pN]{};
|
||||
|
||||
in.read((char*)(*x), sizeof(cuDoubleComplex)*(*pN));
|
||||
return;
|
||||
}
|
||||
|
||||
lcg_float avg_error(cuDoubleComplex *a, cuDoubleComplex *b, int n)
|
||||
{
|
||||
lcg_float avg = 0.0;
|
||||
cuDoubleComplex tmp;
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
tmp = clcg_Zdiff(a[i], b[i]);
|
||||
avg += (tmp.x*tmp.x + tmp.y*tmp.y);
|
||||
}
|
||||
return sqrt(avg)/n;
|
||||
}
|
||||
|
||||
// Declare as global variables
|
||||
cuDoubleComplex one, zero;
|
||||
|
||||
void *d_buf;
|
||||
cusparseSpMatDescr_t smat_A;
|
||||
|
||||
int *d_rowIdxA; // COO
|
||||
int *d_rowPtrA; // CSR
|
||||
int *d_colIdxA;
|
||||
cuDoubleComplex *d_A;
|
||||
cuDoubleComplex *d_B;
|
||||
|
||||
void cudaAx(void* instance, cublasHandle_t cub_handle, cusparseHandle_t cus_handle,
|
||||
cusparseDnVecDescr_t x, cusparseDnVecDescr_t prod_Ax, const int n_size, const int nz_size,
|
||||
cusparseOperation_t oper_t)
|
||||
{
|
||||
one.x = 1.0; one.y = 0.0;
|
||||
zero.x = 0.0; zero.y = 0.0;
|
||||
// Calculate the product of A*x
|
||||
cusparseSpMV(cus_handle, oper_t, &one, smat_A, x, &zero, prod_Ax, CUDA_C_64F, CUSPARSE_SPMV_ALG_DEFAULT, d_buf);
|
||||
return;
|
||||
}
|
||||
|
||||
int cudaProgress(void* instance, const cuDoubleComplex* m, const lcg_float converge,
|
||||
const clcg_para* param, const int n_size, const int nz_size, const int k)
|
||||
{
|
||||
if (converge <= param->epsilon) {
|
||||
std::clog << "Iteration-times: " << k << "\tconvergence: " << converge << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string inputPath = "data/case_1K_cA";
|
||||
std::string answerPath = "data/case_1K_cB";
|
||||
|
||||
int N, nz;
|
||||
int *rowIdxA, *colIdxA;
|
||||
cuDoubleComplex *A, *b;
|
||||
|
||||
read(inputPath, &N, &nz, &A, &rowIdxA, &colIdxA, &b);
|
||||
|
||||
cuDoubleComplex *ans_x;
|
||||
readAnswer(answerPath, &N, &ans_x);
|
||||
|
||||
std::clog << "N = " << N << std::endl;
|
||||
std::clog << "nz = " << nz << std::endl;
|
||||
|
||||
// Create handles
|
||||
cublasHandle_t cubHandle;
|
||||
cusparseHandle_t cusHandle;
|
||||
|
||||
cublasCreate(&cubHandle);
|
||||
cusparseCreate(&cusHandle);
|
||||
|
||||
// Allocate GPU memory & copy matrix/vector to device
|
||||
cudaMalloc(&d_A, nz * sizeof(cuDoubleComplex));
|
||||
cudaMalloc(&d_rowIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_rowPtrA, (N + 1) * sizeof(int));
|
||||
cudaMalloc(&d_colIdxA, nz * sizeof(int));
|
||||
cudaMalloc(&d_B, N * sizeof(cuDoubleComplex));
|
||||
|
||||
cudaMemcpy(d_A, A, nz * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_rowIdxA, rowIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_colIdxA, colIdxA, nz * sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
// Convert matrix A from COO format to CSR format
|
||||
cusparseXcoo2csr(cusHandle, d_rowIdxA, nz, N, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
// Create sparse matrix
|
||||
cusparseCreateCsr(&smat_A, N, N, nz, d_rowPtrA, d_colIdxA, d_A, CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
|
||||
|
||||
// This is just used to get bufferSize;
|
||||
cusparseDnVecDescr_t dvec_tmp;
|
||||
cusparseCreateDnVec(&dvec_tmp, N, d_B, CUDA_C_64F);
|
||||
|
||||
size_t bufferSize_B, bufferSize_B2;
|
||||
|
||||
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B);
|
||||
|
||||
cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE, &one, smat_A,
|
||||
dvec_tmp, &zero, dvec_tmp, CUDA_C_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize_B2);
|
||||
|
||||
if (bufferSize_B2 > bufferSize_B) bufferSize_B = bufferSize_B2;
|
||||
cudaMalloc(&d_buf, bufferSize_B);
|
||||
|
||||
// Declare an initial solution
|
||||
clcg_para self_para = clcg_default_parameters();
|
||||
self_para.epsilon = 1e-6;
|
||||
self_para.abs_diff = 0;
|
||||
|
||||
int ret;
|
||||
cuDoubleComplex *host_m = new cuDoubleComplex[N];
|
||||
|
||||
// Solve with BICG
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||
}
|
||||
|
||||
ret = clcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_BICG);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Solve with BICG_SYM
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
host_m[i].x = 0.0; host_m[i].y = 0.0;
|
||||
}
|
||||
|
||||
ret = clcg_solver_cuda(cudaAx, cudaProgress, host_m, b, N, nz, &self_para, nullptr, cubHandle, cusHandle, CLCG_BICG_SYM);
|
||||
lcg_error_str(ret);
|
||||
|
||||
std::clog << "Averaged error (compared with ans_x): " << avg_error(host_m, ans_x, N) << std::endl;
|
||||
|
||||
// Free Host memory
|
||||
delete[] A;
|
||||
delete[] rowIdxA;
|
||||
delete[] colIdxA;
|
||||
delete[] b;
|
||||
delete[] ans_x;
|
||||
delete[] host_m;
|
||||
|
||||
// Free Device memory
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_rowIdxA);
|
||||
cudaFree(d_rowPtrA);
|
||||
cudaFree(d_colIdxA);
|
||||
cudaFree(d_B);
|
||||
|
||||
cusparseDestroyDnVec(dvec_tmp);
|
||||
cusparseDestroySpMat(smat_A);
|
||||
cudaFree(d_buf);
|
||||
|
||||
// Free handles
|
||||
cublasDestroy(cubHandle);
|
||||
cusparseDestroy(cusHandle);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user