elemental: remove deprecated package (#49291)

This package has not been maintained since 2016.

We maintain an active fork in the hydrogen
package, so remove this one.

Signed-off-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
This commit is contained in:
Massimiliano Culpo 2025-03-05 08:36:05 +01:00 committed by GitHub
parent 7d62045c30
commit aa9e610fa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 0 additions and 861 deletions

View File

@ -1,22 +0,0 @@
diff --git a/cmake/configure_files/ElementalConfig.cmake.in b/cmake/configure_files/ElementalConfig.cmake.in
index d37649f..8511d81 100644
--- a/cmake/configure_files/ElementalConfig.cmake.in
+++ b/cmake/configure_files/ElementalConfig.cmake.in
@@ -1,6 +1,8 @@
set(Elemental_INCLUDE_DIRS "@CMAKE_INSTALL_PREFIX@/include")
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPI_CXX_INCLUDE_PATH@")
-set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@QD_INCLUDES@")
+IF(@QD_FOUND@)
+ set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@QD_INCLUDES@")
+ENDIF()
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPC_INCLUDES@")
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPFR_INCLUDES@")
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@GMP_INCLUDES@")
@@ -13,6 +15,6 @@ set(Elemental_LINK_FLAGS "@EL_LINK_FLAGS@")
set(Elemental_DEFINITIONS "@Qt5Widgets_DEFINITIONS@")
# Our library dependencies (contains definitions for IMPORTED targets)
-include("@CMAKE_INSTALL_PREFIX@/CMake/ElementalTargets.cmake")
+include("${CMAKE_CURRENT_LIST_DIR}/ElementalTargets.cmake")
set(Elemental_LIBRARIES El)

View File

@ -1,668 +0,0 @@
diff -Naur a/include/El/blas_like/level3.hpp b/include/El/blas_like/level3.hpp
--- a/include/El/blas_like/level3.hpp 2017-06-08 07:30:43.180249917 -0700
+++ b/include/El/blas_like/level3.hpp 2017-06-08 07:35:27.325434602 -0700
@@ -31,6 +31,10 @@
}
using namespace GemmAlgorithmNS;
+void GemmUseGPU(int min_M, int min_N, int min_K);
+
+void GemmUseCPU();
+
template<typename T>
void Gemm
( Orientation orientA, Orientation orientB,
diff -Naur a/include/El/core/imports/blas.hpp b/include/El/core/imports/blas.hpp
--- a/include/El/core/imports/blas.hpp 2017-06-08 07:30:43.522016908 -0700
+++ b/include/El/core/imports/blas.hpp 2017-06-08 07:35:06.834030908 -0700
@@ -916,4 +916,63 @@
} // namespace blas
} // namespace El
+
+#if defined(EL_USE_CUBLAS)
+
+namespace El {
+
+#ifdef EL_USE_64BIT_BLAS_INTS
+typedef long long int BlasInt;
+#else
+typedef int BlasInt;
+#endif
+
+namespace cublas {
+
+// NOTE: templated routines are custom and not wrappers
+
+// Level 3 BLAS
+// ============
+template<typename T>
+void Gemm
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
+ const T& alpha,
+ const T* A, BlasInt ALDim,
+ const T* B, BlasInt BLDim,
+ const T& beta,
+ T* C, BlasInt CLDim );
+
+void Gemm
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
+ const float& alpha,
+ const float* A, BlasInt ALDim,
+ const float* B, BlasInt BLDim,
+ const float& beta,
+ float* C, BlasInt CLDim );
+void Gemm
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
+ const double& alpha,
+ const double* A, BlasInt ALDim,
+ const double* B, BlasInt BLDim,
+ const double& beta,
+ double* C, BlasInt CLDim );
+void Gemm
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
+ const scomplex& alpha,
+ const scomplex* A, BlasInt ALDim,
+ const scomplex* B, BlasInt BLDim,
+ const scomplex& beta,
+ scomplex* C, BlasInt CLDim );
+void Gemm
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
+ const dcomplex& alpha,
+ const dcomplex* A, BlasInt ALDim,
+ const dcomplex* B, BlasInt BLDim,
+ const dcomplex& beta,
+ dcomplex* C, BlasInt CLDim );
+
+} // namespace cublas
+} // namespace El
+#endif
+
#endif // ifndef EL_IMPORTS_BLAS_DECL_HPP
diff -Naur a/src/blas_like/level3/Gemm.cpp b/src/blas_like/level3/Gemm.cpp
--- a/src/blas_like/level3/Gemm.cpp 2017-06-08 07:30:44.307096427 -0700
+++ b/src/blas_like/level3/Gemm.cpp 2017-06-08 07:34:23.062863489 -0700
@@ -16,6 +16,20 @@
namespace El {
+char gemm_cpu_gpu_switch = 'c';
+int min_M = 0, min_N = 0, min_K = 0;
+
+void GemmUseGPU(int _min_M, int _min_N, int _min_K) {
+ gemm_cpu_gpu_switch = 'g';
+ min_M = _min_M;
+ min_N = _min_N;
+ min_K = _min_K;
+}
+
+void GemmUseCPU() {
+ gemm_cpu_gpu_switch = 'c';
+}
+
template<typename T>
void Gemm
( Orientation orientA, Orientation orientB,
@@ -59,11 +73,30 @@
const Int k = ( orientA == NORMAL ? A.Width() : A.Height() );
if( k != 0 )
{
+#if defined(EL_USE_CUBLAS)
+ if (gemm_cpu_gpu_switch == 'g' &&
+ m >= min_M &&
+ n >= min_N &&
+ k >= min_K) {
+ cublas::Gemm
+ ( transA, transB, m, n, k,
+ alpha, A.LockedBuffer(), A.LDim(),
+ B.LockedBuffer(), B.LDim(),
+ beta, C.Buffer(), C.LDim() );
+ } else {
+ blas::Gemm
+ ( transA, transB, m, n, k,
+ alpha, A.LockedBuffer(), A.LDim(),
+ B.LockedBuffer(), B.LDim(),
+ beta, C.Buffer(), C.LDim() );
+ }
+#else
blas::Gemm
( transA, transB, m, n, k,
alpha, A.LockedBuffer(), A.LDim(),
B.LockedBuffer(), B.LDim(),
beta, C.Buffer(), C.LDim() );
+#endif
}
else
{
diff -Naur a/src/core/imports/blas/Gemm.hpp b/src/core/imports/blas/Gemm.hpp
--- a/src/core/imports/blas/Gemm.hpp 2017-06-08 07:30:45.090529967 -0700
+++ b/src/core/imports/blas/Gemm.hpp 2017-06-08 07:34:46.503009958 -0700
@@ -41,6 +41,12 @@
} // extern "C"
+
+#if defined(EL_USE_CUBLAS)
+#include <cublas.h>
+#include <cub/util_allocator.cuh>
+#endif
+
namespace El {
namespace blas {
@@ -515,3 +521,515 @@
} // namespace blas
} // namespace El
+
+
+#if EL_USE_CUBLAS
+
+#define USE_CUB 1
+
+namespace El {
+namespace cublas {
+
+#if USE_CUB
+cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory
+#endif
+
+template<typename T>
+void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const T& alpha,
+ const T* A, BlasInt ALDim,
+ const T* B, BlasInt BLDim,
+ const T& beta,
+ T* C, BlasInt CLDim )
+{
+ // put something here
+ printf("integer version \n");
+}
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const Int& alpha,
+ const Int* A, BlasInt ALDim,
+ const Int* B, BlasInt BLDim,
+ const Int& beta,
+ Int* C, BlasInt CLDim );
+#ifdef EL_HAVE_QD
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const DoubleDouble& alpha,
+ const DoubleDouble* A, BlasInt ALDim,
+ const DoubleDouble* B, BlasInt BLDim,
+ const DoubleDouble& beta,
+ DoubleDouble* C, BlasInt CLDim );
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const QuadDouble& alpha,
+ const QuadDouble* A, BlasInt ALDim,
+ const QuadDouble* B, BlasInt BLDim,
+ const QuadDouble& beta,
+ QuadDouble* C, BlasInt CLDim );
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const Complex<DoubleDouble>& alpha,
+ const Complex<DoubleDouble>* A, BlasInt ALDim,
+ const Complex<DoubleDouble>* B, BlasInt BLDim,
+ const Complex<DoubleDouble>& beta,
+ Complex<DoubleDouble>* C, BlasInt CLDim );
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const Complex<QuadDouble>& alpha,
+ const Complex<QuadDouble>* A, BlasInt ALDim,
+ const Complex<QuadDouble>* B, BlasInt BLDim,
+ const Complex<QuadDouble>& beta,
+ Complex<QuadDouble>* C, BlasInt CLDim );
+#endif
+#ifdef EL_HAVE_QUAD
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const Quad& alpha,
+ const Quad* A, BlasInt ALDim,
+ const Quad* B, BlasInt BLDim,
+ const Quad& beta,
+ Quad* C, BlasInt CLDim );
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const Complex<Quad>& alpha,
+ const Complex<Quad>* A, BlasInt ALDim,
+ const Complex<Quad>* B, BlasInt BLDim,
+ const Complex<Quad>& beta,
+ Complex<Quad>* C, BlasInt CLDim );
+#endif
+#ifdef EL_HAVE_MPC
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const BigInt& alpha,
+ const BigInt* A, BlasInt ALDim,
+ const BigInt* B, BlasInt BLDim,
+ const BigInt& beta,
+ BigInt* C, BlasInt CLDim );
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const BigFloat& alpha,
+ const BigFloat* A, BlasInt ALDim,
+ const BigFloat* B, BlasInt BLDim,
+ const BigFloat& beta,
+ BigFloat* C, BlasInt CLDim );
+template void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const Complex<BigFloat>& alpha,
+ const Complex<BigFloat>* A, BlasInt ALDim,
+ const Complex<BigFloat>* B, BlasInt BLDim,
+ const Complex<BigFloat>& beta,
+ Complex<BigFloat>* C, BlasInt CLDim );
+#endif
+
+void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const float& alpha,
+ const float* A, BlasInt ALDim,
+ const float* B, BlasInt BLDim,
+ const float& beta,
+ float* C, BlasInt CLDim )
+{
+ EL_DEBUG_CSE
+ EL_DEBUG_ONLY(
+ if( std::toupper(transA) == 'N' )
+ {
+ if( ALDim < Max(m,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
+ }
+ else
+ {
+ if( ALDim < Max(k,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
+ }
+
+ if( std::toupper(transB) == 'N' )
+ {
+ if( BLDim < Max(k,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
+ }
+ else
+ {
+ if( BLDim < Max(n,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
+ }
+
+ if( CLDim < Max(m,1) )
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
+ )
+ const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
+ const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
+
+ const mpi::Comm comm;
+ const Int commRank = mpi::Rank( comm );
+ if (commRank == 0) {
+ //printf("calling cublas Sgemm: m %d n %d k %d\n", m, n, k);
+ }
+
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
+ // device memory size for A, B and C
+ BlasInt sizeA, sizeB, sizeC;
+ float *devA=NULL, *devB=NULL, *devC=NULL;
+
+ rowA = fixedTransA == 'T' ? k : m;
+ colA = fixedTransA == 'T' ? m : k;
+ rowB = fixedTransB == 'T' ? n : k;
+ colB = fixedTransB == 'T' ? k : n;
+ rowC = m;
+ colC = n;
+ sizeA = rowA * colA;
+ sizeB = rowB * colB;
+ sizeC = rowC * colC;
+
+ cublasStatus stat;
+
+#if USE_CUB
+ CubDebugExit(g_allocator.DeviceAllocate((void**)&devA,
+ sizeof(float) * (sizeA+sizeB+sizeC) ));
+#else
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(float), (void **) &devA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
+#endif
+
+ devB = devA + sizeA;
+ devC = devB + sizeB;
+
+ // copy matrix A, B and C to device
+ stat = cublasSetMatrix(rowA, colA, sizeof(float), A, ALDim, devA, rowA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
+
+ stat = cublasSetMatrix(rowB, colB, sizeof(float), B, BLDim, devB, rowB);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
+
+ if (beta != 0.0)
+ {
+ stat = cublasSetMatrix(rowC, colC, sizeof(float), C, CLDim, devC, rowC);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
+ }
+
+ // cublas<t>gemm
+ cublasSgemm
+ ( fixedTransA, fixedTransB, m, n, k,
+ alpha, devA, rowA, devB, rowB, beta, devC, rowC );
+
+ // copy matrix C to host
+ stat = cublasGetMatrix(rowC, colC, sizeof(float), devC, rowC, C, CLDim);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
+
+ // free
+#if USE_CUB
+ CubDebugExit(g_allocator.DeviceFree(devA));
+#else
+ cublasFree(devA);
+#endif
+ //printf("CUBLAS float done ...\n");
+}
+
+void Gemm
+( char transA, char transB,
+ BlasInt m, BlasInt n, BlasInt k,
+ const double& alpha,
+ const double* A, BlasInt ALDim,
+ const double* B, BlasInt BLDim,
+ const double& beta,
+ double* C, BlasInt CLDim )
+{
+ EL_DEBUG_CSE
+ EL_DEBUG_ONLY(
+ if( std::toupper(transA) == 'N' )
+ {
+ if( ALDim < Max(m,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
+ }
+ else
+ {
+ if( ALDim < Max(k,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
+ }
+
+ if( std::toupper(transB) == 'N' )
+ {
+ if( BLDim < Max(k,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
+ }
+ else
+ {
+ if( BLDim < Max(n,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
+ }
+
+ if( CLDim < Max(m,1) )
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
+ )
+ const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
+ const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
+
+ const mpi::Comm comm;
+ const Int commRank = mpi::Rank( comm );
+ if (commRank == 0) {
+ //printf("calling cublas Dgemm: m %d n %d k %d\n", m, n, k);
+ }
+
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
+ // device memory size for A, B and C
+ BlasInt sizeA, sizeB, sizeC;
+ double *devA=NULL, *devB=NULL, *devC=NULL;
+
+ rowA = fixedTransA == 'T' ? k : m;
+ colA = fixedTransA == 'T' ? m : k;
+ rowB = fixedTransB == 'T' ? n : k;
+ colB = fixedTransB == 'T' ? k : n;
+ rowC = m;
+ colC = n;
+ sizeA = rowA * colA;
+ sizeB = rowB * colB;
+ sizeC = rowC * colC;
+
+ cublasStatus stat;
+
+#if USE_CUB
+ CubDebugExit(g_allocator.DeviceAllocate((void**)&devA,
+ sizeof(double) * (sizeA+sizeB+sizeC) ));
+#else
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(double), (void **) &devA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
+#endif
+
+ devB = devA + sizeA;
+ devC = devB + sizeB;
+
+ // copy matrix A, B and C to device
+ stat = cublasSetMatrix(rowA, colA, sizeof(double), A, ALDim, devA, rowA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
+
+ stat = cublasSetMatrix(rowB, colB, sizeof(double), B, BLDim, devB, rowB);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
+
+ if (beta != 0.0)
+ {
+ stat = cublasSetMatrix(rowC, colC, sizeof(double), C, CLDim, devC, rowC);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
+ }
+
+ // cublas<t>gemm
+ cublasDgemm
+ ( fixedTransA, fixedTransB, m, n, k,
+ alpha, devA, rowA, devB, rowB, beta, devC, rowC );
+
+ // copy matrix C to host
+ stat = cublasGetMatrix(rowC, colC, sizeof(double), devC, rowC, C, CLDim);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
+
+ // free
+#if USE_CUB
+ CubDebugExit(g_allocator.DeviceFree(devA));
+#else
+ cublasFree(devA);
+#endif
+ //printf("CUBLAS double done ...\n");
+}
+
+void Gemm
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
+ const scomplex& alpha,
+ const scomplex* A, BlasInt ALDim,
+ const scomplex* B, BlasInt BLDim,
+ const scomplex& beta,
+ scomplex* C, BlasInt CLDim )
+{
+ EL_DEBUG_CSE
+ EL_DEBUG_ONLY(
+ if( std::toupper(transA) == 'N' )
+ {
+ if( ALDim < Max(m,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
+ }
+ else
+ {
+ if( ALDim < Max(k,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
+ }
+
+ if( std::toupper(transB) == 'N' )
+ {
+ if( BLDim < Max(k,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
+ }
+ else
+ {
+ if( BLDim < Max(n,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
+ }
+
+ if( CLDim < Max(m,1) )
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
+ )
+
+ const char fixedTransA = transA;
+ const char fixedTransB = transB;
+
+ const mpi::Comm comm;
+ const Int commRank = mpi::Rank( comm );
+ if (commRank == 0) {
+ //printf("calling cublas Cgemm: m %d n %d k %d\n", m, n, k);
+ }
+
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
+ // device memory size for A, B and C
+ BlasInt sizeA, sizeB, sizeC;
+ cuComplex *devA=NULL, *devB=NULL, *devC=NULL;
+
+ rowA = fixedTransA == 'T' ? k : m;
+ colA = fixedTransA == 'T' ? m : k;
+ rowB = fixedTransB == 'T' ? n : k;
+ colB = fixedTransB == 'T' ? k : n;
+ rowC = m;
+ colC = n;
+ sizeA = rowA * colA;
+ sizeB = rowB * colB;
+ sizeC = rowC * colC;
+
+ cublasStatus stat;
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuComplex), (void **) &devA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
+
+ devB = devA + sizeA;
+ devC = devB + sizeB;
+
+ // copy matrix A, B and C to device
+ stat = cublasSetMatrix(rowA, colA, sizeof(cuComplex), A, ALDim, devA, rowA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
+
+ stat = cublasSetMatrix(rowB, colB, sizeof(cuComplex), B, BLDim, devB, rowB);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
+
+ if (beta.real() != 0.0 || beta.imag() != 0.0)
+ {
+ stat = cublasSetMatrix(rowC, colC, sizeof(cuComplex), C, CLDim, devC, rowC);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
+ }
+
+ // cublas<t>gemm
+ cublasCgemm
+ ( fixedTransA, fixedTransB, m, n, k,
+ *((cuComplex*) &alpha), devA, rowA, devB, rowB, *((cuComplex*) &beta), devC, rowC );
+
+ // copy matrix C to host
+ stat = cublasGetMatrix(rowC, colC, sizeof(cuComplex), devC, rowC, C, CLDim);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
+
+ // free
+ cublasFree(devA);
+}
+
+void Gemm
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
+ const dcomplex& alpha,
+ const dcomplex* A, BlasInt ALDim,
+ const dcomplex* B, BlasInt BLDim,
+ const dcomplex& beta,
+ dcomplex* C, BlasInt CLDim )
+{
+ EL_DEBUG_CSE
+ EL_DEBUG_ONLY(
+ if( std::toupper(transA) == 'N' )
+ {
+ if( ALDim < Max(m,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
+ }
+ else
+ {
+ if( ALDim < Max(k,1) )
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
+ }
+
+ if( std::toupper(transB) == 'N' )
+ {
+ if( BLDim < Max(k,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
+ }
+ else
+ {
+ if( BLDim < Max(n,1) )
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
+ }
+
+ if( CLDim < Max(m,1) )
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
+ )
+
+ const char fixedTransA = transA;
+ const char fixedTransB = transB;
+
+ const mpi::Comm comm;
+ const Int commRank = mpi::Rank( comm );
+ if (commRank == 0) {
+ //printf("calling cublas Zgemm: m %d n %d k %d\n", m, n, k);
+ }
+
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
+ // device memory size for A, B and C
+ BlasInt sizeA, sizeB, sizeC;
+ cuDoubleComplex *devA=NULL, *devB=NULL, *devC=NULL;
+
+ rowA = fixedTransA == 'T' ? k : m;
+ colA = fixedTransA == 'T' ? m : k;
+ rowB = fixedTransB == 'T' ? n : k;
+ colB = fixedTransB == 'T' ? k : n;
+ rowC = m;
+ colC = n;
+ sizeA = rowA * colA;
+ sizeB = rowB * colB;
+ sizeC = rowC * colC;
+
+ cublasStatus stat;
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuDoubleComplex), (void **) &devA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
+
+ devB = devA + sizeA;
+ devC = devB + sizeB;
+
+ // copy matrix A, B and C to device
+ stat = cublasSetMatrix(rowA, colA, sizeof(cuDoubleComplex), A, ALDim, devA, rowA);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
+
+ stat = cublasSetMatrix(rowB, colB, sizeof(cuDoubleComplex), B, BLDim, devB, rowB);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
+
+ if (beta.real() != 0.0 || beta.imag() != 0.0)
+ {
+ stat = cublasSetMatrix(rowC, colC, sizeof(cuDoubleComplex), C, CLDim, devC, rowC);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
+ }
+
+ cublasZgemm
+ ( fixedTransA, fixedTransB, m, n, k,
+ *((cuDoubleComplex*) &alpha), devA, rowA, devB, rowB, *((cuDoubleComplex*) &beta),
+ devC, rowC );
+
+ // copy matrix C to host
+ stat = cublasGetMatrix(rowC, colC, sizeof(cuDoubleComplex), devC, rowC, C, CLDim);
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
+
+ // free
+ cublasFree(devA);
+}
+
+} // namespace cublas
+} // namespace El
+
+#endif
+

View File

@ -1,171 +0,0 @@
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
from spack.package import *
class Elemental(CMakePackage):
"""Elemental: Distributed-memory dense and sparse-direct linear algebra
and optimization library."""
homepage = "https://libelemental.org"
url = "https://github.com/elemental/Elemental/archive/v0.87.7.tar.gz"
git = "https://github.com/elemental/Elemental.git"
license("Apache-2.0")
version("develop", branch="master")
version("0.87.7", sha256="7becfdbc223e9c72e65ae876d842c48d2037d13f83e9f41cea285e21b840d7d9")
version("0.87.6", sha256="b597987c99ddd3462e0619524c5b7f711177ae8ae541b1b961e11d96e15afc64")
depends_on("c", type="build") # generated
depends_on("cxx", type="build") # generated
variant("shared", default=True, description="Enables the build of shared libraries")
variant("hybrid", default=True, description="Make use of OpenMP within MPI packing/unpacking")
variant(
"openmp_blas", default=False, description="Use OpenMP for threading in the BLAS library"
)
variant("c", default=False, description="Build C interface")
variant("parmetis", default=False, description="Enable ParMETIS")
variant("quad", default=False, description="Enable quad precision")
variant("int64", default=False, description="Use 64bit integers")
variant("cublas", default=False, description="Enable cuBLAS for local BLAS operations")
# When this variant is set remove the normal dependencies since
# Elemental has to build BLAS and ScaLAPACK internally
variant(
"int64_blas",
default=False,
description="Use 64bit integers for BLAS." " Requires local build of BLAS library.",
)
variant("scalapack", default=False, description="Build with ScaLAPACK library")
variant(
"build_type",
default="Release",
description="The build type to build",
values=("Debug", "Release"),
)
variant(
"blas",
default="openblas",
values=("openblas", "mkl", "accelerate", "essl"),
description="Enable the use of OpenBlas/MKL/Accelerate/ESSL",
)
variant(
"mpfr",
default=False,
description="Support GNU MPFR's" "arbitrary-precision floating-point arithmetic",
)
# Note that #1712 forces us to enumerate the different blas variants
depends_on("blas", when="~openmp_blas ~int64_blas")
# Hack to forward variant to openblas package
depends_on("openblas", when="blas=openblas ~openmp_blas ~int64_blas")
# Allow Elemental to build internally when using 8-byte ints
depends_on("openblas threads=openmp", when="blas=openblas +openmp_blas ~int64_blas")
depends_on("intel-mkl", when="blas=mkl")
depends_on("intel-mkl threads=openmp", when="blas=mkl +openmp_blas")
depends_on("intel-mkl@2017.1 +ilp64", when="blas=mkl +int64_blas")
depends_on("veclibfort", when="blas=accelerate")
depends_on("essl", when="blas=essl")
depends_on("essl threads=openmp", when="blas=essl +openmp_blas")
# Note that this forces us to use OpenBLAS until #1712 is fixed
depends_on("lapack", when="blas=openblas ~openmp_blas")
depends_on("netlib-lapack +external-blas", when="blas=essl")
depends_on("metis")
depends_on("metis +int64", when="+int64")
depends_on("mpi")
# Allow Elemental to build internally when using 8-byte ints
depends_on("scalapack", when="+scalapack ~int64_blas")
depends_on("gmp", when="+mpfr")
depends_on("mpc", when="+mpfr")
depends_on("mpfr", when="+mpfr")
patch("elemental_cublas.patch", when="+cublas")
patch("cmake_0.87.7.patch", when="@0.87.7")
conflicts("%intel@:17.0.2", when="@:0.87.7")
@property
def libs(self):
shared = True if "+shared" in self.spec else False
return find_libraries("libEl", root=self.prefix, shared=shared, recursive=True)
def cmake_args(self):
spec = self.spec
args = [
"-DCMAKE_INSTALL_MESSAGE:STRING=LAZY",
"-DCMAKE_C_COMPILER=%s" % spec["mpi"].mpicc,
"-DCMAKE_CXX_COMPILER=%s" % spec["mpi"].mpicxx,
"-DCMAKE_Fortran_COMPILER=%s" % spec["mpi"].mpifc,
"-DEL_PREFER_OPENBLAS:BOOL=TRUE",
"-DEL_DISABLE_SCALAPACK:BOOL=%s" % ("~scalapack" in spec),
"-DBUILD_SHARED_LIBS:BOOL=%s" % ("+shared" in spec),
"-DEL_HYBRID:BOOL=%s" % ("+hybrid" in spec),
"-DEL_C_INTERFACE:BOOL=%s" % ("+c" in spec),
"-DEL_DISABLE_PARMETIS:BOOL=%s" % ("~parmetis" in spec),
"-DEL_DISABLE_QUAD:BOOL=%s" % ("~quad" in spec),
"-DEL_USE_64BIT_INTS:BOOL=%s" % ("+int64" in spec),
"-DEL_USE_64BIT_BLAS_INTS:BOOL=%s" % ("+int64_blas" in spec),
"-DEL_DISABLE_MPFR:BOOL=%s" % ("~mpfr" in spec),
]
if self.spec.satisfies("%intel"):
ifort = env["SPACK_F77"]
intel_bin = os.path.dirname(ifort)
intel_root = os.path.dirname(intel_bin)
libfortran = find_libraries("libifcoremt", root=intel_root, recursive=True)
elif self.spec.satisfies("%gcc"):
# see <stage_folder>/debian/rules as an example:
mpif77 = Executable(spec["mpi"].mpif77)
libfortran = LibraryList(
mpif77("--print-file-name", "libgfortran.%s" % dso_suffix, output=str).strip()
)
elif self.spec.satisfies("%xl") or self.spec.satisfies("%xl_r"):
xl_fort = env["SPACK_F77"]
xl_bin = os.path.dirname(xl_fort)
xl_root = os.path.dirname(xl_bin)
libfortran = find_libraries("libxlf90_r", root=xl_root, recursive=True)
else:
libfortran = None
if libfortran:
args.append("-DGFORTRAN_LIB=%s" % libfortran.libraries[0])
# If using 64bit int BLAS libraries, elemental has to build
# them internally
if spec.satisfies("+int64_blas"):
args.extend(
[
"-DEL_BLAS_SUFFIX:STRING={0}".format(
("_64_" if "+int64_blas" in spec else "_")
),
"-DCUSTOM_BLAS_SUFFIX:BOOL=TRUE",
]
)
if spec.satisfies("+scalapack"):
args.extend(
[
"-DEL_LAPACK_SUFFIX:STRING={0}".format(
("_64_" if "+int64_blas" in spec else "_")
),
"-DCUSTOM_LAPACK_SUFFIX:BOOL=TRUE",
]
)
else:
math_libs = spec["lapack"].libs + spec["blas"].libs
if spec.satisfies("+scalapack"):
math_libs = spec["scalapack"].libs + math_libs
args.extend(["-DMATH_LIBS:STRING={0}".format(math_libs.ld_flags)])
return args