elemental: remove deprecated package (#49291)
This package has not been maintained since 2016. We maintain an active fork in the hydrogen package, so remove this one. Signed-off-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
This commit is contained in:
parent
7d62045c30
commit
aa9e610fa6
@ -1,22 +0,0 @@
|
||||
diff --git a/cmake/configure_files/ElementalConfig.cmake.in b/cmake/configure_files/ElementalConfig.cmake.in
|
||||
index d37649f..8511d81 100644
|
||||
--- a/cmake/configure_files/ElementalConfig.cmake.in
|
||||
+++ b/cmake/configure_files/ElementalConfig.cmake.in
|
||||
@@ -1,6 +1,8 @@
|
||||
set(Elemental_INCLUDE_DIRS "@CMAKE_INSTALL_PREFIX@/include")
|
||||
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPI_CXX_INCLUDE_PATH@")
|
||||
-set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@QD_INCLUDES@")
|
||||
+IF(@QD_FOUND@)
|
||||
+ set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@QD_INCLUDES@")
|
||||
+ENDIF()
|
||||
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPC_INCLUDES@")
|
||||
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPFR_INCLUDES@")
|
||||
set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@GMP_INCLUDES@")
|
||||
@@ -13,6 +15,6 @@ set(Elemental_LINK_FLAGS "@EL_LINK_FLAGS@")
|
||||
set(Elemental_DEFINITIONS "@Qt5Widgets_DEFINITIONS@")
|
||||
|
||||
# Our library dependencies (contains definitions for IMPORTED targets)
|
||||
-include("@CMAKE_INSTALL_PREFIX@/CMake/ElementalTargets.cmake")
|
||||
+include("${CMAKE_CURRENT_LIST_DIR}/ElementalTargets.cmake")
|
||||
|
||||
set(Elemental_LIBRARIES El)
|
@ -1,668 +0,0 @@
|
||||
diff -Naur a/include/El/blas_like/level3.hpp b/include/El/blas_like/level3.hpp
|
||||
--- a/include/El/blas_like/level3.hpp 2017-06-08 07:30:43.180249917 -0700
|
||||
+++ b/include/El/blas_like/level3.hpp 2017-06-08 07:35:27.325434602 -0700
|
||||
@@ -31,6 +31,10 @@
|
||||
}
|
||||
using namespace GemmAlgorithmNS;
|
||||
|
||||
+void GemmUseGPU(int min_M, int min_N, int min_K);
|
||||
+
|
||||
+void GemmUseCPU();
|
||||
+
|
||||
template<typename T>
|
||||
void Gemm
|
||||
( Orientation orientA, Orientation orientB,
|
||||
diff -Naur a/include/El/core/imports/blas.hpp b/include/El/core/imports/blas.hpp
|
||||
--- a/include/El/core/imports/blas.hpp 2017-06-08 07:30:43.522016908 -0700
|
||||
+++ b/include/El/core/imports/blas.hpp 2017-06-08 07:35:06.834030908 -0700
|
||||
@@ -916,4 +916,63 @@
|
||||
} // namespace blas
|
||||
} // namespace El
|
||||
|
||||
+
|
||||
+#if defined(EL_USE_CUBLAS)
|
||||
+
|
||||
+namespace El {
|
||||
+
|
||||
+#ifdef EL_USE_64BIT_BLAS_INTS
|
||||
+typedef long long int BlasInt;
|
||||
+#else
|
||||
+typedef int BlasInt;
|
||||
+#endif
|
||||
+
|
||||
+namespace cublas {
|
||||
+
|
||||
+// NOTE: templated routines are custom and not wrappers
|
||||
+
|
||||
+// Level 3 BLAS
|
||||
+// ============
|
||||
+template<typename T>
|
||||
+void Gemm
|
||||
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const T& alpha,
|
||||
+ const T* A, BlasInt ALDim,
|
||||
+ const T* B, BlasInt BLDim,
|
||||
+ const T& beta,
|
||||
+ T* C, BlasInt CLDim );
|
||||
+
|
||||
+void Gemm
|
||||
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const float& alpha,
|
||||
+ const float* A, BlasInt ALDim,
|
||||
+ const float* B, BlasInt BLDim,
|
||||
+ const float& beta,
|
||||
+ float* C, BlasInt CLDim );
|
||||
+void Gemm
|
||||
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const double& alpha,
|
||||
+ const double* A, BlasInt ALDim,
|
||||
+ const double* B, BlasInt BLDim,
|
||||
+ const double& beta,
|
||||
+ double* C, BlasInt CLDim );
|
||||
+void Gemm
|
||||
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const scomplex& alpha,
|
||||
+ const scomplex* A, BlasInt ALDim,
|
||||
+ const scomplex* B, BlasInt BLDim,
|
||||
+ const scomplex& beta,
|
||||
+ scomplex* C, BlasInt CLDim );
|
||||
+void Gemm
|
||||
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const dcomplex& alpha,
|
||||
+ const dcomplex* A, BlasInt ALDim,
|
||||
+ const dcomplex* B, BlasInt BLDim,
|
||||
+ const dcomplex& beta,
|
||||
+ dcomplex* C, BlasInt CLDim );
|
||||
+
|
||||
+} // namespace cublas
|
||||
+} // namespace El
|
||||
+#endif
|
||||
+
|
||||
#endif // ifndef EL_IMPORTS_BLAS_DECL_HPP
|
||||
diff -Naur a/src/blas_like/level3/Gemm.cpp b/src/blas_like/level3/Gemm.cpp
|
||||
--- a/src/blas_like/level3/Gemm.cpp 2017-06-08 07:30:44.307096427 -0700
|
||||
+++ b/src/blas_like/level3/Gemm.cpp 2017-06-08 07:34:23.062863489 -0700
|
||||
@@ -16,6 +16,20 @@
|
||||
|
||||
namespace El {
|
||||
|
||||
+char gemm_cpu_gpu_switch = 'c';
|
||||
+int min_M = 0, min_N = 0, min_K = 0;
|
||||
+
|
||||
+void GemmUseGPU(int _min_M, int _min_N, int _min_K) {
|
||||
+ gemm_cpu_gpu_switch = 'g';
|
||||
+ min_M = _min_M;
|
||||
+ min_N = _min_N;
|
||||
+ min_K = _min_K;
|
||||
+}
|
||||
+
|
||||
+void GemmUseCPU() {
|
||||
+ gemm_cpu_gpu_switch = 'c';
|
||||
+}
|
||||
+
|
||||
template<typename T>
|
||||
void Gemm
|
||||
( Orientation orientA, Orientation orientB,
|
||||
@@ -59,11 +73,30 @@
|
||||
const Int k = ( orientA == NORMAL ? A.Width() : A.Height() );
|
||||
if( k != 0 )
|
||||
{
|
||||
+#if defined(EL_USE_CUBLAS)
|
||||
+ if (gemm_cpu_gpu_switch == 'g' &&
|
||||
+ m >= min_M &&
|
||||
+ n >= min_N &&
|
||||
+ k >= min_K) {
|
||||
+ cublas::Gemm
|
||||
+ ( transA, transB, m, n, k,
|
||||
+ alpha, A.LockedBuffer(), A.LDim(),
|
||||
+ B.LockedBuffer(), B.LDim(),
|
||||
+ beta, C.Buffer(), C.LDim() );
|
||||
+ } else {
|
||||
+ blas::Gemm
|
||||
+ ( transA, transB, m, n, k,
|
||||
+ alpha, A.LockedBuffer(), A.LDim(),
|
||||
+ B.LockedBuffer(), B.LDim(),
|
||||
+ beta, C.Buffer(), C.LDim() );
|
||||
+ }
|
||||
+#else
|
||||
blas::Gemm
|
||||
( transA, transB, m, n, k,
|
||||
alpha, A.LockedBuffer(), A.LDim(),
|
||||
B.LockedBuffer(), B.LDim(),
|
||||
beta, C.Buffer(), C.LDim() );
|
||||
+#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
diff -Naur a/src/core/imports/blas/Gemm.hpp b/src/core/imports/blas/Gemm.hpp
|
||||
--- a/src/core/imports/blas/Gemm.hpp 2017-06-08 07:30:45.090529967 -0700
|
||||
+++ b/src/core/imports/blas/Gemm.hpp 2017-06-08 07:34:46.503009958 -0700
|
||||
@@ -41,6 +41,12 @@
|
||||
|
||||
} // extern "C"
|
||||
|
||||
+
|
||||
+#if defined(EL_USE_CUBLAS)
|
||||
+#include <cublas.h>
|
||||
+#include <cub/util_allocator.cuh>
|
||||
+#endif
|
||||
+
|
||||
namespace El {
|
||||
namespace blas {
|
||||
|
||||
@@ -515,3 +521,515 @@
|
||||
|
||||
} // namespace blas
|
||||
} // namespace El
|
||||
+
|
||||
+
|
||||
+#if EL_USE_CUBLAS
|
||||
+
|
||||
+#define USE_CUB 1
|
||||
+
|
||||
+namespace El {
|
||||
+namespace cublas {
|
||||
+
|
||||
+#if USE_CUB
|
||||
+cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory
|
||||
+#endif
|
||||
+
|
||||
+template<typename T>
|
||||
+void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const T& alpha,
|
||||
+ const T* A, BlasInt ALDim,
|
||||
+ const T* B, BlasInt BLDim,
|
||||
+ const T& beta,
|
||||
+ T* C, BlasInt CLDim )
|
||||
+{
|
||||
+ // put something here
|
||||
+ printf("integer version \n");
|
||||
+}
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const Int& alpha,
|
||||
+ const Int* A, BlasInt ALDim,
|
||||
+ const Int* B, BlasInt BLDim,
|
||||
+ const Int& beta,
|
||||
+ Int* C, BlasInt CLDim );
|
||||
+#ifdef EL_HAVE_QD
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const DoubleDouble& alpha,
|
||||
+ const DoubleDouble* A, BlasInt ALDim,
|
||||
+ const DoubleDouble* B, BlasInt BLDim,
|
||||
+ const DoubleDouble& beta,
|
||||
+ DoubleDouble* C, BlasInt CLDim );
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const QuadDouble& alpha,
|
||||
+ const QuadDouble* A, BlasInt ALDim,
|
||||
+ const QuadDouble* B, BlasInt BLDim,
|
||||
+ const QuadDouble& beta,
|
||||
+ QuadDouble* C, BlasInt CLDim );
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const Complex<DoubleDouble>& alpha,
|
||||
+ const Complex<DoubleDouble>* A, BlasInt ALDim,
|
||||
+ const Complex<DoubleDouble>* B, BlasInt BLDim,
|
||||
+ const Complex<DoubleDouble>& beta,
|
||||
+ Complex<DoubleDouble>* C, BlasInt CLDim );
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const Complex<QuadDouble>& alpha,
|
||||
+ const Complex<QuadDouble>* A, BlasInt ALDim,
|
||||
+ const Complex<QuadDouble>* B, BlasInt BLDim,
|
||||
+ const Complex<QuadDouble>& beta,
|
||||
+ Complex<QuadDouble>* C, BlasInt CLDim );
|
||||
+#endif
|
||||
+#ifdef EL_HAVE_QUAD
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const Quad& alpha,
|
||||
+ const Quad* A, BlasInt ALDim,
|
||||
+ const Quad* B, BlasInt BLDim,
|
||||
+ const Quad& beta,
|
||||
+ Quad* C, BlasInt CLDim );
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const Complex<Quad>& alpha,
|
||||
+ const Complex<Quad>* A, BlasInt ALDim,
|
||||
+ const Complex<Quad>* B, BlasInt BLDim,
|
||||
+ const Complex<Quad>& beta,
|
||||
+ Complex<Quad>* C, BlasInt CLDim );
|
||||
+#endif
|
||||
+#ifdef EL_HAVE_MPC
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const BigInt& alpha,
|
||||
+ const BigInt* A, BlasInt ALDim,
|
||||
+ const BigInt* B, BlasInt BLDim,
|
||||
+ const BigInt& beta,
|
||||
+ BigInt* C, BlasInt CLDim );
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const BigFloat& alpha,
|
||||
+ const BigFloat* A, BlasInt ALDim,
|
||||
+ const BigFloat* B, BlasInt BLDim,
|
||||
+ const BigFloat& beta,
|
||||
+ BigFloat* C, BlasInt CLDim );
|
||||
+template void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const Complex<BigFloat>& alpha,
|
||||
+ const Complex<BigFloat>* A, BlasInt ALDim,
|
||||
+ const Complex<BigFloat>* B, BlasInt BLDim,
|
||||
+ const Complex<BigFloat>& beta,
|
||||
+ Complex<BigFloat>* C, BlasInt CLDim );
|
||||
+#endif
|
||||
+
|
||||
+void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const float& alpha,
|
||||
+ const float* A, BlasInt ALDim,
|
||||
+ const float* B, BlasInt BLDim,
|
||||
+ const float& beta,
|
||||
+ float* C, BlasInt CLDim )
|
||||
+{
|
||||
+ EL_DEBUG_CSE
|
||||
+ EL_DEBUG_ONLY(
|
||||
+ if( std::toupper(transA) == 'N' )
|
||||
+ {
|
||||
+ if( ALDim < Max(m,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( ALDim < Max(k,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||
+ }
|
||||
+
|
||||
+ if( std::toupper(transB) == 'N' )
|
||||
+ {
|
||||
+ if( BLDim < Max(k,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( BLDim < Max(n,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||
+ }
|
||||
+
|
||||
+ if( CLDim < Max(m,1) )
|
||||
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||
+ )
|
||||
+ const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
|
||||
+ const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
|
||||
+
|
||||
+ const mpi::Comm comm;
|
||||
+ const Int commRank = mpi::Rank( comm );
|
||||
+ if (commRank == 0) {
|
||||
+ //printf("calling cublas Sgemm: m %d n %d k %d\n", m, n, k);
|
||||
+ }
|
||||
+
|
||||
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||
+ // device memory size for A, B and C
|
||||
+ BlasInt sizeA, sizeB, sizeC;
|
||||
+ float *devA=NULL, *devB=NULL, *devC=NULL;
|
||||
+
|
||||
+ rowA = fixedTransA == 'T' ? k : m;
|
||||
+ colA = fixedTransA == 'T' ? m : k;
|
||||
+ rowB = fixedTransB == 'T' ? n : k;
|
||||
+ colB = fixedTransB == 'T' ? k : n;
|
||||
+ rowC = m;
|
||||
+ colC = n;
|
||||
+ sizeA = rowA * colA;
|
||||
+ sizeB = rowB * colB;
|
||||
+ sizeC = rowC * colC;
|
||||
+
|
||||
+ cublasStatus stat;
|
||||
+
|
||||
+#if USE_CUB
|
||||
+ CubDebugExit(g_allocator.DeviceAllocate((void**)&devA,
|
||||
+ sizeof(float) * (sizeA+sizeB+sizeC) ));
|
||||
+#else
|
||||
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(float), (void **) &devA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||
+#endif
|
||||
+
|
||||
+ devB = devA + sizeA;
|
||||
+ devC = devB + sizeB;
|
||||
+
|
||||
+ // copy matrix A, B and C to device
|
||||
+ stat = cublasSetMatrix(rowA, colA, sizeof(float), A, ALDim, devA, rowA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||
+
|
||||
+ stat = cublasSetMatrix(rowB, colB, sizeof(float), B, BLDim, devB, rowB);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||
+
|
||||
+ if (beta != 0.0)
|
||||
+ {
|
||||
+ stat = cublasSetMatrix(rowC, colC, sizeof(float), C, CLDim, devC, rowC);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||
+ }
|
||||
+
|
||||
+ // cublas<t>gemm
|
||||
+ cublasSgemm
|
||||
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||
+ alpha, devA, rowA, devB, rowB, beta, devC, rowC );
|
||||
+
|
||||
+ // copy matrix C to host
|
||||
+ stat = cublasGetMatrix(rowC, colC, sizeof(float), devC, rowC, C, CLDim);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||
+
|
||||
+ // free
|
||||
+#if USE_CUB
|
||||
+ CubDebugExit(g_allocator.DeviceFree(devA));
|
||||
+#else
|
||||
+ cublasFree(devA);
|
||||
+#endif
|
||||
+ //printf("CUBLAS float done ...\n");
|
||||
+}
|
||||
+
|
||||
+void Gemm
|
||||
+( char transA, char transB,
|
||||
+ BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const double& alpha,
|
||||
+ const double* A, BlasInt ALDim,
|
||||
+ const double* B, BlasInt BLDim,
|
||||
+ const double& beta,
|
||||
+ double* C, BlasInt CLDim )
|
||||
+{
|
||||
+ EL_DEBUG_CSE
|
||||
+ EL_DEBUG_ONLY(
|
||||
+ if( std::toupper(transA) == 'N' )
|
||||
+ {
|
||||
+ if( ALDim < Max(m,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( ALDim < Max(k,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||
+ }
|
||||
+
|
||||
+ if( std::toupper(transB) == 'N' )
|
||||
+ {
|
||||
+ if( BLDim < Max(k,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( BLDim < Max(n,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||
+ }
|
||||
+
|
||||
+ if( CLDim < Max(m,1) )
|
||||
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||
+ )
|
||||
+ const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
|
||||
+ const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
|
||||
+
|
||||
+ const mpi::Comm comm;
|
||||
+ const Int commRank = mpi::Rank( comm );
|
||||
+ if (commRank == 0) {
|
||||
+ //printf("calling cublas Dgemm: m %d n %d k %d\n", m, n, k);
|
||||
+ }
|
||||
+
|
||||
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||
+ // device memory size for A, B and C
|
||||
+ BlasInt sizeA, sizeB, sizeC;
|
||||
+ double *devA=NULL, *devB=NULL, *devC=NULL;
|
||||
+
|
||||
+ rowA = fixedTransA == 'T' ? k : m;
|
||||
+ colA = fixedTransA == 'T' ? m : k;
|
||||
+ rowB = fixedTransB == 'T' ? n : k;
|
||||
+ colB = fixedTransB == 'T' ? k : n;
|
||||
+ rowC = m;
|
||||
+ colC = n;
|
||||
+ sizeA = rowA * colA;
|
||||
+ sizeB = rowB * colB;
|
||||
+ sizeC = rowC * colC;
|
||||
+
|
||||
+ cublasStatus stat;
|
||||
+
|
||||
+#if USE_CUB
|
||||
+ CubDebugExit(g_allocator.DeviceAllocate((void**)&devA,
|
||||
+ sizeof(double) * (sizeA+sizeB+sizeC) ));
|
||||
+#else
|
||||
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(double), (void **) &devA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||
+#endif
|
||||
+
|
||||
+ devB = devA + sizeA;
|
||||
+ devC = devB + sizeB;
|
||||
+
|
||||
+ // copy matrix A, B and C to device
|
||||
+ stat = cublasSetMatrix(rowA, colA, sizeof(double), A, ALDim, devA, rowA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||
+
|
||||
+ stat = cublasSetMatrix(rowB, colB, sizeof(double), B, BLDim, devB, rowB);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||
+
|
||||
+ if (beta != 0.0)
|
||||
+ {
|
||||
+ stat = cublasSetMatrix(rowC, colC, sizeof(double), C, CLDim, devC, rowC);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||
+ }
|
||||
+
|
||||
+ // cublas<t>gemm
|
||||
+ cublasDgemm
|
||||
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||
+ alpha, devA, rowA, devB, rowB, beta, devC, rowC );
|
||||
+
|
||||
+ // copy matrix C to host
|
||||
+ stat = cublasGetMatrix(rowC, colC, sizeof(double), devC, rowC, C, CLDim);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||
+
|
||||
+ // free
|
||||
+#if USE_CUB
|
||||
+ CubDebugExit(g_allocator.DeviceFree(devA));
|
||||
+#else
|
||||
+ cublasFree(devA);
|
||||
+#endif
|
||||
+ //printf("CUBLAS double done ...\n");
|
||||
+}
|
||||
+
|
||||
+void Gemm
|
||||
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const scomplex& alpha,
|
||||
+ const scomplex* A, BlasInt ALDim,
|
||||
+ const scomplex* B, BlasInt BLDim,
|
||||
+ const scomplex& beta,
|
||||
+ scomplex* C, BlasInt CLDim )
|
||||
+{
|
||||
+ EL_DEBUG_CSE
|
||||
+ EL_DEBUG_ONLY(
|
||||
+ if( std::toupper(transA) == 'N' )
|
||||
+ {
|
||||
+ if( ALDim < Max(m,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( ALDim < Max(k,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||
+ }
|
||||
+
|
||||
+ if( std::toupper(transB) == 'N' )
|
||||
+ {
|
||||
+ if( BLDim < Max(k,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( BLDim < Max(n,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||
+ }
|
||||
+
|
||||
+ if( CLDim < Max(m,1) )
|
||||
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||
+ )
|
||||
+
|
||||
+ const char fixedTransA = transA;
|
||||
+ const char fixedTransB = transB;
|
||||
+
|
||||
+ const mpi::Comm comm;
|
||||
+ const Int commRank = mpi::Rank( comm );
|
||||
+ if (commRank == 0) {
|
||||
+ //printf("calling cublas Cgemm: m %d n %d k %d\n", m, n, k);
|
||||
+ }
|
||||
+
|
||||
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||
+ // device memory size for A, B and C
|
||||
+ BlasInt sizeA, sizeB, sizeC;
|
||||
+ cuComplex *devA=NULL, *devB=NULL, *devC=NULL;
|
||||
+
|
||||
+ rowA = fixedTransA == 'T' ? k : m;
|
||||
+ colA = fixedTransA == 'T' ? m : k;
|
||||
+ rowB = fixedTransB == 'T' ? n : k;
|
||||
+ colB = fixedTransB == 'T' ? k : n;
|
||||
+ rowC = m;
|
||||
+ colC = n;
|
||||
+ sizeA = rowA * colA;
|
||||
+ sizeB = rowB * colB;
|
||||
+ sizeC = rowC * colC;
|
||||
+
|
||||
+ cublasStatus stat;
|
||||
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuComplex), (void **) &devA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||
+
|
||||
+ devB = devA + sizeA;
|
||||
+ devC = devB + sizeB;
|
||||
+
|
||||
+ // copy matrix A, B and C to device
|
||||
+ stat = cublasSetMatrix(rowA, colA, sizeof(cuComplex), A, ALDim, devA, rowA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||
+
|
||||
+ stat = cublasSetMatrix(rowB, colB, sizeof(cuComplex), B, BLDim, devB, rowB);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||
+
|
||||
+ if (beta.real() != 0.0 || beta.imag() != 0.0)
|
||||
+ {
|
||||
+ stat = cublasSetMatrix(rowC, colC, sizeof(cuComplex), C, CLDim, devC, rowC);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||
+ }
|
||||
+
|
||||
+ // cublas<t>gemm
|
||||
+ cublasCgemm
|
||||
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||
+ *((cuComplex*) &alpha), devA, rowA, devB, rowB, *((cuComplex*) &beta), devC, rowC );
|
||||
+
|
||||
+ // copy matrix C to host
|
||||
+ stat = cublasGetMatrix(rowC, colC, sizeof(cuComplex), devC, rowC, C, CLDim);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||
+
|
||||
+ // free
|
||||
+ cublasFree(devA);
|
||||
+}
|
||||
+
|
||||
+void Gemm
|
||||
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||
+ const dcomplex& alpha,
|
||||
+ const dcomplex* A, BlasInt ALDim,
|
||||
+ const dcomplex* B, BlasInt BLDim,
|
||||
+ const dcomplex& beta,
|
||||
+ dcomplex* C, BlasInt CLDim )
|
||||
+{
|
||||
+ EL_DEBUG_CSE
|
||||
+ EL_DEBUG_ONLY(
|
||||
+ if( std::toupper(transA) == 'N' )
|
||||
+ {
|
||||
+ if( ALDim < Max(m,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( ALDim < Max(k,1) )
|
||||
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||
+ }
|
||||
+
|
||||
+ if( std::toupper(transB) == 'N' )
|
||||
+ {
|
||||
+ if( BLDim < Max(k,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if( BLDim < Max(n,1) )
|
||||
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||
+ }
|
||||
+
|
||||
+ if( CLDim < Max(m,1) )
|
||||
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||
+ )
|
||||
+
|
||||
+ const char fixedTransA = transA;
|
||||
+ const char fixedTransB = transB;
|
||||
+
|
||||
+ const mpi::Comm comm;
|
||||
+ const Int commRank = mpi::Rank( comm );
|
||||
+ if (commRank == 0) {
|
||||
+ //printf("calling cublas Zgemm: m %d n %d k %d\n", m, n, k);
|
||||
+ }
|
||||
+
|
||||
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||
+ // device memory size for A, B and C
|
||||
+ BlasInt sizeA, sizeB, sizeC;
|
||||
+ cuDoubleComplex *devA=NULL, *devB=NULL, *devC=NULL;
|
||||
+
|
||||
+ rowA = fixedTransA == 'T' ? k : m;
|
||||
+ colA = fixedTransA == 'T' ? m : k;
|
||||
+ rowB = fixedTransB == 'T' ? n : k;
|
||||
+ colB = fixedTransB == 'T' ? k : n;
|
||||
+ rowC = m;
|
||||
+ colC = n;
|
||||
+ sizeA = rowA * colA;
|
||||
+ sizeB = rowB * colB;
|
||||
+ sizeC = rowC * colC;
|
||||
+
|
||||
+ cublasStatus stat;
|
||||
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuDoubleComplex), (void **) &devA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||
+
|
||||
+ devB = devA + sizeA;
|
||||
+ devC = devB + sizeB;
|
||||
+
|
||||
+ // copy matrix A, B and C to device
|
||||
+ stat = cublasSetMatrix(rowA, colA, sizeof(cuDoubleComplex), A, ALDim, devA, rowA);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||
+
|
||||
+ stat = cublasSetMatrix(rowB, colB, sizeof(cuDoubleComplex), B, BLDim, devB, rowB);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||
+
|
||||
+ if (beta.real() != 0.0 || beta.imag() != 0.0)
|
||||
+ {
|
||||
+ stat = cublasSetMatrix(rowC, colC, sizeof(cuDoubleComplex), C, CLDim, devC, rowC);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||
+ }
|
||||
+
|
||||
+ cublasZgemm
|
||||
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||
+ *((cuDoubleComplex*) &alpha), devA, rowA, devB, rowB, *((cuDoubleComplex*) &beta),
|
||||
+ devC, rowC );
|
||||
+
|
||||
+ // copy matrix C to host
|
||||
+ stat = cublasGetMatrix(rowC, colC, sizeof(cuDoubleComplex), devC, rowC, C, CLDim);
|
||||
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||
+
|
||||
+ // free
|
||||
+ cublasFree(devA);
|
||||
+}
|
||||
+
|
||||
+} // namespace cublas
|
||||
+} // namespace El
|
||||
+
|
||||
+#endif
|
||||
+
|
@ -1,171 +0,0 @@
|
||||
# Copyright Spack Project Developers. See COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import os
|
||||
|
||||
from spack.package import *
|
||||
|
||||
|
||||
class Elemental(CMakePackage):
|
||||
"""Elemental: Distributed-memory dense and sparse-direct linear algebra
|
||||
and optimization library."""
|
||||
|
||||
homepage = "https://libelemental.org"
|
||||
url = "https://github.com/elemental/Elemental/archive/v0.87.7.tar.gz"
|
||||
git = "https://github.com/elemental/Elemental.git"
|
||||
|
||||
license("Apache-2.0")
|
||||
|
||||
version("develop", branch="master")
|
||||
version("0.87.7", sha256="7becfdbc223e9c72e65ae876d842c48d2037d13f83e9f41cea285e21b840d7d9")
|
||||
version("0.87.6", sha256="b597987c99ddd3462e0619524c5b7f711177ae8ae541b1b961e11d96e15afc64")
|
||||
|
||||
depends_on("c", type="build") # generated
|
||||
depends_on("cxx", type="build") # generated
|
||||
|
||||
variant("shared", default=True, description="Enables the build of shared libraries")
|
||||
variant("hybrid", default=True, description="Make use of OpenMP within MPI packing/unpacking")
|
||||
variant(
|
||||
"openmp_blas", default=False, description="Use OpenMP for threading in the BLAS library"
|
||||
)
|
||||
variant("c", default=False, description="Build C interface")
|
||||
variant("parmetis", default=False, description="Enable ParMETIS")
|
||||
variant("quad", default=False, description="Enable quad precision")
|
||||
variant("int64", default=False, description="Use 64bit integers")
|
||||
variant("cublas", default=False, description="Enable cuBLAS for local BLAS operations")
|
||||
# When this variant is set remove the normal dependencies since
|
||||
# Elemental has to build BLAS and ScaLAPACK internally
|
||||
variant(
|
||||
"int64_blas",
|
||||
default=False,
|
||||
description="Use 64bit integers for BLAS." " Requires local build of BLAS library.",
|
||||
)
|
||||
variant("scalapack", default=False, description="Build with ScaLAPACK library")
|
||||
variant(
|
||||
"build_type",
|
||||
default="Release",
|
||||
description="The build type to build",
|
||||
values=("Debug", "Release"),
|
||||
)
|
||||
variant(
|
||||
"blas",
|
||||
default="openblas",
|
||||
values=("openblas", "mkl", "accelerate", "essl"),
|
||||
description="Enable the use of OpenBlas/MKL/Accelerate/ESSL",
|
||||
)
|
||||
variant(
|
||||
"mpfr",
|
||||
default=False,
|
||||
description="Support GNU MPFR's" "arbitrary-precision floating-point arithmetic",
|
||||
)
|
||||
|
||||
# Note that #1712 forces us to enumerate the different blas variants
|
||||
depends_on("blas", when="~openmp_blas ~int64_blas")
|
||||
# Hack to forward variant to openblas package
|
||||
depends_on("openblas", when="blas=openblas ~openmp_blas ~int64_blas")
|
||||
# Allow Elemental to build internally when using 8-byte ints
|
||||
depends_on("openblas threads=openmp", when="blas=openblas +openmp_blas ~int64_blas")
|
||||
|
||||
depends_on("intel-mkl", when="blas=mkl")
|
||||
depends_on("intel-mkl threads=openmp", when="blas=mkl +openmp_blas")
|
||||
depends_on("intel-mkl@2017.1 +ilp64", when="blas=mkl +int64_blas")
|
||||
|
||||
depends_on("veclibfort", when="blas=accelerate")
|
||||
|
||||
depends_on("essl", when="blas=essl")
|
||||
depends_on("essl threads=openmp", when="blas=essl +openmp_blas")
|
||||
|
||||
# Note that this forces us to use OpenBLAS until #1712 is fixed
|
||||
depends_on("lapack", when="blas=openblas ~openmp_blas")
|
||||
depends_on("netlib-lapack +external-blas", when="blas=essl")
|
||||
|
||||
depends_on("metis")
|
||||
depends_on("metis +int64", when="+int64")
|
||||
depends_on("mpi")
|
||||
# Allow Elemental to build internally when using 8-byte ints
|
||||
depends_on("scalapack", when="+scalapack ~int64_blas")
|
||||
depends_on("gmp", when="+mpfr")
|
||||
depends_on("mpc", when="+mpfr")
|
||||
depends_on("mpfr", when="+mpfr")
|
||||
|
||||
patch("elemental_cublas.patch", when="+cublas")
|
||||
patch("cmake_0.87.7.patch", when="@0.87.7")
|
||||
|
||||
conflicts("%intel@:17.0.2", when="@:0.87.7")
|
||||
|
||||
@property
|
||||
def libs(self):
|
||||
shared = True if "+shared" in self.spec else False
|
||||
return find_libraries("libEl", root=self.prefix, shared=shared, recursive=True)
|
||||
|
||||
def cmake_args(self):
|
||||
spec = self.spec
|
||||
args = [
|
||||
"-DCMAKE_INSTALL_MESSAGE:STRING=LAZY",
|
||||
"-DCMAKE_C_COMPILER=%s" % spec["mpi"].mpicc,
|
||||
"-DCMAKE_CXX_COMPILER=%s" % spec["mpi"].mpicxx,
|
||||
"-DCMAKE_Fortran_COMPILER=%s" % spec["mpi"].mpifc,
|
||||
"-DEL_PREFER_OPENBLAS:BOOL=TRUE",
|
||||
"-DEL_DISABLE_SCALAPACK:BOOL=%s" % ("~scalapack" in spec),
|
||||
"-DBUILD_SHARED_LIBS:BOOL=%s" % ("+shared" in spec),
|
||||
"-DEL_HYBRID:BOOL=%s" % ("+hybrid" in spec),
|
||||
"-DEL_C_INTERFACE:BOOL=%s" % ("+c" in spec),
|
||||
"-DEL_DISABLE_PARMETIS:BOOL=%s" % ("~parmetis" in spec),
|
||||
"-DEL_DISABLE_QUAD:BOOL=%s" % ("~quad" in spec),
|
||||
"-DEL_USE_64BIT_INTS:BOOL=%s" % ("+int64" in spec),
|
||||
"-DEL_USE_64BIT_BLAS_INTS:BOOL=%s" % ("+int64_blas" in spec),
|
||||
"-DEL_DISABLE_MPFR:BOOL=%s" % ("~mpfr" in spec),
|
||||
]
|
||||
|
||||
if self.spec.satisfies("%intel"):
|
||||
ifort = env["SPACK_F77"]
|
||||
intel_bin = os.path.dirname(ifort)
|
||||
intel_root = os.path.dirname(intel_bin)
|
||||
libfortran = find_libraries("libifcoremt", root=intel_root, recursive=True)
|
||||
elif self.spec.satisfies("%gcc"):
|
||||
# see <stage_folder>/debian/rules as an example:
|
||||
mpif77 = Executable(spec["mpi"].mpif77)
|
||||
libfortran = LibraryList(
|
||||
mpif77("--print-file-name", "libgfortran.%s" % dso_suffix, output=str).strip()
|
||||
)
|
||||
elif self.spec.satisfies("%xl") or self.spec.satisfies("%xl_r"):
|
||||
xl_fort = env["SPACK_F77"]
|
||||
xl_bin = os.path.dirname(xl_fort)
|
||||
xl_root = os.path.dirname(xl_bin)
|
||||
libfortran = find_libraries("libxlf90_r", root=xl_root, recursive=True)
|
||||
else:
|
||||
libfortran = None
|
||||
|
||||
if libfortran:
|
||||
args.append("-DGFORTRAN_LIB=%s" % libfortran.libraries[0])
|
||||
|
||||
# If using 64bit int BLAS libraries, elemental has to build
|
||||
# them internally
|
||||
if spec.satisfies("+int64_blas"):
|
||||
args.extend(
|
||||
[
|
||||
"-DEL_BLAS_SUFFIX:STRING={0}".format(
|
||||
("_64_" if "+int64_blas" in spec else "_")
|
||||
),
|
||||
"-DCUSTOM_BLAS_SUFFIX:BOOL=TRUE",
|
||||
]
|
||||
)
|
||||
if spec.satisfies("+scalapack"):
|
||||
args.extend(
|
||||
[
|
||||
"-DEL_LAPACK_SUFFIX:STRING={0}".format(
|
||||
("_64_" if "+int64_blas" in spec else "_")
|
||||
),
|
||||
"-DCUSTOM_LAPACK_SUFFIX:BOOL=TRUE",
|
||||
]
|
||||
)
|
||||
else:
|
||||
math_libs = spec["lapack"].libs + spec["blas"].libs
|
||||
|
||||
if spec.satisfies("+scalapack"):
|
||||
math_libs = spec["scalapack"].libs + math_libs
|
||||
|
||||
args.extend(["-DMATH_LIBS:STRING={0}".format(math_libs.ld_flags)])
|
||||
|
||||
return args
|
Loading…
Reference in New Issue
Block a user