elemental: remove deprecated package (#49291)

This package has not been maintained since 2016. We maintain an active fork in the hydrogen package, so remove this one. Signed-off-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
2025-03-05 08:36:05 +01:00
parent 7d62045c30
commit aa9e610fa6
3 changed files with 0 additions and 861 deletions
--- a/var/spack/repos/builtin/packages/elemental/cmake_0.87.7.patch
+++ b/var/spack/repos/builtin/packages/elemental/cmake_0.87.7.patch
@@ -1,22 +0,0 @@
-diff --git a/cmake/configure_files/ElementalConfig.cmake.in b/cmake/configure_files/ElementalConfig.cmake.in
-index d37649f..8511d81 100644
--- a/cmake/configure_files/ElementalConfig.cmake.in
-+++ b/cmake/configure_files/ElementalConfig.cmake.in
-@@ -1,6 +1,8 @@
- set(Elemental_INCLUDE_DIRS "@CMAKE_INSTALL_PREFIX@/include")
- set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPI_CXX_INCLUDE_PATH@")
-set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@QD_INCLUDES@")
-+IF(@QD_FOUND@)
-+  set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@QD_INCLUDES@")
-+ENDIF()
- set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPC_INCLUDES@")
- set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@MPFR_INCLUDES@")
- set(Elemental_INCLUDE_DIRS "${Elemental_INCLUDE_DIRS};@GMP_INCLUDES@")
-@@ -13,6 +15,6 @@ set(Elemental_LINK_FLAGS "@EL_LINK_FLAGS@")
- set(Elemental_DEFINITIONS "@Qt5Widgets_DEFINITIONS@")
-
- # Our library dependencies (contains definitions for IMPORTED targets)
-include("@CMAKE_INSTALL_PREFIX@/CMake/ElementalTargets.cmake")
-+include("${CMAKE_CURRENT_LIST_DIR}/ElementalTargets.cmake")
-
- set(Elemental_LIBRARIES El)
--- a/var/spack/repos/builtin/packages/elemental/elemental_cublas.patch
+++ b/var/spack/repos/builtin/packages/elemental/elemental_cublas.patch
@@ -1,668 +0,0 @@
-diff -Naur a/include/El/blas_like/level3.hpp b/include/El/blas_like/level3.hpp
--- a/include/El/blas_like/level3.hpp	2017-06-08 07:30:43.180249917 -0700
-+++ b/include/El/blas_like/level3.hpp	2017-06-08 07:35:27.325434602 -0700
-@@ -31,6 +31,10 @@
- }
- using namespace GemmAlgorithmNS;
- 
-+void GemmUseGPU(int min_M, int min_N, int min_K);
-+
-+void GemmUseCPU();
-+
- template<typename T>
- void Gemm
- ( Orientation orientA, Orientation orientB,
-diff -Naur a/include/El/core/imports/blas.hpp b/include/El/core/imports/blas.hpp
--- a/include/El/core/imports/blas.hpp	2017-06-08 07:30:43.522016908 -0700
-+++ b/include/El/core/imports/blas.hpp	2017-06-08 07:35:06.834030908 -0700
-@@ -916,4 +916,63 @@
- } // namespace blas
- } // namespace El
- 
-+
-+#if defined(EL_USE_CUBLAS)
-+
-+namespace El {
-+
-+#ifdef EL_USE_64BIT_BLAS_INTS
-+typedef long long int BlasInt;
-+#else
-+typedef int BlasInt;
-+#endif
-+
-+namespace cublas {
-+
-+// NOTE: templated routines are custom and not wrappers
-+
-+// Level 3 BLAS
-+// ============
-+template<typename T>
-+void Gemm
-+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
-+  const T& alpha,
-+  const T* A, BlasInt ALDim, 
-+  const T* B, BlasInt BLDim,
-+  const T& beta,
-+        T* C, BlasInt CLDim );
-+
-+void Gemm
-+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
-+  const float& alpha,
-+  const float* A, BlasInt ALDim, 
-+  const float* B, BlasInt BLDim,
-+  const float& beta,
-+        float* C, BlasInt CLDim );
-+void Gemm
-+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
-+  const double& alpha,
-+  const double* A, BlasInt ALDim, 
-+  const double* B, BlasInt BLDim,
-+  const double& beta,
-+        double* C, BlasInt CLDim );
-+void Gemm
-+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
-+  const scomplex& alpha,
-+  const scomplex* A, BlasInt ALDim, 
-+  const scomplex* B, BlasInt BLDim,
-+  const scomplex& beta,
-+        scomplex* C, BlasInt CLDim );
-+void Gemm
-+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
-+  const dcomplex& alpha,
-+  const dcomplex* A, BlasInt ALDim, 
-+  const dcomplex* B, BlasInt BLDim,
-+  const dcomplex& beta,
-+        dcomplex* C, BlasInt CLDim );
-+
-+} // namespace cublas
-+} // namespace El
-+#endif
-+
- #endif // ifndef EL_IMPORTS_BLAS_DECL_HPP
-diff -Naur a/src/blas_like/level3/Gemm.cpp b/src/blas_like/level3/Gemm.cpp
--- a/src/blas_like/level3/Gemm.cpp	2017-06-08 07:30:44.307096427 -0700
-+++ b/src/blas_like/level3/Gemm.cpp	2017-06-08 07:34:23.062863489 -0700
-@@ -16,6 +16,20 @@
- 
- namespace El {
- 
-+char gemm_cpu_gpu_switch = 'c';
-+int min_M = 0, min_N = 0, min_K = 0;
-+
-+void GemmUseGPU(int _min_M, int _min_N, int _min_K) {
-+   gemm_cpu_gpu_switch = 'g';
-+   min_M = _min_M;
-+   min_N = _min_N;
-+   min_K = _min_K;
-+}
-+
-+void GemmUseCPU() {
-+   gemm_cpu_gpu_switch = 'c';
-+}
-+
- template<typename T>
- void Gemm
- ( Orientation orientA, Orientation orientB,
-@@ -59,11 +73,30 @@
-     const Int k = ( orientA == NORMAL ? A.Width() : A.Height() );
-     if( k != 0 )
-     {
-+#if defined(EL_USE_CUBLAS)
-+        if (gemm_cpu_gpu_switch == 'g' && 
-+            m >= min_M &&
-+            n >= min_N &&
-+            k >= min_K) {
-+          cublas::Gemm
-+          ( transA, transB, m, n, k,
-+            alpha, A.LockedBuffer(), A.LDim(),
-+                   B.LockedBuffer(), B.LDim(),
-+            beta,  C.Buffer(),       C.LDim() );
-+        } else {
-+          blas::Gemm
-+          ( transA, transB, m, n, k,
-+            alpha, A.LockedBuffer(), A.LDim(),
-+                   B.LockedBuffer(), B.LDim(),
-+            beta,  C.Buffer(),       C.LDim() );
-+        }
-+#else
-         blas::Gemm
-         ( transA, transB, m, n, k,
-           alpha, A.LockedBuffer(), A.LDim(),
-                  B.LockedBuffer(), B.LDim(),
-           beta,  C.Buffer(),       C.LDim() );
-+#endif
-     }
-     else
-     {
-diff -Naur a/src/core/imports/blas/Gemm.hpp b/src/core/imports/blas/Gemm.hpp
--- a/src/core/imports/blas/Gemm.hpp	2017-06-08 07:30:45.090529967 -0700
-+++ b/src/core/imports/blas/Gemm.hpp	2017-06-08 07:34:46.503009958 -0700
-@@ -41,6 +41,12 @@
- 
- } // extern "C"
- 
-+
-+#if defined(EL_USE_CUBLAS)
-+#include <cublas.h>
-+#include <cub/util_allocator.cuh>
-+#endif
-+
- namespace El {
- namespace blas {
- 
-@@ -515,3 +521,515 @@
- 
- } // namespace blas
- } // namespace El
-+
-+
-+#if EL_USE_CUBLAS
-+
-+#define USE_CUB 1
-+
-+namespace El {
-+namespace cublas {
-+
-+#if USE_CUB
-+cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory
-+#endif
-+
-+template<typename T>
-+void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k,
-+  const T& alpha,
-+  const T* A, BlasInt ALDim,
-+  const T* B, BlasInt BLDim,
-+  const T& beta,
-+        T* C, BlasInt CLDim )
-+{
-+   // put something here
-+    printf("integer version \n");
-+}
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const Int& alpha,
-+  const Int* A, BlasInt ALDim,
-+  const Int* B, BlasInt BLDim,
-+  const Int& beta,
-+        Int* C, BlasInt CLDim );
-+#ifdef EL_HAVE_QD
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const DoubleDouble& alpha,
-+  const DoubleDouble* A, BlasInt ALDim,
-+  const DoubleDouble* B, BlasInt BLDim,
-+  const DoubleDouble& beta,
-+        DoubleDouble* C, BlasInt CLDim );
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const QuadDouble& alpha,
-+  const QuadDouble* A, BlasInt ALDim,
-+  const QuadDouble* B, BlasInt BLDim,
-+  const QuadDouble& beta,
-+        QuadDouble* C, BlasInt CLDim );
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const Complex<DoubleDouble>& alpha,
-+  const Complex<DoubleDouble>* A, BlasInt ALDim,
-+  const Complex<DoubleDouble>* B, BlasInt BLDim,
-+  const Complex<DoubleDouble>& beta,
-+        Complex<DoubleDouble>* C, BlasInt CLDim );
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const Complex<QuadDouble>& alpha,
-+  const Complex<QuadDouble>* A, BlasInt ALDim,
-+  const Complex<QuadDouble>* B, BlasInt BLDim,
-+  const Complex<QuadDouble>& beta,
-+        Complex<QuadDouble>* C, BlasInt CLDim );
-+#endif
-+#ifdef EL_HAVE_QUAD
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const Quad& alpha,
-+  const Quad* A, BlasInt ALDim,
-+  const Quad* B, BlasInt BLDim,
-+  const Quad& beta,
-+        Quad* C, BlasInt CLDim );
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const Complex<Quad>& alpha,
-+  const Complex<Quad>* A, BlasInt ALDim, 
-+  const Complex<Quad>* B, BlasInt BLDim,
-+  const Complex<Quad>& beta,
-+        Complex<Quad>* C, BlasInt CLDim );
-+#endif
-+#ifdef EL_HAVE_MPC
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const BigInt& alpha,
-+  const BigInt* A, BlasInt ALDim,
-+  const BigInt* B, BlasInt BLDim,
-+  const BigInt& beta,
-+        BigInt* C, BlasInt CLDim );
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const BigFloat& alpha,
-+  const BigFloat* A, BlasInt ALDim,
-+  const BigFloat* B, BlasInt BLDim,
-+  const BigFloat& beta,
-+        BigFloat* C, BlasInt CLDim );
-+template void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const Complex<BigFloat>& alpha,
-+  const Complex<BigFloat>* A, BlasInt ALDim,
-+  const Complex<BigFloat>* B, BlasInt BLDim,
-+  const Complex<BigFloat>& beta,
-+        Complex<BigFloat>* C, BlasInt CLDim );
-+#endif
-+
-+void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const float& alpha,
-+  const float* A, BlasInt ALDim,
-+  const float* B, BlasInt BLDim,
-+  const float& beta,
-+        float* C, BlasInt CLDim )
-+{
-+    EL_DEBUG_CSE
-+    EL_DEBUG_ONLY(
-+      if( std::toupper(transA) == 'N' )
-+      {
-+          if( ALDim < Max(m,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
-+      }
-+      else
-+      {
-+          if( ALDim < Max(k,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
-+      }
-+
-+      if( std::toupper(transB) == 'N' )
-+      {
-+          if( BLDim < Max(k,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
-+      }
-+      else
-+      {
-+          if( BLDim < Max(n,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
-+      }
-+
-+      if( CLDim < Max(m,1) )
-+          LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
-+    )
-+    const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
-+    const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
-+ 
-+    const mpi::Comm comm;
-+    const Int commRank = mpi::Rank( comm );
-+    if (commRank == 0) {
-+       //printf("calling cublas Sgemm: m %d n %d k %d\n", m, n, k);
-+    }
-+
-+    BlasInt rowA, colA, rowB, colB, rowC, colC;
-+    // device memory size for A, B and C
-+    BlasInt sizeA, sizeB, sizeC;
-+    float *devA=NULL, *devB=NULL, *devC=NULL;
-+    
-+    rowA = fixedTransA == 'T' ? k : m;
-+    colA = fixedTransA == 'T' ? m : k;
-+    rowB = fixedTransB == 'T' ? n : k;
-+    colB = fixedTransB == 'T' ? k : n;
-+    rowC = m;
-+    colC = n;
-+    sizeA = rowA * colA;
-+    sizeB = rowB * colB;
-+    sizeC = rowC * colC;
-+
-+    cublasStatus stat;
-+    
-+#if USE_CUB
-+    CubDebugExit(g_allocator.DeviceAllocate((void**)&devA, 
-+                 sizeof(float) * (sizeA+sizeB+sizeC) ));
-+#else
-+    stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(float), (void **) &devA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
-+#endif
-+
-+    devB = devA + sizeA;
-+    devC = devB + sizeB;
-+
-+    // copy matrix A, B and C to device
-+    stat = cublasSetMatrix(rowA, colA, sizeof(float), A, ALDim, devA, rowA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
-+
-+    stat = cublasSetMatrix(rowB, colB, sizeof(float), B, BLDim, devB, rowB);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
-+    
-+    if (beta != 0.0)
-+    {
-+       stat = cublasSetMatrix(rowC, colC, sizeof(float), C, CLDim, devC, rowC);
-+       if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
-+    }
-+    
-+    // cublas<t>gemm
-+    cublasSgemm
-+    ( fixedTransA, fixedTransB, m, n, k,
-+      alpha, devA, rowA, devB, rowB, beta, devC, rowC );
-+
-+    // copy matrix C to host
-+    stat = cublasGetMatrix(rowC, colC, sizeof(float), devC, rowC, C, CLDim);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
-+
-+    // free
-+#if USE_CUB
-+    CubDebugExit(g_allocator.DeviceFree(devA));
-+#else
-+    cublasFree(devA);
-+#endif
-+    //printf("CUBLAS float done ...\n");
-+}
-+
-+void Gemm
-+( char transA, char transB,
-+  BlasInt m, BlasInt n, BlasInt k, 
-+  const double& alpha,
-+  const double* A, BlasInt ALDim, 
-+  const double* B, BlasInt BLDim,
-+  const double& beta,
-+        double* C, BlasInt CLDim )
-+{
-+    EL_DEBUG_CSE
-+    EL_DEBUG_ONLY(
-+      if( std::toupper(transA) == 'N' )
-+      {
-+          if( ALDim < Max(m,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
-+      }
-+      else
-+      {
-+          if( ALDim < Max(k,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
-+      }      
-+
-+      if( std::toupper(transB) == 'N' )
-+      {
-+          if( BLDim < Max(k,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
-+      }
-+      else
-+      {
-+          if( BLDim < Max(n,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
-+      }
-+
-+      if( CLDim < Max(m,1) )
-+          LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
-+    )
-+    const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
-+    const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
-+
-+    const mpi::Comm comm;
-+    const Int commRank = mpi::Rank( comm );
-+    if (commRank == 0) {
-+       //printf("calling cublas Dgemm: m %d n %d k %d\n", m, n, k);
-+    }
-+
-+    BlasInt rowA, colA, rowB, colB, rowC, colC;
-+    // device memory size for A, B and C
-+    BlasInt sizeA, sizeB, sizeC;
-+    double *devA=NULL, *devB=NULL, *devC=NULL;
-+    
-+    rowA = fixedTransA == 'T' ? k : m;
-+    colA = fixedTransA == 'T' ? m : k;
-+    rowB = fixedTransB == 'T' ? n : k;
-+    colB = fixedTransB == 'T' ? k : n;
-+    rowC = m;
-+    colC = n;
-+    sizeA = rowA * colA;
-+    sizeB = rowB * colB;
-+    sizeC = rowC * colC;
-+
-+    cublasStatus stat;
-+
-+#if USE_CUB
-+    CubDebugExit(g_allocator.DeviceAllocate((void**)&devA, 
-+                 sizeof(double) * (sizeA+sizeB+sizeC) ));
-+#else
-+    stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(double), (void **) &devA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
-+#endif
-+
-+    devB = devA + sizeA;
-+    devC = devB + sizeB;
-+
-+    // copy matrix A, B and C to device
-+    stat = cublasSetMatrix(rowA, colA, sizeof(double), A, ALDim, devA, rowA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
-+
-+    stat = cublasSetMatrix(rowB, colB, sizeof(double), B, BLDim, devB, rowB);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
-+    
-+    if (beta != 0.0)
-+    {
-+       stat = cublasSetMatrix(rowC, colC, sizeof(double), C, CLDim, devC, rowC);
-+       if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
-+    }
-+
-+    // cublas<t>gemm
-+    cublasDgemm
-+    ( fixedTransA, fixedTransB, m, n, k,
-+      alpha, devA, rowA, devB, rowB, beta, devC, rowC );
-+    
-+    // copy matrix C to host
-+    stat = cublasGetMatrix(rowC, colC, sizeof(double), devC, rowC, C, CLDim);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
-+
-+    // free
-+#if USE_CUB
-+    CubDebugExit(g_allocator.DeviceFree(devA));
-+#else
-+    cublasFree(devA);
-+#endif
-+    //printf("CUBLAS double done ...\n");
-+}
-+
-+void Gemm
-+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k, 
-+  const scomplex& alpha,
-+  const scomplex* A, BlasInt ALDim, 
-+  const scomplex* B, BlasInt BLDim,
-+  const scomplex& beta,
-+        scomplex* C, BlasInt CLDim )
-+{
-+    EL_DEBUG_CSE
-+    EL_DEBUG_ONLY(
-+      if( std::toupper(transA) == 'N' )
-+      {
-+          if( ALDim < Max(m,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
-+      }
-+      else
-+      {
-+          if( ALDim < Max(k,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
-+      }      
-+
-+      if( std::toupper(transB) == 'N' )
-+      {
-+          if( BLDim < Max(k,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
-+      }
-+      else
-+      {
-+          if( BLDim < Max(n,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
-+      }
-+
-+      if( CLDim < Max(m,1) )
-+          LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
-+    )
-+        
-+    const char fixedTransA = transA;
-+    const char fixedTransB = transB;
-+    
-+    const mpi::Comm comm;
-+    const Int commRank = mpi::Rank( comm );
-+    if (commRank == 0) {
-+       //printf("calling cublas Cgemm: m %d n %d k %d\n", m, n, k);
-+    }
-+
-+    BlasInt rowA, colA, rowB, colB, rowC, colC;
-+    // device memory size for A, B and C
-+    BlasInt sizeA, sizeB, sizeC;
-+    cuComplex *devA=NULL, *devB=NULL, *devC=NULL;
-+    
-+    rowA = fixedTransA == 'T' ? k : m;
-+    colA = fixedTransA == 'T' ? m : k;
-+    rowB = fixedTransB == 'T' ? n : k;
-+    colB = fixedTransB == 'T' ? k : n;
-+    rowC = m;
-+    colC = n;
-+    sizeA = rowA * colA;
-+    sizeB = rowB * colB;
-+    sizeC = rowC * colC;
-+
-+    cublasStatus stat;
-+    stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuComplex), (void **) &devA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
-+
-+    devB = devA + sizeA;
-+    devC = devB + sizeB;
-+
-+    // copy matrix A, B and C to device
-+    stat = cublasSetMatrix(rowA, colA, sizeof(cuComplex), A, ALDim, devA, rowA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
-+
-+    stat = cublasSetMatrix(rowB, colB, sizeof(cuComplex), B, BLDim, devB, rowB);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
-+    
-+    if (beta.real() != 0.0 || beta.imag() != 0.0)
-+    {
-+       stat = cublasSetMatrix(rowC, colC, sizeof(cuComplex), C, CLDim, devC, rowC);
-+       if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
-+    }
-+
-+    // cublas<t>gemm
-+    cublasCgemm
-+    ( fixedTransA, fixedTransB, m, n, k,
-+      *((cuComplex*) &alpha), devA, rowA, devB, rowB, *((cuComplex*) &beta), devC, rowC );
-+
-+    // copy matrix C to host
-+    stat = cublasGetMatrix(rowC, colC, sizeof(cuComplex), devC, rowC, C, CLDim);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
-+
-+    // free
-+    cublasFree(devA);
-+}
-+
-+void Gemm
-+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k, 
-+  const dcomplex& alpha,
-+  const dcomplex* A, BlasInt ALDim, 
-+  const dcomplex* B, BlasInt BLDim,
-+  const dcomplex& beta,
-+        dcomplex* C, BlasInt CLDim )
-+{
-+    EL_DEBUG_CSE
-+    EL_DEBUG_ONLY(
-+      if( std::toupper(transA) == 'N' )
-+      {
-+          if( ALDim < Max(m,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
-+      }
-+      else
-+      {
-+          if( ALDim < Max(k,1) )
-+              LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
-+      }      
-+
-+      if( std::toupper(transB) == 'N' )
-+      {
-+          if( BLDim < Max(k,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
-+      }
-+      else
-+      {
-+          if( BLDim < Max(n,1) )
-+              LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
-+      }
-+
-+      if( CLDim < Max(m,1) )
-+          LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
-+    )
-+
-+    const char fixedTransA = transA;
-+    const char fixedTransB = transB;
-+       
-+    const mpi::Comm comm;
-+    const Int commRank = mpi::Rank( comm );
-+    if (commRank == 0) {
-+       //printf("calling cublas Zgemm: m %d n %d k %d\n", m, n, k);
-+    }
-+
-+    BlasInt rowA, colA, rowB, colB, rowC, colC;
-+    // device memory size for A, B and C
-+    BlasInt sizeA, sizeB, sizeC;
-+    cuDoubleComplex *devA=NULL, *devB=NULL, *devC=NULL;
-+    
-+    rowA = fixedTransA == 'T' ? k : m;
-+    colA = fixedTransA == 'T' ? m : k;
-+    rowB = fixedTransB == 'T' ? n : k;
-+    colB = fixedTransB == 'T' ? k : n;
-+    rowC = m;
-+    colC = n;
-+    sizeA = rowA * colA;
-+    sizeB = rowB * colB;
-+    sizeC = rowC * colC;
-+
-+    cublasStatus stat;
-+    stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuDoubleComplex), (void **) &devA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
-+
-+    devB = devA + sizeA;
-+    devC = devB + sizeB;
-+
-+    // copy matrix A, B and C to device
-+    stat = cublasSetMatrix(rowA, colA, sizeof(cuDoubleComplex), A, ALDim, devA, rowA);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
-+
-+    stat = cublasSetMatrix(rowB, colB, sizeof(cuDoubleComplex), B, BLDim, devB, rowB);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
-+    
-+    if (beta.real() != 0.0 || beta.imag() != 0.0)
-+    {
-+       stat = cublasSetMatrix(rowC, colC, sizeof(cuDoubleComplex), C, CLDim, devC, rowC);
-+       if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
-+    }
-+
-+    cublasZgemm
-+    ( fixedTransA, fixedTransB, m, n, k,
-+      *((cuDoubleComplex*) &alpha), devA, rowA, devB, rowB, *((cuDoubleComplex*) &beta), 
-+      devC, rowC );
-+
-+    // copy matrix C to host
-+    stat = cublasGetMatrix(rowC, colC, sizeof(cuDoubleComplex), devC, rowC, C, CLDim);
-+    if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
-+
-+    // free
-+    cublasFree(devA);
-+}
-+
-+} // namespace cublas
-+} // namespace El
-+
-+#endif
-+
--- a/var/spack/repos/builtin/packages/elemental/package.py
+++ b/var/spack/repos/builtin/packages/elemental/package.py
@@ -1,171 +0,0 @@
-# Copyright Spack Project Developers. See COPYRIGHT file for details.
-#
-# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-
-import os
-
-from spack.package import *
-
-
-class Elemental(CMakePackage):
-    """Elemental: Distributed-memory dense and sparse-direct linear algebra
-    and optimization library."""
-
-    homepage = "https://libelemental.org"
-    url = "https://github.com/elemental/Elemental/archive/v0.87.7.tar.gz"
-    git = "https://github.com/elemental/Elemental.git"
-
-    license("Apache-2.0")
-
-    version("develop", branch="master")
-    version("0.87.7", sha256="7becfdbc223e9c72e65ae876d842c48d2037d13f83e9f41cea285e21b840d7d9")
-    version("0.87.6", sha256="b597987c99ddd3462e0619524c5b7f711177ae8ae541b1b961e11d96e15afc64")
-
-    depends_on("c", type="build")  # generated
-    depends_on("cxx", type="build")  # generated
-
-    variant("shared", default=True, description="Enables the build of shared libraries")
-    variant("hybrid", default=True, description="Make use of OpenMP within MPI packing/unpacking")
-    variant(
-        "openmp_blas", default=False, description="Use OpenMP for threading in the BLAS library"
-    )
-    variant("c", default=False, description="Build C interface")
-    variant("parmetis", default=False, description="Enable ParMETIS")
-    variant("quad", default=False, description="Enable quad precision")
-    variant("int64", default=False, description="Use 64bit integers")
-    variant("cublas", default=False, description="Enable cuBLAS for local BLAS operations")
-    # When this variant is set remove the normal dependencies since
-    # Elemental has to build BLAS and ScaLAPACK internally
-    variant(
-        "int64_blas",
-        default=False,
-        description="Use 64bit integers for BLAS." " Requires local build of BLAS library.",
-    )
-    variant("scalapack", default=False, description="Build with ScaLAPACK library")
-    variant(
-        "build_type",
-        default="Release",
-        description="The build type to build",
-        values=("Debug", "Release"),
-    )
-    variant(
-        "blas",
-        default="openblas",
-        values=("openblas", "mkl", "accelerate", "essl"),
-        description="Enable the use of OpenBlas/MKL/Accelerate/ESSL",
-    )
-    variant(
-        "mpfr",
-        default=False,
-        description="Support GNU MPFR's" "arbitrary-precision floating-point arithmetic",
-    )
-
-    # Note that #1712 forces us to enumerate the different blas variants
-    depends_on("blas", when="~openmp_blas ~int64_blas")
-    # Hack to forward variant to openblas package
-    depends_on("openblas", when="blas=openblas ~openmp_blas ~int64_blas")
-    # Allow Elemental to build internally when using 8-byte ints
-    depends_on("openblas threads=openmp", when="blas=openblas +openmp_blas ~int64_blas")
-
-    depends_on("intel-mkl", when="blas=mkl")
-    depends_on("intel-mkl threads=openmp", when="blas=mkl +openmp_blas")
-    depends_on("intel-mkl@2017.1 +ilp64", when="blas=mkl +int64_blas")
-
-    depends_on("veclibfort", when="blas=accelerate")
-
-    depends_on("essl", when="blas=essl")
-    depends_on("essl threads=openmp", when="blas=essl +openmp_blas")
-
-    # Note that this forces us to use OpenBLAS until #1712 is fixed
-    depends_on("lapack", when="blas=openblas ~openmp_blas")
-    depends_on("netlib-lapack +external-blas", when="blas=essl")
-
-    depends_on("metis")
-    depends_on("metis +int64", when="+int64")
-    depends_on("mpi")
-    # Allow Elemental to build internally when using 8-byte ints
-    depends_on("scalapack", when="+scalapack ~int64_blas")
-    depends_on("gmp", when="+mpfr")
-    depends_on("mpc", when="+mpfr")
-    depends_on("mpfr", when="+mpfr")
-
-    patch("elemental_cublas.patch", when="+cublas")
-    patch("cmake_0.87.7.patch", when="@0.87.7")
-
-    conflicts("%intel@:17.0.2", when="@:0.87.7")
-
-    @property
-    def libs(self):
-        shared = True if "+shared" in self.spec else False
-        return find_libraries("libEl", root=self.prefix, shared=shared, recursive=True)
-
-    def cmake_args(self):
-        spec = self.spec
-        args = [
-            "-DCMAKE_INSTALL_MESSAGE:STRING=LAZY",
-            "-DCMAKE_C_COMPILER=%s" % spec["mpi"].mpicc,
-            "-DCMAKE_CXX_COMPILER=%s" % spec["mpi"].mpicxx,
-            "-DCMAKE_Fortran_COMPILER=%s" % spec["mpi"].mpifc,
-            "-DEL_PREFER_OPENBLAS:BOOL=TRUE",
-            "-DEL_DISABLE_SCALAPACK:BOOL=%s" % ("~scalapack" in spec),
-            "-DBUILD_SHARED_LIBS:BOOL=%s" % ("+shared" in spec),
-            "-DEL_HYBRID:BOOL=%s" % ("+hybrid" in spec),
-            "-DEL_C_INTERFACE:BOOL=%s" % ("+c" in spec),
-            "-DEL_DISABLE_PARMETIS:BOOL=%s" % ("~parmetis" in spec),
-            "-DEL_DISABLE_QUAD:BOOL=%s" % ("~quad" in spec),
-            "-DEL_USE_64BIT_INTS:BOOL=%s" % ("+int64" in spec),
-            "-DEL_USE_64BIT_BLAS_INTS:BOOL=%s" % ("+int64_blas" in spec),
-            "-DEL_DISABLE_MPFR:BOOL=%s" % ("~mpfr" in spec),
-        ]
-
-        if self.spec.satisfies("%intel"):
-            ifort = env["SPACK_F77"]
-            intel_bin = os.path.dirname(ifort)
-            intel_root = os.path.dirname(intel_bin)
-            libfortran = find_libraries("libifcoremt", root=intel_root, recursive=True)
-        elif self.spec.satisfies("%gcc"):
-            # see <stage_folder>/debian/rules as an example:
-            mpif77 = Executable(spec["mpi"].mpif77)
-            libfortran = LibraryList(
-                mpif77("--print-file-name", "libgfortran.%s" % dso_suffix, output=str).strip()
-            )
-        elif self.spec.satisfies("%xl") or self.spec.satisfies("%xl_r"):
-            xl_fort = env["SPACK_F77"]
-            xl_bin = os.path.dirname(xl_fort)
-            xl_root = os.path.dirname(xl_bin)
-            libfortran = find_libraries("libxlf90_r", root=xl_root, recursive=True)
-        else:
-            libfortran = None
-
-        if libfortran:
-            args.append("-DGFORTRAN_LIB=%s" % libfortran.libraries[0])
-
-        # If using 64bit int BLAS libraries, elemental has to build
-        # them internally
-        if spec.satisfies("+int64_blas"):
-            args.extend(
-                [
-                    "-DEL_BLAS_SUFFIX:STRING={0}".format(
-                        ("_64_" if "+int64_blas" in spec else "_")
-                    ),
-                    "-DCUSTOM_BLAS_SUFFIX:BOOL=TRUE",
-                ]
-            )
-            if spec.satisfies("+scalapack"):
-                args.extend(
-                    [
-                        "-DEL_LAPACK_SUFFIX:STRING={0}".format(
-                            ("_64_" if "+int64_blas" in spec else "_")
-                        ),
-                        "-DCUSTOM_LAPACK_SUFFIX:BOOL=TRUE",
-                    ]
-                )
-        else:
-            math_libs = spec["lapack"].libs + spec["blas"].libs
-
-            if spec.satisfies("+scalapack"):
-                math_libs = spec["scalapack"].libs + math_libs
-
-            args.extend(["-DMATH_LIBS:STRING={0}".format(math_libs.ld_flags)])
-
-        return args