
* rocsparse: add spack build test * Fix Navi 1x patch for ROCm 5.2 * Remove bench variant and other cleanup * Fix style
79 lines
2.9 KiB
Diff
79 lines
2.9 KiB
Diff
From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
|
|
From: Cory Bloor <Cordell.Bloor@amd.com>
|
|
Date: Fri, 8 Jul 2022 20:53:32 -0600
|
|
Subject: [PATCH] Improve guards for arch-specific instructions (#368)
|
|
|
|
When choosing between a specialized implementation that uses
|
|
architecture-specific functionality and a generic fallback, it is
|
|
usually preferable to make the fallback the default. This will give the
|
|
software the best possible chance of functioning without modification
|
|
on future hardware.
|
|
|
|
Of course, the library will still need code updates to function
|
|
optimally on hardware released after the software was written.
|
|
|
|
rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
|
|
to force the use of the fallback implementation. Or with the value 1 to
|
|
force the use of the specialized __hip_move_dpp implementation.
|
|
|
|
This change fixes the compilation error:
|
|
|
|
Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
|
|
|
|
when building for unsupported Navi 1x and Navi 2x GPUs as was
|
|
reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
|
|
---
|
|
library/src/include/common.h | 18 +++++++++++++++---
|
|
1 file changed, 15 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/library/src/include/common.h b/library/src/include/common.h
|
|
index 6a4654af..975c5f7d 100644
|
|
--- a/library/src/include/common.h
|
|
+++ b/library/src/include/common.h
|
|
@@ -34,6 +34,18 @@
|
|
|
|
// clang-format off
|
|
|
|
+#ifndef ROCSPARSE_USE_MOVE_DPP
|
|
+#if defined(__gfx803__) || \
|
|
+ defined(__gfx900__) || \
|
|
+ defined(__gfx906__) || \
|
|
+ defined(__gfx908__) || \
|
|
+ defined(__gfx90a__)
|
|
+#define ROCSPARSE_USE_MOVE_DPP 1
|
|
+#else
|
|
+#define ROCSPARSE_USE_MOVE_DPP 0
|
|
+#endif
|
|
+#endif
|
|
+
|
|
// BSR indexing macros
|
|
#define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
|
|
#define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
|
|
@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
|
|
if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
|
|
}
|
|
|
|
-#if (!defined(__gfx1030__)) && (!defined(__gfx1011__))
|
|
+#if ROCSPARSE_USE_MOVE_DPP
|
|
// DPP-based wavefront reduction maximum
|
|
template <unsigned int WFSIZE>
|
|
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
|
|
@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
|
|
sum = temp_sum.val;
|
|
return sum;
|
|
}
|
|
-#else
|
|
+#else /* ROCSPARSE_USE_MOVE_DPP */
|
|
template <unsigned int WFSIZE>
|
|
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
|
|
{
|
|
@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
|
|
|
|
return sum;
|
|
}
|
|
-#endif
|
|
+#endif /* ROCSPARSE_USE_MOVE_DPP */
|
|
|
|
// DPP-based complex float wavefront reduction sum
|
|
template <unsigned int WFSIZE>
|