rocalution: fix compilation for Navi 1x and 2x (#32586)
This commit is contained in:
parent
5dc1a9f214
commit
13d872592e
@ -0,0 +1,79 @@
|
|||||||
|
From 9bdff9b0897360a60d21a686f7b988f924aea825 Mon Sep 17 00:00:00 2001
|
||||||
|
From: DorianRudolph <dorianrudo97@googlemail.com>
|
||||||
|
Date: Fri, 9 Sep 2022 07:09:25 +0200
|
||||||
|
Subject: [PATCH] fix compilation for gfx1031 (#150)
|
||||||
|
|
||||||
|
* fix compilation for gfx1031
|
||||||
|
|
||||||
|
* Improve guards for arch-specific instructions
|
||||||
|
|
||||||
|
Default to using the fallback implementation and only use
|
||||||
|
__hip_move_dpp on platforms known that are known to support the
|
||||||
|
necessary intrinsics.
|
||||||
|
|
||||||
|
rocALUTION can also be compiled with CXXFLAGS=-DROCALUTION_USE_MOVE_DPP=0
|
||||||
|
to force the use of the fallback implementation (or with the value 1 to
|
||||||
|
force the use of the __hip_move_dpp implementation).
|
||||||
|
|
||||||
|
This change fixes the compilation error:
|
||||||
|
|
||||||
|
Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
|
||||||
|
|
||||||
|
when building for unsupported Navi 1x and Navi 2x GPUs.
|
||||||
|
|
||||||
|
Co-authored-by: Cordell Bloor <Cordell.Bloor@amd.com>
|
||||||
|
---
|
||||||
|
src/base/hip/hip_utils.hpp | 18 +++++++++++++++---
|
||||||
|
1 file changed, 15 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/base/hip/hip_utils.hpp b/src/base/hip/hip_utils.hpp
|
||||||
|
index 830f9a5d..5ec4cd00 100644
|
||||||
|
--- a/src/base/hip/hip_utils.hpp
|
||||||
|
+++ b/src/base/hip/hip_utils.hpp
|
||||||
|
@@ -37,6 +37,18 @@
|
||||||
|
#include <hip/hip_complex.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifndef ROCALUTION_USE_MOVE_DPP
|
||||||
|
+#if defined(__gfx803__) || \
|
||||||
|
+ defined(__gfx900__) || \
|
||||||
|
+ defined(__gfx906__) || \
|
||||||
|
+ defined(__gfx908__) || \
|
||||||
|
+ defined(__gfx90a__)
|
||||||
|
+#define ROCALUTION_USE_MOVE_DPP 1
|
||||||
|
+#else
|
||||||
|
+#define ROCALUTION_USE_MOVE_DPP 0
|
||||||
|
+#endif
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#define ROCBLAS_HANDLE(handle) *static_cast<rocblas_handle*>(handle)
|
||||||
|
#define ROCSPARSE_HANDLE(handle) *static_cast<rocsparse_handle*>(handle)
|
||||||
|
|
||||||
|
@@ -206,7 +218,7 @@ namespace rocalution
|
||||||
|
|
||||||
|
__device__ int __llvm_amdgcn_readlane(int index, int offset) __asm("llvm.amdgcn.readlane");
|
||||||
|
|
||||||
|
-#ifndef __gfx1030__
|
||||||
|
+#if ROCALUTION_USE_MOVE_DPP
|
||||||
|
template <unsigned int WFSIZE>
|
||||||
|
static __device__ __forceinline__ void wf_reduce_sum(int* sum)
|
||||||
|
{
|
||||||
|
@@ -223,7 +235,7 @@ namespace rocalution
|
||||||
|
if(WFSIZE > 32)
|
||||||
|
*sum += __hip_move_dpp(*sum, 0x143, 0xc, 0xf, 0);
|
||||||
|
}
|
||||||
|
-#else
|
||||||
|
+#else /* ROCALUTION_USE_MOVE_DPP */
|
||||||
|
template <unsigned int WFSIZE>
|
||||||
|
static __device__ __forceinline__ void wf_reduce_sum(int* sum)
|
||||||
|
{
|
||||||
|
@@ -232,7 +244,7 @@ namespace rocalution
|
||||||
|
*sum += __shfl_xor(*sum, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
-#endif
|
||||||
|
+#endif /* ROCALUTION_USE_MOVE_DPP */
|
||||||
|
|
||||||
|
template <unsigned int WF_SIZE>
|
||||||
|
static __device__ __forceinline__ void wf_reduce_sum(float* sum)
|
||||||
|
|
@ -152,6 +152,8 @@ class Rocalution(CMakePackage):
|
|||||||
# This fix is added to address the compilation failure and it is
|
# This fix is added to address the compilation failure and it is
|
||||||
# already taken in 5.2.3 rocm release.
|
# already taken in 5.2.3 rocm release.
|
||||||
patch("0003-fix-compilation-for-rocalution-5.2.0.patch", when="@5.2.0:")
|
patch("0003-fix-compilation-for-rocalution-5.2.0.patch", when="@5.2.0:")
|
||||||
|
# Fix build for most Radeon 5000 and Radeon 6000 series GPUs.
|
||||||
|
patch("0004-fix-navi-1x.patch", when="@5.2.0:")
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
exe = join_path(self.build_directory, "clients", "staging", "rocalution-test")
|
exe = join_path(self.build_directory, "clients", "staging", "rocalution-test")
|
||||||
|
Loading…
Reference in New Issue
Block a user