rocalution: fix compilation for Navi 1x and 2x (#32586)

This commit is contained in:
Cory Bloor 2022-09-12 10:19:59 -06:00 committed by GitHub
parent 5dc1a9f214
commit 13d872592e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 81 additions and 0 deletions

View File

@ -0,0 +1,79 @@
From 9bdff9b0897360a60d21a686f7b988f924aea825 Mon Sep 17 00:00:00 2001
From: DorianRudolph <dorianrudo97@googlemail.com>
Date: Fri, 9 Sep 2022 07:09:25 +0200
Subject: [PATCH] fix compilation for gfx1031 (#150)
* fix compilation for gfx1031
* Improve guards for arch-specific instructions
Default to using the fallback implementation and only use
__hip_move_dpp on platforms known that are known to support the
necessary intrinsics.
rocALUTION can also be compiled with CXXFLAGS=-DROCALUTION_USE_MOVE_DPP=0
to force the use of the fallback implementation (or with the value 1 to
force the use of the __hip_move_dpp implementation).
This change fixes the compilation error:
Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
when building for unsupported Navi 1x and Navi 2x GPUs.
Co-authored-by: Cordell Bloor <Cordell.Bloor@amd.com>
---
src/base/hip/hip_utils.hpp | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/src/base/hip/hip_utils.hpp b/src/base/hip/hip_utils.hpp
index 830f9a5d..5ec4cd00 100644
--- a/src/base/hip/hip_utils.hpp
+++ b/src/base/hip/hip_utils.hpp
@@ -37,6 +37,18 @@
#include <hip/hip_complex.h>
#endif
+#ifndef ROCALUTION_USE_MOVE_DPP
+#if defined(__gfx803__) || \
+ defined(__gfx900__) || \
+ defined(__gfx906__) || \
+ defined(__gfx908__) || \
+ defined(__gfx90a__)
+#define ROCALUTION_USE_MOVE_DPP 1
+#else
+#define ROCALUTION_USE_MOVE_DPP 0
+#endif
+#endif
+
#define ROCBLAS_HANDLE(handle) *static_cast<rocblas_handle*>(handle)
#define ROCSPARSE_HANDLE(handle) *static_cast<rocsparse_handle*>(handle)
@@ -206,7 +218,7 @@ namespace rocalution
__device__ int __llvm_amdgcn_readlane(int index, int offset) __asm("llvm.amdgcn.readlane");
-#ifndef __gfx1030__
+#if ROCALUTION_USE_MOVE_DPP
template <unsigned int WFSIZE>
static __device__ __forceinline__ void wf_reduce_sum(int* sum)
{
@@ -223,7 +235,7 @@ namespace rocalution
if(WFSIZE > 32)
*sum += __hip_move_dpp(*sum, 0x143, 0xc, 0xf, 0);
}
-#else
+#else /* ROCALUTION_USE_MOVE_DPP */
template <unsigned int WFSIZE>
static __device__ __forceinline__ void wf_reduce_sum(int* sum)
{
@@ -232,7 +244,7 @@ namespace rocalution
*sum += __shfl_xor(*sum, i);
}
}
-#endif
+#endif /* ROCALUTION_USE_MOVE_DPP */
template <unsigned int WF_SIZE>
static __device__ __forceinline__ void wf_reduce_sum(float* sum)

View File

@ -152,6 +152,8 @@ class Rocalution(CMakePackage):
# This fix is added to address the compilation failure and it is
# already taken in 5.2.3 rocm release.
patch("0003-fix-compilation-for-rocalution-5.2.0.patch", when="@5.2.0:")
# Fix build for most Radeon 5000 and Radeon 6000 series GPUs.
patch("0004-fix-navi-1x.patch", when="@5.2.0:")
def check(self):
exe = join_path(self.build_directory, "clients", "staging", "rocalution-test")