llvm-amdgpu and composable-kernel: fix build failures (#46891)
This commit is contained in:
parent
adaa0a4863
commit
962262a1d3
@ -0,0 +1,88 @@
|
||||
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
|
||||
index f4f496fc10..d9e300b737 100644
|
||||
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
|
||||
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
|
||||
@@ -47,12 +47,12 @@ __global__ void
|
||||
#endif
|
||||
kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3(
|
||||
typename GridwiseGemm::Argument karg,
|
||||
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||
- const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||
- const index_t num_k_per_block)
|
||||
+ [[maybe_unused]] const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||
+ [[maybe_unused]] const index_t num_k_per_block)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
|
||||
defined(__gfx94__))
|
||||
@@ -103,12 +103,12 @@ __global__ void
|
||||
#endif
|
||||
kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3_2lds(
|
||||
typename GridwiseGemm::Argument karg,
|
||||
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||
- const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||
- const index_t num_k_per_block)
|
||||
+ [[maybe_unused]] const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||
+ [[maybe_unused]] const index_t num_k_per_block)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
|
||||
defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
|
||||
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
|
||||
index 415ae3d496..a4d4a01a01 100644
|
||||
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
|
||||
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
|
||||
@@ -69,14 +69,15 @@ __global__ void
|
||||
#if CK_USE_LAUNCH_BOUNDS
|
||||
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, MinimumOccupancy)
|
||||
#endif
|
||||
- kernel_grouped_conv_fwd_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg,
|
||||
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
- c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||
- const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||
- const ComputePtrOffset compute_ptr_offset_of_n,
|
||||
- const index_t groups_count)
|
||||
+ kernel_grouped_conv_fwd_xdl_cshuffle_v3(
|
||||
+ typename GridwiseGemm::Argument karg,
|
||||
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
+ c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_n,
|
||||
+ [[maybe_unused]] const index_t groups_count)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||
// offset base pointer for each work-group
|
||||
@@ -132,13 +133,13 @@ __global__ void
|
||||
#endif
|
||||
kernel_grouped_conv_fwd_xdl_cshuffle_v3_2lds(
|
||||
typename GridwiseGemm::Argument karg,
|
||||
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||
- const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||
- const ComputePtrOffset compute_ptr_offset_of_n,
|
||||
- const index_t groups_count)
|
||||
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_n,
|
||||
+ [[maybe_unused]] const index_t groups_count)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||
// offset base pointer for each work-group
|
@ -78,6 +78,10 @@ class ComposableKernel(CMakePackage):
|
||||
depends_on("llvm-amdgpu@" + ver, when="@" + ver)
|
||||
depends_on("rocm-cmake@" + ver, when="@" + ver, type="build")
|
||||
|
||||
# Build is breaking on warning, -Werror, -Wunused-parameter. The patch is part of:
|
||||
# https://github.com/ROCm/composable_kernel/commit/959073842c0db839d45d565eb260fd018c996ce4
|
||||
patch("0001-mark-kernels-maybe-unused.patch", when="@6.2")
|
||||
|
||||
def setup_build_environment(self, env):
|
||||
env.set("CXX", self.spec["hip"].hipcc)
|
||||
|
||||
@ -101,6 +105,8 @@ def cmake_args(self):
|
||||
args.append(self.define("CMAKE_POSITION_INDEPENDENT_CODE", "ON"))
|
||||
if self.spec.satisfies("@:5.7"):
|
||||
args.append(self.define("CMAKE_CXX_FLAGS", "-O3"))
|
||||
if self.spec.satisfies("@6.2:"):
|
||||
args.append(self.define("BUILD_DEV", "OFF"))
|
||||
return args
|
||||
|
||||
def build(self, spec, prefix):
|
||||
|
@ -20,7 +20,7 @@ class LlvmAmdgpu(CMakePackage, CompilerPackage):
|
||||
executables = [r"amdclang", r"amdclang\+\+", r"amdflang", r"clang.*", r"flang.*", "llvm-.*"]
|
||||
generator("ninja")
|
||||
|
||||
maintainers("srekolam", "renjithravindrankannath", "haampie")
|
||||
maintainers("srekolam", "renjithravindrankannath", "haampie", "afzpatel")
|
||||
|
||||
license("Apache-2.0")
|
||||
|
||||
@ -319,6 +319,5 @@ def post_install(self):
|
||||
def setup_dependent_build_environment(self, env, dependent_spec):
|
||||
for root, _, files in os.walk(self.spec["llvm-amdgpu"].prefix):
|
||||
if "libclang_rt.asan-x86_64.so" in files:
|
||||
asan_lib_path = root
|
||||
env.prepend_path("LD_LIBRARY_PATH", asan_lib_path)
|
||||
env.prepend_path("LD_LIBRARY_PATH", root)
|
||||
env.prune_duplicate_paths("LD_LIBRARY_PATH")
|
||||
|
Loading…
Reference in New Issue
Block a user