llvm-amdgpu and composable-kernel: fix build failures (#46891)
This commit is contained in:
parent
adaa0a4863
commit
962262a1d3
@ -0,0 +1,88 @@
|
|||||||
|
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
|
||||||
|
index f4f496fc10..d9e300b737 100644
|
||||||
|
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
|
||||||
|
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
|
||||||
|
@@ -47,12 +47,12 @@ __global__ void
|
||||||
|
#endif
|
||||||
|
kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3(
|
||||||
|
typename GridwiseGemm::Argument karg,
|
||||||
|
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||||
|
- const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||||
|
- const index_t num_k_per_block)
|
||||||
|
+ [[maybe_unused]] const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||||
|
+ [[maybe_unused]] const index_t num_k_per_block)
|
||||||
|
{
|
||||||
|
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
|
||||||
|
defined(__gfx94__))
|
||||||
|
@@ -103,12 +103,12 @@ __global__ void
|
||||||
|
#endif
|
||||||
|
kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3_2lds(
|
||||||
|
typename GridwiseGemm::Argument karg,
|
||||||
|
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||||
|
- const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||||
|
- const index_t num_k_per_block)
|
||||||
|
+ [[maybe_unused]] const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||||
|
+ [[maybe_unused]] const index_t num_k_per_block)
|
||||||
|
{
|
||||||
|
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
|
||||||
|
defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
|
||||||
|
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
|
||||||
|
index 415ae3d496..a4d4a01a01 100644
|
||||||
|
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
|
||||||
|
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
|
||||||
|
@@ -69,14 +69,15 @@ __global__ void
|
||||||
|
#if CK_USE_LAUNCH_BOUNDS
|
||||||
|
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, MinimumOccupancy)
|
||||||
|
#endif
|
||||||
|
- kernel_grouped_conv_fwd_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg,
|
||||||
|
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
- c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||||
|
- const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||||
|
- const ComputePtrOffset compute_ptr_offset_of_n,
|
||||||
|
- const index_t groups_count)
|
||||||
|
+ kernel_grouped_conv_fwd_xdl_cshuffle_v3(
|
||||||
|
+ typename GridwiseGemm::Argument karg,
|
||||||
|
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
+ c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||||
|
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||||
|
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_n,
|
||||||
|
+ [[maybe_unused]] const index_t groups_count)
|
||||||
|
{
|
||||||
|
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||||
|
// offset base pointer for each work-group
|
||||||
|
@@ -132,13 +133,13 @@ __global__ void
|
||||||
|
#endif
|
||||||
|
kernel_grouped_conv_fwd_xdl_cshuffle_v3_2lds(
|
||||||
|
typename GridwiseGemm::Argument karg,
|
||||||
|
- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
+ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
|
||||||
|
+ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
|
||||||
|
+ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
|
||||||
|
c_grid_desc_mblock_mperblock_nblock_nperblock,
|
||||||
|
- const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||||
|
- const ComputePtrOffset compute_ptr_offset_of_n,
|
||||||
|
- const index_t groups_count)
|
||||||
|
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_groups,
|
||||||
|
+ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_n,
|
||||||
|
+ [[maybe_unused]] const index_t groups_count)
|
||||||
|
{
|
||||||
|
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||||
|
// offset base pointer for each work-group
|
@ -78,6 +78,10 @@ class ComposableKernel(CMakePackage):
|
|||||||
depends_on("llvm-amdgpu@" + ver, when="@" + ver)
|
depends_on("llvm-amdgpu@" + ver, when="@" + ver)
|
||||||
depends_on("rocm-cmake@" + ver, when="@" + ver, type="build")
|
depends_on("rocm-cmake@" + ver, when="@" + ver, type="build")
|
||||||
|
|
||||||
|
# Build is breaking on warning, -Werror, -Wunused-parameter. The patch is part of:
|
||||||
|
# https://github.com/ROCm/composable_kernel/commit/959073842c0db839d45d565eb260fd018c996ce4
|
||||||
|
patch("0001-mark-kernels-maybe-unused.patch", when="@6.2")
|
||||||
|
|
||||||
def setup_build_environment(self, env):
|
def setup_build_environment(self, env):
|
||||||
env.set("CXX", self.spec["hip"].hipcc)
|
env.set("CXX", self.spec["hip"].hipcc)
|
||||||
|
|
||||||
@ -101,6 +105,8 @@ def cmake_args(self):
|
|||||||
args.append(self.define("CMAKE_POSITION_INDEPENDENT_CODE", "ON"))
|
args.append(self.define("CMAKE_POSITION_INDEPENDENT_CODE", "ON"))
|
||||||
if self.spec.satisfies("@:5.7"):
|
if self.spec.satisfies("@:5.7"):
|
||||||
args.append(self.define("CMAKE_CXX_FLAGS", "-O3"))
|
args.append(self.define("CMAKE_CXX_FLAGS", "-O3"))
|
||||||
|
if self.spec.satisfies("@6.2:"):
|
||||||
|
args.append(self.define("BUILD_DEV", "OFF"))
|
||||||
return args
|
return args
|
||||||
|
|
||||||
def build(self, spec, prefix):
|
def build(self, spec, prefix):
|
||||||
|
@ -20,7 +20,7 @@ class LlvmAmdgpu(CMakePackage, CompilerPackage):
|
|||||||
executables = [r"amdclang", r"amdclang\+\+", r"amdflang", r"clang.*", r"flang.*", "llvm-.*"]
|
executables = [r"amdclang", r"amdclang\+\+", r"amdflang", r"clang.*", r"flang.*", "llvm-.*"]
|
||||||
generator("ninja")
|
generator("ninja")
|
||||||
|
|
||||||
maintainers("srekolam", "renjithravindrankannath", "haampie")
|
maintainers("srekolam", "renjithravindrankannath", "haampie", "afzpatel")
|
||||||
|
|
||||||
license("Apache-2.0")
|
license("Apache-2.0")
|
||||||
|
|
||||||
@ -319,6 +319,5 @@ def post_install(self):
|
|||||||
def setup_dependent_build_environment(self, env, dependent_spec):
|
def setup_dependent_build_environment(self, env, dependent_spec):
|
||||||
for root, _, files in os.walk(self.spec["llvm-amdgpu"].prefix):
|
for root, _, files in os.walk(self.spec["llvm-amdgpu"].prefix):
|
||||||
if "libclang_rt.asan-x86_64.so" in files:
|
if "libclang_rt.asan-x86_64.so" in files:
|
||||||
asan_lib_path = root
|
env.prepend_path("LD_LIBRARY_PATH", root)
|
||||||
env.prepend_path("LD_LIBRARY_PATH", asan_lib_path)
|
|
||||||
env.prune_duplicate_paths("LD_LIBRARY_PATH")
|
env.prune_duplicate_paths("LD_LIBRARY_PATH")
|
||||||
|
Loading…
Reference in New Issue
Block a user