
* changes to recipe to enable rocm build * fixing flake8 issue * addressed the review comment
99 lines
4.4 KiB
Diff
99 lines
4.4 KiB
Diff
diff --git a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
|
|
index 9cd678dfb4cc7..4630465115c7c 100644
|
|
--- a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
|
|
+++ b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
|
|
@@ -67,6 +67,14 @@ namespace at { namespace cuda {
|
|
//
|
|
// HIP doesn't have
|
|
// cuGetErrorString (maps to non-functional hipGetErrorString___)
|
|
+//
|
|
+// HIP from ROCm 3.5 on renamed hipOccupancyMaxActiveBlocksPerMultiprocessor
|
|
+// to hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.
|
|
+#if HIP_VERSION < 305
|
|
+#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR hipOccupancyMaxActiveBlocksPerMultiprocessor
|
|
+#else
|
|
+#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR cuOccupancyMaxActiveBlocksPerMultiprocessor
|
|
+#endif
|
|
|
|
#define AT_FORALL_NVRTC(_) \
|
|
_(nvrtcVersion) \
|
|
@@ -76,7 +84,7 @@ namespace at { namespace cuda {
|
|
_(nvrtcGetPTX) \
|
|
_(cuModuleLoadData) \
|
|
_(cuModuleGetFunction) \
|
|
- _(cuOccupancyMaxActiveBlocksPerMultiprocessor) \
|
|
+ _(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)\
|
|
_(nvrtcGetErrorString) \
|
|
_(nvrtcGetProgramLogSize) \
|
|
_(nvrtcGetProgramLog) \
|
|
diff --git a/aten/src/ATen/native/cuda/SoftMax.cu b/aten/src/ATen/native/cuda/SoftMax.cu
|
|
index da1995123ecfc..f935eb4ef3d0e 100644
|
|
--- a/aten/src/ATen/native/cuda/SoftMax.cu
|
|
+++ b/aten/src/ATen/native/cuda/SoftMax.cu
|
|
@@ -127,8 +127,8 @@ void SpatialSoftMax_getLaunchSizes(
|
|
uint32_t block_threads = block.x * block.y;
|
|
smem_size = block.x == 1 ? 0 : block_threads * sizeof(accscalar_t);
|
|
int max_active_blocks;
|
|
-#ifdef __HIP_PLATFORM_HCC__
|
|
- // XXX HIP function signature is not compatible yet.
|
|
+#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
|
|
+ // HIP function signature is not compatible yet.
|
|
uint32_t max_blocks;
|
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks,
|
|
k, block_threads, smem_size);
|
|
diff --git a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
|
|
index 5586e49919727..27315ee475277 100644
|
|
--- a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
|
|
+++ b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
|
|
@@ -140,10 +140,10 @@ FusedKernelCUDA::FusedKernelCUDA(
|
|
nvrtc().cuModuleGetFunction(&function_, module_, name_.c_str()));
|
|
|
|
// Computes max blocks
|
|
-#ifdef __HIP_PLATFORM_HCC__
|
|
- // XXX HIP function signature is not compatible yet
|
|
+#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
|
|
+ // HIP function signature is not compatible yet
|
|
uint32_t max_blocks;
|
|
- AT_CUDA_DRIVER_CHECK(nvrtc().cuOccupancyMaxActiveBlocksPerMultiprocessor(
|
|
+ AT_CUDA_DRIVER_CHECK(nvrtc().hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
|
&max_blocks, function_, 128, 0));
|
|
maxBlocks_ = max_blocks;
|
|
#else
|
|
diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
|
|
index 7e21363cbe6af..26f269d92ae38 100644
|
|
--- a/torch/utils/hipify/cuda_to_hip_mappings.py
|
|
+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
|
|
@@ -2890,7 +2890,7 @@
|
|
(
|
|
"cuOccupancyMaxActiveBlocksPerMultiprocessor",
|
|
(
|
|
- "hipOccupancyMaxActiveBlocksPerMultiprocessor",
|
|
+ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor",
|
|
CONV_OCCUPANCY,
|
|
API_DRIVER,
|
|
),
|
|
@@ -2898,7 +2898,7 @@
|
|
(
|
|
"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
|
|
(
|
|
- "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
|
|
+ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
|
|
CONV_OCCUPANCY,
|
|
API_DRIVER,
|
|
HIP_UNSUPPORTED,
|
|
@@ -2906,12 +2906,12 @@
|
|
),
|
|
(
|
|
"cuOccupancyMaxPotentialBlockSize",
|
|
- ("hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
|
|
+ ("hipModuleOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
|
|
),
|
|
(
|
|
"cuOccupancyMaxPotentialBlockSizeWithFlags",
|
|
(
|
|
- "hipOccupancyMaxPotentialBlockSizeWithFlags",
|
|
+ "hipModuleOccupancyMaxPotentialBlockSizeWithFlags",
|
|
CONV_OCCUPANCY,
|
|
API_DRIVER,
|
|
HIP_UNSUPPORTED,
|