changes to py-torch recipe to enable rocm build (#17410)
* changes to recipe to enable rocm build * fixing flake8 issue * addressed the review comment
This commit is contained in:
parent
4ac1a532f3
commit
d2c2e000a7
@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
from spack import *
|
||||
import os
|
||||
|
||||
|
||||
class PyTorch(PythonPackage, CudaPackage):
|
||||
@ -68,6 +69,7 @@ class PyTorch(PythonPackage, CudaPackage):
|
||||
|
||||
variant('cuda', default=True, description='Build with CUDA')
|
||||
variant('cudnn', default=True, description='Enables the cuDNN build')
|
||||
variant('rocm', default=False, description='Build with ROCm build')
|
||||
variant('magma', default=False, description='Enables the MAGMA build')
|
||||
variant('fbgemm', default=False, description='Enables the FBGEMM build')
|
||||
variant('test', default=False, description='Enables the test build')
|
||||
@ -112,6 +114,7 @@ class PyTorch(PythonPackage, CudaPackage):
|
||||
conflicts('cuda_arch=none', when='+cuda',
|
||||
msg='Must specify CUDA compute capabilities of your GPU, see '
|
||||
'https://developer.nvidia.com/cuda-gpus')
|
||||
conflicts('+rocm', when='+cuda')
|
||||
|
||||
# Required dependencies
|
||||
depends_on('cmake@3.5:', type='build')
|
||||
@ -173,6 +176,9 @@ class PyTorch(PythonPackage, CudaPackage):
|
||||
# Fixes CMake configuration error when XNNPACK is disabled
|
||||
patch('xnnpack.patch', when='@1.5.0:1.5.999')
|
||||
|
||||
# Fixes Build error for when ROCm is enable for pytorch-1.5 release
|
||||
patch('rocm.patch', when='@1.5.0:1.5.999+rocm')
|
||||
|
||||
# https://github.com/pytorch/pytorch/pull/37086
|
||||
# Fixes compilation with Clang 9.0.0 and Apple Clang 11.0.3
|
||||
patch('https://github.com/pytorch/pytorch/commit/e921cd222a8fbeabf5a3e74e83e0d8dfb01aa8b5.patch',
|
||||
@ -244,7 +250,9 @@ def enable_or_disable(variant, keyword='USE', var=None, newer=False):
|
||||
|
||||
enable_or_disable('fbgemm')
|
||||
enable_or_disable('test', keyword='BUILD')
|
||||
|
||||
enable_or_disable('rocm')
|
||||
if '+rocm' in self.spec:
|
||||
env.set('USE_MKLDNN', 0)
|
||||
if '+miopen' in self.spec:
|
||||
env.set('MIOPEN_LIB_DIR', self.spec['miopen'].libs.directories[0])
|
||||
env.set('MIOPEN_INCLUDE_DIR', self.spec['miopen'].prefix.include)
|
||||
@ -297,6 +305,11 @@ def enable_or_disable(variant, keyword='USE', var=None, newer=False):
|
||||
enable_or_disable('zstd', newer=True)
|
||||
enable_or_disable('tbb', newer=True)
|
||||
|
||||
@run_before('install')
|
||||
def build_amd(self):
|
||||
if '+rocm' in self.spec:
|
||||
python(os.path.join('tools', 'amd_build', 'build_amd.py'))
|
||||
|
||||
def install_test(self):
|
||||
with working_dir('test'):
|
||||
python('run_test.py')
|
||||
|
98
var/spack/repos/builtin/packages/py-torch/rocm.patch
Normal file
98
var/spack/repos/builtin/packages/py-torch/rocm.patch
Normal file
@ -0,0 +1,98 @@
|
||||
diff --git a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
|
||||
index 9cd678dfb4cc7..4630465115c7c 100644
|
||||
--- a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
|
||||
+++ b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
|
||||
@@ -67,6 +67,14 @@ namespace at { namespace cuda {
|
||||
//
|
||||
// HIP doesn't have
|
||||
// cuGetErrorString (maps to non-functional hipGetErrorString___)
|
||||
+//
|
||||
+// HIP from ROCm 3.5 on renamed hipOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
+// to hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.
|
||||
+#if HIP_VERSION < 305
|
||||
+#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR hipOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
+#else
|
||||
+#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR cuOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
+#endif
|
||||
|
||||
#define AT_FORALL_NVRTC(_) \
|
||||
_(nvrtcVersion) \
|
||||
@@ -76,7 +84,7 @@ namespace at { namespace cuda {
|
||||
_(nvrtcGetPTX) \
|
||||
_(cuModuleLoadData) \
|
||||
_(cuModuleGetFunction) \
|
||||
- _(cuOccupancyMaxActiveBlocksPerMultiprocessor) \
|
||||
+ _(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)\
|
||||
_(nvrtcGetErrorString) \
|
||||
_(nvrtcGetProgramLogSize) \
|
||||
_(nvrtcGetProgramLog) \
|
||||
diff --git a/aten/src/ATen/native/cuda/SoftMax.cu b/aten/src/ATen/native/cuda/SoftMax.cu
|
||||
index da1995123ecfc..f935eb4ef3d0e 100644
|
||||
--- a/aten/src/ATen/native/cuda/SoftMax.cu
|
||||
+++ b/aten/src/ATen/native/cuda/SoftMax.cu
|
||||
@@ -127,8 +127,8 @@ void SpatialSoftMax_getLaunchSizes(
|
||||
uint32_t block_threads = block.x * block.y;
|
||||
smem_size = block.x == 1 ? 0 : block_threads * sizeof(accscalar_t);
|
||||
int max_active_blocks;
|
||||
-#ifdef __HIP_PLATFORM_HCC__
|
||||
- // XXX HIP function signature is not compatible yet.
|
||||
+#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
|
||||
+ // HIP function signature is not compatible yet.
|
||||
uint32_t max_blocks;
|
||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks,
|
||||
k, block_threads, smem_size);
|
||||
diff --git a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
|
||||
index 5586e49919727..27315ee475277 100644
|
||||
--- a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
|
||||
+++ b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
|
||||
@@ -140,10 +140,10 @@ FusedKernelCUDA::FusedKernelCUDA(
|
||||
nvrtc().cuModuleGetFunction(&function_, module_, name_.c_str()));
|
||||
|
||||
// Computes max blocks
|
||||
-#ifdef __HIP_PLATFORM_HCC__
|
||||
- // XXX HIP function signature is not compatible yet
|
||||
+#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
|
||||
+ // HIP function signature is not compatible yet
|
||||
uint32_t max_blocks;
|
||||
- AT_CUDA_DRIVER_CHECK(nvrtc().cuOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
+ AT_CUDA_DRIVER_CHECK(nvrtc().hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&max_blocks, function_, 128, 0));
|
||||
maxBlocks_ = max_blocks;
|
||||
#else
|
||||
diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
|
||||
index 7e21363cbe6af..26f269d92ae38 100644
|
||||
--- a/torch/utils/hipify/cuda_to_hip_mappings.py
|
||||
+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
|
||||
@@ -2890,7 +2890,7 @@
|
||||
(
|
||||
"cuOccupancyMaxActiveBlocksPerMultiprocessor",
|
||||
(
|
||||
- "hipOccupancyMaxActiveBlocksPerMultiprocessor",
|
||||
+ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor",
|
||||
CONV_OCCUPANCY,
|
||||
API_DRIVER,
|
||||
),
|
||||
@@ -2898,7 +2898,7 @@
|
||||
(
|
||||
"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
|
||||
(
|
||||
- "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
|
||||
+ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
|
||||
CONV_OCCUPANCY,
|
||||
API_DRIVER,
|
||||
HIP_UNSUPPORTED,
|
||||
@@ -2906,12 +2906,12 @@
|
||||
),
|
||||
(
|
||||
"cuOccupancyMaxPotentialBlockSize",
|
||||
- ("hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
|
||||
+ ("hipModuleOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
|
||||
),
|
||||
(
|
||||
"cuOccupancyMaxPotentialBlockSizeWithFlags",
|
||||
(
|
||||
- "hipOccupancyMaxPotentialBlockSizeWithFlags",
|
||||
+ "hipModuleOccupancyMaxPotentialBlockSizeWithFlags",
|
||||
CONV_OCCUPANCY,
|
||||
API_DRIVER,
|
||||
HIP_UNSUPPORTED,
|
Loading…
Reference in New Issue
Block a user