From de4a9e867dece7ae756be6ffc7bb3c00ea8e004d Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Wed, 14 May 2025 09:48:58 +0200 Subject: [PATCH] py-torch: rework patches to avoid secondary rate limits (#50455) Signed-off-by: Massimiliano Culpo --- .../builtin/packages/py_torch/59220.patch | 22 ++++ .../builtin/packages/py_torch/gloo_cuda.patch | 115 ++++++++++++++++++ .../builtin/packages/py_torch/package.py | 32 ++--- 3 files changed, 153 insertions(+), 16 deletions(-) create mode 100644 var/spack/repos/spack_repo/builtin/packages/py_torch/59220.patch create mode 100644 var/spack/repos/spack_repo/builtin/packages/py_torch/gloo_cuda.patch diff --git a/var/spack/repos/spack_repo/builtin/packages/py_torch/59220.patch b/var/spack/repos/spack_repo/builtin/packages/py_torch/59220.patch new file mode 100644 index 00000000000..8b270eecacc --- /dev/null +++ b/var/spack/repos/spack_repo/builtin/packages/py_torch/59220.patch @@ -0,0 +1,22 @@ +From 809f3658b4d7d8147f6c2f79f8aa23b50a232247 Mon Sep 17 00:00:00 2001 +From: Alexander Grund +Date: Mon, 31 May 2021 16:31:59 +0200 +Subject: [PATCH] Pass WITH_BLAS option from environment to CMake + +Allows to choose the BLAS backend with Eigen +--- + tools/setup_helpers/cmake.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py +index 2309ad3bdc52a5c728f41d21eb5ff37daf5d1dd6..60b7bbd47ff222ef9c41604ffed1e452860aebc6 100644 +--- a/tools/setup_helpers/cmake.py ++++ b/tools/setup_helpers/cmake.py +@@ -235,6 +235,7 @@ def generate(self, version, cmake_python_library, build_python, build_test, my_e + # CMakeLists.txt. + var: var for var in + ('BLAS', ++ 'WITH_BLAS', + 'BUILDING_WITH_TORCH_LIBS', + 'CUDA_HOST_COMPILER', + 'CUDA_NVCC_EXECUTABLE', diff --git a/var/spack/repos/spack_repo/builtin/packages/py_torch/gloo_cuda.patch b/var/spack/repos/spack_repo/builtin/packages/py_torch/gloo_cuda.patch new file mode 100644 index 00000000000..4c18dbd2697 --- /dev/null +++ b/var/spack/repos/spack_repo/builtin/packages/py_torch/gloo_cuda.patch @@ -0,0 +1,115 @@ +From e9bfe6f07faeaeba252cc426c2539b4b50326796 Mon Sep 17 00:00:00 2001 +From: Nathan Brown +Date: Tue, 4 Feb 2025 15:51:24 +0000 +Subject: [PATCH] gloo: fix building system gloo with CUDA/HIP + +Fix incorrect linking of Gloo's libraries when building with system +Gloo. Previously, either Gloo's native library or Gloo's CUDA library +were linked. However, Gloo had changed such that all users of Gloo must +link the native library, and can optionally link the CUDA or HIP +library for Gloo + CUDA/HIP support. +This had been updated when building/linking with vendored Gloo, but not +when using system Gloo. + +Fixes: #146239 + +Reported-by: Adam J Stewart +Signed-off-by: Nathan Brown +--- + cmake/Dependencies.cmake | 11 ++++++++-- + cmake/Modules/FindGloo.cmake | 39 +++++++++++++++--------------------- + 2 files changed, 25 insertions(+), 25 deletions(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 9342555d9bc7e40086c87fa7c199da18031ce808..c680e4995fb67000f6e545fe09190643dcf7ee25 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1192,10 +1192,17 @@ if(USE_GLOO) + if(NOT Gloo_FOUND) + message(FATAL_ERROR "Cannot find gloo") + endif() +- message("Found gloo: ${Gloo_LIBRARY}") ++ message("Found gloo: ${Gloo_NATIVE_LIBRARY}, cuda lib: ${Gloo_CUDA_LIBRARY}, hip lib: ${Gloo_HIP_LIBRARY}") + message("Found gloo include directories: ${Gloo_INCLUDE_DIRS}") + add_library(gloo SHARED IMPORTED) +- set_target_properties(gloo PROPERTIES IMPORTED_LOCATION ${Gloo_LIBRARY}) ++ set_target_properties(gloo PROPERTIES IMPORTED_LOCATION ${Gloo_NATIVE_LIBRARY}) ++ if(USE_CUDA) ++ add_library(gloo_cuda SHARED IMPORTED) ++ set_target_properties(gloo_cuda PROPERTIES IMPORTED_LOCATION ${Gloo_CUDA_LIBRARY}) ++ elseif(USE_ROCM) ++ add_library(gloo_hip SHARED IMPORTED) ++ set_target_properties(gloo_hip PROPERTIES IMPORTED_LOCATION ${Gloo_HIP_LIBRARY}) ++ endif() + # need to use Gloo_INCLUDE_DIRS over third_party/gloo to find Gloo's auto-generated config.h + include_directories(BEFORE SYSTEM ${Gloo_INCLUDE_DIRS}) + endif() +diff --git a/cmake/Modules/FindGloo.cmake b/cmake/Modules/FindGloo.cmake +index e965326e2e8a0ab006bfe79243a66292ba262b62..944cd4d8d25738125c0f85b4dd9fee2850029339 100644 +--- a/cmake/Modules/FindGloo.cmake ++++ b/cmake/Modules/FindGloo.cmake +@@ -1,7 +1,8 @@ + # Try to find the Gloo library and headers. + # Gloo_FOUND - system has Gloo lib + # Gloo_INCLUDE_DIRS - the Gloo include directory +-# Gloo_LIBRARY/Gloo_NATIVE_LIBRARY - libraries needed to use Gloo ++# Gloo_NATIVE_LIBRARY - base gloo library, needs to be linked ++# Gloo_CUDA_LIBRARY/Gloo_HIP_LIBRARY - CUDA/HIP support library in Gloo + + find_path(Gloo_INCLUDE_DIR + NAMES gloo/common/common.h +@@ -10,40 +11,32 @@ find_path(Gloo_INCLUDE_DIR + + find_library(Gloo_NATIVE_LIBRARY + NAMES gloo +- DOC "The Gloo library (without CUDA)" ++ DOC "The Gloo library" + ) + ++# Gloo has optional CUDA support ++# if Gloo + CUDA is desired, Gloo_CUDA_LIBRARY ++# needs to be linked into desired target + find_library(Gloo_CUDA_LIBRARY + NAMES gloo_cuda +- DOC "The Gloo library (with CUDA)" ++ DOC "Gloo's CUDA support/code" ++) ++ ++# Gloo has optional HIP support ++# if Gloo + HIP is desired, Gloo_HIP_LIBRARY ++# needs to be linked to desired target ++find_library(Gloo_HIP_LIBRARY ++ NAMES gloo_hiop ++ DOC "Gloo's HIP support/code" + ) + + set(Gloo_INCLUDE_DIRS ${Gloo_INCLUDE_DIR}) + +-# use the CUDA library depending on the Gloo_USE_CUDA variable +-if (DEFINED Gloo_USE_CUDA) +- if (${Gloo_USE_CUDA}) +- set(Gloo_LIBRARY ${Gloo_CUDA_LIBRARY}) +- set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY}) +- else() +- set(Gloo_LIBRARY ${Gloo_NATIVE_LIBRARY}) +- set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY}) +- endif() +-else() +- # else try to use the CUDA library if found +- if (${Gloo_CUDA_LIBRARY} STREQUAL "Gloo_CUDA_LIBRARY-NOTFOUND") +- set(Gloo_LIBRARY ${Gloo_NATIVE_LIBRARY}) +- set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY}) +- else() +- set(Gloo_LIBRARY ${Gloo_CUDA_LIBRARY}) +- set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY}) +- endif() +-endif() + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Gloo + FOUND_VAR Gloo_FOUND +- REQUIRED_VARS Gloo_INCLUDE_DIR Gloo_LIBRARY ++ REQUIRED_VARS Gloo_INCLUDE_DIR Gloo_NATIVE_LIBRARY + ) + + mark_as_advanced(Gloo_FOUND) diff --git a/var/spack/repos/spack_repo/builtin/packages/py_torch/package.py b/var/spack/repos/spack_repo/builtin/packages/py_torch/package.py index 699de840732..dd077d280f0 100644 --- a/var/spack/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/var/spack/repos/spack_repo/builtin/packages/py_torch/package.py @@ -333,45 +333,45 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): # https://github.com/pytorch/pytorch/issues/151316 patch( - "https://github.com/pytorch/pytorch/pull/151344.patch?full_index=1", - sha256="edaadfd5f8acee67fee1c77b34145640a1239c9546d77420f3887af24889799e", + "https://github.com/pytorch/pytorch/commit/331423e5c24170b218e743b3392acbad4480340d.patch?full_index=1", + sha256="493cde279804346e13cf21862fddc48040a4d7da65d4e5d3de5f717a15e0aa62", when="@2.7.0", ) patch("apple_clang_17.patch", when="@1.12:2.6") # https://github.com/pytorch/pytorch/issues/146239 patch( - "https://github.com/pytorch/pytorch/pull/146637.patch?full_index=1", + "gloo_cuda.patch", sha256="f93aa66e2cf9c0febdbcf72f44213a213e570e5f860186e81c92c8d2af0857c0", when="@2.6:", ) # Fixes 'FindBLAS.cmake' error: unknown command check_function_exists patch( - "https://github.com/pytorch/pytorch/pull/145849.patch?full_index=1", - sha256="5675ab543b786b8f360de451b27b3eb5d3ce8499d6c1a207f4a854f0c473ab03", + "https://github.com/pytorch/pytorch/commit/8d91bfd9654589c41b3bbb589bcb0bf95443c53e.patch?full_index=1", + sha256="2c9e0c8986c388f38288dacfb3208b457b2eec340963b8c8c8779e9f487adc07", when="@:2.6", ) # https://github.com/pytorch/pytorch/issues/90448 patch( - "https://github.com/pytorch/pytorch/pull/97270.patch?full_index=1", - sha256="beb3fb57746cf8443f5caa6e08b2f8f4d4822c1e11e0c912134bd166c6a0ade7", + "https://github.com/pytorch/pytorch/commit/9a18968253e28ba8d8bdf646731087000c7876b7.patch?full_index=1", + sha256="b4f299f6751e03fcf5d9a318541156edbc49c00e8a9c78785031e438e38f5533", when="@1.10:2.0", ) # Fix BLAS being overridden by MKL # https://github.com/pytorch/pytorch/issues/60328 patch( - "https://github.com/pytorch/pytorch/pull/59220.patch?full_index=1", + "59220.patch", sha256="6d5717267f901e8ee493dfacd08734d9bcc48ad29a76ca9ef702368e96bee675", when="@:1.11", ) # Fixes build on older systems with glibc <2.12 patch( - "https://github.com/pytorch/pytorch/pull/55063.patch?full_index=1", - sha256="2229bcbf20fbe88aa9f7318f89c126ec7f527875ffe689a763c78abfa127a65c", + "https://github.com/pytorch/pytorch/commit/13c975684a220ec096216ec6468ccd0dc90ff50a.patch?full_index=1", + sha256="a999e9376a69bbb8620ab358b485d1529c8e1c23a09ca34c5d287f6b77d2f5d9", when="@:1.8.1", ) @@ -505,21 +505,21 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): # Some missing includes # See: https://github.com/pytorch/pytorch/pull/100036 patch( - "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/100036.patch?full_index=1", - sha256="65060b54c31196b26dcff29bbb178fd17d5677e8481a2a06002c0ca4dd37b3d0", + "https://github.com/pytorch/pytorch/commit/9d99d8879cb8a7a5ec94b04e933305b8d24ad6ac.patch?full_index=1", + sha256="8c3a5b22d0dbda2ee45cfc2ae1da446fc20898e498003579490d4efe9241f9ee", when="@2.0.0:2.0.1", ) # See: https://github.com/pytorch/pytorch/pull/100049 patch( - "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/100049.patch?full_index=1", - sha256="673056141c0ea6ff4411f65a26f1a9d7a7c49ad8fe034a01ef0d56ba8a7a9386", + "https://github.com/pytorch/pytorch/commit/aaa3eb059a0294cc01c71f8e74abcebc33404e17.patch?full_index=1", + sha256="8dcbc5cd24b4c0e4a051e2161700b485c6c598b66347e7e90a263d9319c76374", when="@2.0.0:2.0.1", ) # Use correct OpenBLAS include path under prefix patch( - "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/110063.patch?full_index=1", - sha256="23fb4009f7337051fc5303927ff977186a5af960245e7212895406477d8b2f66", + "https://github.com/pytorch/pytorch/commit/21d77bcf808d076f81b5e885a8ce6ca20a08dbed.patch?full_index=1", + sha256="c61a6bd8cb9f021bfa122945a332cec223a2c7c6843ac911f9dc23e6facfb0ac", when="@:2.1", )