CachedCMakePackage: Improve finding mpiexec for non-slurm machines (#49033)

* Check for LSF, FLux, and Slurm when determing MPI exec

* Make scheduler/MPI exec helper functions methods of CachedCMakeBuilder

* Remove axom workaround for running mpi on machines with flux
This commit is contained in:
Tara Drwenski 2025-03-31 10:11:09 -06:00 committed by GitHub
parent 46f7737626
commit 0f64f1baec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 64 additions and 38 deletions

View File

@ -2,9 +2,10 @@
# #
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import collections.abc import collections.abc
import enum
import os import os
import re import re
from typing import Tuple from typing import Optional, Tuple
import llnl.util.filesystem as fs import llnl.util.filesystem as fs
import llnl.util.tty as tty import llnl.util.tty as tty
@ -13,6 +14,7 @@
import spack.spec import spack.spec
import spack.util.prefix import spack.util.prefix
from spack.directives import depends_on from spack.directives import depends_on
from spack.util.executable import which_string
from .cmake import CMakeBuilder, CMakePackage from .cmake import CMakeBuilder, CMakePackage
@ -178,6 +180,64 @@ def initconfig_compiler_entries(self):
return entries return entries
class Scheduler(enum.Enum):
LSF = enum.auto()
SLURM = enum.auto()
FLUX = enum.auto()
def get_scheduler(self) -> Optional[Scheduler]:
spec = self.pkg.spec
# Check for Spectrum-mpi, which always uses LSF or LSF MPI variant
if spec.satisfies("^spectrum-mpi") or spec["mpi"].satisfies("schedulers=lsf"):
return self.Scheduler.LSF
# Check for Slurm MPI variants
slurm_checks = ["+slurm", "schedulers=slurm", "process_managers=slurm"]
if any(spec["mpi"].satisfies(variant) for variant in slurm_checks):
return self.Scheduler.SLURM
# TODO improve this when MPI implementations support flux
# Do this check last to avoid using a flux wrapper present next to Slurm/ LSF schedulers
if which_string("flux") is not None:
return self.Scheduler.FLUX
return None
def get_mpi_exec(self) -> Optional[str]:
spec = self.pkg.spec
scheduler = self.get_scheduler()
if scheduler == self.Scheduler.LSF:
return which_string("lrun")
elif scheduler == self.Scheduler.SLURM:
if spec["mpi"].external:
return which_string("srun")
else:
return os.path.join(spec["slurm"].prefix.bin, "srun")
elif scheduler == self.Scheduler.FLUX:
flux = which_string("flux")
return f"{flux};run" if flux else None
elif hasattr(spec["mpi"].package, "mpiexec"):
return spec["mpi"].package.mpiexec
else:
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpirun")
if not os.path.exists(mpiexec):
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpiexec")
return mpiexec
def get_mpi_exec_num_proc(self) -> str:
scheduler = self.get_scheduler()
if scheduler in [self.Scheduler.FLUX, self.Scheduler.LSF, self.Scheduler.SLURM]:
return "-n"
else:
return "-np"
def initconfig_mpi_entries(self): def initconfig_mpi_entries(self):
spec = self.pkg.spec spec = self.pkg.spec
@ -197,27 +257,10 @@ def initconfig_mpi_entries(self):
if hasattr(spec["mpi"], "mpifc"): if hasattr(spec["mpi"], "mpifc"):
entries.append(cmake_cache_path("MPI_Fortran_COMPILER", spec["mpi"].mpifc)) entries.append(cmake_cache_path("MPI_Fortran_COMPILER", spec["mpi"].mpifc))
# Check for slurm
using_slurm = False
slurm_checks = ["+slurm", "schedulers=slurm", "process_managers=slurm"]
if any(spec["mpi"].satisfies(variant) for variant in slurm_checks):
using_slurm = True
# Determine MPIEXEC # Determine MPIEXEC
if using_slurm: mpiexec = self.get_mpi_exec()
if spec["mpi"].external:
# Heuristic until we have dependents on externals
mpiexec = "/usr/bin/srun"
else:
mpiexec = os.path.join(spec["slurm"].prefix.bin, "srun")
elif hasattr(spec["mpi"].package, "mpiexec"):
mpiexec = spec["mpi"].package.mpiexec
else:
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpirun")
if not os.path.exists(mpiexec):
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpiexec")
if not os.path.exists(mpiexec): if mpiexec is None or not os.path.exists(mpiexec.split(";")[0]):
msg = "Unable to determine MPIEXEC, %s tests may fail" % self.pkg.name msg = "Unable to determine MPIEXEC, %s tests may fail" % self.pkg.name
entries.append("# {0}\n".format(msg)) entries.append("# {0}\n".format(msg))
tty.warn(msg) tty.warn(msg)
@ -230,10 +273,7 @@ def initconfig_mpi_entries(self):
entries.append(cmake_cache_path("MPIEXEC", mpiexec)) entries.append(cmake_cache_path("MPIEXEC", mpiexec))
# Determine MPIEXEC_NUMPROC_FLAG # Determine MPIEXEC_NUMPROC_FLAG
if using_slurm: entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", self.get_mpi_exec_num_proc()))
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
else:
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-np"))
return entries return entries

View File

@ -8,7 +8,6 @@
from os.path import join as pjoin from os.path import join as pjoin
from spack.package import * from spack.package import *
from spack.util.executable import which_string
def get_spec_path(spec, package_name, path_replacements={}, use_bin=False): def get_spec_path(spec, package_name, path_replacements={}, use_bin=False):
@ -452,19 +451,6 @@ def initconfig_mpi_entries(self):
entries.append(cmake_cache_option("ENABLE_MPI", True)) entries.append(cmake_cache_option("ENABLE_MPI", True))
if spec["mpi"].name == "spectrum-mpi": if spec["mpi"].name == "spectrum-mpi":
entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind")) entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
# Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
# TODO: Remove this logic by adding `using_flux` case in
# spack/lib/spack/spack/build_systems/cached_cmake.py:196 and remove hard-coded
# path to srun in same file.
if "toss_4" in self._get_sys_type(spec):
srun_wrapper = which_string("srun")
mpi_exec_index = [
index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
]
if mpi_exec_index:
del entries[mpi_exec_index[0]]
entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
else: else:
entries.append(cmake_cache_option("ENABLE_MPI", False)) entries.append(cmake_cache_option("ENABLE_MPI", False))