CachedCMakePackage: Improve finding mpiexec for non-slurm machines (#49033)
* Check for LSF, FLux, and Slurm when determing MPI exec * Make scheduler/MPI exec helper functions methods of CachedCMakeBuilder * Remove axom workaround for running mpi on machines with flux
This commit is contained in:
parent
46f7737626
commit
0f64f1baec
@ -2,9 +2,10 @@
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
import collections.abc
|
||||
import enum
|
||||
import os
|
||||
import re
|
||||
from typing import Tuple
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import llnl.util.filesystem as fs
|
||||
import llnl.util.tty as tty
|
||||
@ -13,6 +14,7 @@
|
||||
import spack.spec
|
||||
import spack.util.prefix
|
||||
from spack.directives import depends_on
|
||||
from spack.util.executable import which_string
|
||||
|
||||
from .cmake import CMakeBuilder, CMakePackage
|
||||
|
||||
@ -178,6 +180,64 @@ def initconfig_compiler_entries(self):
|
||||
|
||||
return entries
|
||||
|
||||
class Scheduler(enum.Enum):
|
||||
LSF = enum.auto()
|
||||
SLURM = enum.auto()
|
||||
FLUX = enum.auto()
|
||||
|
||||
def get_scheduler(self) -> Optional[Scheduler]:
|
||||
spec = self.pkg.spec
|
||||
|
||||
# Check for Spectrum-mpi, which always uses LSF or LSF MPI variant
|
||||
if spec.satisfies("^spectrum-mpi") or spec["mpi"].satisfies("schedulers=lsf"):
|
||||
return self.Scheduler.LSF
|
||||
|
||||
# Check for Slurm MPI variants
|
||||
slurm_checks = ["+slurm", "schedulers=slurm", "process_managers=slurm"]
|
||||
if any(spec["mpi"].satisfies(variant) for variant in slurm_checks):
|
||||
return self.Scheduler.SLURM
|
||||
|
||||
# TODO improve this when MPI implementations support flux
|
||||
# Do this check last to avoid using a flux wrapper present next to Slurm/ LSF schedulers
|
||||
if which_string("flux") is not None:
|
||||
return self.Scheduler.FLUX
|
||||
|
||||
return None
|
||||
|
||||
def get_mpi_exec(self) -> Optional[str]:
|
||||
spec = self.pkg.spec
|
||||
scheduler = self.get_scheduler()
|
||||
|
||||
if scheduler == self.Scheduler.LSF:
|
||||
return which_string("lrun")
|
||||
|
||||
elif scheduler == self.Scheduler.SLURM:
|
||||
if spec["mpi"].external:
|
||||
return which_string("srun")
|
||||
else:
|
||||
return os.path.join(spec["slurm"].prefix.bin, "srun")
|
||||
|
||||
elif scheduler == self.Scheduler.FLUX:
|
||||
flux = which_string("flux")
|
||||
return f"{flux};run" if flux else None
|
||||
|
||||
elif hasattr(spec["mpi"].package, "mpiexec"):
|
||||
return spec["mpi"].package.mpiexec
|
||||
|
||||
else:
|
||||
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpirun")
|
||||
if not os.path.exists(mpiexec):
|
||||
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpiexec")
|
||||
return mpiexec
|
||||
|
||||
def get_mpi_exec_num_proc(self) -> str:
|
||||
scheduler = self.get_scheduler()
|
||||
|
||||
if scheduler in [self.Scheduler.FLUX, self.Scheduler.LSF, self.Scheduler.SLURM]:
|
||||
return "-n"
|
||||
else:
|
||||
return "-np"
|
||||
|
||||
def initconfig_mpi_entries(self):
|
||||
spec = self.pkg.spec
|
||||
|
||||
@ -197,27 +257,10 @@ def initconfig_mpi_entries(self):
|
||||
if hasattr(spec["mpi"], "mpifc"):
|
||||
entries.append(cmake_cache_path("MPI_Fortran_COMPILER", spec["mpi"].mpifc))
|
||||
|
||||
# Check for slurm
|
||||
using_slurm = False
|
||||
slurm_checks = ["+slurm", "schedulers=slurm", "process_managers=slurm"]
|
||||
if any(spec["mpi"].satisfies(variant) for variant in slurm_checks):
|
||||
using_slurm = True
|
||||
|
||||
# Determine MPIEXEC
|
||||
if using_slurm:
|
||||
if spec["mpi"].external:
|
||||
# Heuristic until we have dependents on externals
|
||||
mpiexec = "/usr/bin/srun"
|
||||
else:
|
||||
mpiexec = os.path.join(spec["slurm"].prefix.bin, "srun")
|
||||
elif hasattr(spec["mpi"].package, "mpiexec"):
|
||||
mpiexec = spec["mpi"].package.mpiexec
|
||||
else:
|
||||
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpirun")
|
||||
if not os.path.exists(mpiexec):
|
||||
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpiexec")
|
||||
mpiexec = self.get_mpi_exec()
|
||||
|
||||
if not os.path.exists(mpiexec):
|
||||
if mpiexec is None or not os.path.exists(mpiexec.split(";")[0]):
|
||||
msg = "Unable to determine MPIEXEC, %s tests may fail" % self.pkg.name
|
||||
entries.append("# {0}\n".format(msg))
|
||||
tty.warn(msg)
|
||||
@ -230,10 +273,7 @@ def initconfig_mpi_entries(self):
|
||||
entries.append(cmake_cache_path("MPIEXEC", mpiexec))
|
||||
|
||||
# Determine MPIEXEC_NUMPROC_FLAG
|
||||
if using_slurm:
|
||||
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
|
||||
else:
|
||||
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-np"))
|
||||
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", self.get_mpi_exec_num_proc()))
|
||||
|
||||
return entries
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
from os.path import join as pjoin
|
||||
|
||||
from spack.package import *
|
||||
from spack.util.executable import which_string
|
||||
|
||||
|
||||
def get_spec_path(spec, package_name, path_replacements={}, use_bin=False):
|
||||
@ -452,19 +451,6 @@ def initconfig_mpi_entries(self):
|
||||
entries.append(cmake_cache_option("ENABLE_MPI", True))
|
||||
if spec["mpi"].name == "spectrum-mpi":
|
||||
entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
|
||||
|
||||
# Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
|
||||
# TODO: Remove this logic by adding `using_flux` case in
|
||||
# spack/lib/spack/spack/build_systems/cached_cmake.py:196 and remove hard-coded
|
||||
# path to srun in same file.
|
||||
if "toss_4" in self._get_sys_type(spec):
|
||||
srun_wrapper = which_string("srun")
|
||||
mpi_exec_index = [
|
||||
index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
|
||||
]
|
||||
if mpi_exec_index:
|
||||
del entries[mpi_exec_index[0]]
|
||||
entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
|
||||
else:
|
||||
entries.append(cmake_cache_option("ENABLE_MPI", False))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user