CachedCMakePackage: Improve finding mpiexec for non-slurm machines (#49033)
* Check for LSF, FLux, and Slurm when determing MPI exec * Make scheduler/MPI exec helper functions methods of CachedCMakeBuilder * Remove axom workaround for running mpi on machines with flux
This commit is contained in:
parent
46f7737626
commit
0f64f1baec
@ -2,9 +2,10 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
import collections.abc
|
import collections.abc
|
||||||
|
import enum
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from typing import Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
import llnl.util.filesystem as fs
|
import llnl.util.filesystem as fs
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
@ -13,6 +14,7 @@
|
|||||||
import spack.spec
|
import spack.spec
|
||||||
import spack.util.prefix
|
import spack.util.prefix
|
||||||
from spack.directives import depends_on
|
from spack.directives import depends_on
|
||||||
|
from spack.util.executable import which_string
|
||||||
|
|
||||||
from .cmake import CMakeBuilder, CMakePackage
|
from .cmake import CMakeBuilder, CMakePackage
|
||||||
|
|
||||||
@ -178,6 +180,64 @@ def initconfig_compiler_entries(self):
|
|||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
class Scheduler(enum.Enum):
|
||||||
|
LSF = enum.auto()
|
||||||
|
SLURM = enum.auto()
|
||||||
|
FLUX = enum.auto()
|
||||||
|
|
||||||
|
def get_scheduler(self) -> Optional[Scheduler]:
|
||||||
|
spec = self.pkg.spec
|
||||||
|
|
||||||
|
# Check for Spectrum-mpi, which always uses LSF or LSF MPI variant
|
||||||
|
if spec.satisfies("^spectrum-mpi") or spec["mpi"].satisfies("schedulers=lsf"):
|
||||||
|
return self.Scheduler.LSF
|
||||||
|
|
||||||
|
# Check for Slurm MPI variants
|
||||||
|
slurm_checks = ["+slurm", "schedulers=slurm", "process_managers=slurm"]
|
||||||
|
if any(spec["mpi"].satisfies(variant) for variant in slurm_checks):
|
||||||
|
return self.Scheduler.SLURM
|
||||||
|
|
||||||
|
# TODO improve this when MPI implementations support flux
|
||||||
|
# Do this check last to avoid using a flux wrapper present next to Slurm/ LSF schedulers
|
||||||
|
if which_string("flux") is not None:
|
||||||
|
return self.Scheduler.FLUX
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_mpi_exec(self) -> Optional[str]:
|
||||||
|
spec = self.pkg.spec
|
||||||
|
scheduler = self.get_scheduler()
|
||||||
|
|
||||||
|
if scheduler == self.Scheduler.LSF:
|
||||||
|
return which_string("lrun")
|
||||||
|
|
||||||
|
elif scheduler == self.Scheduler.SLURM:
|
||||||
|
if spec["mpi"].external:
|
||||||
|
return which_string("srun")
|
||||||
|
else:
|
||||||
|
return os.path.join(spec["slurm"].prefix.bin, "srun")
|
||||||
|
|
||||||
|
elif scheduler == self.Scheduler.FLUX:
|
||||||
|
flux = which_string("flux")
|
||||||
|
return f"{flux};run" if flux else None
|
||||||
|
|
||||||
|
elif hasattr(spec["mpi"].package, "mpiexec"):
|
||||||
|
return spec["mpi"].package.mpiexec
|
||||||
|
|
||||||
|
else:
|
||||||
|
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpirun")
|
||||||
|
if not os.path.exists(mpiexec):
|
||||||
|
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpiexec")
|
||||||
|
return mpiexec
|
||||||
|
|
||||||
|
def get_mpi_exec_num_proc(self) -> str:
|
||||||
|
scheduler = self.get_scheduler()
|
||||||
|
|
||||||
|
if scheduler in [self.Scheduler.FLUX, self.Scheduler.LSF, self.Scheduler.SLURM]:
|
||||||
|
return "-n"
|
||||||
|
else:
|
||||||
|
return "-np"
|
||||||
|
|
||||||
def initconfig_mpi_entries(self):
|
def initconfig_mpi_entries(self):
|
||||||
spec = self.pkg.spec
|
spec = self.pkg.spec
|
||||||
|
|
||||||
@ -197,27 +257,10 @@ def initconfig_mpi_entries(self):
|
|||||||
if hasattr(spec["mpi"], "mpifc"):
|
if hasattr(spec["mpi"], "mpifc"):
|
||||||
entries.append(cmake_cache_path("MPI_Fortran_COMPILER", spec["mpi"].mpifc))
|
entries.append(cmake_cache_path("MPI_Fortran_COMPILER", spec["mpi"].mpifc))
|
||||||
|
|
||||||
# Check for slurm
|
|
||||||
using_slurm = False
|
|
||||||
slurm_checks = ["+slurm", "schedulers=slurm", "process_managers=slurm"]
|
|
||||||
if any(spec["mpi"].satisfies(variant) for variant in slurm_checks):
|
|
||||||
using_slurm = True
|
|
||||||
|
|
||||||
# Determine MPIEXEC
|
# Determine MPIEXEC
|
||||||
if using_slurm:
|
mpiexec = self.get_mpi_exec()
|
||||||
if spec["mpi"].external:
|
|
||||||
# Heuristic until we have dependents on externals
|
|
||||||
mpiexec = "/usr/bin/srun"
|
|
||||||
else:
|
|
||||||
mpiexec = os.path.join(spec["slurm"].prefix.bin, "srun")
|
|
||||||
elif hasattr(spec["mpi"].package, "mpiexec"):
|
|
||||||
mpiexec = spec["mpi"].package.mpiexec
|
|
||||||
else:
|
|
||||||
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpirun")
|
|
||||||
if not os.path.exists(mpiexec):
|
|
||||||
mpiexec = os.path.join(spec["mpi"].prefix.bin, "mpiexec")
|
|
||||||
|
|
||||||
if not os.path.exists(mpiexec):
|
if mpiexec is None or not os.path.exists(mpiexec.split(";")[0]):
|
||||||
msg = "Unable to determine MPIEXEC, %s tests may fail" % self.pkg.name
|
msg = "Unable to determine MPIEXEC, %s tests may fail" % self.pkg.name
|
||||||
entries.append("# {0}\n".format(msg))
|
entries.append("# {0}\n".format(msg))
|
||||||
tty.warn(msg)
|
tty.warn(msg)
|
||||||
@ -230,10 +273,7 @@ def initconfig_mpi_entries(self):
|
|||||||
entries.append(cmake_cache_path("MPIEXEC", mpiexec))
|
entries.append(cmake_cache_path("MPIEXEC", mpiexec))
|
||||||
|
|
||||||
# Determine MPIEXEC_NUMPROC_FLAG
|
# Determine MPIEXEC_NUMPROC_FLAG
|
||||||
if using_slurm:
|
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", self.get_mpi_exec_num_proc()))
|
||||||
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
|
|
||||||
else:
|
|
||||||
entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-np"))
|
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
from os.path import join as pjoin
|
from os.path import join as pjoin
|
||||||
|
|
||||||
from spack.package import *
|
from spack.package import *
|
||||||
from spack.util.executable import which_string
|
|
||||||
|
|
||||||
|
|
||||||
def get_spec_path(spec, package_name, path_replacements={}, use_bin=False):
|
def get_spec_path(spec, package_name, path_replacements={}, use_bin=False):
|
||||||
@ -452,19 +451,6 @@ def initconfig_mpi_entries(self):
|
|||||||
entries.append(cmake_cache_option("ENABLE_MPI", True))
|
entries.append(cmake_cache_option("ENABLE_MPI", True))
|
||||||
if spec["mpi"].name == "spectrum-mpi":
|
if spec["mpi"].name == "spectrum-mpi":
|
||||||
entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
|
entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
|
||||||
|
|
||||||
# Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
|
|
||||||
# TODO: Remove this logic by adding `using_flux` case in
|
|
||||||
# spack/lib/spack/spack/build_systems/cached_cmake.py:196 and remove hard-coded
|
|
||||||
# path to srun in same file.
|
|
||||||
if "toss_4" in self._get_sys_type(spec):
|
|
||||||
srun_wrapper = which_string("srun")
|
|
||||||
mpi_exec_index = [
|
|
||||||
index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
|
|
||||||
]
|
|
||||||
if mpi_exec_index:
|
|
||||||
del entries[mpi_exec_index[0]]
|
|
||||||
entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
|
|
||||||
else:
|
else:
|
||||||
entries.append(cmake_cache_option("ENABLE_MPI", False))
|
entries.append(cmake_cache_option("ENABLE_MPI", False))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user