Adds MPI_THREAD_MULTIPLE support for OpenMPI to the variant with UCX fabrics (#14194)

* Fixes:
1. MPI_THREAD_MULTIPLE problem with OpenMPI and UCX.

Changes:
1. OpenMPI provides two new depends_on options which result in UCX being compiled with multiple threads support. One implicit when OpenMPI 3.x is used, MPI_THREAD_MULTIPLE is enabled by default, and one explicit for OpenMPI <= 2.x, MPI_THREAD_MULTIPLE is disabled by default.
2. Extends UCX package to allow "Enable thread support in UCP and UCT" option.
3. Adds sha256 sums of UCX releases 1.6.1 and 1.2.0.

More details:
Fixes the issue with OpenMPI where programs which use MPI_THREAD_MULTIPLE will fail to execute because UCP worker didn't support it.
During the OpenMPI package installation it's the +thread_multiple spec was not propagated to UCX nor UCX handled it at all.
Now, the OpenMPI package is capable of handling +thread_multiple spec when UCX is request and the UCX package correctly handles +thread_multiple and compiles with the --enable-mt option.
Error message during runtime:
pml_ucx.c:226 Error: UCP worker does not support MPI_THREAD_MULTIPLE

* Adapts check of specs to read better and is the suggested form in the docs.

* Explicitly disables multithreading of UCX if +thread_multiple option is not used.
This commit is contained in:
Robert Mijakovic 2020-01-15 19:31:18 +01:00 committed by Adam J. Stewart
parent 0232c820ab
commit a2f961bd6f
2 changed files with 17 additions and 1 deletions

View File

@ -96,6 +96,7 @@ class Openmpi(AutotoolsPackage):
version('3.0.1', sha256='663450d1ee7838b03644507e8a76edfb1fba23e601e9e0b5b2a738e54acd785d') # libmpi.so.40.00.1
version('3.0.0', sha256='f699bff21db0125d8cccfe79518b77641cd83628725a1e1ed3e45633496a82d7') # libmpi.so.40.00.0
# Retired
version('2.1.6', sha256='98b8e1b8597bbec586a0da79fcd54a405388190247aa04d48e8c40944d4ca86e') # libmpi.so.20.10.3
version('2.1.5', sha256='b807ccab801f27c3159a5edf29051cd3331d3792648919f9c4cee48e987e7794') # libmpi.so.20.10.3
version('2.1.4', sha256='3e03695ca8bd663bc2d89eda343c92bb3d4fc79126b178f5ddcb68a8796b24e2') # libmpi.so.20.10.3
@ -104,7 +105,6 @@ class Openmpi(AutotoolsPackage):
version('2.1.1', sha256='bd7badd4ff3afa448c0d7f3ca0ee6ce003b957e9954aa87d8e4435759b5e4d16') # libmpi.so.20.10.1
version('2.1.0', sha256='b169e15f5af81bf3572db764417670f508c0df37ce86ff50deb56bd3acb43957') # libmpi.so.20.10.0
# Retired
version('2.0.4', sha256='4f82d5f7f294becbd737319f74801206b08378188a95b70abe706fdc77a0c20b') # libmpi.so.20.0.4
version('2.0.3', sha256='b52c0204c0e5954c9c57d383bb22b4181c09934f97783292927394d29f2a808a') # libmpi.so.20.0.3
version('2.0.2', sha256='cae396e643f9f91f0a795f8d8694adf7bacfb16f967c22fb39e9e28d477730d3') # libmpi.so.20.0.2
@ -273,6 +273,8 @@ class Openmpi(AutotoolsPackage):
depends_on('zlib', when='@3.0.0:')
depends_on('valgrind~mpi', when='+memchecker')
depends_on('ucx', when='fabrics=ucx')
depends_on('ucx +thread_multiple', when='fabrics=ucx +thread_multiple')
depends_on('ucx +thread_multiple', when='@3.0.0: fabrics=ucx')
depends_on('libfabric', when='fabrics=libfabric')
depends_on('slurm', when='schedulers=slurm')
depends_on('lsf', when='schedulers=lsf')

View File

@ -14,6 +14,7 @@ class Ucx(AutotoolsPackage):
url = "https://github.com/openucx/ucx/releases/download/v1.3.1/ucx-1.3.1.tar.gz"
# Current
version('1.6.1', sha256='1425648aa03f5fa40e4bc5c4a5a83fe0292e2fe44f6054352fbebbf6d8f342a1')
version('1.6.0', sha256='360e885dd7f706a19b673035a3477397d100a02eb618371697c7f3ee4e143e2c')
version('1.5.2', sha256='1a333853069860e86ba69b8d071ccc9871209603790e2b673ec61f8086913fad')
version('1.5.1', sha256='567119cd80ad2ae6968ecaa4bd1d2a80afadd037ccc988740f668de10d2fdb7e')
@ -25,6 +26,19 @@ class Ucx(AutotoolsPackage):
version('1.3.0', sha256='71e69e6d78a4950cc5a1edcbe59bf7a8f8e38d59c9f823109853927c4d442952')
version('1.2.2', sha256='914d10fee8f970d4fb286079dd656cf8a260ec7d724d5f751b3109ed32a6da63')
version('1.2.1', sha256='fc63760601c03ff60a2531ec3c6637e98f5b743576eb410f245839c84a0ad617')
version('1.2.0', sha256='1e1a62d6d0f89ce59e384b0b5b30b416b8fd8d7cedec4182a5319d0dfddf649c')
variant('thread_multiple', default=False,
description='Enable thread support in UCP and UCT')
depends_on('numactl')
depends_on('rdma-core')
def configure_args(self):
spec = self.spec
config_args = []
if '+thread_multiple' in spec:
config_args.append('--enable-mt')
else:
config_args.append('--disable-mt')
return config_args