fixing the perf regression issues with OpenMPI v4.0.x till v4.1.0 for x86_64 (#22350)

This commit is contained in:
AMD Toolchain Support 2021-03-19 05:39:20 +05:30 committed by GitHub
parent d07cb59bef
commit 4f1a76a0d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 0 deletions

View File

@ -0,0 +1,27 @@
diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h
index d85ff02bd6a..a465fdae5db 100644
--- a/opal/include/opal/sys/gcc_builtin/atomic.h
+++ b/opal/include/opal/sys/gcc_builtin/atomic.h
@@ -13,8 +13,8 @@
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved.
- * Copyright (c) 2016-2017 Research Organization for Information Science
- * and Technology (RIST). All rights reserved.
+ * Copyright (c) 2016-2021 Research Organization for Information Science
+ * and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
@@ -61,9 +61,8 @@ static inline void opal_atomic_rmb(void)
{
#if OPAL_ASSEMBLY_ARCH == OPAL_X86_64
/* work around a bug in older gcc versions where ACQUIRE seems to get
- * treated as a no-op instead of being equivalent to
- * __asm__ __volatile__("": : :"memory") */
- __atomic_thread_fence (__ATOMIC_SEQ_CST);
+ * treated as a no-op instead */
+ __asm__ __volatile__("": : :"memory");
#else
__atomic_thread_fence (__ATOMIC_ACQUIRE);
#endif

View File

@ -188,6 +188,9 @@ class Openmpi(AutotoolsPackage):
# The second patch was applied starting version v4.0.0 and backported to
# v2.x, v3.0.x, and v3.1.x.
patch('use_mpi_tkr_sizeof/step_2.patch', when='@1.8.4:2.1.3,3:3.0.1')
# To fix performance regressions introduced while fixing a bug in older
# gcc versions on x86_64, Refs. open-mpi/ompi#8603
patch('opal_assembly_arch.patch', when='@4.0.0:4.1.1')
variant(
'fabrics',