From a66ef5ccd39f294b79bf31271b47d0120a439a2a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 9 Jan 2025 13:59:21 -0600 Subject: [PATCH] [OpenMP] Use __builtin_bit_cast instead of UB type punning (#122325) Summary: Use a normal bitcast, remove from the shared utils since it's not available in GCC 7.4 --- offload/DeviceRTL/include/DeviceUtils.h | 5 ++++ offload/DeviceRTL/include/Synchronization.h | 32 ++++++++++----------- offload/DeviceRTL/src/Mapping.cpp | 8 +++--- offload/include/Shared/Utils.h | 5 ---- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h index fb00d6c75525571..fa66b973a4f5e7d 100644 --- a/offload/DeviceRTL/include/DeviceUtils.h +++ b/offload/DeviceRTL/include/DeviceUtils.h @@ -60,6 +60,11 @@ struct remove_addrspace : type_identity {}; template using remove_addrspace_t = typename remove_addrspace::type; +template inline To bitCast(From V) { + static_assert(sizeof(To) == sizeof(From), "Bad conversion"); + return __builtin_bit_cast(To, V); +} + /// Return the value \p Var from thread Id \p SrcLane in the warp if the thread /// is identified by \p Mask. int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width); diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h index ae065850d824c20..e1968675550d49d 100644 --- a/offload/DeviceRTL/include/Synchronization.h +++ b/offload/DeviceRTL/include/Synchronization.h @@ -98,20 +98,20 @@ template > utils::enable_if_t, V> max(Ty *Address, V Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - max((int32_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - min((uint32_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + max((int32_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + min((uint32_t *)Address, utils::bitCast(Val), Ordering)); } template > utils::enable_if_t, V> max(Ty *Address, V Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - max((int64_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - min((uint64_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + max((int64_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + min((uint64_t *)Address, utils::bitCast(Val), Ordering)); } template > @@ -126,10 +126,10 @@ template > utils::enable_if_t, V> min(Ty *Address, V Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - min((int32_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - max((uint32_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + min((int32_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + max((uint32_t *)Address, utils::bitCast(Val), Ordering)); } // TODO: Implement this with __atomic_fetch_max and remove the duplication. @@ -138,10 +138,10 @@ utils::enable_if_t, V> min(Ty *Address, utils::remove_addrspace_t Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - min((int64_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - max((uint64_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + min((int64_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + max((uint64_t *)Address, utils::bitCast(Val), Ordering)); } template > diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp index 881bd12f0340511..8583a539824c82a 100644 --- a/offload/DeviceRTL/src/Mapping.cpp +++ b/offload/DeviceRTL/src/Mapping.cpp @@ -371,8 +371,8 @@ int ompx_shfl_down_sync_i(uint64_t mask, int var, unsigned delta, int width) { float ompx_shfl_down_sync_f(uint64_t mask, float var, unsigned delta, int width) { - return utils::convertViaPun(utils::shuffleDown( - mask, utils::convertViaPun(var), delta, width)); + return utils::bitCast( + utils::shuffleDown(mask, utils::bitCast(var), delta, width)); } long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) { @@ -381,8 +381,8 @@ long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) { double ompx_shfl_down_sync_d(uint64_t mask, double var, unsigned delta, int width) { - return utils::convertViaPun(utils::shuffleDown( - mask, utils::convertViaPun(var), delta, width)); + return utils::bitCast( + utils::shuffleDown(mask, utils::bitCast(var), delta, width)); } } diff --git a/offload/include/Shared/Utils.h b/offload/include/Shared/Utils.h index 83a82678312c132..523e6bc505b81d3 100644 --- a/offload/include/Shared/Utils.h +++ b/offload/include/Shared/Utils.h @@ -68,11 +68,6 @@ inline uint32_t popc(uint64_t V) { return __builtin_popcountl(V); } -template inline DstTy convertViaPun(SrcTy V) { - static_assert(sizeof(DstTy) == sizeof(SrcTy), "Bad conversion"); - return *((DstTy *)(&V)); -} - } // namespace utils #endif // OMPTARGET_SHARED_UTILS_H