vir-simd 0.4.189
Parallelism TS 2 extensions and simd fallback implementation
Loading...
Searching...
No Matches
detail.h
1/* SPDX-License-Identifier: LGPL-3.0-or-later */
2/* Copyright © 2022–2024 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
3 * Matthias Kretz <m.kretz@gsi.de>
4 */
5
6#ifndef VIR_DETAILS_H
7#define VIR_DETAILS_H
8
9#include "simd.h"
10#include "constexpr_wrapper.h"
11#include <type_traits>
12#if __has_include (<bit>) && __cplusplus >= 202002L
13#include <bit> // for bit_cast
14#define VIR_HAVE_STD_BIT_CAST 1
15#else
16#define VIR_HAVE_STD_BIT_CAST 0
17#endif
18
19#if defined _GLIBCXX_EXPERIMENTAL_SIMD_H && defined __cpp_lib_experimental_parallel_simd
20#define VIR_GLIBCXX_STDX_SIMD 1
21#else
22#define VIR_GLIBCXX_STDX_SIMD 0
23#endif
24
25// needs -std=gnu++17 => __STRICT_ANSI__
26// However, Clang with libstdc++ is never constexpr enabled
27#if VIR_GLIBCXX_STDX_SIMD && (defined __STRICT_ANSI__ || defined __clang__)
28#define VIR_SIMD_CONSTEXPR_SIMD const
29#define VIR_SIMD_HAVE_CONSTEXPR_API 0
30#else // GCC with libstdc++ and GNU extensions or vir::stdx::simd fallback
31#define VIR_SIMD_CONSTEXPR_SIMD constexpr
32#define VIR_SIMD_HAVE_CONSTEXPR_API 1
33#endif
34
35#if defined __GNUC__ and not defined __clang__
36#define VIR_LAMBDA_ALWAYS_INLINE __attribute__((__always_inline__))
37#else
38#define VIR_LAMBDA_ALWAYS_INLINE
39#endif
40
41#ifdef __has_builtin
42// Clang 17 miscompiles permuting loads and stores for simdized types. I was unable to pin down the
43// cause, but it seems highly likely that some __builtin_shufflevector calls get miscompiled. So
44// far, not using __builtin_shufflevector has resolved all failures.
45#if __has_builtin(__builtin_shufflevector) and __clang_major__ != 17
46#define VIR_HAVE_WORKING_SHUFFLEVECTOR 1
47#endif
48#if __has_builtin(__builtin_bit_cast)
49#define VIR_HAVE_BUILTIN_BIT_CAST 1
50#endif
51#endif
52#ifndef VIR_HAVE_WORKING_SHUFFLEVECTOR
53#define VIR_HAVE_WORKING_SHUFFLEVECTOR 0
54#endif
55#ifndef VIR_HAVE_BUILTIN_BIT_CAST
56#define VIR_HAVE_BUILTIN_BIT_CAST 0
57#endif
58
59
62namespace vir::meta
63{
64 template <typename T>
65 using is_simd_or_mask = std::disjunction<stdx::is_simd<T>, stdx::is_simd_mask<T>>;
66
67 template <typename T>
68 inline constexpr bool is_simd_or_mask_v = std::disjunction_v<stdx::is_simd<T>,
69 stdx::is_simd_mask<T>>;
70
71 template <typename T>
72 struct type_identity
73 { using type = T; };
74
75 template <typename T>
76 using type_identity_t = typename type_identity<T>::type;
77
78 template <typename T, typename U = long long, bool = (sizeof(T) == sizeof(U))>
79 struct as_int;
80
81 template <typename T, typename U>
82 struct as_int<T, U, true>
83 { using type = U; };
84
85 template <typename T>
86 struct as_int<T, long long, false>
87 : as_int<T, long> {};
88
89 template <typename T>
90 struct as_int<T, long, false>
91 : as_int<T, int> {};
92
93 template <typename T>
94 struct as_int<T, int, false>
95 : as_int<T, short> {};
96
97 template <typename T>
98 struct as_int<T, short, false>
99 : as_int<T, signed char> {};
100
101#ifdef __GNUC__
102#pragma GCC diagnostic push
103#pragma GCC diagnostic ignored "-Wpedantic"
104#endif
105 template <typename T>
106 struct as_int<T, signed char, false>
107 #ifdef __SIZEOF_INT128__
108 : as_int<T, __int128> {};
109
110 template <typename T>
111 struct as_int<T, __int128, false>
112 #endif // __SIZEOF_INT128__
113 {};
114#ifdef __GNUC__
115#pragma GCC diagnostic pop
116#endif
117
118 template <typename T>
119 using as_int_t = typename as_int<T>::type;
120
121 template <typename T, typename U = unsigned long long, bool = (sizeof(T) == sizeof(U))>
122 struct as_unsigned;
123
124 template <typename T, typename U>
125 struct as_unsigned<T, U, true>
126 { using type = U; };
127
128 template <typename T>
129 struct as_unsigned<T, unsigned long long, false>
130 : as_unsigned<T, unsigned long> {};
131
132 template <typename T>
133 struct as_unsigned<T, unsigned long, false>
134 : as_unsigned<T, unsigned int> {};
135
136 template <typename T>
137 struct as_unsigned<T, unsigned int, false>
138 : as_unsigned<T, unsigned short> {};
139
140 template <typename T>
141 struct as_unsigned<T, unsigned short, false>
142 : as_unsigned<T, unsigned char> {};
143
144#ifdef __GNUC__
145#pragma GCC diagnostic push
146#pragma GCC diagnostic ignored "-Wpedantic"
147#endif
148 template <typename T>
149 struct as_unsigned<T, unsigned char, false>
150 #ifdef __SIZEOF_INT128__
151 : as_unsigned<T, unsigned __int128> {};
152
153 template <typename T>
154 struct as_unsigned<T, unsigned __int128, false>
155 #endif // __SIZEOF_INT128__
156 {};
157#ifdef __GNUC__
158#pragma GCC diagnostic pop
159#endif
160
161 template <typename T>
162 using as_unsigned_t = typename as_unsigned<T>::type;
163}
164
167namespace vir::detail
168{
169 template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
170 using FloatingPoint = T;
171
172 using namespace vir::stdx;
173
178 template <typename T, int N>
179 using deduced_simd = stdx::simd<T, stdx::simd_abi::deduce_t<T, N>>;
180
181 template <typename T, int N>
182 using deduced_simd_mask = stdx::simd_mask<T, stdx::simd_abi::deduce_t<T, N>>;
183
184 template <typename T>
185 constexpr T
186 bit_ceil(T x)
187 { return (x & (x - 1)) == 0 ? x : bit_ceil((x | (x >> 1)) + 1); }
188
189#ifdef __GNUC__
190 template <typename T, int N, unsigned Bytes = sizeof(T) * bit_ceil(unsigned(N))>
191 using gnu_vector [[gnu::vector_size(Bytes)]] = T;
192
193 template <typename T>
194 std::true_type
195 is_vec_builtin_impl(gnu_vector<std::remove_cv_t<std::remove_reference_t<decltype(T()[0])>>,
196 0, sizeof(T)>);
197#endif
198
199 template <typename T>
200 std::false_type
201 is_vec_builtin_impl(...);
202
203 template <typename T>
204 struct is_vec_builtin
205 : decltype(is_vec_builtin_impl<T>(std::declval<T>()))
206 {};
207
208 template <typename T>
209 constexpr bool is_vec_builtin_v = is_vec_builtin<T>::value;
210
211 namespace test
212 {
213 static_assert(not is_vec_builtin_v<int>);
214#ifdef __GNUC__
215 static_assert(is_vec_builtin_v<gnu_vector<int, 4>>);
216#endif
217 }
218
219 template <typename T>
220 T
221 internal_data_hack(T&& x, float)
222 { return x; }
223
224 template <typename T>
225 auto
226 internal_data_hack(T&& x, int) -> std::enable_if_t<sizeof(T) == sizeof(decltype(__data(x))),
227 decltype(__data(x))>
228 { return __data(x); }
229
230 template <typename T>
231 auto
232 internal_data_hack(T&& x, int) -> std::enable_if_t<sizeof(T) == sizeof(decltype(_data_(x))),
233 decltype(_data_(x))>
234 { return _data_(x); }
235
236 template <typename To, typename From>
237 constexpr To
238 bit_cast(const From& x)
239 {
240 static_assert(sizeof(To) == sizeof(From));
241#if VIR_HAVE_STD_BIT_CAST
242 return std::bit_cast<To>(x);
243#elif VIR_HAVE_BUILTIN_BIT_CAST
244 return __builtin_bit_cast(To, x);
245#else
246 if constexpr (is_vec_builtin_v<To>)
247 return reinterpret_cast<To>(x);
248 else
249 {
250 To r;
251 std::memcpy(&internal_data_hack(r, 0), &internal_data_hack(x, 0), sizeof(x));
252 return r;
253 }
254#endif
255 }
256
257#if VIR_HAVE_CONSTEXPR_WRAPPER
258 template <int Iterations, auto I = 0, typename F>
259 [[gnu::always_inline, gnu::flatten]]
260 constexpr void
261 unroll(F&& fun)
262 {
263 if constexpr (Iterations != 0)
264 {
265 fun(vir::cw<I>);
266 unroll<Iterations - 1, I + 1>(static_cast<F&&>(fun));
267 }
268 }
269
270 template <int Iterations>
271 [[gnu::always_inline, gnu::flatten]]
272 constexpr void
273 unroll2(auto&& fun0, auto&& fun1)
274 {
275 [&]<int... Is>(std::integer_sequence<int, Is...>) VIR_LAMBDA_ALWAYS_INLINE {
276 [&](auto&&... r0s) VIR_LAMBDA_ALWAYS_INLINE {
277 (fun1(vir::cw<Is>, static_cast<decltype(r0s)>(r0s)), ...);
278 }(fun0(vir::cw<Is>)...);
279 }(std::make_integer_sequence<int, Iterations>());
280 }
281#endif
282}
283
284#endif // VIR_DETAILS_H