vir-simd 0.4.189
Parallelism TS 2 extensions and simd fallback implementation
All Classes Namespaces Files Functions Variables Typedefs Friends Macros Modules Pages Concepts
simd.h
1/* SPDX-License-Identifier: LGPL-3.0-or-later */
2/* Copyright © 2022–2024 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
3 * Matthias Kretz <m.kretz@gsi.de>
4 */
5
6#ifndef VIR_SIMD_H_
7#define VIR_SIMD_H_
8
9#ifdef _MSVC_LANG
10#if _MSVC_LANG < 201703L
11#error "simd requires C++17 or later"
12#endif
13#else
14#if __cplusplus < 201703L
15#error "simd requires C++17 or later"
16#endif
17#endif
18
19#include "simd_version.h"
20
21#include <cstdlib> // std::abort
22
23#if __has_include (<experimental/simd>) && !defined VIR_DISABLE_STDX_SIMD \
24 && (!defined __clang_major__ || !defined __GNUC__ || __GLIBCXX__ >= 20230525)
25#include <experimental/simd>
26#endif
27
28#ifndef VIR_ALWAYS_INLINE
29#ifdef __GNUC__
30#define VIR_ALWAYS_INLINE [[gnu::always_inline]] inline
31#define VIR_GNU_COLD [[gnu::cold]]
32#define VIR_GNU_ATTR_COLD __attribute__((cold))
33#else
34#define VIR_ALWAYS_INLINE __forceinline
35#define VIR_GNU_COLD
36#define VIR_GNU_ATTR_COLD
37#endif
38#endif
39
40namespace vir::detail
41{
42 [[noreturn]] VIR_GNU_COLD VIR_ALWAYS_INLINE void
43 unreachable()
44 {
45#if defined __GNUC__
46 __builtin_unreachable();
47#elif defined __has_cpp_attribute and __has_cpp_attribute(assume)
48 [[assume(false)]];
49#else
50 __assume(false);
51#endif
52 }
53
54 [[noreturn]] VIR_GNU_COLD VIR_ALWAYS_INLINE void
55 trap()
56 {
57#if defined __GNUC__
58 __builtin_trap();
59#else
60 std::abort();
61#endif
62 }
63
64 template <typename... Args>
65 [[noreturn]] VIR_GNU_COLD VIR_ALWAYS_INLINE void
66 invoke_ub([[maybe_unused]] const char* msg,
67 [[maybe_unused]] const Args&... args)
68 {
69#if VIR_CHECK_PRECONDITIONS < 2
70 unreachable();
71#elif defined __GNUC__ and VIR_CHECK_PRECONDITIONS < 4
72 __builtin_trap();
73#else
74 [&] VIR_GNU_ATTR_COLD () {
75 std::fprintf(stderr, msg, args...);
76 }();
77 std::abort();
78#endif
79 }
80}
81
82#define VIR_SIMD_TOSTRING_IMPL(x) #x
83#define VIR_SIMD_TOSTRING(x) VIR_SIMD_TOSTRING_IMPL(x)
84#define VIR_SIMD_LOC __FILE__ ":" VIR_SIMD_TOSTRING(__LINE__) ": "
85
86/* VIR_CHECK_PRECONDITIONS:
87 * 0: Compile-time warning, invoke UB on run-time failure.
88 * 1: Compile-time error, invoke UB on run-time failure.
89 * 2: Compile-time warning, trap on run-time failure.
90 * 3: Compile-time error, trap on run-time failure.
91 * 4: Compile-time warning, print error and abort on run-time failure.
92 * 5: Compile-time error, print error and abort on run-time failure.
93 */
94
95#ifndef VIR_CHECK_PRECONDITIONS
96#define VIR_CHECK_PRECONDITIONS 3
97#endif
98
99#if VIR_CHECK_PRECONDITIONS > 5 or VIR_CHECK_PRECONDITIONS < 0
100#warning "Invalid value for VIR_CHECK_PRECONDITIONS."
101#endif
102
103#ifdef __GNUC__
104#define VIR_PRETTY_FUNCTION_ __PRETTY_FUNCTION__
105#else
106#define VIR_PRETTY_FUNCTION_ __FUNCSIG__
107#endif
108
109#if VIR_CHECK_PRECONDITIONS < 0
110#define vir_simd_precondition(expr, msg) \
111 (void) bool(expr)
112
113#define vir_simd_precondition_vaargs(expr, msg, ...) \
114 (void) bool(expr)
115
116#elif defined __clang__ or __GNUC__ >= 10
117#if (VIR_CHECK_PRECONDITIONS & 1) == 1
118#define VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION __error__
119#else
120#define VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION __warning__
121#endif
122#if defined __GNUC__ and not defined __clang__
123#define VIR_ATTR_NOIPA __noipa__,
124#else
125#define VIR_ATTR_NOIPA
126#endif
127#define vir_simd_precondition(expr, msg) \
128 do { \
129 const bool precondition_result = bool(expr); \
130 if (__builtin_constant_p(precondition_result) and not precondition_result) \
131 []() __attribute__((__noinline__, __noreturn__, VIR_ATTR_NOIPA \
132 VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION("precondition failure." \
133 "\n" VIR_SIMD_LOC "note: " msg " (precondition '" #expr "' does not hold)"))) \
134 { vir::detail::trap(); }(); \
135 else if (__builtin_expect(not precondition_result, false)) \
136 vir::detail::invoke_ub( \
137 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
138 VIR_PRETTY_FUNCTION_); \
139 } while(false)
140
141#define vir_simd_precondition_vaargs(expr, msg, ...) \
142 do { \
143 const bool precondition_result = bool(expr); \
144 if (__builtin_constant_p(precondition_result) and not precondition_result) \
145 []() __attribute__((__noinline__, __noreturn__, VIR_ATTR_NOIPA \
146 VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION("precondition failure." \
147 "\n" VIR_SIMD_LOC "note: " msg " (precondition '" #expr "' does not hold)"))) \
148 { vir::detail::trap(); }(); \
149 else if (__builtin_expect(not precondition_result, false)) \
150 vir::detail::invoke_ub( \
151 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
152 VIR_PRETTY_FUNCTION_, __VA_ARGS__); \
153 } while(false)
154
155#else
156#define vir_simd_precondition(expr, msg) \
157 do { \
158 const bool precondition_result = bool(expr); \
159 if (not precondition_result) [[unlikely]] \
160 vir::detail::invoke_ub( \
161 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
162 VIR_PRETTY_FUNCTION_); \
163 } while(false)
164
165#define vir_simd_precondition_vaargs(expr, msg, ...) \
166 do { \
167 const bool precondition_result = bool(expr); \
168 if (not precondition_result) [[unlikely]] \
169 vir::detail::invoke_ub( \
170 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
171 VIR_PRETTY_FUNCTION_, __VA_ARGS__); \
172 } while(false)
173
174#endif
175
176
177#if defined __cpp_lib_experimental_parallel_simd && __cpp_lib_experimental_parallel_simd >= 201803
178
179#define VIR_HAVE_STD_SIMD 1
180
181namespace vir::stdx
182{
183 using namespace std::experimental::parallelism_v2;
184 using namespace std::experimental::parallelism_v2::__proposed;
185}
186
187#else
188
189#include <algorithm>
190#include <cmath>
191#include <cstring>
192#ifdef _GLIBCXX_DEBUG_UB
193#include <cstdio>
194#endif
195#include <functional>
196#include <limits>
197#include <tuple>
198#include <type_traits>
199#include <utility>
200
201#define VIR_HAVE_VIR_SIMD 1
202
203#ifdef VIR_SIMD_TS_DROPIN
204namespace std::experimental
205{
206 inline namespace [[gnu::diagnose_as("virx")]] parallelism_v2
207#else
208namespace vir::stdx
209#endif
210{
211 using std::size_t;
212
213 namespace detail
214 {
215 template <typename T>
216 struct type_identity
217 { using type = T; };
218
219 template <typename T>
220 using type_identity_t = typename type_identity<T>::type;
221
222 constexpr size_t
223 bit_ceil(size_t x)
224 {
225 size_t r = 1;
226 while (r < x)
227 r <<= 1;
228 return r;
229 }
230
231 constexpr size_t
232 bit_floor(size_t x)
233 {
234 size_t r = x;
235 do {
236 r = x;
237 x &= x - 1;
238 } while (x);
239 return r;
240 }
241
242 template <typename T>
243 typename T::value_type
244 value_type_or_identity_impl(int);
245
246 template <typename T>
247 T
248 value_type_or_identity_impl(float);
249
250 template <typename T>
251 using value_type_or_identity_t
252 = decltype(value_type_or_identity_impl<T>(int()));
253
254 class ExactBool
255 {
256 const bool data;
257
258 public:
259 constexpr ExactBool(bool b) : data(b) {}
260
261 ExactBool(int) = delete;
262
263 constexpr operator bool() const { return data; }
264 };
265
266 template <typename T>
267 using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;
268
269 template <typename T>
270 using L = std::numeric_limits<T>;
271
272 template <bool B>
273 using BoolConstant = std::integral_constant<bool, B>;
274
275 template <size_t X>
276 using SizeConstant = std::integral_constant<size_t, X>;
277
278 template <size_t I, typename T, typename... Ts>
279 constexpr auto
280 pack_simd_subscript(const T& x0, const Ts&... xs)
281 {
282 if constexpr (I >= T::size())
283 return pack_simd_subscript<I - T::size()>(xs...);
284 else
285 return x0[I];
286 }
287
288 template <class T>
289 struct is_vectorizable : std::is_arithmetic<T>
290 {};
291
292 template <>
293 struct is_vectorizable<bool> : std::false_type
294 {};
295
296 template <class T>
297 inline constexpr bool is_vectorizable_v = is_vectorizable<T>::value;
298
299 template <class T, typename = void>
300 struct only_vectorizable
301 {
302 only_vectorizable() = delete;
303 only_vectorizable(const only_vectorizable&) = delete;
304 only_vectorizable(only_vectorizable&&) = delete;
305 ~only_vectorizable() = delete;
306 };
307
308 template <class T>
309 struct only_vectorizable<T, std::enable_if_t<is_vectorizable_v<T>>>
310 {
311 };
312
313 // Deduces to a vectorizable type
314 template <typename T, typename = std::enable_if_t<is_vectorizable_v<T>>>
315 using Vectorizable = T;
316
317 // Deduces to a floating-point type
318 template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
319 using FloatingPoint = T;
320
321 // Deduces to a signed integer type
322 template <typename T, typename = std::enable_if_t<std::conjunction_v<std::is_integral<T>,
323 std::is_signed<T>>>>
324 using SignedIntegral = T;
325
326 // is_higher_integer_rank<T, U> (T has higher or equal integer rank than U)
327 template <typename T, typename U, bool = (sizeof(T) > sizeof(U)),
328 bool = (sizeof(T) == sizeof(U))>
329 struct is_higher_integer_rank;
330
331 template <typename T>
332 struct is_higher_integer_rank<T, T, false, true>
333 : public std::true_type
334 {};
335
336 template <typename T, typename U>
337 struct is_higher_integer_rank<T, U, true, false>
338 : public std::true_type
339 {};
340
341 template <typename T, typename U>
342 struct is_higher_integer_rank<T, U, false, false>
343 : public std::false_type
344 {};
345
346 // this may fail for char -> short if sizeof(char) == sizeof(short)
347 template <typename T, typename U>
348 struct is_higher_integer_rank<T, U, false, true>
349 : public std::is_same<decltype(std::declval<T>() + std::declval<U>()), T>
350 {};
351
352 // is_value_preserving<From, To>
353 template <typename From, typename To, bool = std::is_arithmetic_v<From>,
354 bool = std::is_arithmetic_v<To>>
355 struct is_value_preserving;
356
357 // ignore "signed/unsigned mismatch" in the following trait.
358 // The implicit conversions will do the right thing here.
359 template <typename From, typename To>
360 struct is_value_preserving<From, To, true, true>
361 : public BoolConstant<L<From>::digits <= L<To>::digits
362 && L<From>::max() <= L<To>::max()
363 && L<From>::lowest() >= L<To>::lowest()
364 && !(std::is_signed_v<From> && std::is_unsigned_v<To>)> {};
365
366 template <typename T>
367 struct is_value_preserving<T, bool, true, true>
368 : public std::false_type {};
369
370 template <>
371 struct is_value_preserving<bool, bool, true, true>
372 : public std::true_type {};
373
374 template <typename T>
375 struct is_value_preserving<T, T, true, true>
376 : public std::true_type {};
377
378 template <typename From, typename To>
379 struct is_value_preserving<From, To, false, true>
380 : public std::is_convertible<From, To> {};
381
382 template <typename From, typename To,
383 typename = std::enable_if_t<is_value_preserving<remove_cvref_t<From>, To>::value>>
384 using ValuePreserving = From;
385
386 template <typename From, typename To,
387 typename DecayedFrom = remove_cvref_t<From>,
388 typename = std::enable_if_t<std::conjunction<
389 std::is_convertible<From, To>,
390 std::disjunction<
391 std::is_same<DecayedFrom, To>,
392 std::is_same<DecayedFrom, int>,
393 std::conjunction<std::is_same<DecayedFrom, unsigned>,
394 std::is_unsigned<To>>,
395 is_value_preserving<DecayedFrom, To>>>::value>>
396 using ValuePreservingOrInt = From;
397
398 // LoadStorePtr / is_possible_loadstore_conversion
399 template <typename Ptr, typename ValueType>
400 struct is_possible_loadstore_conversion
401 : std::conjunction<is_vectorizable<Ptr>, is_vectorizable<ValueType>>
402 {};
403
404 template <>
405 struct is_possible_loadstore_conversion<bool, bool> : std::true_type {};
406
407 // Deduces to a type allowed for load/store with the given value type.
408 template <typename Ptr, typename ValueType,
409 typename = std::enable_if_t<
410 is_possible_loadstore_conversion<Ptr, ValueType>::value>>
411 using LoadStorePtr = Ptr;
412 }
413
414 namespace simd_abi
415 {
416 struct scalar
417 {};
418
419 template <typename>
420 inline constexpr int max_fixed_size = 32;
421
422 template <int N>
423 struct fixed_size
424 {};
425
426 template <class T>
427 using native =
428 std::conditional_t<(sizeof(T) > 8),
429 scalar,
430 fixed_size<
431#ifdef __AVX512F__
432 64
433#elif defined __AVX2__
434 32
435#elif defined __AVX__
436 std::is_floating_point_v<T> ? 32 : 16
437#else
438 16
439#endif
440 / sizeof(T)
441 >
442 >;
443
444 template <class T>
445 using compatible = std::conditional_t<(sizeof(T) > 8),
446 scalar,
447 fixed_size<16 / sizeof(T)>>;
448
449 template <typename T, size_t N, typename...>
450 struct deduce
451 { using type = std::conditional_t<N == 1, scalar, fixed_size<int(N)>>; };
452
453 template <typename T, size_t N, typename... Abis>
454 using deduce_t = typename deduce<T, N, Abis...>::type;
455 }
456
457 // flags //
458 struct element_aligned_tag
459 {};
460
461 struct vector_aligned_tag
462 {};
463
464 template <size_t>
465 struct overaligned_tag
466 {};
467
468 inline constexpr element_aligned_tag element_aligned{};
469
470 inline constexpr vector_aligned_tag vector_aligned{};
471
472 template <size_t N>
473 inline constexpr overaligned_tag<N> overaligned{};
474
475 // fwd decls //
476 template <class T, class A = simd_abi::compatible<T>>
477 class simd
478 {
479 simd() = delete;
480 simd(const simd&) = delete;
481 ~simd() = delete;
482 };
483
484 template <class T, class A = simd_abi::compatible<T>>
485 class simd_mask
486 {
487 simd_mask() = delete;
488 simd_mask(const simd_mask&) = delete;
489 ~simd_mask() = delete;
490 };
491
492 // aliases //
493 template <class T>
494 using native_simd = simd<T, simd_abi::native<T>>;
495
496 template <class T>
497 using native_simd_mask = simd_mask<T, simd_abi::native<T>>;
498
499 template <class T, int N>
500 using fixed_size_simd = simd<T, simd_abi::fixed_size<N>>;
501
502 template <class T, int N>
503 using fixed_size_simd_mask = simd_mask<T, simd_abi::fixed_size<N>>;
504
505 // Traits //
506 template <class T>
507 struct is_abi_tag : std::false_type
508 {};
509
510 template <class T>
511 inline constexpr bool is_abi_tag_v = is_abi_tag<T>::value;
512
513 template <>
514 struct is_abi_tag<simd_abi::scalar> : std::true_type
515 {};
516
517 template <int N>
518 struct is_abi_tag<simd_abi::fixed_size<N>> : std::true_type
519 {};
520
521 template <class T>
522 struct is_simd : std::false_type
523 {};
524
525 template <class T>
526 inline constexpr bool is_simd_v = is_simd<T>::value;
527
528 template <class T, class A>
529 struct is_simd<simd<T, A>>
530 : std::conjunction<detail::is_vectorizable<T>, is_abi_tag<A>>
531 {};
532
533 template <class T>
534 struct is_simd_mask : std::false_type
535 {};
536
537 template <class T>
538 inline constexpr bool is_simd_mask_v = is_simd_mask<T>::value;
539
540 template <class T, class A>
541 struct is_simd_mask<simd_mask<T, A>>
542 : std::conjunction<detail::is_vectorizable<T>, is_abi_tag<A>>
543 {};
544
545 template <class T>
546 struct is_simd_flag_type : std::false_type
547 {};
548
549 template <class T>
550 inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<T>::value;
551
552 template <class T, class A = simd_abi::compatible<T>>
553 struct simd_size;
554
555 template <class T, class A = simd_abi::compatible<T>>
556 inline constexpr size_t simd_size_v = simd_size<T, A>::value;
557
558 template <class T>
559 struct simd_size<detail::Vectorizable<T>, simd_abi::scalar>
560 : std::integral_constant<size_t, 1>
561 {};
562
563 template <class T, int N>
564 struct simd_size<detail::Vectorizable<T>, simd_abi::fixed_size<N>>
565 : std::integral_constant<size_t, N>
566 {};
567
568 template <class T, class U = typename T::value_type>
569 struct memory_alignment;
570
571 template <class T, class U = typename T::value_type>
572 inline constexpr size_t memory_alignment_v = memory_alignment<T, U>::value;
573
574 template <class T, class A, class U>
575 struct memory_alignment<simd<T, A>, detail::Vectorizable<U>>
576 : std::integral_constant<size_t, alignof(U)>
577 {};
578
579 template <class T, class A>
580 struct memory_alignment<simd_mask<T, A>, bool>
581 : std::integral_constant<size_t, alignof(bool)>
582 {};
583
584 template <class T, class V,
585 class = typename std::conjunction<detail::is_vectorizable<T>,
586 std::disjunction<is_simd<V>, is_simd_mask<V>>>::type>
587 struct rebind_simd;
588
589 template <class T, class V>
590 using rebind_simd_t = typename rebind_simd<T, V>::type;
591
592 template <class T, class U, class A>
593 struct rebind_simd<T, simd<U, A>, std::true_type>
594 { using type = simd<T, A>; };
595
596 template <class T, class U, class A>
597 struct rebind_simd<T, simd_mask<U, A>, std::true_type>
598 { using type = simd_mask<T, A>; };
599
600 template <int N, class V,
601 class = typename std::conjunction<
602 detail::BoolConstant<(N > 0)>,
603 std::disjunction<is_simd<V>, is_simd_mask<V>>
604 >::type>
605 struct resize_simd;
606
607 template <int N, class V>
608 using resize_simd_t = typename resize_simd<N, V>::type;
609
610 template <int N, class T, class A>
611 struct resize_simd<N, simd<T, A>, std::true_type>
612 {
613 using type = simd<T, std::conditional_t<N == 1, simd_abi::scalar, simd_abi::fixed_size<N>>>;
614 };
615
616 template <int N, class T, class A>
617 struct resize_simd<N, simd_mask<T, A>, std::true_type>
618 {
619 using type = simd_mask<T, std::conditional_t<
620 N == 1, simd_abi::scalar, simd_abi::fixed_size<N>>>;
621 };
622
623 // simd_mask (scalar)
624 template <class T>
625 class simd_mask<detail::Vectorizable<T>, simd_abi::scalar>
626 : public detail::only_vectorizable<T>
627 {
628 bool data;
629
630 public:
631 using value_type = bool;
632 using reference = bool&;
633 using abi_type = simd_abi::scalar;
634 using simd_type = simd<T, abi_type>;
635
636 static constexpr size_t size() noexcept
637 { return 1; }
638
639 constexpr simd_mask() = default;
640 constexpr simd_mask(const simd_mask&) = default;
641 constexpr simd_mask(simd_mask&&) noexcept = default;
642 constexpr simd_mask& operator=(const simd_mask&) = default;
643 constexpr simd_mask& operator=(simd_mask&&) noexcept = default;
644
645 // explicit broadcast constructor
646 explicit constexpr
647 simd_mask(bool x)
648 : data(x) {}
649
650 template <typename F>
651 explicit constexpr
652 simd_mask(F&& gen, std::enable_if_t<
653 std::is_same_v<decltype(std::declval<F>()(detail::SizeConstant<0>())),
654 value_type>>* = nullptr)
655 : data(gen(detail::SizeConstant<0>()))
656 {}
657
658 // load constructor
659 template <typename Flags>
660 constexpr
661 simd_mask(const value_type* mem, Flags)
662 : data(mem[0])
663 {}
664
665 template <typename Flags>
666 constexpr
667 simd_mask(const value_type* mem, simd_mask k, Flags)
668 : data(k ? mem[0] : false)
669 {}
670
671 // loads [simd_mask.load]
672 template <typename Flags>
673 constexpr void
674 copy_from(const value_type* mem, Flags)
675 { data = mem[0]; }
676
677 // stores [simd_mask.store]
678 template <typename Flags>
679 constexpr void
680 copy_to(value_type* mem, Flags) const
681 { mem[0] = data; }
682
683 // scalar access
684 constexpr reference
685 operator[](size_t i)
686 {
687 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
688 i, size() - 1);
689 return data;
690 }
691
692 constexpr value_type
693 operator[](size_t i) const
694 {
695 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
696 i, size() - 1);
697 return data;
698 }
699
700 // negation
701 constexpr simd_mask
702 operator!() const
703 { return simd_mask(not data); }
704
705 // simd_mask binary operators [simd_mask.binary]
706 friend constexpr simd_mask
707 operator&&(const simd_mask& x, const simd_mask& y)
708 { return simd_mask(x.data && y.data); }
709
710 friend constexpr simd_mask
711 operator||(const simd_mask& x, const simd_mask& y)
712 { return simd_mask(x.data || y.data); }
713
714 friend constexpr simd_mask
715 operator&(const simd_mask& x, const simd_mask& y)
716 { return simd_mask(x.data & y.data); }
717
718 friend constexpr simd_mask
719 operator|(const simd_mask& x, const simd_mask& y)
720 { return simd_mask(x.data | y.data); }
721
722 friend constexpr simd_mask
723 operator^(const simd_mask& x, const simd_mask& y)
724 { return simd_mask(x.data ^ y.data); }
725
726 friend constexpr simd_mask&
727 operator&=(simd_mask& x, const simd_mask& y)
728 {
729 x.data &= y.data;
730 return x;
731 }
732
733 friend constexpr simd_mask&
734 operator|=(simd_mask& x, const simd_mask& y)
735 {
736 x.data |= y.data;
737 return x;
738 }
739
740 friend constexpr simd_mask&
741 operator^=(simd_mask& x, const simd_mask& y)
742 {
743 x.data ^= y.data;
744 return x;
745 }
746
747 // simd_mask compares [simd_mask.comparison]
748 friend constexpr simd_mask
749 operator==(const simd_mask& x, const simd_mask& y)
750 { return simd_mask(x.data == y.data); }
751
752 friend constexpr simd_mask
753 operator!=(const simd_mask& x, const simd_mask& y)
754 { return simd_mask(x.data != y.data); }
755 };
756
757 // simd_mask (fixed_size)
758 template <class T, int N>
759 class simd_mask<detail::Vectorizable<T>, simd_abi::fixed_size<N>>
760 : public detail::only_vectorizable<T>
761 {
762 private:
763 template <typename V, int M, size_t Parts>
764 friend constexpr
765 std::enable_if_t<M == Parts * V::size() && is_simd_mask_v<V>, std::array<V, Parts>>
766 split(const simd_mask<typename V::simd_type::value_type, simd_abi::fixed_size<M>>&);
767
768 bool data[N];
769
770 template <typename F, size_t... Is>
771 constexpr
772 simd_mask(std::index_sequence<Is...>, F&& init)
773 : data {init(detail::SizeConstant<Is>())...}
774 {}
775
776 public:
777 using value_type = bool;
778 using reference = bool&;
779 using abi_type = simd_abi::fixed_size<N>;
780 using simd_type = simd<T, abi_type>;
781
782 static constexpr size_t size() noexcept
783 { return N; }
784
785 constexpr simd_mask() = default;
786 constexpr simd_mask(const simd_mask&) = default;
787 constexpr simd_mask(simd_mask&&) noexcept = default;
788 constexpr simd_mask& operator=(const simd_mask&) = default;
789 constexpr simd_mask& operator=(simd_mask&&) noexcept = default;
790
791 // explicit broadcast constructor
792 explicit constexpr
793 simd_mask(bool x)
794 : simd_mask(std::make_index_sequence<N>(), [x](size_t) { return x; })
795 {}
796
797 template <typename F>
798 explicit constexpr
799 simd_mask(F&& gen, std::enable_if_t<
800 std::is_same_v<decltype(std::declval<F>()(detail::SizeConstant<0>())),
801 value_type>>* = nullptr)
802 : simd_mask(std::make_index_sequence<N>(), std::forward<F>(gen))
803 {}
804
805 // implicit conversions
806 template <typename U>
807 constexpr
808 simd_mask(const simd_mask<U, abi_type>& x)
809 : simd_mask(std::make_index_sequence<N>(), [&x](size_t i) { return x[i]; })
810 {}
811
812 // load constructor
813 template <typename Flags>
814 simd_mask(const value_type* mem, Flags)
815 : simd_mask(std::make_index_sequence<N>(), [mem](size_t i) { return mem[i]; })
816 {}
817
818 template <typename Flags>
819 simd_mask(const value_type* mem, const simd_mask& k, Flags)
820 : simd_mask(std::make_index_sequence<N>(),
821 [mem, &k](size_t i) { return k[i] ? mem[i] : false; })
822 {}
823
824 // loads [simd_mask.load]
825 template <typename Flags>
826 void
827 copy_from(const value_type* mem, Flags)
828 { std::memcpy(data, mem, N * sizeof(bool)); }
829
830 // stores [simd_mask.store]
831 template <typename Flags>
832 void
833 copy_to(value_type* mem, Flags) const
834 { std::memcpy(mem, data, N * sizeof(bool)); }
835
836 // scalar access
837 constexpr reference
838 operator[](size_t i)
839 {
840 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
841 i, size() - 1);
842 return data[i];
843 }
844
845 constexpr value_type
846 operator[](size_t i) const
847 {
848 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
849 i, size() - 1);
850 return data[i];
851 }
852
853 // negation
854 constexpr simd_mask
855 operator!() const
856 {
857 simd_mask r {};
858 for (int i = 0; i < N; ++i)
859 r.data[i] = !data[i];
860 return r;
861 }
862
863 // simd_mask binary operators [simd_mask.binary]
864 friend constexpr simd_mask
865 operator&&(const simd_mask& x, const simd_mask& y)
866 {
867 simd_mask r {};
868 for (int i = 0; i < N; ++i)
869 r.data[i] = x.data[i] & y.data[i];
870 return r;
871 }
872
873 friend constexpr simd_mask
874 operator||(const simd_mask& x, const simd_mask& y)
875 {
876 simd_mask r {};
877 for (int i = 0; i < N; ++i)
878 r.data[i] = x.data[i] | y.data[i];
879 return r;
880 }
881
882 friend constexpr simd_mask
883 operator&(const simd_mask& x, const simd_mask& y)
884 {
885 simd_mask r {};
886 for (int i = 0; i < N; ++i)
887 r.data[i] = x.data[i] & y.data[i];
888 return r;
889 }
890
891 friend constexpr simd_mask
892 operator|(const simd_mask& x, const simd_mask& y)
893 {
894 simd_mask r {};
895 for (int i = 0; i < N; ++i)
896 r.data[i] = x.data[i] | y.data[i];
897 return r;
898 }
899
900 friend constexpr simd_mask
901 operator^(const simd_mask& x, const simd_mask& y)
902 {
903 simd_mask r {};
904 for (int i = 0; i < N; ++i)
905 r.data[i] = x.data[i] ^ y.data[i];
906 return r;
907 }
908
909 friend constexpr simd_mask&
910 operator&=(simd_mask& x, const simd_mask& y)
911 {
912 for (int i = 0; i < N; ++i)
913 x.data[i] &= y.data[i];
914 return x;
915 }
916
917 friend constexpr simd_mask&
918 operator|=(simd_mask& x, const simd_mask& y)
919 {
920 for (int i = 0; i < N; ++i)
921 x.data[i] |= y.data[i];
922 return x;
923 }
924
925 friend constexpr simd_mask&
926 operator^=(simd_mask& x, const simd_mask& y)
927 {
928 for (int i = 0; i < N; ++i)
929 x.data[i] ^= y.data[i];
930 return x;
931 }
932
933 // simd_mask compares [simd_mask.comparison]
934 friend constexpr simd_mask
935 operator==(const simd_mask& x, const simd_mask& y)
936 {
937 simd_mask r {};
938 for (int i = 0; i < N; ++i)
939 r.data[i] = x.data[i] == y.data[i];
940 return r;
941 }
942
943 friend constexpr simd_mask
944 operator!=(const simd_mask& x, const simd_mask& y)
945 {
946 simd_mask r {};
947 for (int i = 0; i < N; ++i)
948 r.data[i] = x.data[i] != y.data[i];
949 return r;
950 }
951 };
952
953 // simd_mask reductions [simd_mask.reductions]
954 template <typename T>
955 constexpr bool
956 all_of(simd_mask<T, simd_abi::scalar> k) noexcept
957 { return k[0]; }
958
959 template <typename T>
960 constexpr bool
961 any_of(simd_mask<T, simd_abi::scalar> k) noexcept
962 { return k[0]; }
963
964 template <typename T>
965 constexpr bool
966 none_of(simd_mask<T, simd_abi::scalar> k) noexcept
967 { return not k[0]; }
968
969 template <typename T>
970 constexpr bool
971 some_of(simd_mask<T, simd_abi::scalar>) noexcept
972 { return false; }
973
974 template <typename T>
975 constexpr int
976 popcount(simd_mask<T, simd_abi::scalar> k) noexcept
977 { return static_cast<int>(k[0]); }
978
979 template <typename T>
980 constexpr int
981 find_first_set(simd_mask<T, simd_abi::scalar> k) noexcept
982 {
983 vir_simd_precondition(k[0], "find_first_set(empty mask) is UB");
984 return 0;
985 }
986
987 template <typename T>
988 constexpr int
989 find_last_set(simd_mask<T, simd_abi::scalar> k) noexcept
990 {
991 vir_simd_precondition(k[0], "find_last_set(empty mask) is UB");
992 return 0;
993 }
994
995 template <typename T, int N>
996 constexpr bool
997 all_of(const simd_mask<T, simd_abi::fixed_size<N>>& k) noexcept
998 {
999 for (int i = 0; i < N; ++i)
1000 {
1001 if (not k[i])
1002 return false;
1003 }
1004 return true;
1005 }
1006
1007 template <typename T, int N>
1008 constexpr bool
1009 any_of(const simd_mask<T, simd_abi::fixed_size<N>>& k) noexcept
1010 {
1011 for (int i = 0; i < N; ++i)
1012 {
1013 if (k[i])
1014 return true;
1015 }
1016 return false;
1017 }
1018
1019 template <typename T, int N>
1020 constexpr bool
1021 none_of(const simd_mask<T, simd_abi::fixed_size<N>>& k) noexcept
1022 {
1023 for (int i = 0; i < N; ++i)
1024 {
1025 if (k[i])
1026 return false;
1027 }
1028 return true;
1029 }
1030
1031 template <typename T, int N>
1032 constexpr bool
1033 some_of(const simd_mask<T, simd_abi::fixed_size<N>>& k) noexcept
1034 {
1035 bool last = k[0];
1036 for (int i = 1; i < N; ++i)
1037 {
1038 if (last != k[i])
1039 return true;
1040 }
1041 return false;
1042 }
1043
1044 template <typename T, int N>
1045 constexpr int
1046 popcount(const simd_mask<T, simd_abi::fixed_size<N>>& k) noexcept
1047 {
1048 int cnt = k[0];
1049 for (int i = 1; i < N; ++i)
1050 cnt += k[i];
1051 return cnt;
1052 }
1053
1054 template <typename T, int N>
1055 constexpr int
1056 find_first_set(const simd_mask<T, simd_abi::fixed_size<N>>& k) noexcept
1057 {
1058 vir_simd_precondition(any_of(k), "find_first_set(empty mask) is UB");
1059 for (int i = 0; i < N; ++i)
1060 {
1061 if (k[i])
1062 return i;
1063 }
1064 vir::detail::unreachable();
1065 }
1066
1067 template <typename T, int N>
1068 constexpr int
1069 find_last_set(const simd_mask<T, simd_abi::fixed_size<N>>& k) noexcept
1070 {
1071 vir_simd_precondition(any_of(k), "find_last_set(empty mask) is UB");
1072 for (int i = N - 1; i >= 0; --i)
1073 {
1074 if (k[i])
1075 return i;
1076 }
1077 vir::detail::unreachable();
1078 }
1079
1080 constexpr bool
1081 all_of(detail::ExactBool x) noexcept
1082 { return x; }
1083
1084 constexpr bool
1085 any_of(detail::ExactBool x) noexcept
1086 { return x; }
1087
1088 constexpr bool
1089 none_of(detail::ExactBool x) noexcept
1090 { return !x; }
1091
1092 constexpr bool
1093 some_of(detail::ExactBool) noexcept
1094 { return false; }
1095
1096 constexpr int
1097 popcount(detail::ExactBool x) noexcept
1098 { return x; }
1099
1100 constexpr int
1101 find_first_set(detail::ExactBool)
1102 { return 0; }
1103
1104 constexpr int
1105 find_last_set(detail::ExactBool)
1106 { return 0; }
1107
1108 // scalar_simd_int_base
1109 template <class T, bool = std::is_integral_v<T>>
1110 class scalar_simd_int_base
1111 {};
1112
1113 template <class T>
1114 class scalar_simd_int_base<T, true>
1115 {
1116 using Derived = simd<T, simd_abi::scalar>;
1117
1118 constexpr T&
1119 d() noexcept
1120 { return static_cast<Derived*>(this)->data; }
1121
1122 constexpr const T&
1123 d() const noexcept
1124 { return static_cast<const Derived*>(this)->data; }
1125
1126 public:
1127 friend constexpr Derived&
1128 operator%=(Derived& lhs, Derived x)
1129 {
1130 lhs.d() %= x.d();
1131 return lhs;
1132 }
1133
1134 friend constexpr Derived&
1135 operator&=(Derived& lhs, Derived x)
1136 {
1137 lhs.d() &= x.d();
1138 return lhs;
1139 }
1140
1141 friend constexpr Derived&
1142 operator|=(Derived& lhs, Derived x)
1143 {
1144 lhs.d() |= x.d();
1145 return lhs;
1146 }
1147
1148 friend constexpr Derived&
1149 operator^=(Derived& lhs, Derived x)
1150 {
1151 lhs.d() ^= x.d();
1152 return lhs;
1153 }
1154
1155 friend constexpr Derived&
1156 operator<<=(Derived& lhs, Derived x)
1157 {
1158 lhs.d() <<= x.d();
1159 return lhs;
1160 }
1161
1162 friend constexpr Derived&
1163 operator>>=(Derived& lhs, Derived x)
1164 {
1165 lhs.d() >>= x.d();
1166 return lhs;
1167 }
1168
1169 friend constexpr Derived
1170 operator%(Derived x, Derived y)
1171 {
1172 x.d() %= y.d();
1173 return x;
1174 }
1175
1176 friend constexpr Derived
1177 operator&(Derived x, Derived y)
1178 {
1179 x.d() &= y.d();
1180 return x;
1181 }
1182
1183 friend constexpr Derived
1184 operator|(Derived x, Derived y)
1185 {
1186 x.d() |= y.d();
1187 return x;
1188 }
1189
1190 friend constexpr Derived
1191 operator^(Derived x, Derived y)
1192 {
1193 x.d() ^= y.d();
1194 return x;
1195 }
1196
1197 friend constexpr Derived
1198 operator<<(Derived x, Derived y)
1199 {
1200 x.d() <<= y.d();
1201 return x;
1202 }
1203
1204 friend constexpr Derived
1205 operator>>(Derived x, Derived y)
1206 {
1207 x.d() >>= y.d();
1208 return x;
1209 }
1210
1211 friend constexpr Derived
1212 operator<<(Derived x, int y)
1213 {
1214 x.d() <<= y;
1215 return x;
1216 }
1217
1218 friend constexpr Derived
1219 operator>>(Derived x, int y)
1220 {
1221 x.d() >>= y;
1222 return x;
1223 }
1224
1225 constexpr Derived
1226 operator~() const
1227 { return Derived(static_cast<T>(~d())); }
1228 };
1229
1230 // simd (scalar)
1231 template <class T>
1232 class simd<T, simd_abi::scalar>
1233 : public scalar_simd_int_base<T>, public detail::only_vectorizable<T>
1234 {
1235 friend class scalar_simd_int_base<T>;
1236
1237 T data;
1238
1239 friend constexpr T&
1240 _data_(simd& x)
1241 { return x.data; }
1242
1243 friend constexpr const T&
1244 _data_(const simd& x)
1245 { return x.data; }
1246
1247 public:
1248 using value_type = T;
1249 using reference = T&;
1250 using abi_type = simd_abi::scalar;
1251 using mask_type = simd_mask<T, abi_type>;
1252
1253 static constexpr size_t size() noexcept
1254 { return 1; }
1255
1256 constexpr simd() = default;
1257 constexpr simd(const simd&) = default;
1258 constexpr simd(simd&&) noexcept = default;
1259 constexpr simd& operator=(const simd&) = default;
1260 constexpr simd& operator=(simd&&) noexcept = default;
1261
1262 // simd constructors
1263 template <typename U>
1264 constexpr
1265 simd(detail::ValuePreservingOrInt<U, value_type>&& value) noexcept
1266 : data(value)
1267 {}
1268
1269 // generator constructor
1270 template <typename F>
1271 explicit constexpr
1272 simd(F&& gen, detail::ValuePreservingOrInt<
1273 decltype(std::declval<F>()(std::declval<detail::SizeConstant<0>&>())),
1274 value_type>* = nullptr)
1275 : data(gen(detail::SizeConstant<0>()))
1276 {}
1277
1278 // load constructor
1279 template <typename U, typename Flags>
1280 constexpr
1281 simd(const U* mem, Flags)
1282 : data(mem[0])
1283 {}
1284
1285 // loads [simd.load]
1286 template <typename U, typename Flags>
1287 constexpr void
1288 copy_from(const detail::Vectorizable<U>* mem, Flags)
1289 { data = mem[0]; }
1290
1291 // stores [simd.store]
1292 template <typename U, typename Flags>
1293 constexpr void
1294 copy_to(detail::Vectorizable<U>* mem, Flags) const
1295 { mem[0] = data; }
1296
1297 // scalar access
1298 constexpr reference
1299 operator[](size_t i)
1300 {
1301 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
1302 i, size() - 1);
1303 return data;
1304 }
1305
1306 constexpr value_type
1307 operator[](size_t i) const
1308 {
1309 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
1310 i, size() - 1);
1311 return data;
1312 }
1313
1314 // increment and decrement:
1315 constexpr simd&
1316 operator++()
1317 {
1318 ++data;
1319 return *this;
1320 }
1321
1322 constexpr simd
1323 operator++(int)
1324 {
1325 simd r = *this;
1326 ++data;
1327 return r;
1328 }
1329
1330 constexpr simd&
1331 operator--()
1332 {
1333 --data;
1334 return *this;
1335 }
1336
1337 constexpr simd
1338 operator--(int)
1339 {
1340 simd r = *this;
1341 --data;
1342 return r;
1343 }
1344
1345 // unary operators
1346 constexpr mask_type
1347 operator!() const
1348 { return mask_type(not data); }
1349
1350 constexpr simd
1351 operator+() const
1352 { return *this; }
1353
1354 constexpr simd
1355 operator-() const
1356 { return -data; }
1357
1358 // compound assignment [simd.cassign]
1359 constexpr friend simd&
1360 operator+=(simd& lhs, const simd& x)
1361 { return lhs = lhs + x; }
1362
1363 constexpr friend simd&
1364 operator-=(simd& lhs, const simd& x)
1365 { return lhs = lhs - x; }
1366
1367 constexpr friend simd&
1368 operator*=(simd& lhs, const simd& x)
1369 { return lhs = lhs * x; }
1370
1371 constexpr friend simd&
1372 operator/=(simd& lhs, const simd& x)
1373 { return lhs = lhs / x; }
1374
1375 // binary operators [simd.binary]
1376 constexpr friend simd
1377 operator+(const simd& x, const simd& y)
1378 { simd r = x; r.data += y.data; return r; }
1379
1380 constexpr friend simd
1381 operator-(const simd& x, const simd& y)
1382 { simd r = x; r.data -= y.data; return r; }
1383
1384 constexpr friend simd
1385 operator*(const simd& x, const simd& y)
1386 { simd r = x; r.data *= y.data; return r; }
1387
1388 constexpr friend simd
1389 operator/(const simd& x, const simd& y)
1390 { simd r = x; r.data /= y.data; return r; }
1391
1392 // compares [simd.comparison]
1393 constexpr friend mask_type
1394 operator==(const simd& x, const simd& y)
1395 { return mask_type(x.data == y.data); }
1396
1397 constexpr friend mask_type
1398 operator!=(const simd& x, const simd& y)
1399 { return mask_type(x.data != y.data); }
1400
1401 constexpr friend mask_type
1402 operator<(const simd& x, const simd& y)
1403 { return mask_type(x.data < y.data); }
1404
1405 constexpr friend mask_type
1406 operator<=(const simd& x, const simd& y)
1407 { return mask_type(x.data <= y.data); }
1408
1409 constexpr friend mask_type
1410 operator>(const simd& x, const simd& y)
1411 { return mask_type(x.data > y.data); }
1412
1413 constexpr friend mask_type
1414 operator>=(const simd& x, const simd& y)
1415 { return mask_type(x.data >= y.data); }
1416 };
1417
1418 // fixed_simd_int_base
1419 template <class T, int N, bool = std::is_integral_v<T>>
1420 class fixed_simd_int_base
1421 {};
1422
1423 template <class T, int N>
1424 class fixed_simd_int_base<T, N, true>
1425 {
1426 using Derived = simd<T, simd_abi::fixed_size<N>>;
1427
1428 constexpr T&
1429 d(int i) noexcept
1430 { return static_cast<Derived*>(this)->data[i]; }
1431
1432 constexpr const T&
1433 d(int i) const noexcept
1434 { return static_cast<const Derived*>(this)->data[i]; }
1435
1436 public:
1437 friend constexpr Derived&
1438 operator%=(Derived& lhs, const Derived& x)
1439 {
1440 for (int i = 0; i < N; ++i)
1441 lhs.d(i) %= x.d(i);
1442 return lhs;
1443 }
1444
1445 friend constexpr Derived&
1446 operator&=(Derived& lhs, const Derived& x)
1447 {
1448 for (int i = 0; i < N; ++i)
1449 lhs.d(i) &= x.d(i);
1450 return lhs;
1451 }
1452
1453 friend constexpr Derived&
1454 operator|=(Derived& lhs, const Derived& x)
1455 {
1456 for (int i = 0; i < N; ++i)
1457 lhs.d(i) |= x.d(i);
1458 return lhs;
1459 }
1460
1461 friend constexpr Derived&
1462 operator^=(Derived& lhs, const Derived& x)
1463 {
1464 for (int i = 0; i < N; ++i)
1465 lhs.d(i) ^= x.d(i);
1466 return lhs;
1467 }
1468
1469 friend constexpr Derived&
1470 operator<<=(Derived& lhs, const Derived& x)
1471 {
1472 for (int i = 0; i < N; ++i)
1473 lhs.d(i) <<= x.d(i);
1474 return lhs;
1475 }
1476
1477 friend constexpr Derived&
1478 operator>>=(Derived& lhs, const Derived& x)
1479 {
1480 for (int i = 0; i < N; ++i)
1481 lhs.d(i) >>= x.d(i);
1482 return lhs;
1483 }
1484
1485 friend constexpr Derived
1486 operator%(const Derived& x, const Derived& y)
1487 { return Derived([&](size_t i) -> T { return x[i] % y[i]; }); }
1488
1489 friend constexpr Derived
1490 operator&(const Derived& x, const Derived& y)
1491 { return Derived([&](size_t i) -> T { return x[i] & y[i]; }); }
1492
1493 friend constexpr Derived
1494 operator|(const Derived& x, const Derived& y)
1495 { return Derived([&](size_t i) -> T { return x[i] | y[i]; }); }
1496
1497 friend constexpr Derived
1498 operator^(const Derived& x, const Derived& y)
1499 { return Derived([&](size_t i) -> T { return x[i] ^ y[i]; }); }
1500
1501 friend constexpr Derived
1502 operator<<(const Derived& x, const Derived& y)
1503 { return Derived([&](size_t i) -> T { return x[i] << y[i]; }); }
1504
1505 friend constexpr Derived
1506 operator>>(const Derived& x, const Derived& y)
1507 { return Derived([&](size_t i) -> T { return x[i] >> y[i]; }); }
1508
1509 friend constexpr Derived
1510 operator<<(const Derived& x, int y)
1511 { return Derived([&](size_t i) -> T { return x[i] << y; }); }
1512
1513 friend constexpr Derived
1514 operator>>(const Derived& x, int y)
1515 { return Derived([&](size_t i) -> T { return x[i] >> y; }); }
1516
1517 constexpr Derived
1518 operator~() const
1519 { return Derived([&](size_t i) -> T { return ~d(i); }); }
1520 };
1521
1522 // simd (fixed_size)
1523 template <class T, int N>
1524 class simd<T, simd_abi::fixed_size<N>>
1525 : public fixed_simd_int_base<T, N>, public detail::only_vectorizable<T>
1526 {
1527 private:
1528 friend class fixed_simd_int_base<T, N>;
1529
1530 template <typename V, int M, size_t Parts>
1531 friend constexpr
1532 std::enable_if_t<M == Parts * V::size() && is_simd_v<V>, std::array<V, Parts>>
1533 split(const simd<typename V::value_type, simd_abi::fixed_size<M>>&);
1534
1535 template <size_t... Sizes, typename U>
1536 friend constexpr
1537 std::tuple<simd<U, simd_abi::deduce_t<U, int(Sizes)>>...>
1538 split(const simd<U, simd_abi::fixed_size<int((Sizes + ...))>>&);
1539
1540 T data[N];
1541
1542 using _data_type_ = T[N];
1543
1544 friend constexpr _data_type_&
1545 _data_(simd& x)
1546 { return x.data; }
1547
1548 friend constexpr const _data_type_&
1549 _data_(const simd& x)
1550 { return x.data; }
1551
1552 template <typename F, size_t... Is>
1553 constexpr
1554 simd(std::index_sequence<Is...>, F&& init)
1555 : data {static_cast<value_type>(init(detail::SizeConstant<Is>()))...}
1556 {}
1557
1558 public:
1559 using value_type = T;
1560 using reference = T&;
1561 using abi_type = simd_abi::fixed_size<N>;
1562 using mask_type = simd_mask<T, abi_type>;
1563
1564 static constexpr size_t size() noexcept
1565 { return N; }
1566
1567 constexpr simd() = default;
1568 constexpr simd(const simd&) = default;
1569 constexpr simd(simd&&) noexcept = default;
1570 constexpr simd& operator=(const simd&) = default;
1571 constexpr simd& operator=(simd&&) noexcept = default;
1572
1573 // simd constructors
1574 template <typename U>
1575 constexpr
1576 simd(detail::ValuePreservingOrInt<U, value_type>&& value) noexcept
1577 : simd(std::make_index_sequence<N>(),
1578 [v = static_cast<value_type>(value)](size_t) { return v; })
1579 {}
1580
1581 // conversion constructors
1582 template <typename U,
1583 typename = std::enable_if_t<
1584 std::conjunction_v<detail::is_value_preserving<U, value_type>,
1585 detail::is_higher_integer_rank<value_type, U>>>>
1586 constexpr
1587 simd(const simd<U, abi_type>& x)
1588 : simd(std::make_index_sequence<N>(),
1589 [&x](size_t i) { return static_cast<value_type>(x[i]); })
1590 {}
1591
1592 // generator constructor
1593 template <typename F>
1594 explicit constexpr
1595 simd(F&& gen, detail::ValuePreservingOrInt<
1596 decltype(std::declval<F>()(std::declval<detail::SizeConstant<0>&>())),
1597 value_type>* = nullptr)
1598 : simd(std::make_index_sequence<N>(), std::forward<F>(gen))
1599 {}
1600
1601 // load constructor
1602 template <typename U, typename Flags>
1603 constexpr
1604 simd(const U* mem, Flags)
1605 : simd(std::make_index_sequence<N>(), [mem](size_t i) -> value_type { return mem[i]; })
1606 {}
1607
1608 // loads [simd.load]
1609 template <typename U, typename Flags>
1610 constexpr void
1611 copy_from(const detail::Vectorizable<U>* mem, Flags)
1612 {
1613 for (int i = 0; i < N; ++i)
1614 data[i] = mem[i];
1615 }
1616
1617 // stores [simd.store]
1618 template <typename U, typename Flags>
1619 constexpr void
1620 copy_to(detail::Vectorizable<U>* mem, Flags) const
1621 {
1622 for (int i = 0; i < N; ++i)
1623 mem[i] = data[i];
1624 }
1625
1626 // scalar access
1627 constexpr reference
1628 operator[](size_t i)
1629 {
1630 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
1631 i, size() - 1);
1632 return data[i];
1633 }
1634
1635 constexpr value_type
1636 operator[](size_t i) const
1637 {
1638 vir_simd_precondition_vaargs(i < size(), "Subscript %zu is out of range [0, %zu]",
1639 i, size() - 1);
1640 return data[i];
1641 }
1642
1643 // increment and decrement:
1644 constexpr simd&
1645 operator++()
1646 {
1647 for (int i = 0; i < N; ++i)
1648 ++data[i];
1649 return *this;
1650 }
1651
1652 constexpr simd
1653 operator++(int)
1654 {
1655 simd r = *this;
1656 for (int i = 0; i < N; ++i)
1657 ++data[i];
1658 return r;
1659 }
1660
1661 constexpr simd&
1662 operator--()
1663 {
1664 for (int i = 0; i < N; ++i)
1665 --data[i];
1666 return *this;
1667 }
1668
1669 constexpr simd
1670 operator--(int)
1671 {
1672 simd r = *this;
1673 for (int i = 0; i < N; ++i)
1674 --data[i];
1675 return r;
1676 }
1677
1678 // unary operators
1679 constexpr mask_type
1680 operator!() const
1681 { return mask_type([&](size_t i) { return !data[i]; }); }
1682
1683 constexpr simd
1684 operator+() const
1685 { return *this; }
1686
1687 constexpr simd
1688 operator-() const
1689 { return simd([&](size_t i) -> value_type { return -data[i]; }); }
1690
1691 // compound assignment [simd.cassign]
1692 constexpr friend simd&
1693 operator+=(simd& lhs, const simd& x)
1694 {
1695 for (int i = 0; i < N; ++i)
1696 lhs.data[i] += x.data[i];
1697 return lhs;
1698 }
1699
1700 constexpr friend simd&
1701 operator-=(simd& lhs, const simd& x)
1702 {
1703 for (int i = 0; i < N; ++i)
1704 lhs.data[i] -= x.data[i];
1705 return lhs;
1706 }
1707
1708 constexpr friend simd&
1709 operator*=(simd& lhs, const simd& x)
1710 {
1711 for (int i = 0; i < N; ++i)
1712 lhs.data[i] *= x.data[i];
1713 return lhs;
1714 }
1715
1716 constexpr friend simd&
1717 operator/=(simd& lhs, const simd& x)
1718 {
1719 for (int i = 0; i < N; ++i)
1720 lhs.data[i] /= x.data[i];
1721 return lhs;
1722 }
1723
1724 // binary operators [simd.binary]
1725 constexpr friend simd
1726 operator+(const simd& x, const simd& y)
1727 { return simd([&](size_t i) { return x.data[i] + y.data[i]; }); }
1728
1729 constexpr friend simd
1730 operator-(const simd& x, const simd& y)
1731 { return simd([&](size_t i) { return x.data[i] - y.data[i]; }); }
1732
1733 constexpr friend simd
1734 operator*(const simd& x, const simd& y)
1735 { return simd([&](size_t i) { return x.data[i] * y.data[i]; }); }
1736
1737 constexpr friend simd
1738 operator/(const simd& x, const simd& y)
1739 { return simd([&](size_t i) { return x.data[i] / y.data[i]; }); }
1740
1741 // compares [simd.comparison]
1742 constexpr friend mask_type
1743 operator==(const simd& x, const simd& y)
1744 { return mask_type([&](size_t i) { return x.data[i] == y.data[i]; }); }
1745
1746 constexpr friend mask_type
1747 operator!=(const simd& x, const simd& y)
1748 { return mask_type([&](size_t i) { return x.data[i] != y.data[i]; }); }
1749
1750 constexpr friend mask_type
1751 operator<(const simd& x, const simd& y)
1752 { return mask_type([&](size_t i) { return x.data[i] < y.data[i]; }); }
1753
1754 constexpr friend mask_type
1755 operator<=(const simd& x, const simd& y)
1756 { return mask_type([&](size_t i) { return x.data[i] <= y.data[i]; }); }
1757
1758 constexpr friend mask_type
1759 operator>(const simd& x, const simd& y)
1760 { return mask_type([&](size_t i) { return x.data[i] > y.data[i]; }); }
1761
1762 constexpr friend mask_type
1763 operator>=(const simd& x, const simd& y)
1764 { return mask_type([&](size_t i) { return x.data[i] >= y.data[i]; }); }
1765 };
1766
1767 // casts [simd.casts]
1768 // static_simd_cast
1769 template <typename T, typename U, typename A,
1770 typename = std::enable_if_t<detail::is_vectorizable_v<T>>>
1771 constexpr simd<T, A>
1772 static_simd_cast(const simd<U, A>& x)
1773 { return simd<T, A>([&x](size_t i) { return static_cast<T>(x[i]); }); }
1774
1775 template <typename V, typename U, typename A,
1776 typename = std::enable_if_t<is_simd_v<V>>>
1777 constexpr V
1778 static_simd_cast(const simd<U, A>& x)
1779 { return V([&x](size_t i) { return static_cast<typename V::value_type>(x[i]); }); }
1780
1781 template <typename T, typename U, typename A,
1782 typename = std::enable_if_t<detail::is_vectorizable_v<T>>>
1783 constexpr simd_mask<T, A>
1784 static_simd_cast(const simd_mask<U, A>& x)
1785 { return simd_mask<T, A>([&x](size_t i) { return x[i]; }); }
1786
1787 template <typename M, typename U, typename A,
1788 typename = std::enable_if_t<M::size() == simd_size_v<U, A>>>
1789 constexpr M
1790 static_simd_cast(const simd_mask<U, A>& x)
1791 { return M([&x](size_t i) { return x[i]; }); }
1792
1793 // simd_cast
1794 template <typename T, typename U, typename A,
1795 typename To = detail::value_type_or_identity_t<T>>
1796 constexpr auto
1797 simd_cast(const simd<detail::ValuePreserving<U, To>, A>& x)
1798 -> decltype(static_simd_cast<T>(x))
1799 { return static_simd_cast<T>(x); }
1800
1801 // to_fixed_size
1802 template <typename T, int N>
1803 constexpr fixed_size_simd<T, N>
1804 to_fixed_size(const fixed_size_simd<T, N>& x)
1805 { return x; }
1806
1807 template <typename T, int N>
1808 constexpr fixed_size_simd_mask<T, N>
1809 to_fixed_size(const fixed_size_simd_mask<T, N>& x)
1810 { return x; }
1811
1812 template <typename T>
1813 constexpr fixed_size_simd<T, 1>
1814 to_fixed_size(const simd<T> x)
1815 { return x[0]; }
1816
1817 template <typename T>
1818 constexpr fixed_size_simd_mask<T, 1>
1819 to_fixed_size(const simd_mask<T> x)
1820 { return fixed_size_simd_mask<T, 1>(x[0]); }
1821
1822 // to_native
1823 template <typename T>
1824 constexpr simd<T>
1825 to_native(const fixed_size_simd<T, 1> x)
1826 { return x[0]; }
1827
1828 template <typename T>
1829 constexpr simd_mask<T>
1830 to_native(const fixed_size_simd_mask<T, 1> x)
1831 { return simd_mask<T>(x[0]); }
1832
1833 // to_compatible
1834 template <typename T>
1835 constexpr simd<T>
1836 to_compatible(const fixed_size_simd<T, 1> x)
1837 { return x[0]; }
1838
1839 template <typename T>
1840 constexpr simd_mask<T>
1841 to_compatible(const fixed_size_simd_mask<T, 1> x)
1842 { return simd_mask<T>(x[0]); }
1843
1844 // split(simd)
1845 template <typename V, int N, size_t Parts = N / V::size()>
1846 constexpr
1847 std::enable_if_t<N == Parts * V::size() && is_simd_v<V>, std::array<V, Parts>>
1848 split(const simd<typename V::value_type, simd_abi::fixed_size<N>>& x)
1849 {
1850 const auto* data = x.data;
1851 return [&]<size_t... Is>(std::index_sequence<Is...>)
1852 -> std::array<V, Parts> {
1853 return {V(data + Is * V::size(), element_aligned)...};
1854 }(std::make_index_sequence<Parts>());
1855 }
1856
1857 // split(simd_mask)
1858 template <typename V, int N, size_t Parts = N / V::size()>
1859 constexpr
1860 std::enable_if_t<N == Parts * V::size() && is_simd_mask_v<V>, std::array<V, Parts>>
1861 split(const simd_mask<typename V::simd_type::value_type, simd_abi::fixed_size<N>>& x)
1862 {
1863 const auto* data = x.data;
1864 return [&]<size_t... Is>(std::index_sequence<Is...>)
1865 -> std::array<V, Parts> {
1866 return {V(data + Is * V::size(), element_aligned)...};
1867 }(std::make_index_sequence<Parts>());
1868 }
1869
1870 // split<Sizes...>
1871 template <size_t... Sizes, typename T>
1872 constexpr
1873 std::tuple<simd<T, simd_abi::deduce_t<T, int(Sizes)>>...>
1874 split(const simd<T, simd_abi::fixed_size<int((Sizes + ...))>>& x)
1875 {
1876 using R = std::tuple<simd<T, simd_abi::deduce_t<T, int(Sizes)>>...>;
1877 const auto* data = x.data;
1878 return [&]<size_t... Is>(std::index_sequence<Is...>) -> R {
1879 constexpr size_t offsets[sizeof...(Sizes)] = {
1880 []<size_t... Js>(std::index_sequence<Js...>) {
1881 constexpr size_t sizes[sizeof...(Sizes)] = {Sizes...};
1882 return (sizes[Js] + ... + 0);
1883 }(std::make_index_sequence<Is>())...
1884 };
1885 return {simd<T, simd_abi::deduce_t<T, int(Sizes)>>(data + offsets[Is],
1886 element_aligned)...};
1887 }(std::make_index_sequence<sizeof...(Sizes)>());
1888 }
1889
1890 // split<V>(V)
1891 template <typename V>
1892 constexpr
1893 std::enable_if_t<std::disjunction_v<is_simd<V>, is_simd_mask<V>>, std::array<V, 1>>
1894 split(const V& x)
1895 { return {x}; }
1896
1897 // concat(simd...)
1898 template <typename T, typename... As>
1899 inline constexpr
1900 simd<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>
1901 concat(const simd<T, As>&... xs)
1902 {
1903 using R = simd<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>;
1904 return R([&](auto i) {
1905 return detail::pack_simd_subscript<i>(xs...);
1906 });
1907 }
1908
1909 // concat(simd_mask...)
1910 template <typename T, typename... As>
1911 inline constexpr
1912 simd_mask<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>
1913 concat(const simd_mask<T, As>&... xs)
1914 {
1915 using R = simd_mask<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>;
1916 return R([&](auto i) -> bool {
1917 return detail::pack_simd_subscript<i>(xs...);
1918 });
1919 }
1920
1921 // concat(array<simd>)
1922 template <typename T, typename A, size_t N>
1923 inline constexpr
1924 simd<T, simd_abi::deduce_t<T, N * simd_size_v<T, A>>>
1925 concat(const std::array<simd<T, A>, N>& x)
1926 {
1927 constexpr int K = simd_size_v<T, A>;
1928 using R = simd<T, simd_abi::deduce_t<T, N * K>>;
1929 return R([&](size_t i) {
1930 return x[i / K][i % K];
1931 });
1932 }
1933
1934 // concat(array<simd_mask>)
1935 template <typename T, typename A, size_t N>
1936 inline constexpr
1937 simd_mask<T, simd_abi::deduce_t<T, N * simd_size_v<T, A>>>
1938 concat(const std::array<simd_mask<T, A>, N>& x)
1939 {
1940 constexpr int K = simd_size_v<T, A>;
1941 using R = simd_mask<T, simd_abi::deduce_t<T, N * K>>;
1942 return R([&](size_t i) -> bool {
1943 return x[i / K][i % K];
1944 });
1945 }
1946
1947 // const_where_expression<M, T>
1948 template <typename M, typename V>
1949 class const_where_expression
1950 {
1951 static_assert(std::is_same_v<V, detail::remove_cvref_t<V>>);
1952
1953 struct Wrapper { using value_type = V; };
1954
1955 protected:
1956 using value_type =
1957 typename std::conditional_t<std::is_arithmetic_v<V>, Wrapper, V>::value_type;
1958
1959 friend const M&
1960 get_mask(const const_where_expression& x)
1961 { return x.m_k; }
1962
1963 friend const V&
1964 get_lvalue(const const_where_expression& x)
1965 { return x.m_value; }
1966
1967 const M& m_k;
1968 V& m_value;
1969
1970 public:
1971 const_where_expression(const const_where_expression&) = delete;
1972 const_where_expression& operator=(const const_where_expression&) = delete;
1973
1974 constexpr const_where_expression(const M& kk, const V& dd)
1975 : m_k(kk), m_value(const_cast<V&>(dd)) {}
1976
1977 constexpr V
1978 operator-() const &&
1979 {
1980 return V([&](size_t i) {
1981 return m_k[i] ? static_cast<value_type>(-m_value[i]) : m_value[i];
1982 });
1983 }
1984
1985 template <typename Up, typename Flags>
1986 [[nodiscard]] constexpr V
1987 copy_from(const detail::LoadStorePtr<Up, value_type>* mem, Flags) const &&
1988 {
1989 return V([&](size_t i) {
1990 return m_k[i] ? static_cast<value_type>(mem[i]) : m_value[i];
1991 });
1992 }
1993
1994 template <typename Up, typename Flags>
1995 constexpr void
1996 copy_to(detail::LoadStorePtr<Up, value_type>* mem, Flags) const &&
1997 {
1998 for (size_t i = 0; i < V::size(); ++i)
1999 {
2000 if (m_k[i])
2001 mem[i] = static_cast<Up>(m_value[i]);
2002 }
2003 }
2004 };
2005
2006 // const_where_expression<bool, T>
2007 template <typename V>
2008 class const_where_expression<bool, V>
2009 {
2010 using M = bool;
2011
2012 static_assert(std::is_same_v<V, detail::remove_cvref_t<V>>);
2013
2014 struct Wrapper { using value_type = V; };
2015
2016 protected:
2017 using value_type =
2018 typename std::conditional_t<std::is_arithmetic_v<V>, Wrapper, V>::value_type;
2019
2020 friend const M&
2021 get_mask(const const_where_expression& x)
2022 { return x.m_k; }
2023
2024 friend const V&
2025 get_lvalue(const const_where_expression& x)
2026 { return x.m_value; }
2027
2028 const bool m_k;
2029 V& m_value;
2030
2031 public:
2032 const_where_expression(const const_where_expression&) = delete;
2033 const_where_expression& operator=(const const_where_expression&) = delete;
2034
2035 constexpr const_where_expression(const bool kk, const V& dd)
2036 : m_k(kk), m_value(const_cast<V&>(dd)) {}
2037
2038 constexpr V
2039 operator-() const &&
2040 { return m_k ? -m_value : m_value; }
2041
2042 template <typename Up, typename Flags>
2043 [[nodiscard]] constexpr V
2044 copy_from(const detail::LoadStorePtr<Up, value_type>* mem, Flags) const &&
2045 { return m_k ? static_cast<V>(mem[0]) : m_value; }
2046
2047 template <typename Up, typename Flags>
2048 constexpr void
2049 copy_to(detail::LoadStorePtr<Up, value_type>* mem, Flags) const &&
2050 {
2051 if (m_k)
2052 mem[0] = m_value;
2053 }
2054 };
2055
2056 // where_expression<M, T>
2057 template <typename M, typename V>
2058 class where_expression : public const_where_expression<M, V>
2059 {
2060 static_assert(not std::is_const_v<V>,
2061 "where_expression may only be instantiated with a non-const V parameter");
2062
2063 using typename const_where_expression<M, V>::value_type;
2064 using const_where_expression<M, V>::m_k;
2065 using const_where_expression<M, V>::m_value;
2066
2067 static_assert(std::is_same_v<typename M::abi_type, typename V::abi_type>);
2068 static_assert(M::size() == V::size());
2069
2070 friend V&
2071 get_lvalue(where_expression& x)
2072 { return x.m_value; }
2073
2074 template <typename Up>
2075 constexpr auto
2076 as_simd(Up&& x)
2077 {
2078 using UU = detail::remove_cvref_t<Up>;
2079 if constexpr (std::is_same_v<V, UU>)
2080 return x;
2081 else if constexpr (std::is_convertible_v<Up&&, value_type>)
2082 return V(static_cast<value_type>(static_cast<Up&&>(x)));
2083 else if constexpr (std::is_convertible_v<Up&&, V>)
2084 return static_cast<V>(static_cast<Up&&>(x));
2085 else
2086 return static_simd_cast<V>(static_cast<Up&&>(x));
2087 }
2088
2089 public:
2090 where_expression(const where_expression&) = delete;
2091 where_expression& operator=(const where_expression&) = delete;
2092
2093 constexpr where_expression(const M& kk, V& dd)
2094 : const_where_expression<M, V>(kk, dd)
2095 {}
2096
2097 template <typename Up>
2098 constexpr void
2099 operator=(Up&& x) &&
2100 {
2101 const V& rhs = as_simd(x);
2102 for (size_t i = 0; i < V::size(); ++i)
2103 {
2104 if (m_k[i])
2105 m_value[i] = rhs[i];
2106 }
2107 }
2108
2109#define SIMD_OP_(op) \
2110 template <typename Up> \
2111 constexpr void \
2112 operator op##=(Up&& x) && \
2113 { \
2114 const V& rhs = as_simd(x); \
2115 for (size_t i = 0; i < V::size(); ++i) \
2116 { \
2117 if (m_k[i]) \
2118 m_value[i] op##= rhs[i]; \
2119 } \
2120 } \
2121 static_assert(true)
2122 SIMD_OP_(+);
2123 SIMD_OP_(-);
2124 SIMD_OP_(*);
2125 SIMD_OP_(/);
2126 SIMD_OP_(%);
2127 SIMD_OP_(&);
2128 SIMD_OP_(|);
2129 SIMD_OP_(^);
2130 SIMD_OP_(<<);
2131 SIMD_OP_(>>);
2132#undef SIMD_OP_
2133
2134 constexpr void operator++() &&
2135 {
2136 for (size_t i = 0; i < V::size(); ++i)
2137 {
2138 if (m_k[i])
2139 ++m_value[i];
2140 }
2141 }
2142
2143 constexpr void operator++(int) &&
2144 {
2145 for (size_t i = 0; i < V::size(); ++i)
2146 {
2147 if (m_k[i])
2148 ++m_value[i];
2149 }
2150 }
2151
2152 constexpr void operator--() &&
2153 {
2154 for (size_t i = 0; i < V::size(); ++i)
2155 {
2156 if (m_k[i])
2157 --m_value[i];
2158 }
2159 }
2160
2161 constexpr void operator--(int) &&
2162 {
2163 for (size_t i = 0; i < V::size(); ++i)
2164 {
2165 if (m_k[i])
2166 --m_value[i];
2167 }
2168 }
2169
2170 // intentionally hides const_where_expression::copy_from
2171 template <typename Up, typename Flags>
2172 constexpr void
2173 copy_from(const detail::LoadStorePtr<Up, value_type>* mem, Flags) &&
2174 {
2175 for (size_t i = 0; i < V::size(); ++i)
2176 {
2177 if (m_k[i])
2178 m_value[i] = mem[i];
2179 }
2180 }
2181 };
2182
2183 // where_expression<bool, T>
2184 template <typename V>
2185 class where_expression<bool, V> : public const_where_expression<bool, V>
2186 {
2187 using M = bool;
2188 using typename const_where_expression<M, V>::value_type;
2189 using const_where_expression<M, V>::m_k;
2190 using const_where_expression<M, V>::m_value;
2191
2192 public:
2193 where_expression(const where_expression&) = delete;
2194 where_expression& operator=(const where_expression&) = delete;
2195
2196 constexpr where_expression(const M& kk, V& dd)
2197 : const_where_expression<M, V>(kk, dd) {}
2198
2199#define SIMD_OP_(op) \
2200 template <typename Up> \
2201 constexpr void operator op(Up&& x) && \
2202 { if (m_k) m_value op static_cast<Up&&>(x); }
2203
2204 SIMD_OP_(=)
2205 SIMD_OP_(+=)
2206 SIMD_OP_(-=)
2207 SIMD_OP_(*=)
2208 SIMD_OP_(/=)
2209 SIMD_OP_(%=)
2210 SIMD_OP_(&=)
2211 SIMD_OP_(|=)
2212 SIMD_OP_(^=)
2213 SIMD_OP_(<<=)
2214 SIMD_OP_(>>=)
2215#undef SIMD_OP_
2216
2217 constexpr void operator++() &&
2218 { if (m_k) ++m_value; }
2219
2220 constexpr void operator++(int) &&
2221 { if (m_k) ++m_value; }
2222
2223 constexpr void operator--() &&
2224 { if (m_k) --m_value; }
2225
2226 constexpr void operator--(int) &&
2227 { if (m_k) --m_value; }
2228
2229 // intentionally hides const_where_expression::copy_from
2230 template <typename Up, typename Flags>
2231 constexpr void
2232 copy_from(const detail::LoadStorePtr<Up, value_type>* mem, Flags) &&
2233 { if (m_k) m_value = mem[0]; }
2234 };
2235
2236 // where
2237 template <typename Tp, typename Ap>
2238 constexpr where_expression<simd_mask<Tp, Ap>, simd<Tp, Ap>>
2239 where(const typename simd<Tp, Ap>::mask_type& k, simd<Tp, Ap>& value)
2240 { return {k, value}; }
2241
2242 template <typename Tp, typename Ap>
2243 constexpr const_where_expression<simd_mask<Tp, Ap>, simd<Tp, Ap>>
2244 where(const typename simd<Tp, Ap>::mask_type& k,
2245 const simd<Tp, Ap>& value)
2246 { return {k, value}; }
2247
2248 template <typename Tp, typename Ap>
2249 constexpr where_expression<simd_mask<Tp, Ap>, simd_mask<Tp, Ap>>
2250 where(const std::remove_const_t<simd_mask<Tp, Ap>>& k,
2251 simd_mask<Tp, Ap>& value)
2252 { return {k, value}; }
2253
2254 template <typename Tp, typename Ap>
2255 constexpr const_where_expression<simd_mask<Tp, Ap>, simd_mask<Tp, Ap>>
2256 where(const std::remove_const_t<simd_mask<Tp, Ap>>& k,
2257 const simd_mask<Tp, Ap>& value)
2258 { return {k, value}; }
2259
2260 template <typename Tp>
2261 constexpr where_expression<bool, Tp>
2262 where(detail::ExactBool k, Tp& value)
2263 { return {k, value}; }
2264
2265 template <typename Tp>
2266 constexpr const_where_expression<bool, Tp>
2267 where(detail::ExactBool k, const Tp& value)
2268 { return {k, value}; }
2269
2270 template <typename Tp, typename Ap>
2271 constexpr void
2272 where(bool k, simd<Tp, Ap>& value) = delete;
2273
2274 template <typename Tp, typename Ap>
2275 constexpr void
2276 where(bool k, const simd<Tp, Ap>& value) = delete;
2277
2278 // reductions [simd.reductions]
2279 template <typename T, typename A, typename BinaryOperation = std::plus<>>
2280 constexpr T
2281 reduce(const simd<T, A>& v,
2282 BinaryOperation binary_op = BinaryOperation())
2283 {
2284 constexpr int N = simd_size_v<T, A>;
2285 if constexpr (N > 3)
2286 {
2287 constexpr int N2 = detail::bit_floor(N / 2);
2288 constexpr int NRem = N - 2 * N2;
2289 if constexpr (NRem > 0)
2290 {
2291 const auto [l, r, rem] = split<N2, N2, N - 2 * N2>(v);
2292 return binary_op(reduce(binary_op(l, r), binary_op), reduce(rem, binary_op));
2293 }
2294 else
2295 {
2296 const auto [l, r] = split<N2, N2>(v);
2297 return reduce(binary_op(l, r), binary_op);
2298 }
2299 }
2300 else
2301 {
2302 T r = v[0];
2303 for (size_t i = 1; i < simd_size_v<T, A>; ++i)
2304 r = binary_op(r, v[i]);
2305 return r;
2306 }
2307 }
2308
2309 template <typename M, typename V, typename BinaryOperation = std::plus<>>
2310 constexpr typename V::value_type
2311 reduce(const const_where_expression<M, V>& x,
2312 typename V::value_type identity_element,
2313 BinaryOperation binary_op)
2314 {
2315 const M& k = get_mask(x);
2316 const V& v = get_lvalue(x);
2317 auto r = identity_element;
2318 if (any_of(k)) [[likely]]
2319 {
2320 for (size_t i = 0; i < V::size(); ++i)
2321 if (k[i])
2322 r = binary_op(r, v[i]);
2323 }
2324 return r;
2325 }
2326
2327 template <typename M, typename V>
2328 constexpr typename V::value_type
2329 reduce(const const_where_expression<M, V>& x, std::plus<> binary_op = {})
2330 { return reduce(x, 0, binary_op); }
2331
2332 template <typename M, typename V>
2333 constexpr typename V::value_type
2334 reduce(const const_where_expression<M, V>& x, std::multiplies<> binary_op)
2335 { return reduce(x, 1, binary_op); }
2336
2337 template <typename M, typename V>
2338 constexpr typename V::value_type
2339 reduce(const const_where_expression<M, V>& x, std::bit_and<> binary_op)
2340 { return reduce(x, ~typename V::value_type(), binary_op); }
2341
2342 template <typename M, typename V>
2343 constexpr typename V::value_type
2344 reduce(const const_where_expression<M, V>& x, std::bit_or<> binary_op)
2345 { return reduce(x, 0, binary_op); }
2346
2347 template <typename M, typename V>
2348 constexpr typename V::value_type
2349 reduce(const const_where_expression<M, V>& x, std::bit_xor<> binary_op)
2350 { return reduce(x, 0, binary_op); }
2351
2352 template <typename T, typename A>
2353 constexpr T
2354 hmin(const simd<T, A>& v) noexcept
2355 {
2356 return reduce(v, [](const auto& l, const auto& r) {
2357 using std::min;
2358 return min(l, r);
2359 });
2360 }
2361
2362 template <typename T, typename A>
2363 constexpr T
2364 hmax(const simd<T, A>& v) noexcept
2365 {
2366 return reduce(v, [](const auto& l, const auto& r) {
2367 using std::max;
2368 return max(l, r);
2369 });
2370 }
2371
2372 template <typename M, typename V>
2373 constexpr typename V::value_type
2374 hmin(const const_where_expression<M, V>& x) noexcept
2375 {
2376 using T = typename V::value_type;
2377 constexpr T id_elem =
2378#ifdef __FINITE_MATH_ONLY__
2379 std::numeric_limits<T>::max();
2380#else
2381 std::numeric_limits<T>::infinity();
2382#endif
2383 return reduce(x, id_elem, [](const auto& l, const auto& r) {
2384 using std::min;
2385 return min(l, r);
2386 });
2387 }
2388
2389 template <typename M, typename V>
2390 constexpr
2391 typename V::value_type
2392 hmax(const const_where_expression<M, V>& x) noexcept
2393 {
2394 using T = typename V::value_type;
2395 constexpr T id_elem =
2396#ifdef __FINITE_MATH_ONLY__
2397 std::numeric_limits<T>::lowest();
2398#else
2399 -std::numeric_limits<T>::infinity();
2400#endif
2401 return reduce(x, id_elem, [](const auto& l, const auto& r) {
2402 using std::max;
2403 return max(l, r);
2404 });
2405 }
2406
2407 // algorithms [simd.alg]
2408 template <typename T, typename A>
2409 constexpr simd<T, A>
2410 min(const simd<T, A>& a, const simd<T, A>& b)
2411 { return simd<T, A>([&](size_t i) { return std::min(a[i], b[i]); }); }
2412
2413 template <typename T, typename A>
2414 constexpr simd<T, A>
2415 max(const simd<T, A>& a, const simd<T, A>& b)
2416 { return simd<T, A>([&](size_t i) { return std::max(a[i], b[i]); }); }
2417
2418 template <typename T, typename A>
2419 constexpr
2420 std::pair<simd<T, A>, simd<T, A>>
2421 minmax(const simd<T, A>& a, const simd<T, A>& b)
2422 { return {min(a, b), max(a, b)}; }
2423
2424 template <typename T, typename A>
2425 constexpr simd<T, A>
2426 clamp(const simd<T, A>& v, const simd<T, A>& lo,
2427 const simd<T, A>& hi)
2428 { return simd<T, A>([&](size_t i) { return std::clamp(v[i], lo[i], hi[i]); }); }
2429
2430 // math
2431#define SIMD_MATH_1ARG(name, return_temp) \
2432 template <typename T, typename A> \
2433 constexpr return_temp<T, A> \
2434 name(const simd<detail::FloatingPoint<T>, A>& x) noexcept \
2435 { return return_temp<T, A>([&x](size_t i) { return std::name(x[i]); }); }
2436
2437#define SIMD_MATH_1ARG_FIXED(name, R) \
2438 template <typename T, typename A> \
2439 constexpr fixed_size_simd<R, simd_size_v<T, A>> \
2440 name(const simd<detail::FloatingPoint<T>, A>& x) noexcept \
2441 { return fixed_size_simd<R, simd_size_v<T, A>>([&x](size_t i) { return std::name(x[i]); }); }
2442
2443#define SIMD_MATH_2ARG(name, return_temp) \
2444 template <typename T, typename A> \
2445 constexpr return_temp<T, A> \
2446 name(const simd<detail::FloatingPoint<T>, A>& x, const simd<T, A>& y) noexcept \
2447 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i]); }); } \
2448 \
2449 template <typename T, typename A> \
2450 constexpr return_temp<T, A> \
2451 name(const simd<detail::FloatingPoint<T>, A>& x, \
2452 const detail::type_identity_t<simd<T, A>>& y) noexcept \
2453 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i]); }); } \
2454 \
2455 template <typename T, typename A> \
2456 constexpr return_temp<T, A> \
2457 name(const detail::type_identity_t<simd<T, A>>& x, \
2458 const simd<detail::FloatingPoint<T>, A>& y) noexcept \
2459 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i]); }); }
2460
2461#define SIMD_MATH_3ARG(name, return_temp) \
2462 template <typename T, typename A> \
2463 constexpr return_temp<T, A> \
2464 name(const simd<detail::FloatingPoint<T>, A>& x, \
2465 const simd<T, A>& y, const simd<T, A> &z) noexcept \
2466 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); } \
2467 \
2468 template <typename T, typename A> \
2469 constexpr return_temp<T, A> \
2470 name(const simd<detail::FloatingPoint<T>, A>& x, \
2471 const detail::type_identity_t<simd<T, A>>& y, \
2472 const detail::type_identity_t<simd<T, A>> &z) noexcept \
2473 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); } \
2474 \
2475 template <typename T, typename A> \
2476 constexpr return_temp<T, A> \
2477 name(const detail::type_identity_t<simd<T, A>>& x, \
2478 const simd<detail::FloatingPoint<T>, A>& y, \
2479 const detail::type_identity_t<simd<T, A>> &z) noexcept \
2480 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); } \
2481 \
2482 template <typename T, typename A> \
2483 constexpr return_temp<T, A> \
2484 name(const detail::type_identity_t<simd<T, A>>& x, \
2485 const detail::type_identity_t<simd<T, A>>& y, \
2486 const simd<detail::FloatingPoint<T>, A> &z) noexcept \
2487 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); }
2488
2489 template <typename T, typename A, typename U = detail::SignedIntegral<T>>
2490 constexpr simd<T, A>
2491 abs(const simd<T, A>& x) noexcept
2492 { return simd<T, A>([&x](size_t i) { return std::abs(x[i]); }); }
2493
2494 SIMD_MATH_1ARG(abs, simd)
2495 SIMD_MATH_1ARG(isnan, simd_mask)
2496 SIMD_MATH_1ARG(isfinite, simd_mask)
2497 SIMD_MATH_1ARG(isinf, simd_mask)
2498 SIMD_MATH_1ARG(isnormal, simd_mask)
2499 SIMD_MATH_1ARG(signbit, simd_mask)
2500 SIMD_MATH_1ARG_FIXED(fpclassify, int)
2501
2502 SIMD_MATH_2ARG(hypot, simd)
2503 SIMD_MATH_3ARG(hypot, simd)
2504
2505 template <typename T, typename A>
2506 constexpr simd<T, A>
2507 remquo(const simd<T, A>& x, const simd<T, A>& y,
2508 fixed_size_simd<int, simd_size_v<T, A>>* quo) noexcept
2509 { return simd<T, A>([&x, &y, quo](size_t i) { return std::remquo(x[i], y[i], &(*quo)[i]); }); }
2510
2511 SIMD_MATH_1ARG(erf, simd)
2512 SIMD_MATH_1ARG(erfc, simd)
2513 SIMD_MATH_1ARG(tgamma, simd)
2514 SIMD_MATH_1ARG(lgamma, simd)
2515
2516 SIMD_MATH_2ARG(pow, simd)
2517 SIMD_MATH_2ARG(fmod, simd)
2518 SIMD_MATH_2ARG(remainder, simd)
2519 SIMD_MATH_2ARG(nextafter, simd)
2520 SIMD_MATH_2ARG(copysign, simd)
2521 SIMD_MATH_2ARG(fdim, simd)
2522 SIMD_MATH_2ARG(fmax, simd)
2523 SIMD_MATH_2ARG(fmin, simd)
2524 SIMD_MATH_2ARG(isgreater, simd_mask)
2525 SIMD_MATH_2ARG(isgreaterequal, simd_mask)
2526 SIMD_MATH_2ARG(isless, simd_mask)
2527 SIMD_MATH_2ARG(islessequal, simd_mask)
2528 SIMD_MATH_2ARG(islessgreater, simd_mask)
2529 SIMD_MATH_2ARG(isunordered, simd_mask)
2530
2531 template <typename T, typename A>
2532 constexpr simd<T, A>
2533 modf(const simd<detail::FloatingPoint<T>, A>& x, simd<T, A>* iptr) noexcept
2534 { return simd<T, A>([&x, iptr](size_t i) { return std::modf(x[i], &(*iptr)[i]); }); }
2535
2536 template <typename T, typename A>
2537 constexpr simd<T, A>
2538 frexp(const simd<detail::FloatingPoint<T>, A>& x,
2539 fixed_size_simd<int, simd_size_v<T, A>>* exp) noexcept
2540 { return simd<T, A>([&x, exp](size_t i) { return std::frexp(x[i], &(*exp)[i]); }); }
2541
2542 template <typename T, typename A>
2543 constexpr simd<T, A>
2544 scalbln(const simd<detail::FloatingPoint<T>, A>& x,
2545 const fixed_size_simd<long int, simd_size_v<T, A>>& exp) noexcept
2546 { return simd<T, A>([&x, &exp](size_t i) { return std::scalbln(x[i], exp[i]); }); }
2547
2548 template <typename T, typename A>
2549 constexpr simd<T, A>
2550 scalbn(const simd<detail::FloatingPoint<T>, A>& x,
2551 const fixed_size_simd<int, simd_size_v<T, A>>& exp) noexcept
2552 { return simd<T, A>([&x, &exp](size_t i) { return std::scalbn(x[i], exp[i]); }); }
2553
2554 template <typename T, typename A>
2555 constexpr simd<T, A>
2556 ldexp(const simd<detail::FloatingPoint<T>, A>& x,
2557 const fixed_size_simd<int, simd_size_v<T, A>>& exp) noexcept
2558 { return simd<T, A>([&x, &exp](size_t i) { return std::ldexp(x[i], exp[i]); }); }
2559
2560 SIMD_MATH_1ARG(sqrt, simd)
2561
2562 SIMD_MATH_3ARG(fma, simd)
2563
2564 SIMD_MATH_1ARG(trunc, simd)
2565 SIMD_MATH_1ARG(ceil, simd)
2566 SIMD_MATH_1ARG(floor, simd)
2567 SIMD_MATH_1ARG(round, simd)
2568 SIMD_MATH_1ARG_FIXED(lround, long)
2569 SIMD_MATH_1ARG_FIXED(llround, long long)
2570 SIMD_MATH_1ARG(nearbyint, simd)
2571 SIMD_MATH_1ARG(rint, simd)
2572 SIMD_MATH_1ARG_FIXED(lrint, long)
2573 SIMD_MATH_1ARG_FIXED(llrint, long long)
2574 SIMD_MATH_1ARG_FIXED(ilogb, int)
2575
2576 // trig functions
2577 SIMD_MATH_1ARG(sin, simd)
2578 SIMD_MATH_1ARG(cos, simd)
2579 SIMD_MATH_1ARG(tan, simd)
2580 SIMD_MATH_1ARG(asin, simd)
2581 SIMD_MATH_1ARG(acos, simd)
2582 SIMD_MATH_1ARG(atan, simd)
2583 SIMD_MATH_2ARG(atan2, simd)
2584 SIMD_MATH_1ARG(sinh, simd)
2585 SIMD_MATH_1ARG(cosh, simd)
2586 SIMD_MATH_1ARG(tanh, simd)
2587 SIMD_MATH_1ARG(asinh, simd)
2588 SIMD_MATH_1ARG(acosh, simd)
2589 SIMD_MATH_1ARG(atanh, simd)
2590
2591 // logarithms
2592 SIMD_MATH_1ARG(log, simd)
2593 SIMD_MATH_1ARG(log10, simd)
2594 SIMD_MATH_1ARG(log1p, simd)
2595 SIMD_MATH_1ARG(log2, simd)
2596 SIMD_MATH_1ARG(logb, simd)
2597
2598#undef SIMD_MATH_1ARG
2599#undef SIMD_MATH_1ARG_FIXED
2600#undef SIMD_MATH_2ARG
2601#undef SIMD_MATH_3ARG
2602}
2603#ifdef VIR_SIMD_TS_DROPIN
2604}
2605
2606namespace vir::stdx
2607{
2608 using namespace std::experimental::parallelism_v2;
2609}
2610#endif
2611
2612#endif
2613#endif // VIR_SIMD_H_
multiplies
constexpr simd_policy simd
SIMD execution policy.
Definition simd_execution.h:528
Version macros and version constant.