10#if _MSVC_LANG < 201703L
11#error "simd requires C++17 or later"
14#if __cplusplus < 201703L
15#error "simd requires C++17 or later"
23#if __has_include (<experimental/simd>) && !defined VIR_DISABLE_STDX_SIMD \
24 && (!defined __clang_major__ || !defined __GNUC__ || __GLIBCXX__ >= 20230525)
25#include <experimental/simd>
28#ifndef VIR_ALWAYS_INLINE
30#define VIR_ALWAYS_INLINE [[gnu::always_inline]] inline
31#define VIR_GNU_COLD [[gnu::cold]]
32#define VIR_GNU_ATTR_COLD __attribute__((cold))
34#define VIR_ALWAYS_INLINE __forceinline
36#define VIR_GNU_ATTR_COLD
42 [[noreturn]] VIR_GNU_COLD VIR_ALWAYS_INLINE
void
46 __builtin_unreachable();
47#elif defined __has_cpp_attribute and __has_cpp_attribute(assume)
54 [[noreturn]] VIR_GNU_COLD VIR_ALWAYS_INLINE
void
64 template <
typename... Args>
65 [[noreturn]] VIR_GNU_COLD VIR_ALWAYS_INLINE
void
66 invoke_ub([[maybe_unused]]
const char* msg,
67 [[maybe_unused]]
const Args&... args)
69#if VIR_CHECK_PRECONDITIONS < 2
71#elif defined __GNUC__ and VIR_CHECK_PRECONDITIONS < 4
74 [&] VIR_GNU_ATTR_COLD () {
75 std::fprintf(stderr, msg, args...);
82#define VIR_SIMD_TOSTRING_IMPL(x) #x
83#define VIR_SIMD_TOSTRING(x) VIR_SIMD_TOSTRING_IMPL(x)
84#define VIR_SIMD_LOC __FILE__ ":" VIR_SIMD_TOSTRING(__LINE__) ": "
95#ifndef VIR_CHECK_PRECONDITIONS
96#define VIR_CHECK_PRECONDITIONS 3
99#if VIR_CHECK_PRECONDITIONS > 5 or VIR_CHECK_PRECONDITIONS < 0
100#warning "Invalid value for VIR_CHECK_PRECONDITIONS."
104#define VIR_PRETTY_FUNCTION_ __PRETTY_FUNCTION__
106#define VIR_PRETTY_FUNCTION_ __FUNCSIG__
109#if VIR_CHECK_PRECONDITIONS < 0
110#define vir_simd_precondition(expr, msg) \
113#define vir_simd_precondition_vaargs(expr, msg, ...) \
116#elif defined __clang__ or __GNUC__ >= 10
117#if (VIR_CHECK_PRECONDITIONS & 1) == 1
118#define VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION __error__
120#define VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION __warning__
122#if defined __GNUC__ and not defined __clang__
123#define VIR_ATTR_NOIPA __noipa__,
125#define VIR_ATTR_NOIPA
127#define vir_simd_precondition(expr, msg) \
129 const bool precondition_result = bool(expr); \
130 if (__builtin_constant_p(precondition_result) and not precondition_result) \
131 []() __attribute__((__noinline__, __noreturn__, VIR_ATTR_NOIPA \
132 VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION("precondition failure." \
133 "\n" VIR_SIMD_LOC "note: " msg " (precondition '" #expr "' does not hold)"))) \
134 { vir::detail::trap(); }(); \
135 else if (__builtin_expect(not precondition_result, false)) \
136 vir::detail::invoke_ub( \
137 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
138 VIR_PRETTY_FUNCTION_); \
141#define vir_simd_precondition_vaargs(expr, msg, ...) \
143 const bool precondition_result = bool(expr); \
144 if (__builtin_constant_p(precondition_result) and not precondition_result) \
145 []() __attribute__((__noinline__, __noreturn__, VIR_ATTR_NOIPA \
146 VIR_CONSTPROP_PRECONDITION_FAILURE_ACTION("precondition failure." \
147 "\n" VIR_SIMD_LOC "note: " msg " (precondition '" #expr "' does not hold)"))) \
148 { vir::detail::trap(); }(); \
149 else if (__builtin_expect(not precondition_result, false)) \
150 vir::detail::invoke_ub( \
151 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
152 VIR_PRETTY_FUNCTION_, __VA_ARGS__); \
156#define vir_simd_precondition(expr, msg) \
158 const bool precondition_result = bool(expr); \
159 if (not precondition_result) [[unlikely]] \
160 vir::detail::invoke_ub( \
161 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
162 VIR_PRETTY_FUNCTION_); \
165#define vir_simd_precondition_vaargs(expr, msg, ...) \
167 const bool precondition_result = bool(expr); \
168 if (not precondition_result) [[unlikely]] \
169 vir::detail::invoke_ub( \
170 VIR_SIMD_LOC "precondition failure in '%s': " msg " ('" #expr "' does not hold)\n", \
171 VIR_PRETTY_FUNCTION_, __VA_ARGS__); \
177#if defined __cpp_lib_experimental_parallel_simd && __cpp_lib_experimental_parallel_simd >= 201803
179#define VIR_HAVE_STD_SIMD 1
183 using namespace std::experimental::parallelism_v2;
184 using namespace std::experimental::parallelism_v2::__proposed;
192#ifdef _GLIBCXX_DEBUG_UB
198#include <type_traits>
201#define VIR_HAVE_VIR_SIMD 1
203#ifdef VIR_SIMD_TS_DROPIN
204namespace std::experimental
206 inline namespace [[gnu::diagnose_as(
"virx")]] parallelism_v2
215 template <
typename T>
219 template <
typename T>
220 using type_identity_t =
typename type_identity<T>::type;
242 template <
typename T>
243 typename T::value_type
244 value_type_or_identity_impl(
int);
246 template <
typename T>
248 value_type_or_identity_impl(
float);
250 template <
typename T>
251 using value_type_or_identity_t
252 =
decltype(value_type_or_identity_impl<T>(
int()));
259 constexpr ExactBool(
bool b) : data(b) {}
261 ExactBool(
int) =
delete;
263 constexpr operator bool()
const {
return data; }
266 template <
typename T>
267 using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;
269 template <
typename T>
270 using L = std::numeric_limits<T>;
273 using BoolConstant = std::integral_constant<bool, B>;
276 using SizeConstant = std::integral_constant<size_t, X>;
278 template <
size_t I,
typename T,
typename... Ts>
280 pack_simd_subscript(
const T& x0,
const Ts&... xs)
282 if constexpr (I >= T::size())
283 return pack_simd_subscript<I - T::size()>(xs...);
289 struct is_vectorizable : std::is_arithmetic<T>
293 struct is_vectorizable<bool> : std::false_type
297 inline constexpr bool is_vectorizable_v = is_vectorizable<T>::value;
299 template <
class T,
typename =
void>
300 struct only_vectorizable
302 only_vectorizable() =
delete;
303 only_vectorizable(
const only_vectorizable&) =
delete;
304 only_vectorizable(only_vectorizable&&) =
delete;
305 ~only_vectorizable() =
delete;
309 struct only_vectorizable<T, std::enable_if_t<is_vectorizable_v<T>>>
314 template <
typename T,
typename = std::enable_if_t<is_vectorizable_v<T>>>
315 using Vectorizable = T;
318 template <
typename T,
typename = std::enable_if_t<std::is_
floating_po
int_v<T>>>
319 using FloatingPoint = T;
322 template <
typename T,
typename = std::enable_if_t<std::conjunction_v<std::is_
integral<T>,
324 using SignedIntegral = T;
327 template <
typename T,
typename U,
bool = (sizeof(T) >
sizeof(U)),
328 bool = (
sizeof(T) ==
sizeof(U))>
329 struct is_higher_integer_rank;
331 template <
typename T>
332 struct is_higher_integer_rank<T, T, false, true>
333 :
public std::true_type
336 template <
typename T,
typename U>
337 struct is_higher_integer_rank<T, U, true, false>
338 :
public std::true_type
341 template <
typename T,
typename U>
342 struct is_higher_integer_rank<T, U, false, false>
343 :
public std::false_type
347 template <
typename T,
typename U>
348 struct is_higher_integer_rank<T, U, false, true>
349 :
public std::is_same<decltype(std::declval<T>() + std::declval<U>()), T>
353 template <
typename From,
typename To,
bool = std::is_arithmetic_v<From>,
354 bool = std::is_arithmetic_v<To>>
355 struct is_value_preserving;
359 template <
typename From,
typename To>
360 struct is_value_preserving<From, To, true, true>
361 :
public BoolConstant<L<From>::digits <= L<To>::digits
362 && L<From>::max() <= L<To>::max()
363 && L<From>::lowest() >= L<To>::lowest()
364 && !(std::is_signed_v<From> && std::is_unsigned_v<To>)> {};
366 template <typename T>
367 struct is_value_preserving<T, bool, true, true>
368 : public std::false_type {};
371 struct is_value_preserving<bool, bool, true, true>
372 : public std::true_type {};
374 template <typename T>
375 struct is_value_preserving<T, T, true, true>
376 : public std::true_type {};
378 template <typename From, typename To>
379 struct is_value_preserving<From, To, false, true>
380 : public std::is_convertible<From, To> {};
382 template <typename From, typename To,
383 typename = std::enable_if_t<is_value_preserving<remove_cvref_t<From>, To>::value>>
384 using ValuePreserving = From;
386 template <typename From, typename To,
387 typename DecayedFrom = remove_cvref_t<From>,
388 typename = std::enable_if_t<std::conjunction<
389 std::is_convertible<From, To>,
391 std::is_same<DecayedFrom, To>,
392 std::is_same<DecayedFrom, int>,
393 std::conjunction<std::is_same<DecayedFrom, unsigned>,
394 std::is_unsigned<To>>,
395 is_value_preserving<DecayedFrom, To>>>::value>>
396 using ValuePreservingOrInt = From;
399 template <typename Ptr, typename ValueType>
400 struct is_possible_loadstore_conversion
401 : std::conjunction<is_vectorizable<Ptr>, is_vectorizable<ValueType>>
405 struct is_possible_loadstore_conversion<bool, bool> : std::true_type {};
408 template <typename Ptr, typename ValueType,
409 typename = std::enable_if_t<
410 is_possible_loadstore_conversion<Ptr, ValueType>::value>>
411 using LoadStorePtr = Ptr;
420 inline constexpr int max_fixed_size = 32;
428 std::conditional_t<(sizeof(T) > 8),
433#elif defined __AVX2__
436 std::is_floating_point_v<T> ? 32 : 16
445 using compatible = std::conditional_t<(sizeof(T) > 8),
447 fixed_size<16 / sizeof(T)>>;
449 template <typename T, size_t N, typename...>
451 {
using type = std::conditional_t<N == 1, scalar, fixed_size<int(N)>>; };
453 template <
typename T,
size_t N,
typename... Abis>
454 using deduce_t =
typename deduce<T, N, Abis...>::type;
458 struct element_aligned_tag
461 struct vector_aligned_tag
465 struct overaligned_tag
468 inline constexpr element_aligned_tag element_aligned{};
470 inline constexpr vector_aligned_tag vector_aligned{};
473 inline constexpr overaligned_tag<N> overaligned{};
476 template <
class T,
class A = simd_abi::compatible<T>>
480 simd(
const simd&) =
delete;
484 template <
class T,
class A = simd_abi::compatible<T>>
487 simd_mask() =
delete;
488 simd_mask(
const simd_mask&) =
delete;
489 ~simd_mask() =
delete;
494 using native_simd = simd<T, simd_abi::native<T>>;
497 using native_simd_mask = simd_mask<T, simd_abi::native<T>>;
499 template <
class T,
int N>
500 using fixed_size_simd = simd<T, simd_abi::fixed_size<N>>;
502 template <
class T,
int N>
503 using fixed_size_simd_mask = simd_mask<T, simd_abi::fixed_size<N>>;
507 struct is_abi_tag : std::false_type
511 inline constexpr bool is_abi_tag_v = is_abi_tag<T>::value;
514 struct is_abi_tag<simd_abi::scalar> : std::true_type
518 struct is_abi_tag<simd_abi::fixed_size<N>> : std::true_type
522 struct is_simd : std::false_type
526 inline constexpr bool is_simd_v = is_simd<T>::value;
528 template <
class T,
class A>
529 struct is_simd<
simd<T, A>>
530 : std::conjunction<detail::is_vectorizable<T>, is_abi_tag<A>>
534 struct is_simd_mask : std::false_type
538 inline constexpr bool is_simd_mask_v = is_simd_mask<T>::value;
540 template <
class T,
class A>
541 struct is_simd_mask<simd_mask<T, A>>
542 : std::conjunction<detail::is_vectorizable<T>, is_abi_tag<A>>
546 struct is_simd_flag_type : std::false_type
550 inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<T>::value;
552 template <
class T,
class A = simd_abi::compatible<T>>
555 template <
class T,
class A = simd_abi::compatible<T>>
556 inline constexpr size_t simd_size_v = simd_size<T, A>::value;
559 struct simd_size<detail::Vectorizable<T>, simd_abi::scalar>
560 : std::integral_constant<size_t, 1>
563 template <
class T,
int N>
564 struct simd_size<detail::Vectorizable<T>, simd_abi::fixed_size<N>>
565 : std::integral_constant<size_t, N>
568 template <
class T,
class U =
typename T::value_type>
569 struct memory_alignment;
571 template <
class T,
class U =
typename T::value_type>
572 inline constexpr size_t memory_alignment_v = memory_alignment<T, U>::value;
574 template <
class T,
class A,
class U>
575 struct memory_alignment<
simd<T, A>, detail::Vectorizable<U>>
576 : std::integral_constant<size_t, alignof(U)>
579 template <
class T,
class A>
580 struct memory_alignment<simd_mask<T, A>, bool>
581 : std::integral_constant<size_t, alignof(bool)>
584 template <
class T,
class V,
585 class =
typename std::conjunction<detail::is_vectorizable<T>,
586 std::disjunction<is_simd<V>, is_simd_mask<V>>>::type>
589 template <
class T,
class V>
590 using rebind_simd_t =
typename rebind_simd<T, V>::type;
592 template <
class T,
class U,
class A>
593 struct rebind_simd<T,
simd<U, A>, std::true_type>
594 {
using type = simd<T, A>; };
596 template <
class T,
class U,
class A>
597 struct rebind_simd<T, simd_mask<U, A>, std::true_type>
598 {
using type = simd_mask<T, A>; };
600 template <
int N,
class V,
601 class =
typename std::conjunction<
602 detail::BoolConstant<(N > 0)>,
603 std::disjunction<is_simd<V>, is_simd_mask<V>>
607 template <
int N,
class V>
608 using resize_simd_t =
typename resize_simd<N, V>::type;
610 template <
int N,
class T,
class A>
611 struct resize_simd<N, simd<T, A>, std::true_type>
613 using type = simd<T, std::conditional_t<N == 1, simd_abi::scalar, simd_abi::fixed_size<N>>>;
616 template <
int N,
class T,
class A>
617 struct resize_simd<N, simd_mask<T, A>, std::true_type>
619 using type = simd_mask<T, std::conditional_t<
620 N == 1, simd_abi::scalar, simd_abi::fixed_size<N>>>;
625 class simd_mask<detail::Vectorizable<T>, simd_abi::scalar>
626 :
public detail::only_vectorizable<T>
631 using value_type = bool;
632 using reference =
bool&;
633 using abi_type = simd_abi::scalar;
634 using simd_type = simd<T, abi_type>;
636 static constexpr size_t size() noexcept
639 constexpr simd_mask() =
default;
640 constexpr simd_mask(
const simd_mask&) =
default;
641 constexpr simd_mask(simd_mask&&) noexcept = default;
642 constexpr simd_mask& operator=(const simd_mask&) = default;
643 constexpr simd_mask& operator=(simd_mask&&) noexcept = default;
650 template <
typename F>
652 simd_mask(F&& gen, std::enable_if_t<
653 std::is_same_v<
decltype(std::declval<F>()(detail::SizeConstant<0>())),
654 value_type>>* =
nullptr)
655 : data(gen(detail::SizeConstant<0>()))
659 template <
typename Flags>
661 simd_mask(
const value_type* mem, Flags)
665 template <
typename Flags>
667 simd_mask(
const value_type* mem, simd_mask k, Flags)
668 : data(k ? mem[0] : false)
672 template <
typename Flags>
674 copy_from(
const value_type* mem, Flags)
678 template <
typename Flags>
680 copy_to(value_type* mem, Flags)
const
687 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
693 operator[](
size_t i)
const
695 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
703 {
return simd_mask(not data); }
706 friend constexpr simd_mask
707 operator&&(
const simd_mask& x,
const simd_mask& y)
708 {
return simd_mask(x.data && y.data); }
710 friend constexpr simd_mask
711 operator||(
const simd_mask& x,
const simd_mask& y)
712 {
return simd_mask(x.data || y.data); }
714 friend constexpr simd_mask
715 operator&(
const simd_mask& x,
const simd_mask& y)
716 {
return simd_mask(x.data & y.data); }
718 friend constexpr simd_mask
719 operator|(
const simd_mask& x,
const simd_mask& y)
720 {
return simd_mask(x.data | y.data); }
722 friend constexpr simd_mask
723 operator^(
const simd_mask& x,
const simd_mask& y)
724 {
return simd_mask(x.data ^ y.data); }
726 friend constexpr simd_mask&
727 operator&=(simd_mask& x,
const simd_mask& y)
733 friend constexpr simd_mask&
734 operator|=(simd_mask& x,
const simd_mask& y)
740 friend constexpr simd_mask&
741 operator^=(simd_mask& x,
const simd_mask& y)
748 friend constexpr simd_mask
749 operator==(
const simd_mask& x,
const simd_mask& y)
750 {
return simd_mask(x.data == y.data); }
752 friend constexpr simd_mask
753 operator!=(
const simd_mask& x,
const simd_mask& y)
754 {
return simd_mask(x.data != y.data); }
758 template <
class T,
int N>
759 class simd_mask<detail::Vectorizable<T>, simd_abi::fixed_size<N>>
760 :
public detail::only_vectorizable<T>
763 template <
typename V,
int M,
size_t Parts>
765 std::enable_if_t<M == Parts * V::size() && is_simd_mask_v<V>, std::array<V, Parts>>
766 split(
const simd_mask<
typename V::simd_type::value_type, simd_abi::fixed_size<M>>&);
770 template <
typename F,
size_t... Is>
772 simd_mask(std::index_sequence<Is...>, F&& init)
773 : data {init(detail::SizeConstant<Is>())...}
777 using value_type = bool;
778 using reference =
bool&;
779 using abi_type = simd_abi::fixed_size<N>;
780 using simd_type = simd<T, abi_type>;
782 static constexpr size_t size() noexcept
785 constexpr simd_mask() =
default;
786 constexpr simd_mask(
const simd_mask&) =
default;
787 constexpr simd_mask(simd_mask&&) noexcept = default;
788 constexpr simd_mask& operator=(const simd_mask&) = default;
789 constexpr simd_mask& operator=(simd_mask&&) noexcept = default;
794 : simd_mask(std::make_index_sequence<N>(), [x](
size_t) {
return x; })
797 template <
typename F>
799 simd_mask(F&& gen, std::enable_if_t<
800 std::is_same_v<
decltype(std::declval<F>()(detail::SizeConstant<0>())),
801 value_type>>* =
nullptr)
802 : simd_mask(std::make_index_sequence<N>(), std::forward<F>(gen))
806 template <
typename U>
808 simd_mask(
const simd_mask<U, abi_type>& x)
809 : simd_mask(std::make_index_sequence<N>(), [&x](size_t i) {
return x[i]; })
813 template <
typename Flags>
814 simd_mask(
const value_type* mem, Flags)
815 : simd_mask(std::make_index_sequence<N>(), [mem](size_t i) {
return mem[i]; })
818 template <
typename Flags>
819 simd_mask(
const value_type* mem,
const simd_mask& k, Flags)
820 : simd_mask(std::make_index_sequence<N>(),
821 [mem, &k](size_t i) {
return k[i] ? mem[i] :
false; })
825 template <
typename Flags>
827 copy_from(
const value_type* mem, Flags)
828 { std::memcpy(data, mem, N *
sizeof(
bool)); }
831 template <
typename Flags>
833 copy_to(value_type* mem, Flags)
const
834 { std::memcpy(mem, data, N *
sizeof(
bool)); }
840 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
846 operator[](
size_t i)
const
848 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
858 for (
int i = 0; i < N; ++i)
859 r.data[i] = !data[i];
864 friend constexpr simd_mask
865 operator&&(
const simd_mask& x,
const simd_mask& y)
868 for (
int i = 0; i < N; ++i)
869 r.data[i] = x.data[i] & y.data[i];
873 friend constexpr simd_mask
874 operator||(
const simd_mask& x,
const simd_mask& y)
877 for (
int i = 0; i < N; ++i)
878 r.data[i] = x.data[i] | y.data[i];
882 friend constexpr simd_mask
883 operator&(
const simd_mask& x,
const simd_mask& y)
886 for (
int i = 0; i < N; ++i)
887 r.data[i] = x.data[i] & y.data[i];
891 friend constexpr simd_mask
892 operator|(
const simd_mask& x,
const simd_mask& y)
895 for (
int i = 0; i < N; ++i)
896 r.data[i] = x.data[i] | y.data[i];
900 friend constexpr simd_mask
901 operator^(
const simd_mask& x,
const simd_mask& y)
904 for (
int i = 0; i < N; ++i)
905 r.data[i] = x.data[i] ^ y.data[i];
909 friend constexpr simd_mask&
910 operator&=(simd_mask& x,
const simd_mask& y)
912 for (
int i = 0; i < N; ++i)
913 x.data[i] &= y.data[i];
917 friend constexpr simd_mask&
918 operator|=(simd_mask& x,
const simd_mask& y)
920 for (
int i = 0; i < N; ++i)
921 x.data[i] |= y.data[i];
925 friend constexpr simd_mask&
926 operator^=(simd_mask& x,
const simd_mask& y)
928 for (
int i = 0; i < N; ++i)
929 x.data[i] ^= y.data[i];
934 friend constexpr simd_mask
935 operator==(
const simd_mask& x,
const simd_mask& y)
938 for (
int i = 0; i < N; ++i)
939 r.data[i] = x.data[i] == y.data[i];
943 friend constexpr simd_mask
944 operator!=(
const simd_mask& x,
const simd_mask& y)
947 for (
int i = 0; i < N; ++i)
948 r.data[i] = x.data[i] != y.data[i];
954 template <
typename T>
956 all_of(simd_mask<T, simd_abi::scalar> k)
noexcept
959 template <
typename T>
961 any_of(simd_mask<T, simd_abi::scalar> k)
noexcept
964 template <
typename T>
966 none_of(simd_mask<T, simd_abi::scalar> k)
noexcept
969 template <
typename T>
971 some_of(simd_mask<T, simd_abi::scalar>)
noexcept
974 template <
typename T>
976 popcount(simd_mask<T, simd_abi::scalar> k)
noexcept
977 {
return static_cast<int>(k[0]); }
979 template <
typename T>
981 find_first_set(simd_mask<T, simd_abi::scalar> k)
noexcept
983 vir_simd_precondition(k[0],
"find_first_set(empty mask) is UB");
987 template <
typename T>
989 find_last_set(simd_mask<T, simd_abi::scalar> k)
noexcept
991 vir_simd_precondition(k[0],
"find_last_set(empty mask) is UB");
995 template <
typename T,
int N>
997 all_of(
const simd_mask<T, simd_abi::fixed_size<N>>& k)
noexcept
999 for (
int i = 0; i < N; ++i)
1007 template <
typename T,
int N>
1009 any_of(
const simd_mask<T, simd_abi::fixed_size<N>>& k)
noexcept
1011 for (
int i = 0; i < N; ++i)
1019 template <
typename T,
int N>
1021 none_of(
const simd_mask<T, simd_abi::fixed_size<N>>& k)
noexcept
1023 for (
int i = 0; i < N; ++i)
1031 template <
typename T,
int N>
1033 some_of(
const simd_mask<T, simd_abi::fixed_size<N>>& k)
noexcept
1036 for (
int i = 1; i < N; ++i)
1044 template <
typename T,
int N>
1046 popcount(
const simd_mask<T, simd_abi::fixed_size<N>>& k)
noexcept
1049 for (
int i = 1; i < N; ++i)
1054 template <
typename T,
int N>
1056 find_first_set(
const simd_mask<T, simd_abi::fixed_size<N>>& k)
noexcept
1058 vir_simd_precondition(any_of(k),
"find_first_set(empty mask) is UB");
1059 for (
int i = 0; i < N; ++i)
1064 vir::detail::unreachable();
1067 template <
typename T,
int N>
1069 find_last_set(
const simd_mask<T, simd_abi::fixed_size<N>>& k)
noexcept
1071 vir_simd_precondition(any_of(k),
"find_last_set(empty mask) is UB");
1072 for (
int i = N - 1; i >= 0; --i)
1077 vir::detail::unreachable();
1081 all_of(detail::ExactBool x)
noexcept
1085 any_of(detail::ExactBool x)
noexcept
1089 none_of(detail::ExactBool x)
noexcept
1093 some_of(detail::ExactBool)
noexcept
1097 popcount(detail::ExactBool x)
noexcept
1101 find_first_set(detail::ExactBool)
1105 find_last_set(detail::ExactBool)
1109 template <
class T,
bool = std::is_
integral_v<T>>
1110 class scalar_simd_int_base
1114 class scalar_simd_int_base<T, true>
1116 using Derived = simd<T, simd_abi::scalar>;
1120 {
return static_cast<Derived*
>(
this)->data; }
1124 {
return static_cast<const Derived*
>(
this)->data; }
1127 friend constexpr Derived&
1128 operator%=(Derived& lhs, Derived x)
1134 friend constexpr Derived&
1135 operator&=(Derived& lhs, Derived x)
1141 friend constexpr Derived&
1142 operator|=(Derived& lhs, Derived x)
1148 friend constexpr Derived&
1149 operator^=(Derived& lhs, Derived x)
1155 friend constexpr Derived&
1156 operator<<=(Derived& lhs, Derived x)
1162 friend constexpr Derived&
1163 operator>>=(Derived& lhs, Derived x)
1169 friend constexpr Derived
1170 operator%(Derived x, Derived y)
1176 friend constexpr Derived
1177 operator&(Derived x, Derived y)
1183 friend constexpr Derived
1184 operator|(Derived x, Derived y)
1190 friend constexpr Derived
1191 operator^(Derived x, Derived y)
1197 friend constexpr Derived
1198 operator<<(Derived x, Derived y)
1204 friend constexpr Derived
1205 operator>>(Derived x, Derived y)
1211 friend constexpr Derived
1212 operator<<(Derived x,
int y)
1218 friend constexpr Derived
1219 operator>>(Derived x,
int y)
1227 {
return Derived(
static_cast<T
>(~d())); }
1232 class simd<T, simd_abi::scalar>
1233 :
public scalar_simd_int_base<T>,
public detail::only_vectorizable<T>
1235 friend class scalar_simd_int_base<T>;
1243 friend constexpr const T&
1244 _data_(
const simd& x)
1248 using value_type = T;
1249 using reference = T&;
1250 using abi_type = simd_abi::scalar;
1251 using mask_type = simd_mask<T, abi_type>;
1253 static constexpr size_t size() noexcept
1256 constexpr simd() =
default;
1257 constexpr simd(
const simd&) =
default;
1258 constexpr simd(simd&&) noexcept = default;
1259 constexpr simd& operator=(const simd&) = default;
1260 constexpr simd& operator=(simd&&) noexcept = default;
1263 template <typename U>
1265 simd(detail::ValuePreservingOrInt<U, value_type>&& value) noexcept
1270 template <
typename F>
1272 simd(F&& gen, detail::ValuePreservingOrInt<
1273 decltype(std::declval<F>()(std::declval<detail::SizeConstant<0>&>())),
1274 value_type>* =
nullptr)
1275 : data(gen(detail::SizeConstant<0>()))
1279 template <
typename U,
typename Flags>
1281 simd(
const U* mem, Flags)
1286 template <
typename U,
typename Flags>
1288 copy_from(
const detail::Vectorizable<U>* mem, Flags)
1292 template <
typename U,
typename Flags>
1294 copy_to(detail::Vectorizable<U>* mem, Flags)
const
1299 operator[](
size_t i)
1301 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
1306 constexpr value_type
1307 operator[](
size_t i)
const
1309 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
1348 {
return mask_type(not data); }
1359 constexpr friend simd&
1360 operator+=(simd& lhs,
const simd& x)
1361 {
return lhs = lhs + x; }
1363 constexpr friend simd&
1364 operator-=(simd& lhs,
const simd& x)
1365 {
return lhs = lhs - x; }
1367 constexpr friend simd&
1368 operator*=(simd& lhs,
const simd& x)
1369 {
return lhs = lhs * x; }
1371 constexpr friend simd&
1372 operator/=(simd& lhs,
const simd& x)
1373 {
return lhs = lhs / x; }
1376 constexpr friend simd
1377 operator+(
const simd& x,
const simd& y)
1378 {
simd r = x; r.data += y.data;
return r; }
1380 constexpr friend simd
1381 operator-(
const simd& x,
const simd& y)
1382 {
simd r = x; r.data -= y.data;
return r; }
1384 constexpr friend simd
1385 operator*(
const simd& x,
const simd& y)
1386 {
simd r = x; r.data *= y.data;
return r; }
1388 constexpr friend simd
1389 operator/(
const simd& x,
const simd& y)
1390 {
simd r = x; r.data /= y.data;
return r; }
1393 constexpr friend mask_type
1394 operator==(
const simd& x,
const simd& y)
1395 {
return mask_type(x.data == y.data); }
1397 constexpr friend mask_type
1398 operator!=(
const simd& x,
const simd& y)
1399 {
return mask_type(x.data != y.data); }
1401 constexpr friend mask_type
1402 operator<(
const simd& x,
const simd& y)
1403 {
return mask_type(x.data < y.data); }
1405 constexpr friend mask_type
1406 operator<=(
const simd& x,
const simd& y)
1407 {
return mask_type(x.data <= y.data); }
1409 constexpr friend mask_type
1410 operator>(
const simd& x,
const simd& y)
1411 {
return mask_type(x.data > y.data); }
1413 constexpr friend mask_type
1414 operator>=(
const simd& x,
const simd& y)
1415 {
return mask_type(x.data >= y.data); }
1419 template <
class T,
int N,
bool = std::is_
integral_v<T>>
1420 class fixed_simd_int_base
1423 template <
class T,
int N>
1424 class fixed_simd_int_base<T, N, true>
1426 using Derived = simd<T, simd_abi::fixed_size<N>>;
1430 {
return static_cast<Derived*
>(
this)->data[i]; }
1433 d(
int i)
const noexcept
1434 {
return static_cast<const Derived*
>(
this)->data[i]; }
1437 friend constexpr Derived&
1438 operator%=(Derived& lhs,
const Derived& x)
1440 for (
int i = 0; i < N; ++i)
1445 friend constexpr Derived&
1446 operator&=(Derived& lhs,
const Derived& x)
1448 for (
int i = 0; i < N; ++i)
1453 friend constexpr Derived&
1454 operator|=(Derived& lhs,
const Derived& x)
1456 for (
int i = 0; i < N; ++i)
1461 friend constexpr Derived&
1462 operator^=(Derived& lhs,
const Derived& x)
1464 for (
int i = 0; i < N; ++i)
1469 friend constexpr Derived&
1470 operator<<=(Derived& lhs,
const Derived& x)
1472 for (
int i = 0; i < N; ++i)
1473 lhs.d(i) <<= x.d(i);
1477 friend constexpr Derived&
1478 operator>>=(Derived& lhs,
const Derived& x)
1480 for (
int i = 0; i < N; ++i)
1481 lhs.d(i) >>= x.d(i);
1485 friend constexpr Derived
1486 operator%(
const Derived& x,
const Derived& y)
1487 {
return Derived([&](
size_t i) -> T {
return x[i] % y[i]; }); }
1489 friend constexpr Derived
1490 operator&(
const Derived& x,
const Derived& y)
1491 {
return Derived([&](
size_t i) -> T {
return x[i] & y[i]; }); }
1493 friend constexpr Derived
1494 operator|(
const Derived& x,
const Derived& y)
1495 {
return Derived([&](
size_t i) -> T {
return x[i] | y[i]; }); }
1497 friend constexpr Derived
1498 operator^(
const Derived& x,
const Derived& y)
1499 {
return Derived([&](
size_t i) -> T {
return x[i] ^ y[i]; }); }
1501 friend constexpr Derived
1502 operator<<(
const Derived& x,
const Derived& y)
1503 {
return Derived([&](
size_t i) -> T {
return x[i] << y[i]; }); }
1505 friend constexpr Derived
1506 operator>>(
const Derived& x,
const Derived& y)
1507 {
return Derived([&](
size_t i) -> T {
return x[i] >> y[i]; }); }
1509 friend constexpr Derived
1510 operator<<(
const Derived& x,
int y)
1511 {
return Derived([&](
size_t i) -> T {
return x[i] << y; }); }
1513 friend constexpr Derived
1514 operator>>(
const Derived& x,
int y)
1515 {
return Derived([&](
size_t i) -> T {
return x[i] >> y; }); }
1519 {
return Derived([&](
size_t i) -> T {
return ~d(i); }); }
1523 template <
class T,
int N>
1524 class simd<T, simd_abi::fixed_size<N>>
1525 :
public fixed_simd_int_base<T, N>,
public detail::only_vectorizable<T>
1528 friend class fixed_simd_int_base<T, N>;
1530 template <
typename V,
int M,
size_t Parts>
1532 std::enable_if_t<M == Parts * V::size() && is_simd_v<V>, std::array<V, Parts>>
1533 split(
const simd<
typename V::value_type, simd_abi::fixed_size<M>>&);
1535 template <
size_t... Sizes,
typename U>
1537 std::tuple<
simd<U, simd_abi::deduce_t<U, int(Sizes)>>...>
1538 split(
const simd<U, simd_abi::fixed_size<
int((Sizes + ...))>>&);
1542 using _data_type_ = T[N];
1544 friend constexpr _data_type_&
1548 friend constexpr const _data_type_&
1549 _data_(
const simd& x)
1552 template <
typename F,
size_t... Is>
1554 simd(std::index_sequence<Is...>, F&& init)
1555 : data {static_cast<value_type>(init(detail::SizeConstant<Is>()))...}
1559 using value_type = T;
1560 using reference = T&;
1561 using abi_type = simd_abi::fixed_size<N>;
1562 using mask_type = simd_mask<T, abi_type>;
1564 static constexpr size_t size() noexcept
1567 constexpr simd() =
default;
1568 constexpr simd(
const simd&) =
default;
1569 constexpr simd(simd&&) noexcept = default;
1570 constexpr simd& operator=(const simd&) = default;
1571 constexpr simd& operator=(simd&&) noexcept = default;
1574 template <typename U>
1576 simd(detail::ValuePreservingOrInt<U, value_type>&& value) noexcept
1577 : simd(std::make_index_sequence<N>(),
1578 [v = static_cast<value_type>(value)](
size_t) {
return v; })
1582 template <
typename U,
1583 typename = std::enable_if_t<
1584 std::conjunction_v<detail::is_value_preserving<U, value_type>,
1585 detail::is_higher_integer_rank<value_type, U>>>>
1587 simd(
const simd<U, abi_type>& x)
1588 :
simd(std::make_index_sequence<N>(),
1589 [&x](size_t i) {
return static_cast<value_type
>(x[i]); })
1593 template <
typename F>
1595 simd(F&& gen, detail::ValuePreservingOrInt<
1596 decltype(std::declval<F>()(std::declval<detail::SizeConstant<0>&>())),
1597 value_type>* =
nullptr)
1598 :
simd(std::make_index_sequence<N>(), std::forward<F>(gen))
1602 template <
typename U,
typename Flags>
1604 simd(
const U* mem, Flags)
1605 :
simd(std::make_index_sequence<N>(), [mem](size_t i) -> value_type { return mem[i]; })
1609 template <
typename U,
typename Flags>
1611 copy_from(
const detail::Vectorizable<U>* mem, Flags)
1613 for (
int i = 0; i < N; ++i)
1618 template <
typename U,
typename Flags>
1620 copy_to(detail::Vectorizable<U>* mem, Flags)
const
1622 for (
int i = 0; i < N; ++i)
1628 operator[](
size_t i)
1630 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
1635 constexpr value_type
1636 operator[](
size_t i)
const
1638 vir_simd_precondition_vaargs(i < size(),
"Subscript %zu is out of range [0, %zu]",
1647 for (
int i = 0; i < N; ++i)
1656 for (
int i = 0; i < N; ++i)
1664 for (
int i = 0; i < N; ++i)
1673 for (
int i = 0; i < N; ++i)
1681 {
return mask_type([&](
size_t i) {
return !data[i]; }); }
1689 {
return simd([&](
size_t i) -> value_type {
return -data[i]; }); }
1692 constexpr friend simd&
1693 operator+=(simd& lhs,
const simd& x)
1695 for (
int i = 0; i < N; ++i)
1696 lhs.data[i] += x.data[i];
1700 constexpr friend simd&
1701 operator-=(simd& lhs,
const simd& x)
1703 for (
int i = 0; i < N; ++i)
1704 lhs.data[i] -= x.data[i];
1708 constexpr friend simd&
1709 operator*=(simd& lhs,
const simd& x)
1711 for (
int i = 0; i < N; ++i)
1712 lhs.data[i] *= x.data[i];
1716 constexpr friend simd&
1717 operator/=(simd& lhs,
const simd& x)
1719 for (
int i = 0; i < N; ++i)
1720 lhs.data[i] /= x.data[i];
1725 constexpr friend simd
1726 operator+(
const simd& x,
const simd& y)
1727 {
return simd([&](
size_t i) {
return x.data[i] + y.data[i]; }); }
1729 constexpr friend simd
1730 operator-(
const simd& x,
const simd& y)
1731 {
return simd([&](
size_t i) {
return x.data[i] - y.data[i]; }); }
1733 constexpr friend simd
1734 operator*(
const simd& x,
const simd& y)
1735 {
return simd([&](
size_t i) {
return x.data[i] * y.data[i]; }); }
1737 constexpr friend simd
1738 operator/(
const simd& x,
const simd& y)
1739 {
return simd([&](
size_t i) {
return x.data[i] / y.data[i]; }); }
1742 constexpr friend mask_type
1743 operator==(
const simd& x,
const simd& y)
1744 {
return mask_type([&](
size_t i) {
return x.data[i] == y.data[i]; }); }
1746 constexpr friend mask_type
1747 operator!=(
const simd& x,
const simd& y)
1748 {
return mask_type([&](
size_t i) {
return x.data[i] != y.data[i]; }); }
1750 constexpr friend mask_type
1751 operator<(
const simd& x,
const simd& y)
1752 {
return mask_type([&](
size_t i) {
return x.data[i] < y.data[i]; }); }
1754 constexpr friend mask_type
1755 operator<=(
const simd& x,
const simd& y)
1756 {
return mask_type([&](
size_t i) {
return x.data[i] <= y.data[i]; }); }
1758 constexpr friend mask_type
1759 operator>(
const simd& x,
const simd& y)
1760 {
return mask_type([&](
size_t i) {
return x.data[i] > y.data[i]; }); }
1762 constexpr friend mask_type
1763 operator>=(
const simd& x,
const simd& y)
1764 {
return mask_type([&](
size_t i) {
return x.data[i] >= y.data[i]; }); }
1769 template <
typename T,
typename U,
typename A,
1770 typename = std::enable_if_t<detail::is_vectorizable_v<T>>>
1771 constexpr simd<T, A>
1772 static_simd_cast(
const simd<U, A>& x)
1773 {
return simd<T, A>([&x](
size_t i) {
return static_cast<T
>(x[i]); }); }
1775 template <
typename V,
typename U,
typename A,
1776 typename = std::enable_if_t<is_simd_v<V>>>
1778 static_simd_cast(
const simd<U, A>& x)
1779 {
return V([&x](
size_t i) {
return static_cast<typename V::value_type
>(x[i]); }); }
1781 template <
typename T,
typename U,
typename A,
1782 typename = std::enable_if_t<detail::is_vectorizable_v<T>>>
1783 constexpr simd_mask<T, A>
1784 static_simd_cast(
const simd_mask<U, A>& x)
1785 {
return simd_mask<T, A>([&x](
size_t i) {
return x[i]; }); }
1787 template <
typename M,
typename U,
typename A,
1788 typename = std::enable_if_t<M::size() == simd_size_v<U, A>>>
1790 static_simd_cast(
const simd_mask<U, A>& x)
1791 {
return M([&x](
size_t i) {
return x[i]; }); }
1794 template <
typename T,
typename U,
typename A,
1795 typename To = detail::value_type_or_identity_t<T>>
1797 simd_cast(
const simd<detail::ValuePreserving<U, To>, A>& x)
1798 ->
decltype(static_simd_cast<T>(x))
1799 {
return static_simd_cast<T>(x); }
1802 template <
typename T,
int N>
1803 constexpr fixed_size_simd<T, N>
1804 to_fixed_size(
const fixed_size_simd<T, N>& x)
1807 template <
typename T,
int N>
1808 constexpr fixed_size_simd_mask<T, N>
1809 to_fixed_size(
const fixed_size_simd_mask<T, N>& x)
1812 template <
typename T>
1813 constexpr fixed_size_simd<T, 1>
1814 to_fixed_size(
const simd<T> x)
1817 template <
typename T>
1818 constexpr fixed_size_simd_mask<T, 1>
1819 to_fixed_size(
const simd_mask<T> x)
1820 {
return fixed_size_simd_mask<T, 1>(x[0]); }
1823 template <
typename T>
1825 to_native(
const fixed_size_simd<T, 1> x)
1828 template <
typename T>
1829 constexpr simd_mask<T>
1830 to_native(
const fixed_size_simd_mask<T, 1> x)
1831 {
return simd_mask<T>(x[0]); }
1834 template <
typename T>
1836 to_compatible(
const fixed_size_simd<T, 1> x)
1839 template <
typename T>
1840 constexpr simd_mask<T>
1841 to_compatible(
const fixed_size_simd_mask<T, 1> x)
1842 {
return simd_mask<T>(x[0]); }
1845 template <
typename V,
int N,
size_t Parts = N / V::size()>
1847 std::enable_if_t<N == Parts * V::size() && is_simd_v<V>, std::array<V, Parts>>
1848 split(
const simd<
typename V::value_type, simd_abi::fixed_size<N>>& x)
1850 const auto* data = x.data;
1851 return [&]<
size_t... Is>(std::index_sequence<Is...>)
1852 -> std::array<V, Parts> {
1853 return {V(data + Is * V::size(), element_aligned)...};
1854 }(std::make_index_sequence<Parts>());
1858 template <
typename V,
int N,
size_t Parts = N / V::size()>
1860 std::enable_if_t<N == Parts * V::size() && is_simd_mask_v<V>, std::array<V, Parts>>
1861 split(
const simd_mask<
typename V::simd_type::value_type, simd_abi::fixed_size<N>>& x)
1863 const auto* data = x.data;
1864 return [&]<
size_t... Is>(std::index_sequence<Is...>)
1865 -> std::array<V, Parts> {
1866 return {V(data + Is * V::size(), element_aligned)...};
1867 }(std::make_index_sequence<Parts>());
1871 template <
size_t... Sizes,
typename T>
1873 std::tuple<
simd<T, simd_abi::deduce_t<T, int(Sizes)>>...>
1874 split(
const simd<T, simd_abi::fixed_size<
int((Sizes + ...))>>& x)
1876 using R = std::tuple<
simd<T, simd_abi::deduce_t<T, int(Sizes)>>...>;
1877 const auto* data = x.data;
1878 return [&]<
size_t... Is>(std::index_sequence<Is...>) -> R {
1879 constexpr size_t offsets[
sizeof...(Sizes)] = {
1880 []<
size_t... Js>(std::index_sequence<Js...>) {
1881 constexpr size_t sizes[
sizeof...(Sizes)] = {Sizes...};
1882 return (sizes[Js] + ... + 0);
1883 }(std::make_index_sequence<Is>())...
1885 return {
simd<T, simd_abi::deduce_t<T, int(Sizes)>>(data + offsets[Is],
1886 element_aligned)...};
1887 }(std::make_index_sequence<
sizeof...(Sizes)>());
1891 template <
typename V>
1893 std::enable_if_t<std::disjunction_v<is_simd<V>, is_simd_mask<V>>, std::array<V, 1>>
1898 template <
typename T,
typename... As>
1900 simd<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>
1901 concat(
const simd<T, As>&... xs)
1903 using R =
simd<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>;
1904 return R([&](
auto i) {
1905 return detail::pack_simd_subscript<i>(xs...);
1910 template <
typename T,
typename... As>
1912 simd_mask<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>
1913 concat(
const simd_mask<T, As>&... xs)
1915 using R = simd_mask<T, simd_abi::deduce_t<T, (simd_size_v<T, As> + ...)>>;
1916 return R([&](
auto i) ->
bool {
1917 return detail::pack_simd_subscript<i>(xs...);
1922 template <
typename T,
typename A,
size_t N>
1924 simd<T, simd_abi::deduce_t<T, N * simd_size_v<T, A>>>
1925 concat(
const std::array<simd<T, A>, N>& x)
1927 constexpr int K = simd_size_v<T, A>;
1928 using R = simd<T, simd_abi::deduce_t<T, N * K>>;
1929 return R([&](
size_t i) {
1930 return x[i / K][i % K];
1935 template <
typename T,
typename A,
size_t N>
1937 simd_mask<T, simd_abi::deduce_t<T, N * simd_size_v<T, A>>>
1938 concat(
const std::array<simd_mask<T, A>, N>& x)
1940 constexpr int K = simd_size_v<T, A>;
1941 using R = simd_mask<T, simd_abi::deduce_t<T, N * K>>;
1942 return R([&](
size_t i) ->
bool {
1943 return x[i / K][i % K];
1948 template <
typename M,
typename V>
1949 class const_where_expression
1951 static_assert(std::is_same_v<V, detail::remove_cvref_t<V>>);
1953 struct Wrapper {
using value_type = V; };
1957 typename std::conditional_t<std::is_arithmetic_v<V>, Wrapper, V>::value_type;
1960 get_mask(
const const_where_expression& x)
1964 get_lvalue(
const const_where_expression& x)
1965 {
return x.m_value; }
1971 const_where_expression(
const const_where_expression&) =
delete;
1972 const_where_expression& operator=(
const const_where_expression&) =
delete;
1974 constexpr const_where_expression(
const M& kk,
const V& dd)
1975 : m_k(kk), m_value(const_cast<V&>(dd)) {}
1978 operator-() const &&
1980 return V([&](
size_t i) {
1981 return m_k[i] ?
static_cast<value_type
>(-m_value[i]) : m_value[i];
1985 template <
typename Up,
typename Flags>
1986 [[nodiscard]]
constexpr V
1987 copy_from(
const detail::LoadStorePtr<Up, value_type>* mem, Flags)
const &&
1989 return V([&](
size_t i) {
1990 return m_k[i] ?
static_cast<value_type
>(mem[i]) : m_value[i];
1994 template <
typename Up,
typename Flags>
1996 copy_to(detail::LoadStorePtr<Up, value_type>* mem, Flags)
const &&
1998 for (
size_t i = 0; i < V::size(); ++i)
2001 mem[i] =
static_cast<Up
>(m_value[i]);
2007 template <
typename V>
2008 class const_where_expression<bool, V>
2012 static_assert(std::is_same_v<V, detail::remove_cvref_t<V>>);
2014 struct Wrapper {
using value_type = V; };
2018 typename std::conditional_t<std::is_arithmetic_v<V>, Wrapper, V>::value_type;
2021 get_mask(
const const_where_expression& x)
2025 get_lvalue(
const const_where_expression& x)
2026 {
return x.m_value; }
2032 const_where_expression(
const const_where_expression&) =
delete;
2033 const_where_expression& operator=(
const const_where_expression&) =
delete;
2035 constexpr const_where_expression(
const bool kk,
const V& dd)
2036 : m_k(kk), m_value(const_cast<V&>(dd)) {}
2039 operator-() const &&
2040 {
return m_k ? -m_value : m_value; }
2042 template <
typename Up,
typename Flags>
2043 [[nodiscard]]
constexpr V
2044 copy_from(
const detail::LoadStorePtr<Up, value_type>* mem, Flags)
const &&
2045 {
return m_k ?
static_cast<V
>(mem[0]) : m_value; }
2047 template <
typename Up,
typename Flags>
2049 copy_to(detail::LoadStorePtr<Up, value_type>* mem, Flags)
const &&
2057 template <
typename M,
typename V>
2058 class where_expression :
public const_where_expression<M, V>
2060 static_assert(not std::is_const_v<V>,
2061 "where_expression may only be instantiated with a non-const V parameter");
2063 using typename const_where_expression<M, V>::value_type;
2064 using const_where_expression<M, V>::m_k;
2065 using const_where_expression<M, V>::m_value;
2067 static_assert(std::is_same_v<typename M::abi_type, typename V::abi_type>);
2068 static_assert(M::size() == V::size());
2071 get_lvalue(where_expression& x)
2072 {
return x.m_value; }
2074 template <
typename Up>
2078 using UU = detail::remove_cvref_t<Up>;
2079 if constexpr (std::is_same_v<V, UU>)
2081 else if constexpr (std::is_convertible_v<Up&&, value_type>)
2082 return V(
static_cast<value_type
>(
static_cast<Up&&
>(x)));
2083 else if constexpr (std::is_convertible_v<Up&&, V>)
2084 return static_cast<V
>(
static_cast<Up&&
>(x));
2086 return static_simd_cast<V>(
static_cast<Up&&
>(x));
2090 where_expression(
const where_expression&) =
delete;
2091 where_expression& operator=(
const where_expression&) =
delete;
2093 constexpr where_expression(
const M& kk, V& dd)
2094 : const_where_expression<M, V>(kk, dd)
2097 template <
typename Up>
2099 operator=(Up&& x) &&
2101 const V& rhs = as_simd(x);
2102 for (
size_t i = 0; i < V::size(); ++i)
2105 m_value[i] = rhs[i];
2109#define SIMD_OP_(op) \
2110 template <typename Up> \
2112 operator op##=(Up&& x) && \
2114 const V& rhs = as_simd(x); \
2115 for (size_t i = 0; i < V::size(); ++i) \
2118 m_value[i] op##= rhs[i]; \
2134 constexpr void operator++() &&
2136 for (
size_t i = 0; i < V::size(); ++i)
2143 constexpr void operator++(
int) &&
2145 for (
size_t i = 0; i < V::size(); ++i)
2152 constexpr void operator--() &&
2154 for (
size_t i = 0; i < V::size(); ++i)
2161 constexpr void operator--(
int) &&
2163 for (
size_t i = 0; i < V::size(); ++i)
2171 template <
typename Up,
typename Flags>
2173 copy_from(
const detail::LoadStorePtr<Up, value_type>* mem, Flags) &&
2175 for (
size_t i = 0; i < V::size(); ++i)
2178 m_value[i] = mem[i];
2184 template <
typename V>
2185 class where_expression<bool, V> :
public const_where_expression<bool, V>
2188 using typename const_where_expression<M, V>::value_type;
2189 using const_where_expression<M, V>::m_k;
2190 using const_where_expression<M, V>::m_value;
2193 where_expression(
const where_expression&) =
delete;
2194 where_expression& operator=(
const where_expression&) =
delete;
2196 constexpr where_expression(
const M& kk, V& dd)
2197 : const_where_expression<M, V>(kk, dd) {}
2199#define SIMD_OP_(op) \
2200 template <typename Up> \
2201 constexpr void operator op(Up&& x) && \
2202 { if (m_k) m_value op static_cast<Up&&>(x); }
2217 constexpr void operator++() &&
2218 {
if (m_k) ++m_value; }
2220 constexpr void operator++(
int) &&
2221 {
if (m_k) ++m_value; }
2223 constexpr void operator--() &&
2224 {
if (m_k) --m_value; }
2226 constexpr void operator--(
int) &&
2227 {
if (m_k) --m_value; }
2230 template <
typename Up,
typename Flags>
2232 copy_from(
const detail::LoadStorePtr<Up, value_type>* mem, Flags) &&
2233 {
if (m_k) m_value = mem[0]; }
2237 template <
typename Tp,
typename Ap>
2238 constexpr where_expression<simd_mask<Tp, Ap>, simd<Tp, Ap>>
2239 where(
const typename simd<Tp, Ap>::mask_type& k, simd<Tp, Ap>& value)
2240 {
return {k, value}; }
2242 template <
typename Tp,
typename Ap>
2243 constexpr const_where_expression<simd_mask<Tp, Ap>, simd<Tp, Ap>>
2244 where(
const typename simd<Tp, Ap>::mask_type& k,
2245 const simd<Tp, Ap>& value)
2246 {
return {k, value}; }
2248 template <
typename Tp,
typename Ap>
2249 constexpr where_expression<simd_mask<Tp, Ap>, simd_mask<Tp, Ap>>
2250 where(
const std::remove_const_t<simd_mask<Tp, Ap>>& k,
2251 simd_mask<Tp, Ap>& value)
2252 {
return {k, value}; }
2254 template <
typename Tp,
typename Ap>
2255 constexpr const_where_expression<simd_mask<Tp, Ap>, simd_mask<Tp, Ap>>
2256 where(
const std::remove_const_t<simd_mask<Tp, Ap>>& k,
2257 const simd_mask<Tp, Ap>& value)
2258 {
return {k, value}; }
2260 template <
typename Tp>
2261 constexpr where_expression<bool, Tp>
2262 where(detail::ExactBool k, Tp& value)
2263 {
return {k, value}; }
2265 template <
typename Tp>
2266 constexpr const_where_expression<bool, Tp>
2267 where(detail::ExactBool k,
const Tp& value)
2268 {
return {k, value}; }
2270 template <
typename Tp,
typename Ap>
2272 where(
bool k, simd<Tp, Ap>& value) =
delete;
2274 template <
typename Tp,
typename Ap>
2276 where(
bool k,
const simd<Tp, Ap>& value) =
delete;
2279 template <
typename T,
typename A,
typename BinaryOperation = std::plus<>>
2281 reduce(
const simd<T, A>& v,
2282 BinaryOperation binary_op = BinaryOperation())
2284 constexpr int N = simd_size_v<T, A>;
2285 if constexpr (N > 3)
2287 constexpr int N2 = detail::bit_floor(N / 2);
2288 constexpr int NRem = N - 2 * N2;
2289 if constexpr (NRem > 0)
2291 const auto [l, r, rem] = split<N2, N2, N - 2 * N2>(v);
2292 return binary_op(
reduce(binary_op(l, r), binary_op),
reduce(rem, binary_op));
2296 const auto [l, r] = split<N2, N2>(v);
2297 return reduce(binary_op(l, r), binary_op);
2303 for (
size_t i = 1; i < simd_size_v<T, A>; ++i)
2304 r = binary_op(r, v[i]);
2309 template <
typename M,
typename V,
typename BinaryOperation = std::plus<>>
2310 constexpr typename V::value_type
2311 reduce(
const const_where_expression<M, V>& x,
2312 typename V::value_type identity_element,
2313 BinaryOperation binary_op)
2315 const M& k = get_mask(x);
2316 const V& v = get_lvalue(x);
2317 auto r = identity_element;
2318 if (any_of(k)) [[likely]]
2320 for (
size_t i = 0; i < V::size(); ++i)
2322 r = binary_op(r, v[i]);
2327 template <
typename M,
typename V>
2328 constexpr typename V::value_type
2330 {
return reduce(x, 0, binary_op); }
2332 template <
typename M,
typename V>
2333 constexpr typename V::value_type
2335 {
return reduce(x, 1, binary_op); }
2337 template <
typename M,
typename V>
2338 constexpr typename V::value_type
2339 reduce(
const const_where_expression<M, V>& x, std::bit_and<> binary_op)
2340 {
return reduce(x, ~
typename V::value_type(), binary_op); }
2342 template <
typename M,
typename V>
2343 constexpr typename V::value_type
2344 reduce(
const const_where_expression<M, V>& x, std::bit_or<> binary_op)
2345 {
return reduce(x, 0, binary_op); }
2347 template <
typename M,
typename V>
2348 constexpr typename V::value_type
2349 reduce(
const const_where_expression<M, V>& x, std::bit_xor<> binary_op)
2350 {
return reduce(x, 0, binary_op); }
2352 template <
typename T,
typename A>
2354 hmin(
const simd<T, A>& v)
noexcept
2356 return reduce(v, [](
const auto& l,
const auto& r) {
2362 template <
typename T,
typename A>
2364 hmax(
const simd<T, A>& v)
noexcept
2366 return reduce(v, [](
const auto& l,
const auto& r) {
2372 template <
typename M,
typename V>
2373 constexpr typename V::value_type
2374 hmin(
const const_where_expression<M, V>& x)
noexcept
2376 using T =
typename V::value_type;
2377 constexpr T id_elem =
2378#ifdef __FINITE_MATH_ONLY__
2379 std::numeric_limits<T>::max();
2381 std::numeric_limits<T>::infinity();
2383 return reduce(x, id_elem, [](
const auto& l,
const auto& r) {
2389 template <
typename M,
typename V>
2391 typename V::value_type
2392 hmax(
const const_where_expression<M, V>& x)
noexcept
2394 using T =
typename V::value_type;
2395 constexpr T id_elem =
2396#ifdef __FINITE_MATH_ONLY__
2397 std::numeric_limits<T>::lowest();
2399 -std::numeric_limits<T>::infinity();
2401 return reduce(x, id_elem, [](
const auto& l,
const auto& r) {
2408 template <
typename T,
typename A>
2409 constexpr simd<T, A>
2410 min(
const simd<T, A>& a,
const simd<T, A>& b)
2411 {
return simd<T, A>([&](
size_t i) {
return std::min(a[i], b[i]); }); }
2413 template <
typename T,
typename A>
2414 constexpr simd<T, A>
2415 max(
const simd<T, A>& a,
const simd<T, A>& b)
2416 {
return simd<T, A>([&](
size_t i) {
return std::max(a[i], b[i]); }); }
2418 template <
typename T,
typename A>
2420 std::pair<simd<T, A>, simd<T, A>>
2421 minmax(
const simd<T, A>& a,
const simd<T, A>& b)
2422 {
return {min(a, b), max(a, b)}; }
2424 template <
typename T,
typename A>
2425 constexpr simd<T, A>
2426 clamp(
const simd<T, A>& v,
const simd<T, A>& lo,
2427 const simd<T, A>& hi)
2428 {
return simd<T, A>([&](
size_t i) {
return std::clamp(v[i], lo[i], hi[i]); }); }
2431#define SIMD_MATH_1ARG(name, return_temp) \
2432 template <typename T, typename A> \
2433 constexpr return_temp<T, A> \
2434 name(const simd<detail::FloatingPoint<T>, A>& x) noexcept \
2435 { return return_temp<T, A>([&x](size_t i) { return std::name(x[i]); }); }
2437#define SIMD_MATH_1ARG_FIXED(name, R) \
2438 template <typename T, typename A> \
2439 constexpr fixed_size_simd<R, simd_size_v<T, A>> \
2440 name(const simd<detail::FloatingPoint<T>, A>& x) noexcept \
2441 { return fixed_size_simd<R, simd_size_v<T, A>>([&x](size_t i) { return std::name(x[i]); }); }
2443#define SIMD_MATH_2ARG(name, return_temp) \
2444 template <typename T, typename A> \
2445 constexpr return_temp<T, A> \
2446 name(const simd<detail::FloatingPoint<T>, A>& x, const simd<T, A>& y) noexcept \
2447 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i]); }); } \
2449 template <typename T, typename A> \
2450 constexpr return_temp<T, A> \
2451 name(const simd<detail::FloatingPoint<T>, A>& x, \
2452 const detail::type_identity_t<simd<T, A>>& y) noexcept \
2453 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i]); }); } \
2455 template <typename T, typename A> \
2456 constexpr return_temp<T, A> \
2457 name(const detail::type_identity_t<simd<T, A>>& x, \
2458 const simd<detail::FloatingPoint<T>, A>& y) noexcept \
2459 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i]); }); }
2461#define SIMD_MATH_3ARG(name, return_temp) \
2462 template <typename T, typename A> \
2463 constexpr return_temp<T, A> \
2464 name(const simd<detail::FloatingPoint<T>, A>& x, \
2465 const simd<T, A>& y, const simd<T, A> &z) noexcept \
2466 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); } \
2468 template <typename T, typename A> \
2469 constexpr return_temp<T, A> \
2470 name(const simd<detail::FloatingPoint<T>, A>& x, \
2471 const detail::type_identity_t<simd<T, A>>& y, \
2472 const detail::type_identity_t<simd<T, A>> &z) noexcept \
2473 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); } \
2475 template <typename T, typename A> \
2476 constexpr return_temp<T, A> \
2477 name(const detail::type_identity_t<simd<T, A>>& x, \
2478 const simd<detail::FloatingPoint<T>, A>& y, \
2479 const detail::type_identity_t<simd<T, A>> &z) noexcept \
2480 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); } \
2482 template <typename T, typename A> \
2483 constexpr return_temp<T, A> \
2484 name(const detail::type_identity_t<simd<T, A>>& x, \
2485 const detail::type_identity_t<simd<T, A>>& y, \
2486 const simd<detail::FloatingPoint<T>, A> &z) noexcept \
2487 { return return_temp<T, A>([&](size_t i) { return std::name(x[i], y[i], z[i]); }); }
2489 template <
typename T,
typename A,
typename U = detail::SignedIntegral<T>>
2490 constexpr simd<T, A>
2491 abs(
const simd<T, A>& x)
noexcept
2492 {
return simd<T, A>([&x](
size_t i) {
return std::abs(x[i]); }); }
2494 SIMD_MATH_1ARG(abs, simd)
2495 SIMD_MATH_1ARG(isnan, simd_mask)
2496 SIMD_MATH_1ARG(isfinite, simd_mask)
2497 SIMD_MATH_1ARG(isinf, simd_mask)
2498 SIMD_MATH_1ARG(isnormal, simd_mask)
2499 SIMD_MATH_1ARG(signbit, simd_mask)
2500 SIMD_MATH_1ARG_FIXED(fpclassify,
int)
2502 SIMD_MATH_2ARG(hypot, simd)
2503 SIMD_MATH_3ARG(hypot, simd)
2505 template <
typename T,
typename A>
2506 constexpr simd<T, A>
2507 remquo(
const simd<T, A>& x,
const simd<T, A>& y,
2508 fixed_size_simd<
int, simd_size_v<T, A>>* quo)
noexcept
2509 {
return simd<T, A>([&x, &y, quo](
size_t i) {
return std::remquo(x[i], y[i], &(*quo)[i]); }); }
2511 SIMD_MATH_1ARG(erf, simd)
2512 SIMD_MATH_1ARG(erfc, simd)
2513 SIMD_MATH_1ARG(tgamma, simd)
2514 SIMD_MATH_1ARG(lgamma, simd)
2516 SIMD_MATH_2ARG(pow, simd)
2517 SIMD_MATH_2ARG(fmod, simd)
2518 SIMD_MATH_2ARG(remainder, simd)
2519 SIMD_MATH_2ARG(nextafter, simd)
2520 SIMD_MATH_2ARG(copysign, simd)
2521 SIMD_MATH_2ARG(fdim, simd)
2522 SIMD_MATH_2ARG(fmax, simd)
2523 SIMD_MATH_2ARG(fmin, simd)
2524 SIMD_MATH_2ARG(isgreater, simd_mask)
2525 SIMD_MATH_2ARG(isgreaterequal, simd_mask)
2526 SIMD_MATH_2ARG(isless, simd_mask)
2527 SIMD_MATH_2ARG(islessequal, simd_mask)
2528 SIMD_MATH_2ARG(islessgreater, simd_mask)
2529 SIMD_MATH_2ARG(isunordered, simd_mask)
2531 template <
typename T,
typename A>
2532 constexpr simd<T, A>
2533 modf(
const simd<detail::FloatingPoint<T>, A>& x, simd<T, A>* iptr)
noexcept
2534 {
return simd<T, A>([&x, iptr](
size_t i) {
return std::modf(x[i], &(*iptr)[i]); }); }
2536 template <
typename T,
typename A>
2537 constexpr simd<T, A>
2538 frexp(
const simd<detail::FloatingPoint<T>, A>& x,
2539 fixed_size_simd<
int, simd_size_v<T, A>>* exp)
noexcept
2540 {
return simd<T, A>([&x, exp](
size_t i) {
return std::frexp(x[i], &(*exp)[i]); }); }
2542 template <
typename T,
typename A>
2543 constexpr simd<T, A>
2544 scalbln(
const simd<detail::FloatingPoint<T>, A>& x,
2545 const fixed_size_simd<
long int, simd_size_v<T, A>>& exp)
noexcept
2546 {
return simd<T, A>([&x, &exp](
size_t i) {
return std::scalbln(x[i], exp[i]); }); }
2548 template <
typename T,
typename A>
2549 constexpr simd<T, A>
2550 scalbn(
const simd<detail::FloatingPoint<T>, A>& x,
2551 const fixed_size_simd<
int, simd_size_v<T, A>>& exp)
noexcept
2552 {
return simd<T, A>([&x, &exp](
size_t i) {
return std::scalbn(x[i], exp[i]); }); }
2554 template <
typename T,
typename A>
2555 constexpr simd<T, A>
2556 ldexp(
const simd<detail::FloatingPoint<T>, A>& x,
2557 const fixed_size_simd<
int, simd_size_v<T, A>>& exp)
noexcept
2558 {
return simd<T, A>([&x, &exp](
size_t i) {
return std::ldexp(x[i], exp[i]); }); }
2560 SIMD_MATH_1ARG(sqrt, simd)
2562 SIMD_MATH_3ARG(fma, simd)
2564 SIMD_MATH_1ARG(trunc, simd)
2565 SIMD_MATH_1ARG(ceil, simd)
2566 SIMD_MATH_1ARG(floor, simd)
2567 SIMD_MATH_1ARG(round, simd)
2568 SIMD_MATH_1ARG_FIXED(lround,
long)
2569 SIMD_MATH_1ARG_FIXED(llround,
long long)
2570 SIMD_MATH_1ARG(nearbyint, simd)
2571 SIMD_MATH_1ARG(rint, simd)
2572 SIMD_MATH_1ARG_FIXED(lrint,
long)
2573 SIMD_MATH_1ARG_FIXED(llrint,
long long)
2574 SIMD_MATH_1ARG_FIXED(ilogb,
int)
2577 SIMD_MATH_1ARG(sin, simd)
2578 SIMD_MATH_1ARG(cos, simd)
2579 SIMD_MATH_1ARG(tan, simd)
2580 SIMD_MATH_1ARG(asin, simd)
2581 SIMD_MATH_1ARG(acos, simd)
2582 SIMD_MATH_1ARG(atan, simd)
2583 SIMD_MATH_2ARG(atan2, simd)
2584 SIMD_MATH_1ARG(sinh, simd)
2585 SIMD_MATH_1ARG(cosh, simd)
2586 SIMD_MATH_1ARG(tanh, simd)
2587 SIMD_MATH_1ARG(asinh, simd)
2588 SIMD_MATH_1ARG(acosh, simd)
2589 SIMD_MATH_1ARG(atanh, simd)
2592 SIMD_MATH_1ARG(log, simd)
2593 SIMD_MATH_1ARG(log10, simd)
2594 SIMD_MATH_1ARG(log1p, simd)
2595 SIMD_MATH_1ARG(log2, simd)
2596 SIMD_MATH_1ARG(logb, simd)
2598#undef SIMD_MATH_1ARG
2599#undef SIMD_MATH_1ARG_FIXED
2600#undef SIMD_MATH_2ARG
2601#undef SIMD_MATH_3ARG
2603#ifdef VIR_SIMD_TS_DROPIN
2608 using namespace std::experimental::parallelism_v2;
constexpr simd_policy simd
SIMD execution policy.
Definition simd_execution.h:528
Version macros and version constant.