8#ifndef VIR_SIMD_PERMUTE_H_
9#define VIR_SIMD_PERMUTE_H_
17#if VIR_HAVE_SIMD_CONCEPTS
18#define VIR_HAVE_SIMD_PERMUTE 1
22#include "constexpr_wrapper.h"
30 concept index_permutation_function_nosize =
requires(F
const& f)
32 { f(vir::cw<0>) } -> std::integral;
34 typename std::integral_constant<int, f(vir::cw<0>)>;
38 template <
typename F, std::
size_t Size>
39 concept index_permutation_function_size =
requires(F
const& f)
41 { f(vir::cw<0>, vir::cw<Size>) } -> std::integral;
43 typename std::integral_constant<int, f(vir::cw<0>, vir::cw<Size>)>;
47 template <
typename F, std::
size_t Size>
48 concept index_permutation_function
49 = index_permutation_function_size<F, Size> or index_permutation_function_nosize<F>;
58#if defined __clang__ and __clang__ <= 13
59#define VIR_CONSTEVAL constexpr
61#define VIR_CONSTEVAL consteval
69 VIR_CONSTEVAL
unsigned
70 operator()(
unsigned i)
const
79 VIR_CONSTEVAL
unsigned
80 operator()(
unsigned i)
const
90 VIR_CONSTEVAL
unsigned
91 operator()(
unsigned i,
auto size)
const
93 static_assert(size % (2 * N) == 0,
94 "swap_neighbors<N> permutation requires a multiple of 2N elements");
95 if (std::has_single_bit(N))
97 else if (i % (2 * N) >= N)
109 template <
unsigned N = 1u>
112 template <
int Position>
116 operator()(
int)
const
121 template <
int Position>
133 operator()(
int i)
const
143 static constexpr int Offset = O;
144 static constexpr bool is_even_rotation = Offset % 2 == 0;
147 operator()(
int i,
auto size)
const
148 {
return (i + Offset) % size.value; }
152 template <
int Offset>
153 inline constexpr Rotate<Offset>
rotate {};
155 template <
int Offset>
159 operator()(
int i,
int size)
const
161 const int j = i + Offset;
162 if constexpr (Offset >= 0)
170 template <
int Offset>
171 inline constexpr Shift<Offset>
shift {};
179 detail::index_permutation_function<V::size()> F>
180 VIR_ALWAYS_INLINE
constexpr stdx::resize_simd_t<N == 0 ? V::size() : N, V>
183 using T =
typename V::value_type;
184 using R = stdx::resize_simd_t<N == 0 ? V::size() : N, V>;
187 if (not std::is_constant_evaluated())
188 if constexpr (std::has_single_bit(
sizeof(V)) and V::size() <= stdx::native_simd<T>::size())
191 using v4df [[gnu::vector_size(32)]] = double;
192 if constexpr (std::same_as<T, float> and std::is_trivially_copyable_v<V>
193 and
sizeof(v4df) ==
sizeof(V)
197 { std::bool_constant<F::is_even_rotation>() }
198 -> std::same_as<std::true_type>;
201 const v4df intrin = detail::bit_cast<v4df>(v);
202 constexpr int control = ((F::Offset / 2) << 0)
203 | (((F::Offset / 2 + 1) % 4) << 2)
204 | (((F::Offset / 2 + 2) % 4) << 4)
205 | (((F::Offset / 2 + 3) % 4) << 6);
206 return detail::bit_cast<R>(__builtin_ia32_permdf256(intrin, control));
209#if VIR_HAVE_WORKING_SHUFFLEVECTOR
210 if constexpr (std::has_single_bit(
sizeof(V)) and std::has_single_bit(
sizeof(R)))
212 using VBuiltin [[gnu::vector_size(
sizeof(V))]] = T;
213 using RBuiltin [[gnu::vector_size(
sizeof(R))]] = T;
214 if constexpr (std::is_trivially_copyable_v<V> and std::is_trivially_copyable_v<R>
215 and
sizeof(VBuiltin) ==
sizeof(V) and
sizeof(RBuiltin) ==
sizeof(R))
217 const VBuiltin vec = detail::bit_cast<VBuiltin>(v);
218 constexpr auto idx_perm2 = [=](constexpr_value
auto i) {
219 if constexpr (detail::index_permutation_function_nosize<F>)
220 return vir::cw<idx_perm(i)>;
222 return vir::cw<idx_perm(i, vir::cw<V::size()>)>;
224 constexpr auto adj_idx = [](constexpr_value
auto i) {
227 return vir::cw<V::size()>;
230 else if constexpr (j < 0)
232 static_assert (-j <= int(V::size()));
233 return vir::cw<int(V::size()) + j>;
237 static_assert (j < int(V::size()));
241 return [&]<std::size_t... Is>(std::index_sequence<Is...>) {
242 return detail::bit_cast<R>(
243 __builtin_shufflevector(vec, VBuiltin{},
244 adj_idx(idx_perm2(vir::cw<Is>)).value...));
245 }(std::make_index_sequence<R::size()>());
252 return R([&](
auto i) -> T {
253 constexpr int j = [&] {
254 if constexpr (detail::index_permutation_function_nosize<F>)
257 return idx_perm(i, vir::cw<V::size()>);
266 else if constexpr (j < 0)
268 static_assert(-j <= int(V::size()));
269 return v[v.size() + j];
273 static_assert(j < int(V::size()));
280 template <std::
size_t N = 0, vir::vectorizable T, detail::index_permutation_function<1> F>
281 VIR_ALWAYS_INLINE
constexpr
282 std::conditional_t<N <= 1, T, stdx::resize_simd_t<N == 0 ? 1 : N, stdx::simd<T>>>
285 if constexpr (N <= 1)
287 constexpr auto i = vir::cw<0>;
288 constexpr int j = [&] {
289 if constexpr (detail::index_permutation_function_nosize<F>)
292 return idx_perm(i, vir::cw<std::size_t(1)>);
303 static_assert(j == 0 or j == -1);
308 return simd_permute<N>(stdx::simd<T, stdx::simd_abi::scalar>(v), idx_perm);
312 template <
int Offset, vir::any_simd_or_mask V>
313 VIR_ALWAYS_INLINE
constexpr V
314 simd_shift_in(V
const& a, std::convertible_to<V>
auto const&... more)
noexcept
316 return V([&](
auto i) ->
typename V::value_type {
317 constexpr int ninputs = 1 +
sizeof...(more);
318 constexpr int w = V::size();
319 constexpr int j = Offset + int(i);
320 if constexpr (j >= w * ninputs)
322 else if constexpr (j >= 0)
324 const V tmp[] = {a, more...};
325 return tmp[j / w][j % w];
327 else if constexpr (j < -w)
Satisfied if V is either a simd or a simd_mask.
Definition simd_concepts.h:61
Predefined permutations.
Definition simd_permute.h:66
constexpr Reverse reverse
Reverse the elements.
Definition simd_permute.h:138
constexpr SwapNeighbors< N > swap_neighbors
Swaps N neighboring elements.
Definition simd_permute.h:110
constexpr DuplicateOdd duplicate_odd
Copies odd elements into even elements.
Definition simd_permute.h:85
constexpr Shift< Offset > shift
Shift the elements by Offset.
Definition simd_permute.h:171
constexpr Broadcast<-1 > broadcast_last
Copy the last element into all elements.
Definition simd_permute.h:128
constexpr Broadcast< Position > broadcast
Copy element at index Position into all elements.
Definition simd_permute.h:122
constexpr DuplicateEven duplicate_even
Copies even elements into odd elements.
Definition simd_permute.h:75
constexpr Broadcast< 0 > broadcast_first
Copy the first element into all elements.
Definition simd_permute.h:125
constexpr Rotate< Offset > rotate
Rotate the elements by Offset.
Definition simd_permute.h:153
This namespace collects libraries and tools authored by Matthias Kretz.
Definition constexpr_wrapper.h:21
constexpr int simd_permute_zero
Constant that requests a zero value instead of one of the input values.
Definition simd_permute.h:53
constexpr stdx::resize_simd_t< N==0 ? V::size() :N, V > simd_permute(V const &v, F const idx_perm) noexcept
Permute the elements of v using the index permutation function idx_perm.
Definition simd_permute.h:181
constexpr int simd_permute_uninit
Constant that allows an arbitrary value instead of one of the input values.
Definition simd_permute.h:56
constexpr V simd_shift_in(V const &a, std::convertible_to< V > auto const &... more) noexcept
Concatenate a, more..., shift by Offset, and return the first V::size() elements.
Definition simd_permute.h:314
C++20 concepts extending the Parallelism TS 2 (which is limited to C++17).