libstdc++
simd_fixed_size.h
1 // Simd fixed_size ABI specific implementations -*- C++ -*-
2 
3 // Copyright (C) 2020-2023 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /*
26  * The fixed_size ABI gives the following guarantees:
27  * - simd objects are passed via the stack
28  * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>`
29  * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a
30  * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note:
31  * if the alignment were to exceed the system/compiler maximum, it is bounded
32  * to that maximum)
33  * - simd_mask objects are passed like bitset<_Np>
34  * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>`
35  * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of
36  * `bitset<_Np>`
37  */
38 
39 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_
40 #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_
41 
42 #if __cplusplus >= 201703L
43 
44 #include <array>
45 
46 _GLIBCXX_SIMD_BEGIN_NAMESPACE
47 
48 // __simd_tuple_element {{{
49 template <size_t _I, typename _Tp>
50  struct __simd_tuple_element;
51 
52 template <typename _Tp, typename _A0, typename... _As>
53  struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>>
54  { using type = simd<_Tp, _A0>; };
55 
56 template <size_t _I, typename _Tp, typename _A0, typename... _As>
57  struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>>
58  { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; };
59 
60 template <size_t _I, typename _Tp>
61  using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type;
62 
63 // }}}
64 // __simd_tuple_concat {{{
65 
66 template <typename _Tp, typename... _A0s, typename... _A1s>
67  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...>
68  __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left,
69  const _SimdTuple<_Tp, _A1s...>& __right)
70  {
71  if constexpr (sizeof...(_A0s) == 0)
72  return __right;
73  else if constexpr (sizeof...(_A1s) == 0)
74  return __left;
75  else
76  return {__left.first, __simd_tuple_concat(__left.second, __right)};
77  }
78 
79 template <typename _Tp, typename _A10, typename... _A1s>
80  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...>
81  __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right)
82  { return {__left, __right}; }
83 
84 // }}}
85 // __simd_tuple_pop_front {{{
86 // Returns the next _SimdTuple in __x that has _Np elements less.
87 // Precondition: _Np must match the number of elements in __first (recursively)
88 template <size_t _Np, typename _Tp>
89  _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto)
90  __simd_tuple_pop_front(_Tp&& __x)
91  {
92  if constexpr (_Np == 0)
93  return static_cast<_Tp&&>(__x);
94  else
95  {
96  using _Up = __remove_cvref_t<_Tp>;
97  static_assert(_Np >= _Up::_S_first_size);
98  return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second);
99  }
100  }
101 
102 // }}}
103 // __get_simd_at<_Np> {{{1
104 struct __as_simd {};
105 
106 struct __as_simd_tuple {};
107 
108 template <typename _Tp, typename _A0, typename... _Abis>
109  _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0>
110  __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>)
111  { return {__private_init, __t.first}; }
112 
113 template <typename _Tp, typename _A0, typename... _Abis>
114  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
115  __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t,
116  _SizeConstant<0>)
117  { return __t.first; }
118 
119 template <typename _Tp, typename _A0, typename... _Abis>
120  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
121  __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>)
122  { return __t.first; }
123 
124 template <typename _R, size_t _Np, typename _Tp, typename... _Abis>
125  _GLIBCXX_SIMD_INTRINSIC constexpr auto
126  __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>)
127  { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); }
128 
129 template <size_t _Np, typename _Tp, typename... _Abis>
130  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
131  __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>)
132  { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); }
133 
134 template <size_t _Np, typename _Tp, typename... _Abis>
135  _GLIBCXX_SIMD_INTRINSIC constexpr auto
136  __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t)
137  { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); }
138 
139 // }}}
140 // __get_tuple_at<_Np> {{{
141 template <size_t _Np, typename _Tp, typename... _Abis>
142  _GLIBCXX_SIMD_INTRINSIC constexpr auto
143  __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t)
144  { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); }
145 
146 template <size_t _Np, typename _Tp, typename... _Abis>
147  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
148  __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t)
149  { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); }
150 
151 // __tuple_element_meta {{{1
152 template <typename _Tp, typename _Abi, size_t _Offset>
153  struct __tuple_element_meta : public _Abi::_SimdImpl
154  {
155  static_assert(is_same_v<typename _Abi::_SimdImpl::abi_type,
156  _Abi>); // this fails e.g. when _SimdImpl is an
157  // alias for _SimdImplBuiltin<_DifferentAbi>
158  using value_type = _Tp;
159  using abi_type = _Abi;
160  using _Traits = _SimdTraits<_Tp, _Abi>;
161  using _MaskImpl = typename _Abi::_MaskImpl;
162  using _MaskMember = typename _Traits::_MaskMember;
163  using simd_type = simd<_Tp, _Abi>;
164  static constexpr size_t _S_offset = _Offset;
165  static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; }
166  static constexpr _MaskImpl _S_mask_impl = {};
167 
168  template <size_t _Np, bool _Sanitized>
169  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
170  _S_submask(_BitMask<_Np, _Sanitized> __bits)
171  { return __bits.template _M_extract<_Offset, _S_size()>(); }
172 
173  template <size_t _Np, bool _Sanitized>
174  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
175  _S_make_mask(_BitMask<_Np, _Sanitized> __bits)
176  {
177  return _MaskImpl::template _S_convert<_Tp>(
178  __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized());
179  }
180 
181  _GLIBCXX_SIMD_INTRINSIC static constexpr _ULLong
182  _S_mask_to_shifted_ullong(_MaskMember __k)
183  { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; }
184  };
185 
186 template <size_t _Offset, typename _Tp, typename _Abi, typename... _As>
187  _GLIBCXX_SIMD_INTRINSIC constexpr
188  __tuple_element_meta<_Tp, _Abi, _Offset>
189  __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&)
190  { return {}; }
191 
192 // }}}1
193 // _WithOffset wrapper class {{{
194 template <size_t _Offset, typename _Base>
195  struct _WithOffset : public _Base
196  {
197  static inline constexpr size_t _S_offset = _Offset;
198 
199  _GLIBCXX_SIMD_INTRINSIC char*
200  _M_as_charptr()
201  { return reinterpret_cast<char*>(this) + _S_offset * sizeof(typename _Base::value_type); }
202 
203  _GLIBCXX_SIMD_INTRINSIC const char*
204  _M_as_charptr() const
205  { return reinterpret_cast<const char*>(this) + _S_offset * sizeof(typename _Base::value_type); }
206  };
207 
208 // make _WithOffset<_WithOffset> ill-formed to use:
209 template <size_t _O0, size_t _O1, typename _Base>
210  struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {};
211 
212 template <size_t _Offset, typename _Tp>
213  _GLIBCXX_SIMD_INTRINSIC
214  decltype(auto)
215  __add_offset(_Tp& __base)
216  { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); }
217 
218 template <size_t _Offset, typename _Tp>
219  _GLIBCXX_SIMD_INTRINSIC
220  decltype(auto)
221  __add_offset(const _Tp& __base)
222  { return static_cast<const _WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); }
223 
224 template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
225  _GLIBCXX_SIMD_INTRINSIC
226  decltype(auto)
227  __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base)
228  { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>(static_cast<_Tp&>(__base)); }
229 
230 template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
231  _GLIBCXX_SIMD_INTRINSIC
232  decltype(auto)
233  __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base)
234  {
235  return static_cast<const _WithOffset<_Offset + _ExistingOffset, _Tp>&>(
236  static_cast<const _Tp&>(__base));
237  }
238 
239 template <typename _Tp>
240  constexpr inline size_t __offset = 0;
241 
242 template <size_t _Offset, typename _Tp>
243  constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>>
244  = _WithOffset<_Offset, _Tp>::_S_offset;
245 
246 template <typename _Tp>
247  constexpr inline size_t __offset<const _Tp> = __offset<_Tp>;
248 
249 template <typename _Tp>
250  constexpr inline size_t __offset<_Tp&> = __offset<_Tp>;
251 
252 template <typename _Tp>
253  constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>;
254 
255 // }}}
256 // _SimdTuple specializations {{{1
257 // empty {{{2
258 template <typename _Tp>
259  struct _SimdTuple<_Tp>
260  {
261  using value_type = _Tp;
262  static constexpr size_t _S_tuple_size = 0;
263  static constexpr size_t _S_size() { return 0; }
264  };
265 
266 // _SimdTupleData {{{2
267 template <typename _FirstType, typename _SecondType>
268  struct _SimdTupleData
269  {
270  _FirstType first;
271  _SecondType second;
272 
273  _GLIBCXX_SIMD_INTRINSIC
274  constexpr bool
275  _M_is_constprop() const
276  {
277  if constexpr (is_class_v<_FirstType>)
278  return first._M_is_constprop() && second._M_is_constprop();
279  else
280  return __builtin_constant_p(first) && second._M_is_constprop();
281  }
282  };
283 
284 template <typename _FirstType, typename _Tp>
285  struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>>
286  {
287  _FirstType first;
288  static constexpr _SimdTuple<_Tp> second = {};
289 
290  _GLIBCXX_SIMD_INTRINSIC
291  constexpr bool
292  _M_is_constprop() const
293  {
294  if constexpr (is_class_v<_FirstType>)
295  return first._M_is_constprop();
296  else
297  return __builtin_constant_p(first);
298  }
299  };
300 
301 // 1 or more {{{2
302 template <typename _Tp, typename _Abi0, typename... _Abis>
303  struct _SimdTuple<_Tp, _Abi0, _Abis...>
304  : _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember,
305  _SimdTuple<_Tp, _Abis...>>
306  {
307  static_assert(!__is_fixed_size_abi_v<_Abi0>);
308  using value_type = _Tp;
309  using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember;
310  using _FirstAbi = _Abi0;
311  using _SecondType = _SimdTuple<_Tp, _Abis...>;
312  static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1;
313 
314  static constexpr size_t _S_size()
315  { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); }
316 
317  static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>;
318  static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...);
319 
320  using _Base = _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember,
321  _SimdTuple<_Tp, _Abis...>>;
322  using _Base::first;
323  using _Base::second;
324 
325  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default;
326  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default;
327  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&)
328  = default;
329 
330  template <typename _Up>
331  _GLIBCXX_SIMD_INTRINSIC constexpr
332  _SimdTuple(_Up&& __x)
333  : _Base{static_cast<_Up&&>(__x)} {}
334 
335  template <typename _Up, typename _Up2>
336  _GLIBCXX_SIMD_INTRINSIC constexpr
337  _SimdTuple(_Up&& __x, _Up2&& __y)
338  : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {}
339 
340  template <typename _Up>
341  _GLIBCXX_SIMD_INTRINSIC constexpr
342  _SimdTuple(_Up&& __x, _SimdTuple<_Tp>)
343  : _Base{static_cast<_Up&&>(__x)} {}
344 
345  _GLIBCXX_SIMD_INTRINSIC char*
346  _M_as_charptr()
347  { return reinterpret_cast<char*>(this); }
348 
349  _GLIBCXX_SIMD_INTRINSIC const char*
350  _M_as_charptr() const
351  { return reinterpret_cast<const char*>(this); }
352 
353  template <size_t _Np>
354  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
355  _M_at()
356  {
357  if constexpr (_Np == 0)
358  return first;
359  else
360  return second.template _M_at<_Np - 1>();
361  }
362 
363  template <size_t _Np>
364  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
365  _M_at() const
366  {
367  if constexpr (_Np == 0)
368  return first;
369  else
370  return second.template _M_at<_Np - 1>();
371  }
372 
373  template <size_t _Np>
374  _GLIBCXX_SIMD_INTRINSIC constexpr auto
375  _M_simd_at() const
376  {
377  if constexpr (_Np == 0)
378  return simd<_Tp, _Abi0>(__private_init, first);
379  else
380  return second.template _M_simd_at<_Np - 1>();
381  }
382 
383  template <size_t _Offset = 0, typename _Fp>
384  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple
385  _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {})
386  {
387  auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>());
388  if constexpr (_S_tuple_size == 1)
389  return {__first};
390  else
391  return {__first,
392  _SecondType::_S_generate(
393  static_cast<_Fp&&>(__gen),
394  _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())};
395  }
396 
397  template <size_t _Offset = 0, typename _Fp, typename... _More>
398  _GLIBCXX_SIMD_INTRINSIC _SimdTuple
399  _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const
400  {
401  auto&& __first
402  = __fun(__make_meta<_Offset>(*this), first, __more.first...);
403  if constexpr (_S_tuple_size == 1)
404  return {__first};
405  else
406  return {
407  __first,
408  second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>(
409  static_cast<_Fp&&>(__fun), __more.second...)};
410  }
411 
412  template <typename _Tup>
413  _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto)
414  _M_extract_argument(_Tup&& __tup) const
415  {
416  using _TupT = typename __remove_cvref_t<_Tup>::value_type;
417  if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>)
418  return __tup.first;
419  else if (__builtin_is_constant_evaluated())
420  return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate(
421  [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
422  return __meta._S_generator(
423  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
424  return __tup[__i];
425  }, static_cast<_TupT*>(nullptr));
426  });
427  else
428  return [&]() { // not always_inline; allow the compiler to decide
429  __fixed_size_storage_t<_TupT, _S_first_size> __r;
430  __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(),
431  sizeof(__r));
432  return __r;
433  }();
434  }
435 
436  template <typename _Tup>
437  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
438  _M_skip_argument(_Tup&& __tup) const
439  {
440  static_assert(_S_tuple_size > 1);
441  using _Up = __remove_cvref_t<_Tup>;
442  constexpr size_t __off = __offset<_Up>;
443  if constexpr (_S_first_size == _Up::_S_first_size && __off == 0)
444  return __tup.second;
445  else if constexpr (_S_first_size > _Up::_S_first_size
446  && _S_first_size % _Up::_S_first_size == 0
447  && __off == 0)
448  return __simd_tuple_pop_front<_S_first_size>(__tup);
449  else if constexpr (_S_first_size + __off < _Up::_S_first_size)
450  return __add_offset<_S_first_size>(__tup);
451  else if constexpr (_S_first_size + __off == _Up::_S_first_size)
452  return __tup.second;
453  else
454  __assert_unreachable<_Tup>();
455  }
456 
457  template <size_t _Offset, typename... _More>
458  _GLIBCXX_SIMD_INTRINSIC constexpr void
459  _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) &
460  {
461  static_assert(_Offset == 0);
462  first = __x.first;
463  if constexpr (sizeof...(_More) > 0)
464  {
465  static_assert(sizeof...(_Abis) >= sizeof...(_More));
466  second.template _M_assign_front<0>(__x.second);
467  }
468  }
469 
470  template <size_t _Offset>
471  _GLIBCXX_SIMD_INTRINSIC constexpr void
472  _M_assign_front(const _FirstType& __x) &
473  {
474  static_assert(_Offset == 0);
475  first = __x;
476  }
477 
478  template <size_t _Offset, typename... _As>
479  _GLIBCXX_SIMD_INTRINSIC constexpr void
480  _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) &
481  {
482  __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type),
483  __x._M_as_charptr(),
484  sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size());
485  }
486 
487  /*
488  * Iterate over the first objects in this _SimdTuple and call __fun for each
489  * of them. If additional arguments are passed via __more, chunk them into
490  * _SimdTuple or __vector_type_t objects of the same number of values.
491  */
492  template <typename _Fp, typename... _More>
493  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple
494  _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const
495  {
496  if constexpr ((...
497  || conjunction_v<
498  is_lvalue_reference<_More>,
499  negation<is_const<remove_reference_t<_More>>>>) )
500  {
501  // need to write back at least one of __more after calling __fun
502  auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
503  auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first,
504  __args...);
505  [[maybe_unused]] auto&& __ignore_me = {(
506  [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
507  if constexpr (is_assignable_v<decltype(__dst),
508  decltype(__dst)>)
509  {
510  __dst.template _M_assign_front<__offset<decltype(__dst)>>(
511  __src);
512  }
513  }(static_cast<_More&&>(__more), __args),
514  0)...};
515  return __r;
516  }(_M_extract_argument(__more)...);
517  if constexpr (_S_tuple_size == 1)
518  return {__first};
519  else
520  return {__first,
521  second._M_apply_per_chunk(static_cast<_Fp&&>(__fun),
522  _M_skip_argument(__more)...)};
523  }
524  else
525  {
526  auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first,
527  _M_extract_argument(__more)...);
528  if constexpr (_S_tuple_size == 1)
529  return {__first};
530  else
531  return {__first,
532  second._M_apply_per_chunk(static_cast<_Fp&&>(__fun),
533  _M_skip_argument(__more)...)};
534  }
535  }
536 
537  template <typename _R = _Tp, typename _Fp, typename... _More>
538  _GLIBCXX_SIMD_INTRINSIC constexpr auto
539  _M_apply_r(_Fp&& __fun, const _More&... __more) const
540  {
541  auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first,
542  __more.first...);
543  if constexpr (_S_tuple_size == 1)
544  return __first;
545  else
546  return __simd_tuple_concat<_R>(
547  __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun),
548  __more.second...));
549  }
550 
551  template <typename _Fp, typename... _More>
552  _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()>
553  _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more)
554  {
555  const _SanitizedBitMask<_S_first_size> __first
556  = _Abi0::_MaskImpl::_S_to_bits(
557  __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first,
558  __more.first...));
559  if constexpr (_S_tuple_size == 1)
560  return __first;
561  else
562  return _M_test(__fun, __x.second, __more.second...)
563  ._M_prepend(__first);
564  }
565 
566  template <typename _Up, _Up _I>
567  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
568  operator[](integral_constant<_Up, _I>) const noexcept
569  {
570  if constexpr (_I < simd_size_v<_Tp, _Abi0>)
571  return _M_subscript_read(_I);
572  else
573  return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()];
574  }
575 
576  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
577  operator[](size_t __i) const noexcept
578  {
579  if constexpr (_S_tuple_size == 1)
580  return _M_subscript_read(__i);
581 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
582  else if (not __builtin_is_constant_evaluated())
583  return reinterpret_cast<const __may_alias<_Tp>*>(this)[__i];
584 #endif
585  else if constexpr (__is_scalar_abi<_Abi0>())
586  {
587  const _Tp* ptr = &first;
588  return ptr[__i];
589  }
590  else
591  return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i)
592  : second[__i - simd_size_v<_Tp, _Abi0>];
593  }
594 
595  _GLIBCXX_SIMD_INTRINSIC constexpr void
596  _M_set(size_t __i, _Tp __val) noexcept
597  {
598  if constexpr (_S_tuple_size == 1)
599  return _M_subscript_write(__i, __val);
600 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
601  else if (not __builtin_is_constant_evaluated())
602  reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val;
603 #endif
604  else if (__i < simd_size_v<_Tp, _Abi0>)
605  _M_subscript_write(__i, __val);
606  else
607  second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val);
608  }
609 
610  private:
611  // _M_subscript_read/_write {{{
612  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
613  _M_subscript_read([[maybe_unused]] size_t __i) const noexcept
614  {
615  if constexpr (__is_vectorizable_v<_FirstType>)
616  return first;
617  else
618  return first[__i];
619  }
620 
621  _GLIBCXX_SIMD_INTRINSIC constexpr void
622  _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept
623  {
624  if constexpr (__is_vectorizable_v<_FirstType>)
625  first = __y;
626  else
627  first._M_set(__i, __y);
628  }
629 
630  // }}}
631  };
632 
633 // __make_simd_tuple {{{1
634 template <typename _Tp, typename _A0>
635  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0>
636  __make_simd_tuple(simd<_Tp, _A0> __x0)
637  { return {__data(__x0)}; }
638 
639 template <typename _Tp, typename _A0, typename... _As>
640  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _As...>
641  __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
642  { return {__data(__x0), __make_simd_tuple(__xs...)}; }
643 
644 template <typename _Tp, typename _A0>
645  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0>
646  __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0)
647  { return {__arg0}; }
648 
649 template <typename _Tp, typename _A0, typename _A1, typename... _Abis>
650  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _A1, _Abis...>
651  __make_simd_tuple(
652  const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0,
653  const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1,
654  const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args)
655  { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; }
656 
657 // __to_simd_tuple {{{1
658 template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX>
659  _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np>
660  __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX);
661 
662 template <typename _Tp, size_t _Np,
663  size_t _Offset = 0, // skip this many elements in __from0
664  typename _R = __fixed_size_storage_t<_Tp, _Np>, typename _V0,
665  typename _V0VT = _VectorTraits<_V0>, typename... _VX>
666  _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX)
667  {
668  static_assert(is_same_v<typename _V0VT::value_type, _Tp>);
669  static_assert(_Offset < _V0VT::_S_full_size);
670  using _R0 = __vector_type_t<_Tp, _R::_S_first_size>;
671  if constexpr (_R::_S_tuple_size == 1)
672  {
673  if constexpr (_Np == 1)
674  return _R{__from0[_Offset]};
675  else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np)
676  return _R{__intrin_bitcast<_R0>(__from0)};
677  else if constexpr (_Offset * 2 == _V0VT::_S_full_size
678  && _V0VT::_S_full_size / 2 >= _Np)
679  return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))};
680  else if constexpr (_Offset * 4 == _V0VT::_S_full_size
681  && _V0VT::_S_full_size / 4 >= _Np)
682  return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))};
683  else
684  __assert_unreachable<_Tp>();
685  }
686  else
687  {
688  if constexpr (1 == _R::_S_first_size)
689  { // extract one scalar and recurse
690  if constexpr (_Offset + 1 < _V0VT::_S_full_size)
691  return _R{__from0[_Offset],
692  __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0,
693  __fromX...)};
694  else
695  return _R{__from0[_Offset],
696  __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)};
697  }
698 
699  // place __from0 into _R::first and recurse for __fromX -> _R::second
700  else if constexpr (_V0VT::_S_full_size == _R::_S_first_size
701  && _Offset == 0)
702  return _R{__from0,
703  __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)};
704 
705  // place lower part of __from0 into _R::first and recurse with _Offset
706  else if constexpr (_V0VT::_S_full_size > _R::_S_first_size
707  && _Offset == 0)
708  return _R{__intrin_bitcast<_R0>(__from0),
709  __to_simd_tuple<_Tp, _Np - _R::_S_first_size,
710  _R::_S_first_size>(__from0, __fromX...)};
711 
712  // place lower part of second quarter of __from0 into _R::first and
713  // recurse with _Offset
714  else if constexpr (_Offset * 4 == _V0VT::_S_full_size
715  && _V0VT::_S_full_size >= 4 * _R::_S_first_size)
716  return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)),
717  __to_simd_tuple<_Tp, _Np - _R::_S_first_size,
718  _Offset + _R::_S_first_size>(__from0,
719  __fromX...)};
720 
721  // place lower half of high half of __from0 into _R::first and recurse
722  // with _Offset
723  else if constexpr (_Offset * 2 == _V0VT::_S_full_size
724  && _V0VT::_S_full_size >= 4 * _R::_S_first_size)
725  return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)),
726  __to_simd_tuple<_Tp, _Np - _R::_S_first_size,
727  _Offset + _R::_S_first_size>(__from0,
728  __fromX...)};
729 
730  // place high half of __from0 into _R::first and recurse with __fromX
731  else if constexpr (_Offset * 2 == _V0VT::_S_full_size
732  && _V0VT::_S_full_size / 2 >= _R::_S_first_size)
733  return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)),
734  __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>(
735  __fromX...)};
736 
737  // ill-formed if some unforseen pattern is needed
738  else
739  __assert_unreachable<_Tp>();
740  }
741  }
742 
743 template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX>
744  _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np>
745  __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX)
746  {
747  if constexpr (is_same_v<_Tp, _V>)
748  {
749  static_assert(
750  sizeof...(_VX) == 0,
751  "An array of scalars must be the last argument to __to_simd_tuple");
752  return __call_with_subscripts(
753  __from, make_index_sequence<_NV>(),
754  [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
755  return __simd_tuple_concat(
756  _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>());
757  });
758  }
759  else
760  return __call_with_subscripts(
761  __from, make_index_sequence<_NV>(),
762  [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
763  return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...);
764  });
765  }
766 
767 template <size_t, typename _Tp>
768  using __to_tuple_helper = _Tp;
769 
770 template <typename _Tp, typename _A0, size_t _NOut, size_t _Np,
771  size_t... _Indexes>
772  _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut>
773  __to_simd_tuple_impl(index_sequence<_Indexes...>,
774  const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args)
775  {
776  return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>(
777  __args[_Indexes]...);
778  }
779 
780 template <typename _Tp, typename _A0, size_t _NOut, size_t _Np,
781  typename _R = __fixed_size_storage_t<_Tp, _NOut>>
782  _GLIBCXX_SIMD_INTRINSIC _R
783  __to_simd_tuple_sized(
784  const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args)
785  {
786  static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut);
787  return __to_simd_tuple_impl<_Tp, _A0, _NOut>(
788  make_index_sequence<_R::_S_tuple_size>(), __args);
789  }
790 
791 // __optimize_simd_tuple {{{1
792 template <typename _Tp>
793  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp>
794  __optimize_simd_tuple(const _SimdTuple<_Tp>)
795  { return {}; }
796 
797 template <typename _Tp, typename _Ap>
798  _GLIBCXX_SIMD_INTRINSIC constexpr const _SimdTuple<_Tp, _Ap>&
799  __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x)
800  { return __x; }
801 
802 template <typename _Tp, typename _A0, typename _A1, typename... _Abis,
803  typename _R = __fixed_size_storage_t<
804  _Tp, _SimdTuple<_Tp, _A0, _A1, _Abis...>::_S_size()>>
805  _GLIBCXX_SIMD_INTRINSIC constexpr _R
806  __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x)
807  {
808  using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>;
809  if constexpr (is_same_v<_R, _Tup>)
810  return __x;
811  else if constexpr (is_same_v<typename _R::_FirstType,
812  typename _Tup::_FirstType>)
813  return {__x.first, __optimize_simd_tuple(__x.second)};
814  else if constexpr (__is_scalar_abi<_A0>()
815  || _A0::template _S_is_partial<_Tp>)
816  return {__generate_from_n_evaluations<_R::_S_first_size,
817  typename _R::_FirstType>(
818  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }),
819  __optimize_simd_tuple(
820  __simd_tuple_pop_front<_R::_S_first_size>(__x))};
821  else if constexpr (is_same_v<_A0, _A1>
822  && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>)
823  return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()),
824  __optimize_simd_tuple(__x.second.second)};
825  else if constexpr (sizeof...(_Abis) >= 2
826  && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>)
827  && simd_size_v<_Tp, _A0> == __simd_tuple_element_t<
828  (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size())
829  return {
830  __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()),
831  __concat(__x.template _M_at<2>(), __x.template _M_at<3>())),
832  __optimize_simd_tuple(__x.second.second.second.second)};
833  else
834  {
835  static_assert(sizeof(_R) == sizeof(__x));
836  _R __r;
837  __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(),
838  sizeof(_Tp) * _R::_S_size());
839  return __r;
840  }
841  }
842 
843 // __for_each(const _SimdTuple &, Fun) {{{1
844 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
845  _GLIBCXX_SIMD_INTRINSIC constexpr void
846  __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun)
847  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); }
848 
849 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
850  typename... _As, typename _Fp>
851  _GLIBCXX_SIMD_INTRINSIC constexpr void
852  __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun)
853  {
854  __fun(__make_meta<_Offset>(__t), __t.first);
855  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second,
856  static_cast<_Fp&&>(__fun));
857  }
858 
859 // __for_each(_SimdTuple &, Fun) {{{1
860 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
861  _GLIBCXX_SIMD_INTRINSIC constexpr void
862  __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun)
863  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); }
864 
865 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
866  typename... _As, typename _Fp>
867  _GLIBCXX_SIMD_INTRINSIC constexpr void
868  __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun)
869  {
870  __fun(__make_meta<_Offset>(__t), __t.first);
871  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second,
872  static_cast<_Fp&&>(__fun));
873  }
874 
875 // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1
876 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
877  _GLIBCXX_SIMD_INTRINSIC constexpr void
878  __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun)
879  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); }
880 
881 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
882  typename... _As, typename _Fp>
883  _GLIBCXX_SIMD_INTRINSIC constexpr void
884  __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a,
885  const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun)
886  {
887  __fun(__make_meta<_Offset>(__a), __a.first, __b.first);
888  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second,
889  static_cast<_Fp&&>(__fun));
890  }
891 
892 // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1
893 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
894  _GLIBCXX_SIMD_INTRINSIC constexpr void
895  __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun)
896  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); }
897 
898 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
899  typename... _As, typename _Fp>
900  _GLIBCXX_SIMD_INTRINSIC constexpr void
901  __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a,
902  const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun)
903  {
904  __fun(__make_meta<_Offset>(__a), __a.first, __b.first);
905  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second,
906  static_cast<_Fp&&>(__fun));
907  }
908 
909 // }}}1
910 // __extract_part(_SimdTuple) {{{
911 template <int _Index, int _Total, int _Combine, typename _Tp, typename _A0, typename... _As>
912  _GLIBCXX_SIMD_INTRINSIC constexpr auto // __vector_type_t or _SimdTuple
913  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x)
914  {
915  // worst cases:
916  // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4)
917  // (b) 2, 2, 2 => 3, 3 (_Total = 2)
918  // (c) 4, 2 => 2, 2, 2 (_Total = 3)
919  using _Tuple = _SimdTuple<_Tp, _A0, _As...>;
920  static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1);
921  constexpr size_t _Np = _Tuple::_S_size();
922  static_assert(_Np >= _Total && _Np % _Total == 0);
923  constexpr size_t __values_per_part = _Np / _Total;
924  [[maybe_unused]] constexpr size_t __values_to_skip
925  = _Index * __values_per_part;
926  constexpr size_t __return_size = __values_per_part * _Combine;
927  using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>;
928 
929  // handle (optimize) the simple cases
930  if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size)
931  return __x.first._M_data;
932  else if constexpr (_Index == 0 && _Total == _Combine)
933  return __x;
934  else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size)
935  return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>(
936  __as_vector(__x.first));
937 
938  // recurse to skip unused data members at the beginning of _SimdTuple
939  else if constexpr (__values_to_skip >= _Tuple::_S_first_size)
940  { // recurse
941  if constexpr (_Tuple::_S_first_size % __values_per_part == 0)
942  {
943  constexpr int __parts_in_first
944  = _Tuple::_S_first_size / __values_per_part;
945  return __extract_part<_Index - __parts_in_first,
946  _Total - __parts_in_first, _Combine>(
947  __x.second);
948  }
949  else
950  return __extract_part<__values_to_skip - _Tuple::_S_first_size,
951  _Np - _Tuple::_S_first_size, __return_size>(
952  __x.second);
953  }
954 
955  // extract from multiple _SimdTuple data members
956  else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip)
957  {
958 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
959  const __may_alias<_Tp>* const element_ptr
960  = reinterpret_cast<const __may_alias<_Tp>*>(&__x) + __values_to_skip;
961  return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned));
962 #else
963  [[maybe_unused]] constexpr size_t __offset = __values_to_skip;
964  return __as_vector(simd<_Tp, _RetAbi>(
965  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
966  constexpr _SizeConstant<__i + __offset> __k;
967  return __x[__k];
968  }));
969 #endif
970  }
971 
972  // all of the return values are in __x.first
973  else if constexpr (_Tuple::_S_first_size % __values_per_part == 0)
974  return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part,
975  _Combine>(__x.first);
976  else
977  return __extract_part<__values_to_skip, _Tuple::_S_first_size,
978  _Combine * __values_per_part>(__x.first);
979  }
980 
981 // }}}
982 // __fixed_size_storage_t<_Tp, _Np>{{{
983 template <typename _Tp, int _Np, typename _Tuple,
984  typename _Next = simd<_Tp, _AllNativeAbis::_BestAbi<_Tp, _Np>>,
985  int _Remain = _Np - int(_Next::size())>
986  struct __fixed_size_storage_builder;
987 
988 template <typename _Tp, int _Np>
989  struct __fixed_size_storage
990  : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {};
991 
992 template <typename _Tp, int _Np, typename... _As, typename _Next>
993  struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next,
994  0>
995  { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; };
996 
997 template <typename _Tp, int _Np, typename... _As, typename _Next, int _Remain>
998  struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next,
999  _Remain>
1000  {
1001  using type = typename __fixed_size_storage_builder<
1002  _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type;
1003  };
1004 
1005 // }}}
1006 // __autocvt_to_simd {{{
1007 template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
1008  struct __autocvt_to_simd
1009  {
1010  _Tp _M_data;
1011  using _TT = __remove_cvref_t<_Tp>;
1012 
1013  _GLIBCXX_SIMD_INTRINSIC constexpr
1014  operator _TT()
1015  { return _M_data; }
1016 
1017  _GLIBCXX_SIMD_INTRINSIC constexpr
1018  operator _TT&()
1019  {
1020  static_assert(is_lvalue_reference<_Tp>::value, "");
1021  static_assert(!is_const<_Tp>::value, "");
1022  return _M_data;
1023  }
1024 
1025  _GLIBCXX_SIMD_INTRINSIC constexpr
1026  operator _TT*()
1027  {
1028  static_assert(is_lvalue_reference<_Tp>::value, "");
1029  static_assert(!is_const<_Tp>::value, "");
1030  return &_M_data;
1031  }
1032 
1033  _GLIBCXX_SIMD_INTRINSIC constexpr
1034  __autocvt_to_simd(_Tp dd) : _M_data(dd) {}
1035 
1036  template <typename _Abi>
1037  _GLIBCXX_SIMD_INTRINSIC constexpr
1038  operator simd<typename _TT::value_type, _Abi>()
1039  { return {__private_init, _M_data}; }
1040 
1041  template <typename _Abi>
1042  _GLIBCXX_SIMD_INTRINSIC constexpr
1043  operator simd<typename _TT::value_type, _Abi>&()
1044  { return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); }
1045 
1046  template <typename _Abi>
1047  _GLIBCXX_SIMD_INTRINSIC constexpr
1048  operator simd<typename _TT::value_type, _Abi>*()
1049  { return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); }
1050  };
1051 
1052 template <typename _Tp>
1053  __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>;
1054 
1055 template <typename _Tp>
1056  struct __autocvt_to_simd<_Tp, true>
1057  {
1058  using _TT = __remove_cvref_t<_Tp>;
1059  _Tp _M_data;
1060  fixed_size_simd<_TT, 1> _M_fd;
1061 
1062  _GLIBCXX_SIMD_INTRINSIC
1063  constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {}
1064 
1065  _GLIBCXX_SIMD_INTRINSIC
1066  ~__autocvt_to_simd()
1067  { _M_data = __data(_M_fd).first; }
1068 
1069  _GLIBCXX_SIMD_INTRINSIC constexpr
1070  operator fixed_size_simd<_TT, 1>()
1071  { return _M_fd; }
1072 
1073  _GLIBCXX_SIMD_INTRINSIC constexpr
1074  operator fixed_size_simd<_TT, 1> &()
1075  {
1076  static_assert(is_lvalue_reference<_Tp>::value, "");
1077  static_assert(!is_const<_Tp>::value, "");
1078  return _M_fd;
1079  }
1080 
1081  _GLIBCXX_SIMD_INTRINSIC constexpr
1082  operator fixed_size_simd<_TT, 1> *()
1083  {
1084  static_assert(is_lvalue_reference<_Tp>::value, "");
1085  static_assert(!is_const<_Tp>::value, "");
1086  return &_M_fd;
1087  }
1088  };
1089 
1090 // }}}
1091 
1092 struct _CommonImplFixedSize;
1093 template <int _Np, typename = __detail::__odr_helper> struct _SimdImplFixedSize;
1094 template <int _Np, typename = __detail::__odr_helper> struct _MaskImplFixedSize;
1095 // simd_abi::_Fixed {{{
1096 template <int _Np>
1097  struct simd_abi::_Fixed
1098  {
1099  template <typename _Tp> static constexpr size_t _S_size = _Np;
1100  template <typename _Tp> static constexpr size_t _S_full_size = _Np;
1101  // validity traits {{{
1102  struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {};
1103 
1104  template <typename _Tp>
1105  struct _IsValidSizeFor
1106  : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {};
1107 
1108  template <typename _Tp>
1109  struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>,
1110  _IsValidSizeFor<_Tp>> {};
1111 
1112  template <typename _Tp>
1113  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
1114 
1115  // }}}
1116  // _S_masked {{{
1117  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
1118  _S_masked(_BitMask<_Np> __x)
1119  { return __x._M_sanitized(); }
1120 
1121  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
1122  _S_masked(_SanitizedBitMask<_Np> __x)
1123  { return __x; }
1124 
1125  // }}}
1126  // _*Impl {{{
1127  using _CommonImpl = _CommonImplFixedSize;
1128  using _SimdImpl = _SimdImplFixedSize<_Np>;
1129  using _MaskImpl = _MaskImplFixedSize<_Np>;
1130 
1131  // }}}
1132  // __traits {{{
1133  template <typename _Tp, bool = _S_is_valid_v<_Tp>>
1134  struct __traits : _InvalidTraits {};
1135 
1136  template <typename _Tp>
1137  struct __traits<_Tp, true>
1138  {
1139  using _IsValid = true_type;
1140  using _SimdImpl = _SimdImplFixedSize<_Np>;
1141  using _MaskImpl = _MaskImplFixedSize<_Np>;
1142 
1143  // simd and simd_mask member types {{{
1144  using _SimdMember = __fixed_size_storage_t<_Tp, _Np>;
1145  using _MaskMember = _SanitizedBitMask<_Np>;
1146 
1147  static constexpr size_t _S_simd_align
1148  = std::__bit_ceil(_Np * sizeof(_Tp));
1149 
1150  static constexpr size_t _S_mask_align = alignof(_MaskMember);
1151 
1152  // }}}
1153  // _SimdBase / base class for simd, providing extra conversions {{{
1154  struct _SimdBase
1155  {
1156  // The following ensures, function arguments are passed via the stack.
1157  // This is important for ABI compatibility across TU boundaries
1158  _GLIBCXX_SIMD_ALWAYS_INLINE constexpr
1159  _SimdBase(const _SimdBase&) {}
1160 
1161  _SimdBase() = default;
1162 
1163  _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit
1164  operator const _SimdMember &() const
1165  { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; }
1166 
1167  _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit
1168  operator array<_Tp, _Np>() const
1169  {
1170  array<_Tp, _Np> __r;
1171  // _SimdMember can be larger because of higher alignment
1172  static_assert(sizeof(__r) <= sizeof(_SimdMember), "");
1173  __builtin_memcpy(__r.data(), &static_cast<const _SimdMember&>(*this),
1174  sizeof(__r));
1175  return __r;
1176  }
1177  };
1178 
1179  // }}}
1180  // _MaskBase {{{
1181  // empty. The bitset interface suffices
1182  struct _MaskBase {};
1183 
1184  // }}}
1185  // _SimdCastType {{{
1186  struct _SimdCastType
1187  {
1188  _GLIBCXX_SIMD_ALWAYS_INLINE constexpr
1189  _SimdCastType(const array<_Tp, _Np>&);
1190 
1191  _GLIBCXX_SIMD_ALWAYS_INLINE constexpr
1192  _SimdCastType(const _SimdMember& dd) : _M_data(dd) {}
1193 
1194  _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit
1195  operator const _SimdMember &() const { return _M_data; }
1196 
1197  private:
1198  const _SimdMember& _M_data;
1199  };
1200 
1201  // }}}
1202  // _MaskCastType {{{
1203  class _MaskCastType
1204  {
1205  _MaskCastType() = delete;
1206  };
1207  // }}}
1208  };
1209  // }}}
1210  };
1211 
1212 // }}}
1213 // _CommonImplFixedSize {{{
1214 struct _CommonImplFixedSize
1215 {
1216  // _S_store {{{
1217  template <typename _Tp, typename... _As>
1218  _GLIBCXX_SIMD_INTRINSIC static void
1219  _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr)
1220  {
1221  constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size();
1222  __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp));
1223  }
1224 
1225  // }}}
1226 };
1227 
1228 // }}}
1229 // _SimdImplFixedSize {{{1
1230 // fixed_size should not inherit from _SimdMathFallback in order for
1231 // specializations in the used _SimdTuple Abis to get used
1232 template <int _Np, typename>
1233  struct _SimdImplFixedSize
1234  {
1235  // member types {{{2
1236  using _MaskMember = _SanitizedBitMask<_Np>;
1237 
1238  template <typename _Tp>
1239  using _SimdMember = __fixed_size_storage_t<_Tp, _Np>;
1240 
1241  template <typename _Tp>
1242  static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size;
1243 
1244  template <typename _Tp>
1245  using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
1246 
1247  template <typename _Tp>
1248  using _TypeTag = _Tp*;
1249 
1250  // broadcast {{{2
1251  template <typename _Tp>
1252  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1253  _S_broadcast(_Tp __x) noexcept
1254  {
1255  return _SimdMember<_Tp>::_S_generate(
1256  [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1257  return __meta._S_broadcast(__x);
1258  });
1259  }
1260 
1261  // _S_generator {{{2
1262  template <typename _Fp, typename _Tp>
1263  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1264  _S_generator(_Fp&& __gen, _TypeTag<_Tp>)
1265  {
1266  return _SimdMember<_Tp>::_S_generate(
1267  [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1268  return __meta._S_generator(
1269  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1270  return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>())
1271  : 0;
1272  },
1273  _TypeTag<_Tp>());
1274  });
1275  }
1276 
1277  // _S_load {{{2
1278  template <typename _Tp, typename _Up>
1279  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1280  _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
1281  {
1282  return _SimdMember<_Tp>::_S_generate(
1283  [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1284  return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>());
1285  });
1286  }
1287 
1288  // _S_masked_load {{{2
1289  template <typename _Tp, typename... _As, typename _Up>
1290  _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...>
1291  _S_masked_load(const _SimdTuple<_Tp, _As...>& __old,
1292  const _MaskMember __bits, const _Up* __mem) noexcept
1293  {
1294  auto __merge = __old;
1295  __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1296  if (__meta._S_submask(__bits).any())
1297 #pragma GCC diagnostic push
1298  // Dereferencing __mem + __meta._S_offset could be UB ([expr.add]/4.3).
1299  // It is the responsibility of the caller of the masked load (via the mask's value) to
1300  // avoid UB. Consequently, the compiler may assume this branch is unreachable, if the
1301  // pointer arithmetic is UB.
1302 #pragma GCC diagnostic ignored "-Warray-bounds"
1303  __native
1304  = __meta._S_masked_load(__native, __meta._S_make_mask(__bits),
1305  __mem + __meta._S_offset);
1306 #pragma GCC diagnostic pop
1307  });
1308  return __merge;
1309  }
1310 
1311  // _S_store {{{2
1312  template <typename _Tp, typename _Up>
1313  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1314  _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept
1315  {
1316  __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1317  __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>());
1318  });
1319  }
1320 
1321  // _S_masked_store {{{2
1322  template <typename _Tp, typename... _As, typename _Up>
1323  _GLIBCXX_SIMD_INTRINSIC static void
1324  _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem,
1325  const _MaskMember __bits) noexcept
1326  {
1327  __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1328  if (__meta._S_submask(__bits).any())
1329 #pragma GCC diagnostic push
1330  // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts
1331  // the responsibility for avoiding UB to the caller of the masked
1332  // store via the mask. Consequently, the compiler may assume this
1333  // branch is unreachable, if the pointer arithmetic is UB.
1334 #pragma GCC diagnostic ignored "-Warray-bounds"
1335  __meta._S_masked_store(__native, __mem + __meta._S_offset,
1336  __meta._S_make_mask(__bits));
1337 #pragma GCC diagnostic pop
1338  });
1339  }
1340 
1341  // negation {{{2
1342  template <typename _Tp, typename... _As>
1343  static constexpr inline _MaskMember
1344  _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept
1345  {
1346  _MaskMember __bits = 0;
1347  __for_each(
1348  __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1349  __bits
1350  |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native));
1351  });
1352  return __bits;
1353  }
1354 
1355  // reductions {{{2
1356  template <typename _Tp, typename _BinaryOperation>
1357  static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x,
1358  const _BinaryOperation& __binary_op)
1359  {
1360  using _Tup = _SimdMember<_Tp>;
1361  const _Tup& __tup = __data(__x);
1362  if constexpr (_Tup::_S_tuple_size == 1)
1363  return _Tup::_FirstAbi::_SimdImpl::_S_reduce(
1364  __tup.template _M_simd_at<0>(), __binary_op);
1365  else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2
1366  && _Tup::_SecondType::_S_size() == 1)
1367  {
1368  return __binary_op(simd<_Tp, simd_abi::scalar>(
1369  reduce(__tup.template _M_simd_at<0>(),
1370  __binary_op)),
1371  __tup.template _M_simd_at<1>())[0];
1372  }
1373  else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4
1374  && _Tup::_SecondType::_S_size() == 2)
1375  {
1376  return __binary_op(
1377  simd<_Tp, simd_abi::scalar>(
1378  reduce(__tup.template _M_simd_at<0>(), __binary_op)),
1379  simd<_Tp, simd_abi::scalar>(
1380  reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0];
1381  }
1382  else
1383  {
1384  const auto& __x2 = __call_with_n_evaluations<
1385  __div_roundup(_Tup::_S_tuple_size, 2)>(
1386  [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1387  if constexpr (sizeof...(__remaining) == 0)
1388  return __first_simd;
1389  else
1390  {
1391  using _Tup2
1392  = _SimdTuple<_Tp,
1393  typename decltype(__first_simd)::abi_type,
1394  typename decltype(__remaining)::abi_type...>;
1395  return fixed_size_simd<_Tp, _Tup2::_S_size()>(
1396  __private_init,
1397  __make_simd_tuple(__first_simd, __remaining...));
1398  }
1399  },
1400  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1401  auto __left = __tup.template _M_simd_at<2 * __i>();
1402  if constexpr (2 * __i + 1 == _Tup::_S_tuple_size)
1403  return __left;
1404  else
1405  {
1406  auto __right = __tup.template _M_simd_at<2 * __i + 1>();
1407  using _LT = decltype(__left);
1408  using _RT = decltype(__right);
1409  if constexpr (_LT::size() == _RT::size())
1410  return __binary_op(__left, __right);
1411  else
1412  {
1413  _GLIBCXX_SIMD_USE_CONSTEXPR_API
1414  typename _LT::mask_type __k(
1415  __private_init,
1416  [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1417  return __j < _RT::size();
1418  });
1419  _LT __ext_right = __left;
1420  where(__k, __ext_right)
1421  = __proposed::resizing_simd_cast<_LT>(__right);
1422  where(__k, __left) = __binary_op(__left, __ext_right);
1423  return __left;
1424  }
1425  }
1426  });
1427  return reduce(__x2, __binary_op);
1428  }
1429  }
1430 
1431  // _S_min, _S_max {{{2
1432  template <typename _Tp, typename... _As>
1433  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1434  _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b)
1435  {
1436  return __a._M_apply_per_chunk(
1437  [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1438  return __impl._S_min(__aa, __bb);
1439  },
1440  __b);
1441  }
1442 
1443  template <typename _Tp, typename... _As>
1444  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1445  _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b)
1446  {
1447  return __a._M_apply_per_chunk(
1448  [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1449  return __impl._S_max(__aa, __bb);
1450  },
1451  __b);
1452  }
1453 
1454  // _S_complement {{{2
1455  template <typename _Tp, typename... _As>
1456  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1457  _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept
1458  {
1459  return __x._M_apply_per_chunk(
1460  [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1461  return __impl._S_complement(__xx);
1462  });
1463  }
1464 
1465  // _S_unary_minus {{{2
1466  template <typename _Tp, typename... _As>
1467  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1468  _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept
1469  {
1470  return __x._M_apply_per_chunk(
1471  [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1472  return __impl._S_unary_minus(__xx);
1473  });
1474  }
1475 
1476  // arithmetic operators {{{2
1477 
1478 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \
1479  template <typename _Tp, typename... _As> \
1480  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_( \
1481  const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y) \
1482  { \
1483  return __x._M_apply_per_chunk( \
1484  [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
1485  return __impl.name_(__xx, __yy); \
1486  }, \
1487  __y); \
1488  }
1489 
1490  _GLIBCXX_SIMD_FIXED_OP(_S_plus, +)
1491  _GLIBCXX_SIMD_FIXED_OP(_S_minus, -)
1492  _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *)
1493  _GLIBCXX_SIMD_FIXED_OP(_S_divides, /)
1494  _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %)
1495  _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &)
1496  _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |)
1497  _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^)
1498  _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<)
1499  _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>)
1500 #undef _GLIBCXX_SIMD_FIXED_OP
1501 
1502  template <typename _Tp, typename... _As>
1503  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1504  _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y)
1505  {
1506  return __x._M_apply_per_chunk(
1507  [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1508  return __impl._S_bit_shift_left(__xx, __y);
1509  });
1510  }
1511 
1512  template <typename _Tp, typename... _As>
1513  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1514  _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y)
1515  {
1516  return __x._M_apply_per_chunk(
1517  [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1518  return __impl._S_bit_shift_right(__xx, __y);
1519  });
1520  }
1521 
1522  // math {{{2
1523 #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \
1524  template <typename _Tp, typename... _As, typename... _More> \
1525  static inline __fixed_size_storage_t<_RetTp, _Np> \
1526  _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \
1527  const _More&... __more) \
1528  { \
1529  if constexpr (sizeof...(_More) == 0) \
1530  { \
1531  if constexpr (is_same_v<_Tp, _RetTp>) \
1532  return __x._M_apply_per_chunk( \
1533  [](auto __impl, auto __xx) \
1534  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1535  { \
1536  using _V = typename decltype(__impl)::simd_type; \
1537  return __data(__name(_V(__private_init, __xx))); \
1538  }); \
1539  else \
1540  return __optimize_simd_tuple( \
1541  __x.template _M_apply_r<_RetTp>( \
1542  [](auto __impl, auto __xx) \
1543  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1544  { return __impl._S_##__name(__xx); })); \
1545  } \
1546  else if constexpr ( \
1547  is_same_v< \
1548  _Tp, \
1549  _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \
1550  return __x._M_apply_per_chunk( \
1551  [](auto __impl, auto __xx, auto... __pack) \
1552  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1553  { \
1554  using _V = typename decltype(__impl)::simd_type; \
1555  return __data(__name(_V(__private_init, __xx), \
1556  _V(__private_init, __pack)...)); \
1557  }, __more...); \
1558  else if constexpr (is_same_v<_Tp, _RetTp>) \
1559  return __x._M_apply_per_chunk( \
1560  [](auto __impl, auto __xx, auto... __pack) \
1561  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1562  { \
1563  using _V = typename decltype(__impl)::simd_type; \
1564  return __data(__name(_V(__private_init, __xx), \
1565  __autocvt_to_simd(__pack)...)); \
1566  }, __more...); \
1567  else \
1568  __assert_unreachable<_Tp>(); \
1569  }
1570 
1571  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos)
1572  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin)
1573  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan)
1574  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2)
1575  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos)
1576  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin)
1577  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan)
1578  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh)
1579  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh)
1580  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh)
1581  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh)
1582  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh)
1583  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh)
1584  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp)
1585  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2)
1586  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1)
1587  _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb)
1588  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log)
1589  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10)
1590  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p)
1591  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2)
1592  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb)
1593  // modf implemented in simd_math.h
1594  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp,
1595  scalbn) // double scalbn(double x, int exp);
1596  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln)
1597  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt)
1598  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs)
1599  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs)
1600  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow)
1601  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt)
1602  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf)
1603  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc)
1604  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma)
1605  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma)
1606  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc)
1607  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil)
1608  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor)
1609  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint)
1610 
1611  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint)
1612  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint)
1613  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint)
1614 
1615  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round)
1616  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround)
1617  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround)
1618 
1619  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp)
1620  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod)
1621  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder)
1622  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, copysign)
1623  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter)
1624  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim)
1625  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax)
1626  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin)
1627  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma)
1628  _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify)
1629 #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE
1630 
1631  template <typename _Tp, typename... _Abis>
1632  static inline _SimdTuple<_Tp, _Abis...>
1633  _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y,
1634  __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z)
1635  {
1636  return __x._M_apply_per_chunk(
1637  [](auto __impl, const auto __xx, const auto __yy, auto& __zz)
1638  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
1639  { return __impl._S_remquo(__xx, __yy, &__zz); },
1640  __y, *__z);
1641  }
1642 
1643  template <typename _Tp, typename... _As>
1644  static inline _SimdTuple<_Tp, _As...>
1645  _S_frexp(const _SimdTuple<_Tp, _As...>& __x,
1646  __fixed_size_storage_t<int, _Np>& __exp) noexcept
1647  {
1648  return __x._M_apply_per_chunk(
1649  [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1650  return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a),
1651  __autocvt_to_simd(__b)));
1652  }, __exp);
1653  }
1654 
1655 #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \
1656  template <typename _Tp, typename... _As> \
1657  static inline _MaskMember \
1658  _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \
1659  { \
1660  return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
1661  return __impl._S_##name_(__xx); \
1662  }, __x); \
1663  }
1664 
1665  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf)
1666  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite)
1667  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan)
1668  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal)
1669  _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit)
1670 #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_
1671 
1672  // _S_increment & _S_decrement{{{2
1673  template <typename... _Ts>
1674  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1675  _S_increment(_SimdTuple<_Ts...>& __x)
1676  {
1677  __for_each(
1678  __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1679  __meta._S_increment(native);
1680  });
1681  }
1682 
1683  template <typename... _Ts>
1684  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1685  _S_decrement(_SimdTuple<_Ts...>& __x)
1686  {
1687  __for_each(
1688  __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1689  __meta._S_decrement(native);
1690  });
1691  }
1692 
1693  // compares {{{2
1694 #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \
1695  template <typename _Tp, typename... _As> \
1696  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember \
1697  __cmp(const _SimdTuple<_Tp, _As...>& __x, \
1698  const _SimdTuple<_Tp, _As...>& __y) \
1699  { \
1700  return _M_test([](auto __impl, auto __xx, auto __yy) \
1701  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1702  { return __impl.__cmp(__xx, __yy); }, \
1703  __x, __y); \
1704  }
1705 
1706  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to)
1707  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to)
1708  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less)
1709  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal)
1710  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless)
1711  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal)
1712  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater)
1713  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal)
1714  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater)
1715  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered)
1716 #undef _GLIBCXX_SIMD_CMP_OPERATIONS
1717 
1718  // smart_reference access {{{2
1719  template <typename _Tp, typename... _As, typename _Up>
1720  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1721  _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept
1722  { __v._M_set(__i, static_cast<_Up&&>(__x)); }
1723 
1724  // _S_masked_assign {{{2
1725  template <typename _Tp, typename... _As>
1726  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1727  _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1728  const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs)
1729  {
1730  __for_each(__lhs, __rhs,
1731  [&](auto __meta, auto& __native_lhs, auto __native_rhs)
1732  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
1733  {
1734  __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs,
1735  __native_rhs);
1736  });
1737  }
1738 
1739  // Optimization for the case where the RHS is a scalar. No need to broadcast
1740  // the scalar to a simd first.
1741  template <typename _Tp, typename... _As>
1742  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1743  _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1744  const __type_identity_t<_Tp> __rhs)
1745  {
1746  __for_each(
1747  __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1748  __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs,
1749  __rhs);
1750  });
1751  }
1752 
1753  // _S_masked_cassign {{{2
1754  template <typename _Op, typename _Tp, typename... _As>
1755  static constexpr inline void
1756  _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1757  const _SimdTuple<_Tp, _As...>& __rhs, _Op __op)
1758  {
1759  __for_each(__lhs, __rhs,
1760  [&](auto __meta, auto& __native_lhs, auto __native_rhs)
1761  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
1762  {
1763  __meta.template _S_masked_cassign(__meta._S_make_mask(__bits),
1764  __native_lhs, __native_rhs, __op);
1765  });
1766  }
1767 
1768  // Optimization for the case where the RHS is a scalar. No need to broadcast
1769  // the scalar to a simd first.
1770  template <typename _Op, typename _Tp, typename... _As>
1771  static constexpr inline void
1772  _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1773  const _Tp& __rhs, _Op __op)
1774  {
1775  __for_each(
1776  __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1777  __meta.template _S_masked_cassign(__meta._S_make_mask(__bits),
1778  __native_lhs, __rhs, __op);
1779  });
1780  }
1781 
1782  // _S_masked_unary {{{2
1783  template <template <typename> class _Op, typename _Tp, typename... _As>
1784  static constexpr inline _SimdTuple<_Tp, _As...>
1785  _S_masked_unary(const _MaskMember __bits, const _SimdTuple<_Tp, _As...>& __v)
1786  {
1787  return __v._M_apply_wrapped([&__bits](auto __meta,
1788  auto __native) constexpr {
1789  return __meta.template _S_masked_unary<_Op>(__meta._S_make_mask(
1790  __bits),
1791  __native);
1792  });
1793  }
1794 
1795  // }}}2
1796  };
1797 
1798 // _MaskImplFixedSize {{{1
1799 template <int _Np, typename>
1800  struct _MaskImplFixedSize
1801  {
1802  static_assert(
1803  sizeof(_ULLong) * __CHAR_BIT__ >= _Np,
1804  "The fixed_size implementation relies on one _ULLong being able to store "
1805  "all boolean elements."); // required in load & store
1806 
1807  // member types {{{
1808  using _Abi = simd_abi::fixed_size<_Np>;
1809 
1810  using _MaskMember = _SanitizedBitMask<_Np>;
1811 
1812  template <typename _Tp>
1813  using _FirstAbi = typename __fixed_size_storage_t<_Tp, _Np>::_FirstAbi;
1814 
1815  template <typename _Tp>
1816  using _TypeTag = _Tp*;
1817 
1818  // }}}
1819  // _S_broadcast {{{
1820  template <typename>
1821  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1822  _S_broadcast(bool __x)
1823  { return __x ? ~_MaskMember() : _MaskMember(); }
1824 
1825  // }}}
1826  // _S_load {{{
1827  template <typename>
1828  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1829  _S_load(const bool* __mem)
1830  {
1831  if (__builtin_is_constant_evaluated())
1832  {
1833  _MaskMember __r{};
1834  for (size_t __i = 0; __i < _Np; ++__i)
1835  __r.set(__i, __mem[__i]);
1836  return __r;
1837  }
1838  using _Ip = __int_for_sizeof_t<bool>;
1839  // the following load uses element_aligned and relies on __mem already
1840  // carrying alignment information from when this load function was
1841  // called.
1842  const simd<_Ip, _Abi> __bools(reinterpret_cast<const __may_alias<_Ip>*>(
1843  __mem),
1844  element_aligned);
1845  return __data(__bools != 0);
1846  }
1847 
1848  // }}}
1849  // _S_to_bits {{{
1850  template <bool _Sanitized>
1851  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
1852  _S_to_bits(_BitMask<_Np, _Sanitized> __x)
1853  {
1854  if constexpr (_Sanitized)
1855  return __x;
1856  else
1857  return __x._M_sanitized();
1858  }
1859 
1860  // }}}
1861  // _S_convert {{{
1862  template <typename _Tp, typename _Up, typename _UAbi>
1863  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1864  _S_convert(simd_mask<_Up, _UAbi> __x)
1865  {
1866  return _UAbi::_MaskImpl::_S_to_bits(__data(__x))
1867  .template _M_extract<0, _Np>();
1868  }
1869 
1870  // }}}
1871  // _S_from_bitmask {{{2
1872  template <typename _Tp>
1873  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1874  _S_from_bitmask(_MaskMember __bits, _TypeTag<_Tp>) noexcept
1875  { return __bits; }
1876 
1877  // _S_load {{{2
1878  static constexpr inline _MaskMember
1879  _S_load(const bool* __mem) noexcept
1880  {
1881  // TODO: _UChar is not necessarily the best type to use here. For smaller
1882  // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient.
1883  _ULLong __r = 0;
1884  using _Vs = __fixed_size_storage_t<_UChar, _Np>;
1885  __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1886  __r |= __meta._S_mask_to_shifted_ullong(
1887  __meta._S_mask_impl._S_load(&__mem[__meta._S_offset],
1888  _SizeConstant<__meta._S_size()>()));
1889  });
1890  return __r;
1891  }
1892 
1893  // _S_masked_load {{{2
1894  static constexpr inline _MaskMember
1895  _S_masked_load(_MaskMember __merge, _MaskMember __mask, const bool* __mem) noexcept
1896  {
1897  _BitOps::_S_bit_iteration(__mask.to_ullong(),
1898  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1899  __merge.set(__i, __mem[__i]);
1900  });
1901  return __merge;
1902  }
1903 
1904  // _S_store {{{2
1905  static constexpr inline void
1906  _S_store(const _MaskMember __bitmask, bool* __mem) noexcept
1907  {
1908  if constexpr (_Np == 1)
1909  __mem[0] = __bitmask[0];
1910  else
1911  _FirstAbi<_UChar>::_CommonImpl::_S_store_bool_array(__bitmask, __mem);
1912  }
1913 
1914  // _S_masked_store {{{2
1915  static constexpr inline void
1916  _S_masked_store(const _MaskMember __v, bool* __mem, const _MaskMember __k) noexcept
1917  {
1918  _BitOps::_S_bit_iteration(
1919  __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; });
1920  }
1921 
1922  // logical and bitwise operators {{{2
1923  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1924  _S_logical_and(const _MaskMember& __x, const _MaskMember& __y) noexcept
1925  { return __x & __y; }
1926 
1927  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1928  _S_logical_or(const _MaskMember& __x, const _MaskMember& __y) noexcept
1929  { return __x | __y; }
1930 
1931  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1932  _S_bit_not(const _MaskMember& __x) noexcept
1933  { return ~__x; }
1934 
1935  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1936  _S_bit_and(const _MaskMember& __x, const _MaskMember& __y) noexcept
1937  { return __x & __y; }
1938 
1939  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1940  _S_bit_or(const _MaskMember& __x, const _MaskMember& __y) noexcept
1941  { return __x | __y; }
1942 
1943  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1944  _S_bit_xor(const _MaskMember& __x, const _MaskMember& __y) noexcept
1945  { return __x ^ __y; }
1946 
1947  // smart_reference access {{{2
1948  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1949  _S_set(_MaskMember& __k, int __i, bool __x) noexcept
1950  { __k.set(__i, __x); }
1951 
1952  // _S_masked_assign {{{2
1953  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1954  _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const _MaskMember __rhs)
1955  { __lhs = (__lhs & ~__k) | (__rhs & __k); }
1956 
1957  // Optimization for the case where the RHS is a scalar.
1958  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1959  _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const bool __rhs)
1960  {
1961  if (__rhs)
1962  __lhs |= __k;
1963  else
1964  __lhs &= ~__k;
1965  }
1966 
1967  // }}}2
1968  // _S_all_of {{{
1969  template <typename _Tp>
1970  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
1971  _S_all_of(simd_mask<_Tp, _Abi> __k)
1972  { return __data(__k).all(); }
1973 
1974  // }}}
1975  // _S_any_of {{{
1976  template <typename _Tp>
1977  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
1978  _S_any_of(simd_mask<_Tp, _Abi> __k)
1979  { return __data(__k).any(); }
1980 
1981  // }}}
1982  // _S_none_of {{{
1983  template <typename _Tp>
1984  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
1985  _S_none_of(simd_mask<_Tp, _Abi> __k)
1986  { return __data(__k).none(); }
1987 
1988  // }}}
1989  // _S_some_of {{{
1990  template <typename _Tp>
1991  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
1992  _S_some_of([[maybe_unused]] simd_mask<_Tp, _Abi> __k)
1993  {
1994  if constexpr (_Np == 1)
1995  return false;
1996  else
1997  return __data(__k).any() && !__data(__k).all();
1998  }
1999 
2000  // }}}
2001  // _S_popcount {{{
2002  template <typename _Tp>
2003  _GLIBCXX_SIMD_INTRINSIC static constexpr int
2004  _S_popcount(simd_mask<_Tp, _Abi> __k)
2005  { return __data(__k).count(); }
2006 
2007  // }}}
2008  // _S_find_first_set {{{
2009  template <typename _Tp>
2010  _GLIBCXX_SIMD_INTRINSIC static constexpr int
2011  _S_find_first_set(simd_mask<_Tp, _Abi> __k)
2012  { return std::__countr_zero(__data(__k).to_ullong()); }
2013 
2014  // }}}
2015  // _S_find_last_set {{{
2016  template <typename _Tp>
2017  _GLIBCXX_SIMD_INTRINSIC static constexpr int
2018  _S_find_last_set(simd_mask<_Tp, _Abi> __k)
2019  { return std::__bit_width(__data(__k).to_ullong()) - 1; }
2020 
2021  // }}}
2022  };
2023 // }}}1
2024 
2025 _GLIBCXX_SIMD_END_NAMESPACE
2026 #endif // __cplusplus >= 201703L
2027 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_
2028 
2029 // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
constexpr _Iterator __base(_Iterator __it)
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition: complex:1120
std::complex< _Tp > atanh(const std::complex< _Tp > &)
atanh(__z) [8.1.7].
Definition: complex:2415
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition: complex:1002
complex< _Tp > exp(const complex< _Tp > &)
Return complex base e exponential of z.
Definition: complex:1058
_Tp abs(const complex< _Tp > &)
Return magnitude of z.
Definition: complex:891
constexpr enable_if_t< __and_< __is_duration< _ToDur >, __not_< treat_as_floating_point< typename _ToDur::rep > > >::value, _ToDur > round(const duration< _Rep, _Period > &__d)
Definition: chrono.h:433
complex< _Tp > tanh(const complex< _Tp > &)
Return complex hyperbolic tangent of z.
Definition: complex:1249
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:1085
_Tp fabs(const std::complex< _Tp > &)
fabs(__z) [8.1.8].
Definition: complex:2424
complex< _Tp > tan(const complex< _Tp > &)
Return complex tangent of z.
Definition: complex:1221
complex< _Tp > sinh(const complex< _Tp > &)
Return complex hyperbolic sine of z.
Definition: complex:1150
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:1194
complex< _Tp > cosh(const complex< _Tp > &)
Return complex hyperbolic cosine of z.
Definition: complex:1032
complex< _Tp > pow(const complex< _Tp > &, int)
Return x to the y&#39;th power.
Definition: complex:1280
complex< _Tp > log10(const complex< _Tp > &)
Return complex base 10 logarithm of z.
Definition: complex:1090
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:287
std::complex< _Tp > asinh(const std::complex< _Tp > &)
asinh(__z) [8.1.6].
Definition: complex:2371
std::complex< _Tp > acos(const std::complex< _Tp > &)
acos(__z) [8.1.2].
Definition: complex:2216
std::complex< _Tp > asin(const std::complex< _Tp > &)
asin(__z) [8.1.3].
Definition: complex:2252
std::complex< _Tp > atan(const std::complex< _Tp > &)
atan(__z) [8.1.4].
Definition: complex:2296
std::complex< _Tp > acosh(const std::complex< _Tp > &)
acosh(__z) [8.1.5].
Definition: complex:2332