26 namespace seqan3::detail
31 template <simd::simd_concept simd_t>
32 constexpr simd_t load_avx512(
void const * mem_addr);
37 template <simd::simd_concept simd_t>
38 constexpr
void store_avx512(
void * mem_addr, simd_t
const & simd_vec);
43 template <simd::simd_concept simd_t>
44 inline void transpose_matrix_avx512(
std::array<simd_t, simd_traits<simd_t>::length> & matrix);
49 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
50 constexpr target_simd_t upcast_signed_avx512(source_simd_t
const & src);
55 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
56 constexpr target_simd_t upcast_unsigned_avx512(source_simd_t
const & src);
61 template <u
int8_t index, simd::simd_concept simd_t>
62 constexpr simd_t extract_half_avx512(simd_t
const & src);
67 template <u
int8_t index, simd::simd_concept simd_t>
68 constexpr simd_t extract_quarter_avx512(simd_t
const & src);
73 template <u
int8_t index, simd::simd_concept simd_t>
74 constexpr simd_t extract_eighth_avx512(simd_t
const & src);
84 namespace seqan3::detail
87 template <simd::simd_concept simd_t>
88 constexpr simd_t load_avx512(
void const * mem_addr)
90 return reinterpret_cast<simd_t
>(_mm512_loadu_si512(mem_addr));
93 template <simd::simd_concept simd_t>
94 constexpr
void store_avx512(
void * mem_addr, simd_t
const & simd_vec)
96 _mm512_storeu_si512(mem_addr,
reinterpret_cast<__m512i
const &
>(simd_vec));
99 # if defined(__AVX512BW__)
100 template <simd::simd_concept simd_t>
101 inline void transpose_matrix_avx512(
std::array<simd_t, simd_traits<simd_t>::length> & matrix)
118 for (
int i = 0; i < 32; ++i)
120 tmp1[i] = _mm512_unpacklo_epi8(
reinterpret_cast<__m512i
const &
>(matrix[2 * i]),
121 reinterpret_cast<__m512i
const &
>(matrix[2 * i + 1]));
122 tmp1[i + 32] = _mm512_unpackhi_epi8(
reinterpret_cast<__m512i
const &
>(matrix[2 * i]),
123 reinterpret_cast<__m512i
const &
>(matrix[2 * i + 1]));
128 for (
int i = 0; i < 32; ++i)
130 tmp2[i] = _mm512_unpacklo_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
131 tmp2[i + 32] = _mm512_unpackhi_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
134 for (
int i = 0; i < 32; ++i)
136 tmp1[i] = _mm512_unpacklo_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
137 tmp1[i + 32] = _mm512_unpackhi_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
140 for (
int i = 0; i < 32; ++i)
142 tmp2[i] = _mm512_unpacklo_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
143 tmp2[i + 32] = _mm512_unpackhi_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
148 auto _mm512_unpacklo_epi128 = [](__m512i
const & a, __m512i
const & b)
151 return _mm512_permutex2var_epi64(a,
reinterpret_cast<__m512i
const &
>(*lo_mask.data()), b);
154 auto _mm521_unpackhi_epi128 = [](__m512i
const & a, __m512i
const & b)
157 return _mm512_permutex2var_epi64(a,
reinterpret_cast<__m512i
const &
>(*hi_mask.data()), b);
160 for (
int i = 0; i < 32; ++i)
162 tmp1[i] = _mm512_unpacklo_epi128(tmp2[2 * i], tmp2[2 * i + 1]);
163 tmp1[i + 32] = _mm521_unpackhi_epi128(tmp2[2 * i], tmp2[2 * i + 1]);
167 auto _mm512_unpacklo_epi256 = [](__m512i
const & a, __m512i
const & b)
169 return _mm512_shuffle_i64x2(a, b, 0b0100'0100);
172 auto _mm521_unpackhi_epi256 = [](__m512i
const & a, __m512i
const & b)
174 return _mm512_shuffle_i64x2(a, b, 0b1110'1110);
182 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
184 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
186 32, 48, 40, 56, 36, 52, 44, 60, 34, 50, 42, 58, 38, 54, 46, 62,
188 33, 49, 41, 57, 37, 53, 45, 61, 35, 51, 43, 59, 39, 55, 47, 63};
191 for (
int i = 0; i < 32; ++i)
193 int const idx = i * 2;
194 matrix[reverse_idx_mask[idx]] =
reinterpret_cast<simd_t
>(_mm512_unpacklo_epi256(tmp1[idx], tmp1[idx + 1]));
195 matrix[reverse_idx_mask[idx + 1]] =
reinterpret_cast<simd_t
>(_mm521_unpackhi_epi256(tmp1[idx], tmp1[idx + 1]));
200 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
201 constexpr target_simd_t upcast_signed_avx512(source_simd_t
const & src)
203 __m512i
const & tmp =
reinterpret_cast<__m512i
const &
>(src);
204 if constexpr (simd_traits<source_simd_t>::length == 64)
206 if constexpr (simd_traits<target_simd_t>::length == 32)
207 return reinterpret_cast<target_simd_t>(_mm512_cvtepi8_epi16(_mm512_castsi512_si256(tmp)));
208 if constexpr (simd_traits<target_simd_t>::length == 16)
209 return reinterpret_cast<target_simd_t>(_mm512_cvtepi8_epi32(_mm512_castsi512_si128(tmp)));
210 if constexpr (simd_traits<target_simd_t>::length == 8)
211 return reinterpret_cast<target_simd_t>(_mm512_cvtepi8_epi64(_mm512_castsi512_si128(tmp)));
213 else if constexpr (simd_traits<source_simd_t>::length == 32)
215 if constexpr (simd_traits<target_simd_t>::length == 16)
216 return reinterpret_cast<target_simd_t>(_mm512_cvtepi16_epi32(_mm512_castsi512_si256(tmp)));
217 if constexpr (simd_traits<target_simd_t>::length == 8)
218 return reinterpret_cast<target_simd_t>(_mm512_cvtepi16_epi64(_mm512_castsi512_si128(tmp)));
222 static_assert(simd_traits<source_simd_t>::length == 16,
"Expected 32 bit scalar type.");
223 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi32_epi64(_mm512_castsi512_si256(tmp)));
227 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
228 constexpr target_simd_t upcast_unsigned_avx512(source_simd_t
const & src)
230 __m512i
const & tmp =
reinterpret_cast<__m512i
const &
>(src);
231 if constexpr (simd_traits<source_simd_t>::length == 64)
233 if constexpr (simd_traits<target_simd_t>::length == 32)
234 return reinterpret_cast<target_simd_t>(_mm512_cvtepu8_epi16(_mm512_castsi512_si256(tmp)));
235 if constexpr (simd_traits<target_simd_t>::length == 16)
236 return reinterpret_cast<target_simd_t>(_mm512_cvtepu8_epi32(_mm512_castsi512_si128(tmp)));
237 if constexpr (simd_traits<target_simd_t>::length == 8)
238 return reinterpret_cast<target_simd_t>(_mm512_cvtepu8_epi64(_mm512_castsi512_si128(tmp)));
240 else if constexpr (simd_traits<source_simd_t>::length == 32)
242 if constexpr (simd_traits<target_simd_t>::length == 16)
243 return reinterpret_cast<target_simd_t>(_mm512_cvtepu16_epi32(_mm512_castsi512_si256(tmp)));
244 if constexpr (simd_traits<target_simd_t>::length == 8)
245 return reinterpret_cast<target_simd_t>(_mm512_cvtepu16_epi64(_mm512_castsi512_si128(tmp)));
249 static_assert(simd_traits<source_simd_t>::length == 16,
"Expected 32 bit scalar type.");
250 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu32_epi64(_mm512_castsi512_si256(tmp)));
254 template <u
int8_t index, simd::simd_concept simd_t>
255 constexpr simd_t extract_half_avx512(simd_t
const & src)
257 return reinterpret_cast<simd_t
>(
258 _mm512_castsi256_si512(_mm512_extracti64x4_epi64(
reinterpret_cast<__m512i
const &
>(src), index)));
261 # if defined(__AVX512DQ__)
262 template <u
int8_t index, simd::simd_concept simd_t>
263 constexpr simd_t extract_quarter_avx512(simd_t
const & src)
265 return reinterpret_cast<simd_t
>(
266 _mm512_castsi128_si512(_mm512_extracti64x2_epi64(
reinterpret_cast<__m512i
const &
>(src), index)));
269 template <u
int8_t index, simd::simd_concept simd_t>
270 constexpr simd_t extract_eighth_avx512(simd_t
const & src)
272 __m512i tmp =
reinterpret_cast<__m512i
const &
>(src);
275 if constexpr (index % 2 == 1)
276 tmp = _mm512_shuffle_epi32(tmp, 0b0100'1110);
278 return reinterpret_cast<simd_t>(_mm512_castsi128_si512(_mm512_extracti64x2_epi64(tmp, index / 2)));
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Provides seqan3::simd::simd_traits.
Provides seqan3::simd::simd_concept.