Visible to Intel only — GUID: GUID-57F1B16C-887A-466C-97E6-1B6D6867E2A8
Visible to Intel only — GUID: GUID-57F1B16C-887A-466C-97E6-1B6D6867E2A8
Intrinsics for Integer Permutation Operations
The prototypes for Intel® Advanced Vector Extensions 512 (Intel® AVX-512) intrinsics are located in the zmmintrin.h header file.
To use these intrinsics, include the immintrin.h file as follows:
#include <immintrin.h>
Intrinsic Name |
Operation |
Corresponding |
---|---|---|
_mm512_permutex2var_epi32, _mm512_mask_permutex2var_epi32, _mm512_mask2_permutex2var_epi32, _mm512_maskz_permutex2var_epi32 |
Shuffle int32 elements across lanes. |
VPERMI2D |
_mm512_permutex2var_epi64, _mm512_mask_permutex2var_epi64, _mm512_mask2_permutex2var_epi64, _mm512_maskz_permutex2var_epi64 |
Shuffle int64 elements across lanes. |
VPERMI2Q, VPERMT2Q |
_mm512_permutevar_epi32, _mm512_mask_permutevar_epi32 _mm512_permutexvar_epi32, _mm512_mask_permutexvar_epi32, _mm512_maskz_permutexvar_epi32 |
Shuffle int32 elements across lanes. |
VPERMD |
_mm512_permutex_epi64, _mm512_mask_permutex_epi64, _mm512_maskz_permutex_epi64 _mm512_permutexvar_epi64, _mm512_mask_permutexvar_epi64, _mm512_maskz_permutexvar_epi64 |
Shuffle int64 elements across lanes. |
VPERMQ |
variable | definition |
---|---|
k | writemask used as a selector |
a | first source vector element |
src | source element to use based on writemask result |
idx | int32 vector containing indices in memory |
_mm512_permutevar_epi32
extern __m512i __cdecl _mm512_permutevar_epi32(__m512i a, __m512i idx);
Shuffle int32 elements in a across lanes using the corresponding index in idx, and stores the result.
This intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
_mm512_mask_permutevar_epi32
extern __m512i __cdecl _mm512_mask_permutevar_epi32(__m512i src, __mmask16 k, __m512i a, __m512i idx);
Shuffle int32 elements in a across lanes using the corresponding index in idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
This intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
_mm512_permutexvar_epi32
extern __m512i __cdecl _mm512_permutexvar_epi32(__m512i idx, __m512i a);
Shuffles int32 elements in a across lanes using the corresponding index in idx, and stores the result.
_mm512_mask_permutexvar_epi32
extern __m512i __cdecl _mm512_mask_permutexvar_epi32(__m512i src, __mmask16 k, __m512i idx, __m512i a);
Shuffles int32 elements in a across lanes using the corresponding index in idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutexvar_epi32
extern __m512i __cdecl _mm512_maskz_permutexvar_epi32(__mmask16 k, __m512i idx, __m512i a);
Shuffles int32 elements in a across lanes using the corresponding index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutex2var_epi32
extern __m512i __cdecl _mm512_permutex2var_epi32(__m512i a, __m512i idx, __m512i b);
Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result.
_mm512_mask_permutex2var_epi32
extern __m512i __cdecl _mm512_mask_permutex2var_epi32(__m512i a, __mmask16 k, __m512i idx, __m512i b);
Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from a when the corresponding mask bit is not set).
_mm512_mask2_permutex2var_epi32
extern __m512i __cdecl _mm512_mask2_permutex2var_epi32(__m512i a, __m512i idx, __mmask16 k, __m512i b);
Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from idx when the corresponding mask bit is not set).
_mm512_maskz_permutex2var_epi32
extern __m512i __cdecl _mm512_maskz_permutex2var_epi32(__mmask16 k, __m512i a, __m512i idx, __m512i b);
Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutex2var_epi64
extern __m512i __cdecl _mm512_permutex2var_epi64(__m512i a, __m512i idx, __m512i b);
Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result.
_mm512_mask_permutex2var_epi64
extern __m512i __cdecl _mm512_mask_permutex2var_epi64(__m512i a, __mmask8 k, __m512i idx, __m512i b);
Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from a when the corresponding mask bit is not set).
_mm512_mask2_permutex2var_epi64
extern __m512i __cdecl _mm512_mask2_permutex2var_epi64(__m512i a, __m512i idx, __mmask8 k, __m512i b);
Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from idx when the corresponding mask bit is not set).
_mm512_maskz_permutex2var_epi64
extern __m512i __cdecl _mm512_maskz_permutex2var_epi64(__mmask8 k, __m512i a, __m512i idx, __m512i b);
Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutex_epi64
extern __m512i __cdecl _mm512_permutex_epi64(__m512i a, const int imm);
Shuffles int64 elements in a within 256-bit lanes using the control in imm, and stores the result.
_mm512_mask_permutex_epi64
extern __m512i __cdecl _mm512_mask_permutex_epi64(__m512i src, __mmask8 k, __m512i a, const int imm);
Shuffles int64 elements in a within 256-bit lanes using the control in imm, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutex_epi64
extern __m512i __cdecl _mm512_maskz_permutex_epi64(__mmask8 k, __m512i a, const int imm);
Shuffles int64 elements in a within 256-bit lanes using the control in imm, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutexvar_epi64
extern __m512i __cdecl _mm512_permutexvar_epi64(__m512i idx, __m512i a);
Shuffles int64 elements in a across lanes using the corresponding index idx, and stores the result.
_mm512_mask_permutexvar_epi64
extern __m512i __cdecl _mm512_mask_permutexvar_epi64(__m512i src, __mmask8 k, __m512i idx, __m512i a);
Shuffles int64 elements in a across lanes using the corresponding index idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutexvar_epi64
extern __m512i __cdecl _mm512_maskz_permutexvar_epi64(__mmask8 k, __m512i idx, __m512i a);
Shuffles int64 elements in a across lanes using the corresponding index idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).