Visible to Intel only — GUID: GUID-6FCDAACE-3EFA-4FEC-AC69-9C22F269F32B
Visible to Intel only — GUID: GUID-6FCDAACE-3EFA-4FEC-AC69-9C22F269F32B
Intrinsics for FP Permutation Operations
The prototypes for Intel® Advanced Vector Extensions 512 (Intel® AVX-512) intrinsics are located in the zmmintrin.h header file.
To use these intrinsics, include the immintrin.h file as follows:
#include <immintrin.h>
Intrinsic Name |
Operation |
Corresponding |
---|---|---|
_mm512_permutex2var_pd, _mm512_mask_permutex2var_pd, _mm512_mask2_permutex2var_pd, _mm512_maskz_permutex2var_pd |
Shuffle float64 elements across lanes. |
VPERMI2PD |
_mm512_permutex2var_ps, _mm512_mask_permutex2var_ps, _mm512_mask2_permutex2var_ps, _mm512_maskz_permutex2var_ps |
Shuffle float32 elements across lanes. |
VPERMI2PS |
_mm512_permute_pd, _mm512_mask_permute_pd, _mm512_maskz_permute_pd |
Shuffle float64 elements within 128-bit lanes. |
VPERMILPD, VPERMPD |
_mm512_permutevar_pd, _mm512_mask_permutevar_pd, _mm512_maskz_permutevar_pd |
Shuffle float64 elements within 128-bit lanes. |
VPERMPD |
_mm512_permutex_pd, _mm512_mask_permutex_pd, _mm512_maskz_permutex_pd |
Shuffle float64 elements within lanes. |
VPERMPD |
_mm512_permutexvar_pd, _mm512_mask_permutexvar_pd, _mm512_maskz_permutexvar_pd |
Shuffle float64 elements across lanes. |
VPERMPD |
_mm512_permute_ps, _mm512_mask_permute_ps, _mm512_maskz_permute_ps |
Shuffle float32 elements within lanes. |
VPERMILPS |
_mm512_permutevar_ps, _mm512_mask_permutevar_ps, _mm512_maskz_permutevar_ps |
Shuffle float32 elements within lanes. |
VPERMPS, VPERMILPS |
_mm512_permutexvar_ps, _mm512_mask_permutexvar_ps, _mm512_maskz_permutexvar_ps |
Shuffle float32 elements across lanes. |
VPERMPS |
variable | definition |
---|---|
k | writemask used as a selector |
a | first source vector element |
b | second source vector element |
src | source element to use based on writemask result |
idx | index |
_mm512_permutex2var_pd
extern __m512d __cdecl _mm512_permutex2var_pd(__m512d a, __m512i idx, __m512d b);
Shuffles float64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result.
_mm512_mask_permutex2var_pd
extern __m512d __cdecl _mm512_mask_permutex2var_pd(__m512d a, __mmask8 k, __m512i idx, __m512d b);
Shuffles float64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from a when the corresponding mask bit is not set).
_mm512_mask2_permutex2var_pd
extern __m512d __cdecl _mm512_mask2_permutex2var_pd(__m512d a, __m512i idx, __mmask8 k, __m512d b);
Shuffles float64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the results using writemask k (elements are copied from idx when the corresponding mask bit is not set)
_mm512_maskz_permutex2var_pd
extern __m512d __cdecl _mm512_maskz_permutex2var_pd(__mmask8 k, __m512d a, __m512i idx, __m512d b);
Shuffles float64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutex2var_ps
extern __m512 __cdecl _mm512_permutex2var_ps(__m512 a, __m512i idx, __m512 b);
Shuffles float32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result.
_mm512_mask2_permutex2var_ps
extern __m512 __cdecl _mm512_mask_permutex2var_ps(__m512 a, __mmask16 k, __m512i idx, __m512 b);
Shuffles float32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from idx when the corresponding mask bit is not set).
_mm512_mask_permutex2var_ps
extern __m512 __cdecl _mm512_mask2_permutex2var_ps(__m512 a, __m512i idx, __mmask16 k, __m512 b);
Shuffles float32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from a when the corresponding mask bit is not set).
_mm512_maskz_permutex2var_ps
extern __m512 __cdecl _mm512_maskz_permutex2var_ps(__mmask16 k, __m512 a, __m512i idx, __m512 b);
Shuffles float32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permute_pd
extern __m512d __cdecl _mm512_permute_pd(__m512d a, const int imm);
Shuffles float64 elements in a within 128-bit lanes using the control in imm, and stores the result.
_mm512_mask_permute_pd
extern __m512d __cdecl _mm512_mask_permute_pd(__m512d src, __mmask8 k, __m512d a, const int imm);
Shuffles float64 elements in a within 128-bit lanes using the control in imm, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permute_pd
extern __m512d __cdecl _mm512_maskz_permute_pd(__mmask8 k, __m512d a, const int imm);
Shuffles float64 elements in a within 128-bit lanes using the control in imm, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutevar_pd
extern __m512d __cdecl _mm512_permutevar_pd(__m512d a, __m512i b);
Shuffles float64 elements in a within 128-bit lanes using the control in b, and stores the result.
_mm512_mask_permutevar_pd
extern __m512d __cdecl _mm512_mask_permutevar_pd(__m512d src, __mmask8 k, __m512d a, __m512i b);
Shuffles float64 elements in a within 128-bit lanes using the control in b, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutevar_pd
extern __m512d __cdecl _mm512_maskz_permutevar_pd(__mmask8 k, __m512d a, __m512i b);
Shuffles float64 elements in a within 128-bit lanes using the control in b, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permute_ps
extern __m512 __cdecl _mm512_permute_ps(__m512 a, const int imm);
Shuffles float32 elements in a within 128-bit lanes using the control in imm, and stores the result.
_mm512_mask_permute_ps
extern __m512 __cdecl _mm512_mask_permute_ps(__m512 src, __mmask16 k, __m512 a, const int imm);
Shuffles float32 elements in a within 128-bit lanes using the control in imm, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permute_ps
extern __m512 __cdecl _mm512_maskz_permute_ps(__mmask16 k, __m512 a, const int imm);
Shuffles float32 elements in a within 128-bit lanes using the control in imm, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutevar_ps
extern __m512 __cdecl _mm512_permutevar_ps(__m512 a, __m512i b);
Shuffles float32 elements in a within 128-bit lanes using the control in b, and stores the result.
_mm512_mask_permutevar_ps
extern __m512 __cdecl _mm512_mask_permutevar_ps(__m512 src, __mmask16 k, __m512 a, __m512i b);
Shuffles float32 elements in a within 128-bit lanes using the control in b, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutevar_ps
extern __m512 __cdecl _mm512_maskz_permutevar_ps(__mmask16 k, __m512 a, __m512i b);
Shuffles float32 elements in a within 128-bit lanes using the control in b, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutex_pd
extern __m512d __cdecl _mm512_permutex_pd(__m512d a, const int imm);
Shuffles float64 elements in a within 256-bit lanes using the control in imm, and stores the result.
_mm512_mask_permutex_pd
extern __m512d __cdecl _mm512_mask_permutex_pd(__m512d src, __mmask8 k, __m512d a, const int imm);
Shuffles float64 elements in a within 256-bit lanes using the control in imm, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutex_pd
extern __m512d __cdecl _mm512_maskz_permutex_pd(__mmask8 k, __m512d a, const int imm);
Shuffles float64 elements in a within 256-bit lanes using the control in imm, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutexvar_pd
extern __m512d __cdecl _mm512_permutexvar_pd(__m512i idx, __m512d a);
Shuffles float64 elements in a across lanes using the corresponding index in idx, and stores the result.
_mm512_mask_permutexvar_pd
extern __m512d __cdecl _mm512_mask_permutexvar_pd(__m512d src, __mmask8 k, __m512i idx, __m512d a);
Shuffles float64 elements in a across lanes using the corresponding index in idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutexvar_pd
extern __m512d __cdecl _mm512_maskz_permutexvar_pd(__mmask8 k, __m512i idx, __m512d a);
Shuffles float64 elements in a across lanes using the corresponding index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
_mm512_permutexvar_ps
extern __m512 __cdecl _mm512_permutexvar_ps(__m512i idx, __m512 a);
Shuffles float32 elements in a across lanes using the corresponding index in idx, and stores the result.
_mm512_mask_permutexvar_ps
extern __m512 __cdecl _mm512_mask_permutexvar_ps(__m512 src, __mmask16 k, __m512i idx, __m512 a);
Shuffles float32 elements in a across lanes using the corresponding index in idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
_mm512_maskz_permutexvar_ps
extern __m512 __cdecl _mm512_maskz_permutexvar_ps(__mmask16 k, __m512i idx, __m512 a);
Shuffles float32 elements in a across lanes using the corresponding index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).