Visible to Intel only — GUID: GUID-AFA947A7-8490-443B-9946-C7B16C8E6244
Visible to Intel only — GUID: GUID-AFA947A7-8490-443B-9946-C7B16C8E6244
Miscellaneous Intrinsics
Intel® Streaming SIMD Extensions 2 (Intel® SSE2) intrinsics for miscellaneous operations are listed in the following table followed by descriptions.
The prototypes for Intel® SSE2 intrinsics are in the emmintrin.h header file.
To use these intrinsics, include the immintrin.h file as follows:
#include <immintrin.h>
Intrinsic |
Operation |
Corresponding Intel® SSE 2 Instruction |
---|---|---|
_mm_packs_epi16 |
Packed Saturation |
PACKSSWB |
_mm_packs_epi32 |
Packed Saturation |
PACKSSDW |
_mm_packus_epi16 |
Packed Saturation |
PACKUSWB |
_mm_extract_epi16 |
Extraction |
PEXTRW |
_mm_insert_epi16 |
Insertion |
PINSRW |
_mm_movemask_epi8 |
Mask Creation |
PMOVMSKB |
_mm_shuffle_epi32 |
Shuffle |
PSHUFD |
_mm_shufflehi_epi16 |
Shuffle |
PSHUFHW |
_mm_shufflelo_epi16 |
Shuffle |
PSHUFLW |
_mm_unpackhi_epi8 |
Interleave |
PUNPCKHBW |
_mm_unpackhi_epi16 |
Interleave |
PUNPCKHWD |
_mm_unpackhi_epi32 |
Interleave |
PUNPCKHDQ |
_mm_unpackhi_epi64 |
Interleave |
PUNPCKHQDQ |
_mm_unpacklo_epi8 |
Interleave |
PUNPCKLBW |
_mm_unpacklo_epi16 |
Interleave |
PUNPCKLWD |
_mm_unpacklo_epi32 |
Interleave |
PUNPCKLDQ |
_mm_unpacklo_epi64 |
Interleave |
PUNPCKLQDQ |
_mm_movepi64_pi64 |
Move |
MOVDQ2Q |
_mm_movpi64_epi64 |
Move |
MOVDQ2Q |
_mm_move_epi64 |
Move |
MOVQ |
_mm_unpackhi_pd |
Interleave |
UNPCKHPD |
_mm_unpacklo_pd |
Interleave |
UNPCKLPD |
_mm_movemask_pd |
Create mask |
MOVMSKPD |
_mm_shuffle_pd |
Select values |
SHUFPD |
_mm_packs_epi16
__m128i _mm_packs_epi16(__m128i a, __m128i b);
Packs the 16 signed 16-bit integers from a and b into 8-bit integers and saturates.
R0 |
... |
R7 |
R8 |
... |
R15 |
---|---|---|---|---|---|
Signed Saturate(a0) |
... |
Signed Saturate(a7) |
Signed Saturate(b0) |
... |
Signed Saturate(b7) |
_mm_packs_epi32
__m128i _mm_packs_epi32(__m128i a, __m128i b);
Packs the eight signed 32-bit integers from a and b into signed 16-bit integers and saturates.
R0 |
... |
R3 |
R4 |
... |
R7 |
---|---|---|---|---|---|
Signed Saturate(a0) |
... |
Signed Saturate(a3) |
Signed Saturate(b0) |
... |
Signed Saturate(b3) |
_mm_packus_epi16
__m128i _mm_packus_epi16(__m128i a, __m128i b);
Packs the 16 signed 16-bit integers from a and b into 8-bit unsigned integers and saturates.
R0 |
... |
R7 |
R8 |
... |
R15 |
---|---|---|---|---|---|
Unsigned Saturate(a0) |
... |
Unsigned Saturate(a7) |
Unsigned Saturate(b0) |
... |
Unsigned Saturate(b15) |
_mm_extract_epi16
int _mm_extract_epi16(__m128i a, int imm);
Extracts the selected signed or unsigned 16-bit integer from a and zero extends. The selector imm must be an immediate.
R0 |
---|
(imm == 0) ? a0: ( (imm == 1) ? a1: ... (imm==7) ? a7) |
_mm_insert_epi16
__m128i _mm_insert_epi16(__m128i a, int b, int imm);
Inserts the least significant 16 bits of b into the selected 16-bit integer of a. The selector imm must be an immediate.
R0 |
R1 |
... |
R7 |
---|---|---|---|
(imm == 0) ? b : a0; |
(imm == 1) ? b : a1; |
... |
(imm == 7) ? b : a7; |
_mm_movemask_epi8
int _mm_movemask_epi8(__m128i a);
Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits.
R0 |
---|
a15[7] << 15 | a14[7] << 14 | ... a1[7] << 1 | a0[7] |
_mm_shuffle_epi32
__m128i _mm_shuffle_epi32(__m128i a, int imm);
Shuffles the four signed or unsigned 32-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.
_mm_shufflehi_epi16
__m128i _mm_shufflehi_epi16(__m128i a, int imm);
Shuffles the upper four signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.
_mm_shufflelo_epi16
__m128i _mm_shufflelo_epi16(__m128i a, int imm);
Shuffles the lower four signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.
_mm_unpackhi_epi8
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b);
Interleaves the upper eight signed or unsigned 8-bit integers in a with the upper eight signed or unsigned 8-bit integers in b.
R0 |
R1 |
R2 |
R3 |
... |
R14 |
R15 |
---|---|---|---|---|---|---|
a8 |
b8 |
a9 |
b9 |
... |
a15 |
b15 |
_mm_unpackhi_epi16
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b);
Interleaves the upper four signed or unsigned 16-bit integers in a with the upper four signed or unsigned 16-bit integers in b.
R0 |
R1 |
R2 |
R3 |
R4 |
R5 |
R6 |
R7 |
---|---|---|---|---|---|---|---|
a4 |
b4 |
a5 |
b5 |
a6 |
b6 |
a7 |
b7 |
_mm_unpackhi_epi32
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b);
Interleaves the upper two signed or unsigned 32-bit integers in a with the upper two signed or unsigned 32-bit integers in b.
R0 |
R1 |
R2 |
R3 |
---|---|---|---|
a2 |
b2 |
a3 |
b3 |
_mm_unpackhi_epi64
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b);
Interleaves the upper signed or unsigned 64-bit integer in a with the upper signed or unsigned 64-bit integer in b.
R0 |
R1 |
---|---|
a1 |
b1 |
_mm_unpacklo_epi8
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b);
Interleaves the lower eight signed or unsigned 8-bit integers in a with the lower eight signed or unsigned 8-bit integers in b.
R0 |
R1 |
R2 |
R3 |
... |
R14 |
R15 |
---|---|---|---|---|---|---|
a0 |
b0 |
a1 |
b1 |
... |
a7 |
b7 |
_mm_unpacklo_epi16
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b);
Interleaves the lower four signed or unsigned 16-bit integers in a with the lower four signed or unsigned 16-bit integers in b.
R0 |
R1 |
R2 |
R3 |
R4 |
R5 |
R6 |
R7 |
---|---|---|---|---|---|---|---|
a0 |
b0 |
a1 |
b1 |
a2 |
b2 |
a3 |
b3 |
_mm_unpacklo_epi32
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b);
Interleaves the lower two signed or unsigned 32-bit integers in a with the lower two signed or unsigned 32-bit integers in b.
R0 |
R1 |
R2 |
R3 |
---|---|---|---|
a0 |
b0 |
a1 |
b1 |
_mm_unpacklo_epi64
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b);
Interleaves the lower signed or unsigned 64-bit integer in a with the lower signed or unsigned 64-bit integer in b.
R0 |
R1 |
---|---|
a0 |
b0 |
_mm_movepi64_pi64
__m64 _mm_movepi64_pi64(__m128i a);
Returns the lower 64 bits of a as an __m64 type.
R0 |
---|
a0 |
_mm_movpi64_pi64
__m128i _mm_movpi64_pi64(__m64 a);
Moves the 64 bits of a to the lower 64 bits of the result, zeroing the upper bits.
R0 |
R1 |
---|---|
a0 |
0X0 |
_mm_move_epi64
__m128i _mm_move_epi64(__m128i a);
Moves the lower 64 bits of a to the lower 64 bits of the result, zeroing the upper bits.
R0 |
R1 |
---|---|
a0 |
0X0 |
_mm_unpackhi_pd
__m128d _mm_unpackhi_pd(__m128d a, __m128d b);
Interleaves the upper DP FP values of a and b.
R0 |
R1 |
---|---|
a1 |
b1 |
_mm_unpacklo_pd
__m128d _mm_unpacklo_pd(__m128d a, __m128d b);
Interleaves the lower DP FP values of a and b.
R0 |
R1 |
---|---|
a0 |
b0 |
_mm_movemask_pd
int _mm_movemask_pd(__m128d a);
Creates a two-bit mask from the sign bits of the two DP FP values of a.
R |
---|
sign(a1) << 1 | sign(a0) |
_mm_shuffle_pd
__m128d _mm_shuffle_pd(__m128d a, __m128d b, int i)
Selects two specific DP FP values from a and b, based on the mask i. The mask must be an immediate. See Macro Function for Shuffle for a description of the shuffle semantics.