The miscellaneous intrinsics for Streaming SIMD Extensions 2 (SSE2) are listed in the following table followed by their descriptions.

The prototypes for SSE2 intrinsics are in the emmintrin.h header file.

Intrinsic | Corresponding Instruction |
Operation |
---|---|---|

_mm_packs_epi16 | PACKSSWB | Packed Saturation |

_mm_packs_epi32 | PACKSSDW | Packed Saturation |

_mm_packus_epi16 | PACKUSWB | Packed Saturation |

_mm_extract_epi16 | PEXTRW | Extraction |

_mm_insert_epi16 | PINSRW | Insertion |

_mm_movemask_epi8 | PMOVMSKB | Mask Creation |

_mm_shuffle_epi32 | PSHUFD | Shuffle |

_mm_shufflehi_epi16 | PSHUFHW | Shuffle |

_mm_shufflelo_epi16 | PSHUFLW | Shuffle |

_mm_unpackhi_epi8 | PUNPCKHBW | Interleave |

_mm_unpackhi_epi16 | PUNPCKHWD | Interleave |

_mm_unpackhi_epi32 | PUNPCKHDQ | Interleave |

_mm_unpackhi_epi64 | PUNPCKHQDQ | Interleave |

_mm_unpacklo_epi8 | PUNPCKLBW | Interleave |

_mm_unpacklo_epi16 | PUNPCKLWD | Interleave |

_mm_unpacklo_epi32 | PUNPCKLDQ | Interleave |

_mm_unpacklo_epi64 | PUNPCKLQDQ | Interleave |

_mm_movepi64_pi64 | MOVDQ2Q | move |

_m128i_mm_movpi64_epi64 | MOVQ2DQ | move |

_mm_move_epi64 | MOVQ | move |

__m128i _mm_packs_epi16(__m128i a, __m128i b)

Packs the 16 signed 16-bit integers from a
and b into 8-bit integers and saturates.

r0 := SignedSaturate(a0)

r1 := SignedSaturate(a1)

...

r7 := SignedSaturate(a7)

r8 := SignedSaturate(b0)

r9 := SignedSaturate(b1)

...

r15 := SignedSaturate(b7)

__m128i _mm_packs_epi32(__m128i a, __m128i b)

Packs the 8 signed 32-bit integers from a
and b into signed 16-bit integers and saturates.

r0 := SignedSaturate(a0)

r1 := SignedSaturate(a1)

r2 := SignedSaturate(a2)

r3 := SignedSaturate(a3)

r4 := SignedSaturate(b0)

r5 := SignedSaturate(b1)

r6 := SignedSaturate(b2)

r7 := SignedSaturate(b3)

__m128i _mm_packus_epi16(__m128i a, __m128i b)

Packs the 16 signed 16-bit integers from a
and b into 8-bit unsigned integers and saturates.

r0 := UnsignedSaturate(a0)

r1 := UnsignedSaturate(a1)

...

r7 := UnsignedSaturate(a7)

r8 := UnsignedSaturate(b0)

r9 := UnsignedSaturate(b1)

...

r15 := UnsignedSaturate(b7)

int _mm_extract_epi16(__m128i a, int imm)

Extracts the selected signed or unsigned 16-bit integer
from a and zero extends. The selector imm
must be an immediate.

r := (imm == 0) ? a0 :

( (imm == 1) ? a1 :

...

(imm == 7) ? a7 )

__m128i _mm_insert_epi16(__m128i a, int b, int imm)

Inserts the least significant 16 bits of b
into the selected 16-bit integer of a. The selector
imm must be an immediate.

r0 := (imm == 0) ? b : a0;

r1 := (imm == 1) ? b : a1;

...

r7 := (imm == 7) ? b : a7;

int _mm_movemask_epi8(__m128i a)

Creates a 16-bit mask from the most significant bits
of the 16 signed or unsigned 8-bit integers in a
and zero extends the upper bits.

r := a15[7] << 15 |

a14[7] << 14 |

...

a1[7] << 1 |

a0[7]

__m128i _mm_shuffle_epi32(__m128i a, int imm)

Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.

__m128i _mm_shufflehi_epi16(__m128i a, int imm)

Shuffles the upper 4 signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.

__m128i _mm_shufflelo_epi16(__m128i a, int imm)

Shuffles the lower 4 signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.

__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)

Interleaves the upper 8 signed or unsigned 8-bit integers
in a with the upper 8 signed or unsigned 8-bit
integers in b.

r0 := a8 ; r1 := b8

r2 := a9 ; r3 := b9

...

r14 := a15 ; r15 := b15

__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)

Interleaves the upper 4 signed or unsigned 16-bit integers
in a with the upper 4 signed or unsigned 16-bit
integers in b.

r0 := a4 ; r1 := b4

r2 := a5 ; r3 := b5

r4 := a6 ; r5 := b6

r6 := a7 ; r7 := b7

__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)

Interleaves the upper 2 signed or unsigned 32-bit integers
in a with the upper 2 signed or unsigned 32-bit
integers in b.

r0 := a2 ; r1 := b2

r2 := a3 ; r3 := b3

__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)

Interleaves the upper signed or unsigned 64-bit integer
in a with the upper signed or unsigned 64-bit
integer in b.

r0 := a1 ; r1 := b1

__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)

Interleaves the lower 8 signed or unsigned 8-bit integers
in a with the lower 8 signed or unsigned 8-bit
integers in b.

r0 := a0 ; r1 := b0

r2 := a1 ; r3 := b1

...

r14 := a7 ; r15 := b7

__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)

Interleaves the lower 4 signed or unsigned 16-bit integers
in a with the lower 4 signed or unsigned 16-bit
integers in b.

r0 := a0 ; r1 := b0

r2 := a1 ; r3 := b1

r4 := a2 ; r5 := b2

r6 := a3 ; r7 := b3

__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)

Interleaves the lower 2 signed or unsigned 32-bit integers
in a with the lower 2 signed or unsigned 32-bit
integers in b.

r0 := a0 ; r1 := b0

r2 := a1 ; r3 := b1

__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)

Interleaves the lower signed or unsigned 64-bit integer
in a with the lower signed or unsigned 64-bit
integer in b.

r0 := a0 ; r1 := b0

__m64 _mm_movepi64_pi64(__m128i a)

Returns the lower 64 bits of a
as an __m64 type.

r0 := a0 ;

__128i _mm_movpi64_pi64(__m64 a)

Moves the 64 bits of a to the
lower 64 bits of the result, zeroing the upper bits.

r0 := a0 ; r1 := 0X0 ;

__128i _mm_move_epi64(__128i a)

Moves the lower 64 bits of the lower 64 bits of the
result, zeroing the upper bits.

r0 := a0 ; r1 := 0X0 ;

The prototypes for Streaming SIMD Extensions 2 (SSE2) intrinsics are in the emmintrin.h header file.

__m128d _mm_unpackhi_pd(__m128d a, __m128d b)

(uses UNPCKHPD) Interleaves
the upper DP FP values of a and b.

r0 := a1

r1 := b1

__m128d _mm_unpacklo_pd(__m128d a, __m128d b)

(uses UNPCKLPD) Interleaves
the lower DP FP values of a and b.

r0 := a0

r1 := b0

int _mm_movemask_pd(__m128d a)

(uses MOVMSKPD) Creates a two-bit
mask from the sign bits of the two DP FP values of a.

r := sign(a1) << 1 | sign(a0)

__m128d _mm_shuffle_pd(__m128d a, __m128d b, int i)

(uses SHUFPD) Selects two specific DP FP values from a and b, based on the mask i. The mask must be an immediate. See Macro Function for Shuffle for a description of the shuffle semantics.

This version of the Intel C++ Compiler supports casting between various SP, DP, and INT vector types. These intrinsics do not convert values; they just change the type.

extern __m128 _mm_castpd_ps(__m128d in);

extern __m128i _mm_castpd_si128(__m128d in);

extern __m128d _mm_castps_pd(__m128 in);

extern __m128i _mm_castps_si128(__m128 in);

extern __m128 _mm_castsi128_ps(__m128i in);

extern __m128d _mm_castsi128_pd(__m128i in);