Floating-point Vector Intrinsics for Streaming SIMD Extensions 3

The floating-point intrinsics listed here are designed for the IntelŪ PentiumŪ 4 processor with Streaming SIMD Extensions 3 (SSE3).

The prototypes for these intrinsics are in the pmmintrin.h header file.

Single-precision Floating-point Vector Intrinsics

extern __m128 _mm_addsub_ps(__m128 a, __m128 b);

Subtracts even vector elements while adding odd vector elements.
r0 := a0 - b0;

r1 := a1 + b1;

r2 := a2 - b2;

r3 := a3 + b3;

extern __m128 _mm_hadd_ps(__m128 a, __m128 b);

Adds adjacent vector elements.
r0 := a0 + a1;

r1 := a2 + a3;

r2 := b0 + b1;

r3 := b2 + b3;

extern __m128 _mm_hsub_ps(__m128 a, __m128 b);

Subtracts adjacent vector elements.
r0 := a0 - a1;

r1 := a2 - a3;

r2 := b0 - b1;

r3 := b2 - b3;

extern __m128 _mm_movehdup_ps(__m128 a);

Duplicates odd vector elements into even vector elements.
r0 := a1;

r1 := a1;

r2 := a3;

r3 := a3;

extern __m128 _mm_moveldup_ps(__m128 a);

Duplicates even vector elements into odd vector elements.
r0 := a0;

r1 := a0;

r2 := a2;

r3 := a2;

Double-precision Floating-point Vector Intrinsics

extern __m128d _mm_addsub_pd(__m128d a, __m128d b);

Adds upper vector element while subtracting lower vector element.
r0 := a0 - b0;

r1 := a1 + b1;

extern __m128d _mm_hadd_pd(__m128d a, __m128d b);

Adds adjacent vector elements.
r0 := a0 + a1;

r1 := b0 + b1;

extern __m128d _mm_hsub_pd(__m128d a, __m128d b);

Subtracts adjacent vector elements.
r0 := a0 - a1;

r1 := b0 - b1;

extern __m128d _mm_loaddup_pd(double const * dp);

Duplicates a double value into upper and lower vector elements.
r0 := *dp;

r1 := *dp;

extern __m128d _mm_movedup_pd(__m128d a);

Duplicates lower vector element into upper vector element.
r0 := a0;
r1 := a0;