// Include SIMD Header
#ifdef __INTEL_COMPILER
#  include <fvec.h>   // also contains xmmintrin.h
#else
#  include <xmmintrin.h>
#endif

typedef union{
__m128 m; float f[4];
} vector;

// Compute scalar product using Intrisics:
float scalarproductIntrinsics(float x[], float y[])
{
__m128 *vecX = (__m128 *)x;
__m128 *vecY = (__m128 *)y;
vector tmp;

tmp.m = _mm_mul_ps(vecX[0],vecY[0]);

return tmp.f[0] + tmp.f[1] + tmp.f[2] + tmp.f[3];
}

// Compute scalar product with F32vec4:
float scalarproductSSE(float x[], float y[])
{
F32vec4 *vecX = (F32vec4 *)x;
F32vec4 *vecY = (F32vec4 *)y;
F32vec4 tmp;

tmp = vecX[0] * vecY[0];

return tmp[0] + tmp[1] + tmp[2] + tmp[3];
}
