135 v2df sign = _mm_and_pd(x, sign_mask);
138 x = _mm_andnot_pd(sign_mask, x);
141 v2df modulo = _mm_mul_pd(x,
ARRAY2V2DF(_pd_2OPI));
145 v2di i = _mm_cvttpd_epi32 (modulo);
146 v2df frac = _mm_sub_pd(modulo, _mm_cvtepi32_pd(i));
147 i = _mm_shuffle_epi32 (i, _MM_SHUFFLE(1, 1, 0, 0));
150 v2di gt3 = _mm_slli_epi32(i, 30);
151 v2di mask = _mm_and_si128( gt3, _mm_castpd_si128(sign_mask));
152 sign = _mm_xor_pd(sign, _mm_castsi128_pd(mask));
155 v2di signmask2 = _mm_slli_epi64(i, 63);
156 v2di add =_mm_srli_epi32( _mm_srai_epi32(signmask2,9),2);
157 frac = _mm_add_pd(_mm_or_pd(frac, _mm_castsi128_pd(signmask2)), _mm_castsi128_pd(add));
160 v2df calcx = _mm_mul_pd(frac,
ARRAY2V2DF(_pd_PIO2));
163 v2df xx = _mm_mul_pd(calcx, calcx);
171 y = _mm_mul_pd(y,xx);
172 y = _mm_add_pd(_mm_mul_pd(y, calcx), calcx);
175 y = _mm_or_pd(sign, y);
240 x = _mm_andnot_pd(sign_mask, x);
243 v2df modulo = _mm_mul_pd(x,
ARRAY2V2DF(_pd_2OPI));
247 v2di i = _mm_cvttpd_epi32 (modulo);
248 v2df frac = _mm_sub_pd(modulo, _mm_cvtepi32_pd(i));
249 i = _mm_shuffle_epi32 (i, _MM_SHUFFLE(1, 1, 0, 0));
252 v2di signmask2 = _mm_slli_epi64(i, 63);
253 v2di add =_mm_srli_epi32( _mm_srai_epi32(signmask2,9),2);
254 frac = _mm_add_pd(_mm_or_pd(frac, _mm_castsi128_pd(signmask2)), _mm_castsi128_pd(add));
257 v2di gt3 = _mm_slli_epi32(_mm_add_epi32(i,
ARRAY2V2DI(_pd_x01_double_mask)), 30);
258 v2df sign = _mm_and_pd( _mm_castsi128_pd(gt3), sign_mask);
261 v2df calcx = _mm_mul_pd(frac,
ARRAY2V2DF(_pd_PIO2));
264 v2df xx = _mm_mul_pd(calcx, calcx);
272 y = _mm_add_pd(_mm_mul_pd(y,xx),
ARRAY2V2DF(_pd_1));
274 y = _mm_or_pd(sign, y);
312 v2di e_int = _mm_and_si128(_mm_castpd_si128(x),
ARRAY2V2DI(_pd_f_exp_mask));
313 e_int = _mm_srli_epi64(e_int,52);
314 e_int = _mm_or_si128(_mm_srli_si128(e_int,4), e_int);
315 e_int = _mm_sub_epi32(e_int,
ARRAY2V2DI(_pd_x03FE_double_mask));
316 v2df e = _mm_cvtepi32_pd( e_int);
326 v2df mask =_mm_cmplt_pd(x,
ARRAY2V2DF(_pd_0_87));
327 v2df ex = _mm_and_pd(mask,
ARRAY2V2DF(_pd_log_inv_1_32));
328 v2df mulx = _mm_mul_pd(x,
ARRAY2V2DF(_pd_1_32));
329 v2df v =_mm_or_pd( _mm_and_pd(mask, mulx), _mm_andnot_pd(mask, x));
336 ex = _mm_or_pd(_mm_and_pd(mask,
ARRAY2V2DF(_pd_log_inv_1_74)), _mm_andnot_pd(mask, ex));
338 v =_mm_or_pd( _mm_and_pd(mask, mulx), _mm_andnot_pd(mask, v));
342 v2df term = _mm_div_pd(_mm_sub_pd(v, ones), _mm_add_pd(v,ones));
345 v2df termsquared = _mm_mul_pd(term, term);
353 res = _mm_mul_pd(_mm_mul_pd(res,term), termsquared);
355 res = _mm_add_pd(res, term);
358 v2df r1 = _mm_mul_pd(e,
ARRAY2V2DF(_pd_LOG_C_2));
359 v2df r2 = _mm_mul_pd( _mm_add_pd(ones,ones), res);
360 return _mm_add_pd(r1, _mm_add_pd(r2, ex));