10#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
11#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
17#if EIGEN_HAS_AVX512_MATH
19#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
20 const Packet16f p16f_##NAME = pset1<Packet16f>(X)
22#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
23 const Packet16f p16f_##NAME = preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
25#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
26 const Packet8d p8d_##NAME = pset1<Packet8d>(X)
28#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
29 const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
31#define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
32 const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
34#define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
35 const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
38EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
39plog<Packet16f>(
const Packet16f& _x) {
40 return plog_float(_x);
44EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
45plog<Packet8d>(
const Packet8d& _x) {
46 return plog_double(_x);
49F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
50BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
53EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
54plog2<Packet16f>(
const Packet16f& _x) {
55 return plog2_float(_x);
59EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
60plog2<Packet8d>(
const Packet8d& _x) {
61 return plog2_double(_x);
64F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
65BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
71EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
72pexp<Packet16f>(
const Packet16f& _x) {
73 _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
74 _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
75 _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
77 _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
78 _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
80 _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
82 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
83 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
84 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
85 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
86 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
87 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
90 Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
94 Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
98 _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
99 Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
100 Packet16f r2 = pmul(r, r);
101 Packet16f r3 = pmul(r2, r);
105 y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);
106 y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);
107 y2 = padd(r, p16f_1);
108 y = pmadd(y, r, p16f_cephes_exp_p2);
109 y1 = pmadd(y1, r, p16f_cephes_exp_p5);
110 y = pmadd(y, r3, y1);
111 y = pmadd(y, r2, y2);
114 Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
115 emm0 = _mm512_slli_epi32(emm0, 23);
118 return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
122EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
123pexp<Packet8d>(
const Packet8d& _x) {
124 return pexp_double(_x);
127F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
128BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
131EIGEN_STRONG_INLINE Packet16h pfrexp(
const Packet16h& a, Packet16h& exponent) {
133 const Packet16h out = float2half(pfrexp<Packet16f>(half2float(a), fexponent));
134 exponent = float2half(fexponent);
139EIGEN_STRONG_INLINE Packet16h pldexp(
const Packet16h& a,
const Packet16h& exponent) {
140 return float2half(pldexp<Packet16f>(half2float(a), half2float(exponent)));
144EIGEN_STRONG_INLINE Packet16bf pfrexp(
const Packet16bf& a, Packet16bf& exponent) {
146 const Packet16bf out = F32ToBf16(pfrexp<Packet16f>(Bf16ToF32(a), fexponent));
147 exponent = F32ToBf16(fexponent);
152EIGEN_STRONG_INLINE Packet16bf pldexp(
const Packet16bf& a,
const Packet16bf& exponent) {
153 return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
164EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
165psqrt<Packet16f>(
const Packet16f& _x) {
166 Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
167 __mmask16 denormal_mask = _mm512_kand(
168 _mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
170 _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
172 Packet16f x = _mm512_rsqrt14_ps(_x);
175 x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
178 return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
182EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
183psqrt<Packet8d>(
const Packet8d& _x) {
184 Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5));
185 __mmask16 denormal_mask = _mm512_kand(
186 _mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),
188 _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
190 Packet8d x = _mm512_rsqrt14_pd(_x);
193 x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
196 x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
198 return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
202EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(
const Packet16f& x) {
203 return _mm512_sqrt_ps(x);
207EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(
const Packet8d& x) {
208 return _mm512_sqrt_pd(x);
212F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
213BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
216#if defined(EIGEN_VECTORIZE_AVX512ER)
219EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(
const Packet16f& x) {
220 return _mm512_rsqrt28_ps(x);
225EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
226prsqrt<Packet16f>(
const Packet16f& _x) {
227 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
228 _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
229 _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
231 Packet16f neg_half = pmul(_x, p16f_minus_half);
234 __mmask16 inf_mask = _mm512_cmp_ps_mask(_x, p16f_inf, _CMP_EQ_OQ);
235 __mmask16 not_pos_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LE_OQ);
236 __mmask16 not_finite_pos_mask = not_pos_mask | inf_mask;
240 Packet16f y_approx = _mm512_rsqrt14_ps(_x);
246 Packet16f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p16f_one_point_five));
251 return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
256EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(
const Packet16f& x) {
257 _EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
258 return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
262F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
263BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
268EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
269prsqrt<Packet8d>(
const Packet8d& _x) {
270 _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
271 _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
272 _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
274 Packet8d neg_half = pmul(_x, p8d_minus_half);
277 __mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);
278 __mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);
279 __mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;
283#if defined(EIGEN_VECTORIZE_AVX512ER)
284 Packet8d y_approx = _mm512_rsqrt28_pd(_x);
286 Packet8d y_approx = _mm512_rsqrt14_pd(_x);
295 Packet8d y_newton = pmul(y_approx, pmadd(neg_half, pmul(y_approx, y_approx), p8d_one_point_five));
296#if !defined(EIGEN_VECTORIZE_AVX512ER)
297 y_newton = pmul(y_newton, pmadd(y_newton, pmul(neg_half, y_newton), p8d_one_point_five));
302 return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
306EIGEN_STRONG_INLINE Packet8d prsqrt<Packet8d>(
const Packet8d& x) {
307 _EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);
308 return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));
312template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
313Packet16f plog1p<Packet16f>(
const Packet16f& _x) {
314 return generic_plog1p(_x);
317F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
318BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
320template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
321Packet16f pexpm1<Packet16f>(
const Packet16f& _x) {
322 return generic_expm1(_x);
325F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
326BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
332EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
333psin<Packet16f>(
const Packet16f& _x) {
334 return psin_float(_x);
338EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
339pcos<Packet16f>(
const Packet16f& _x) {
340 return pcos_float(_x);
344EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
345ptanh<Packet16f>(
const Packet16f& _x) {
346 return internal::generic_fast_tanh_float(_x);
349F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
350F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
351F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
353BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
354BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)
355BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)
Namespace containing all symbols from the Eigen library.
Definition LDLT.h:16