10#ifndef EIGEN_COMPLEX_SSE_H
11#define EIGEN_COMPLEX_SSE_H
20 EIGEN_STRONG_INLINE Packet2cf() {}
21 EIGEN_STRONG_INLINE
explicit Packet2cf(
const __m128& a) : v(a) {}
27#ifndef EIGEN_VECTORIZE_AVX
28template<>
struct packet_traits<
std::complex<float> > : default_packet_traits
30 typedef Packet2cf type;
31 typedef Packet2cf half;
54template<>
struct unpacket_traits<Packet2cf> {
55 typedef std::complex<float> type;
56 typedef Packet2cf half;
57 typedef Packet4f as_real;
62 masked_load_available=
false,
63 masked_store_available=
false
67template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_add_ps(a.v,b.v)); }
68template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_sub_ps(a.v,b.v)); }
70template<> EIGEN_STRONG_INLINE Packet2cf pnegate(
const Packet2cf& a)
72 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
73 return Packet2cf(_mm_xor_ps(a.v,mask));
75template<> EIGEN_STRONG_INLINE Packet2cf pconj(
const Packet2cf& a)
77 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
78 return Packet2cf(_mm_xor_ps(a.v,mask));
81template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
83 #ifdef EIGEN_VECTORIZE_SSE3
84 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
85 _mm_mul_ps(_mm_movehdup_ps(a.v),
86 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
91 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
92 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
93 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
94 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
98template<> EIGEN_STRONG_INLINE Packet2cf ptrue <Packet2cf>(
const Packet2cf& a) {
return Packet2cf(ptrue(Packet4f(a.v))); }
99template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_and_ps(a.v,b.v)); }
100template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_or_ps(a.v,b.v)); }
101template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_xor_ps(a.v,b.v)); }
102template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_andnot_ps(b.v,a.v)); }
104template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
105template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
107template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(
const std::complex<float>& from)
109 const float re = std::real(from);
110 const float im = std::imag(from);
111 return Packet2cf(_mm_set_ps(im, re, im, re));
114template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(
const std::complex<float>* from) {
return pset1<Packet2cf>(*from); }
116template<> EIGEN_STRONG_INLINE
void pstore <std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
117template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
120template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from,
Index stride)
122 return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
123 std::imag(from[0*stride]), std::real(from[0*stride])));
126template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from,
Index stride)
128 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
129 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
130 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
131 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
134template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<float> >(
const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
136template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(
const Packet2cf& a)
138 #if EIGEN_GNUC_AT_MOST(4,3)
141 EIGEN_ALIGN16 std::complex<float> res[2];
142 _mm_store_ps((
float*)res, a.v);
145 std::complex<float> res;
146 _mm_storel_pi((__m64*)&res, a.v);
151template<> EIGEN_STRONG_INLINE Packet2cf preverse(
const Packet2cf& a) {
return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
153template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(
const Packet2cf& a)
155 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
158template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(
const Packet2cf& a)
160 return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
163EIGEN_STRONG_INLINE Packet2cf pcplxflip(
const Packet2cf& x)
165 return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
168EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
170template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
172 return pdiv_complex(a, b);
178 EIGEN_STRONG_INLINE Packet1cd() {}
179 EIGEN_STRONG_INLINE
explicit Packet1cd(
const __m128d& a) : v(a) {}
185#ifndef EIGEN_VECTORIZE_AVX
186template<>
struct packet_traits<
std::complex<double> > : default_packet_traits
188 typedef Packet1cd type;
189 typedef Packet1cd half;
211template<>
struct unpacket_traits<Packet1cd> {
212 typedef std::complex<double> type;
213 typedef Packet1cd half;
214 typedef Packet2d as_real;
219 masked_load_available=
false,
220 masked_store_available=
false
224template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(_mm_add_pd(a.v,b.v)); }
225template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(_mm_sub_pd(a.v,b.v)); }
226template<> EIGEN_STRONG_INLINE Packet1cd pnegate(
const Packet1cd& a) {
return Packet1cd(pnegate(Packet2d(a.v))); }
227template<> EIGEN_STRONG_INLINE Packet1cd pconj(
const Packet1cd& a)
229 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
230 return Packet1cd(_mm_xor_pd(a.v,mask));
233template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
235 #ifdef EIGEN_VECTORIZE_SSE3
236 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
237 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
238 vec2d_swizzle1(b.v, 1, 0))));
240 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
241 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
242 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
243 vec2d_swizzle1(b.v, 1, 0)), mask)));
247template<> EIGEN_STRONG_INLINE Packet1cd ptrue <Packet1cd>(
const Packet1cd& a) {
return Packet1cd(ptrue(Packet2d(a.v))); }
248template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(_mm_and_pd(a.v,b.v)); }
249template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(_mm_or_pd(a.v,b.v)); }
250template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(_mm_xor_pd(a.v,b.v)); }
251template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(_mm_andnot_pd(b.v,a.v)); }
254template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(
const std::complex<double>* from)
255{ EIGEN_DEBUG_ALIGNED_LOAD
return Packet1cd(pload<Packet2d>((
const double*)from)); }
256template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(
const std::complex<double>* from)
257{ EIGEN_DEBUG_UNALIGNED_LOAD
return Packet1cd(ploadu<Packet2d>((
const double*)from)); }
258template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(
const std::complex<double>& from)
259{
return ploadu<Packet1cd>(&from); }
261template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(
const std::complex<double>* from) {
return pset1<Packet1cd>(*from); }
264template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, Packet2d(from.v)); }
265template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, Packet2d(from.v)); }
267template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<double> >(
const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
269template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(
const Packet1cd& a)
271 EIGEN_ALIGN16
double res[2];
272 _mm_store_pd(res, a.v);
273 return std::complex<double>(res[0],res[1]);
276template<> EIGEN_STRONG_INLINE Packet1cd preverse(
const Packet1cd& a) {
return a; }
278template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(
const Packet1cd& a)
283template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(
const Packet1cd& a)
288EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
290template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
292 return pdiv_complex(a, b);
295EIGEN_STRONG_INLINE Packet1cd pcplxflip(
const Packet1cd& x)
297 return Packet1cd(preverse(Packet2d(x.v)));
300EIGEN_DEVICE_FUNC
inline void
301ptranspose(PacketBlock<Packet2cf,2>& kernel) {
302 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
303 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
305 __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
306 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
307 kernel.packet[1].v = tmp;
310template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(
const Packet2cf& a,
const Packet2cf& b)
312 __m128 eq = _mm_cmpeq_ps(a.v, b.v);
313 return Packet2cf(pand<Packet4f>(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));
316template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(
const Packet1cd& a,
const Packet1cd& b)
318 __m128d eq = _mm_cmpeq_pd(a.v, b.v);
319 return Packet1cd(pand<Packet2d>(eq, vec2d_swizzle1(eq, 1, 0)));
322template<> EIGEN_STRONG_INLINE Packet2cf pblend(
const Selector<2>& ifPacket,
const Packet2cf& thenPacket,
const Packet2cf& elsePacket) {
323 __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
324 return Packet2cf(_mm_castpd_ps(result));
327template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(
const Packet1cd& a) {
328 return psqrt_complex<Packet1cd>(a);
331template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(
const Packet2cf& a) {
332 return psqrt_complex<Packet2cf>(a);
@ Aligned16
Data pointer is aligned on a 16 bytes boundary.
Definition Constants.h:235
Namespace containing all symbols from the Eigen library.
Definition LDLT.h:16
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74