10#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
11#define EIGEN_SPARSELU_GEMM_KERNEL_H
24template<
typename Scalar>
26void sparselu_gemm(Index m, Index n, Index d,
const Scalar* A, Index lda,
const Scalar* B, Index ldb, Scalar* C, Index ldc)
28 using namespace Eigen::internal;
30 typedef typename packet_traits<Scalar>::type Packet;
32 NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
36 RK = NumberOfRegisters>=16 ? 4 : 2,
37 BM = 4096/
sizeof(Scalar),
42 Index
i0 = internal::first_default_aligned(A,m);
44 eigen_internal_assert(((lda%PacketSize)==0) && ((
ldc%PacketSize)==0) && (
i0==internal::first_default_aligned(C,m)));
47 for(Index i=0; i<
i0; ++i)
49 for(Index
j=0;
j<n; ++
j)
51 Scalar c = C[i+
j*
ldc];
52 for(Index k=0; k<d; ++k)
53 c += B[k+
j*
ldb] * A[i+k*lda];
67 const Scalar*
Bc0 = B+(
j+0)*
ldb;
68 const Scalar*
Bc1 = B+(
j+1)*
ldb;
86 const Scalar*
A0 = A+
ib+(k+0)*lda;
87 const Scalar*
A1 = A+
ib+(k+1)*lda;
88 const Scalar*
A2 = A+
ib+(k+2)*lda;
89 const Scalar*
A3 = A+
ib+(k+3)*lda;
107#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
109 c0 = pload<Packet>(C0+i+(I)*PacketSize); \
110 c1 = pload<Packet>(C1+i+(I)*PacketSize); \
111 KMADD(c0, a0, b00, t0) \
112 KMADD(c1, a0, b01, t1) \
113 a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
114 KMADD(c0, a1, b10, t0) \
115 KMADD(c1, a1, b11, t1) \
116 a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
117 if(RK==4) KMADD(c0, a2, b20, t0) \
118 if(RK==4) KMADD(c1, a2, b21, t1) \
119 if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
120 if(RK==4) KMADD(c0, a3, b30, t0) \
121 if(RK==4) KMADD(c1, a3, b31, t1) \
122 if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
123 pstore(C0+i+(I)*PacketSize, c0); \
124 pstore(C1+i+(I)*PacketSize, c1)
129 EIGEN_ASM_COMMENT(
"SPARSELU_GEMML_KERNEL1");
130 prefetch((
A0+i+(5)*PacketSize));
131 prefetch((
A1+i+(5)*PacketSize));
132 if(
RK==4) prefetch((
A2+i+(5)*PacketSize));
133 if(
RK==4) prefetch((
A3+i+(5)*PacketSize));
171 const Scalar*
Bc0 = B+(n-1)*
ldb;
185 const Scalar*
A0 = A+
ib+(k+0)*lda;
186 const Scalar*
A1 = A+
ib+(k+1)*lda;
187 const Scalar*
A2 = A+
ib+(k+2)*lda;
188 const Scalar*
A3 = A+
ib+(k+3)*lda;
206 c0 = pload<Packet>(C0+i+(I)*PacketSize); \
207 KMADD(c0, a0, b00, t0) \
208 a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
209 KMADD(c0, a1, b10, t0) \
210 a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
211 if(RK==4) KMADD(c0, a2, b20, t0) \
212 if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
213 if(RK==4) KMADD(c0, a3, b30, t0) \
214 if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
215 pstore(C0+i+(I)*PacketSize, c0);
220 EIGEN_ASM_COMMENT(
"SPARSELU_GEMML_KERNEL2");
253 for(Index
j=0;
j<n; ++
j)
256 Alignment = PacketSize>1 ?
Aligned : 0
Pseudo expression representing a solving operation.
Definition Solve.h:63
@ Aligned
Definition Constants.h:235
Definition GenericPacketMath.h:90