Medial Code Documentation
Loading...
Searching...
No Matches
Redux.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
14namespace Eigen {
15
16namespace internal {
17
18// TODO
19// * implement other kind of vectorization
20// * factorize code
21
22/***************************************************************************
23* Part 1 : the logic deciding a strategy for vectorization and unrolling
24***************************************************************************/
25
26template<typename Func, typename Derived>
28{
29public:
30 enum {
32 InnerMaxSize = int(Derived::IsRowMajor)
33 ? Derived::MaxColsAtCompileTime
34 : Derived::MaxRowsAtCompileTime
35 };
36
37 enum {
38 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
40 MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41 MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42 };
43
44public:
45 enum {
46 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
48 : int(DefaultTraversal)
49 };
50
51public:
52 enum {
53 Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
54 : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
55 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
56 };
57
58public:
59 enum {
60 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
61 };
62
63#ifdef EIGEN_DEBUG_ASSIGN
64 static void debug()
65 {
66 std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
67 std::cerr.setf(std::ios::hex, std::ios::basefield);
68 EIGEN_DEBUG_VAR(Derived::Flags)
69 std::cerr.unsetf(std::ios::hex);
70 EIGEN_DEBUG_VAR(InnerMaxSize)
71 EIGEN_DEBUG_VAR(PacketSize)
72 EIGEN_DEBUG_VAR(MightVectorize)
73 EIGEN_DEBUG_VAR(MayLinearVectorize)
74 EIGEN_DEBUG_VAR(MaySliceVectorize)
75 EIGEN_DEBUG_VAR(Traversal)
76 EIGEN_DEBUG_VAR(UnrollingLimit)
77 EIGEN_DEBUG_VAR(Unrolling)
78 std::cerr << std::endl;
79 }
80#endif
81};
82
83/***************************************************************************
84* Part 2 : unrollers
85***************************************************************************/
86
87/*** no vectorization ***/
88
89template<typename Func, typename Derived, int Start, int Length>
91{
92 enum {
93 HalfLength = Length/2
94 };
95
96 typedef typename Derived::Scalar Scalar;
97
99 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
100 {
103 }
104};
105
106template<typename Func, typename Derived, int Start>
107struct redux_novec_unroller<Func, Derived, Start, 1>
108{
109 enum {
110 outer = Start / Derived::InnerSizeAtCompileTime,
111 inner = Start % Derived::InnerSizeAtCompileTime
112 };
113
114 typedef typename Derived::Scalar Scalar;
115
117 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
118 {
119 return mat.coeffByOuterInner(outer, inner);
120 }
121};
122
123// This is actually dead code and will never be called. It is required
124// to prevent false warnings regarding failed inlining though
125// for 0 length run() will never be called at all.
126template<typename Func, typename Derived, int Start>
127struct redux_novec_unroller<Func, Derived, Start, 0>
128{
129 typedef typename Derived::Scalar Scalar;
131 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
132};
133
134/*** vectorization ***/
135
136template<typename Func, typename Derived, int Start, int Length>
138{
139 enum {
141 HalfLength = Length/2
142 };
143
144 typedef typename Derived::Scalar Scalar;
145 typedef typename packet_traits<Scalar>::type PacketScalar;
146
147 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
148 {
149 return func.packetOp(
152 }
153};
154
155template<typename Func, typename Derived, int Start>
156struct redux_vec_unroller<Func, Derived, Start, 1>
157{
158 enum {
160 outer = index / int(Derived::InnerSizeAtCompileTime),
161 inner = index % int(Derived::InnerSizeAtCompileTime),
162 alignment = Derived::Alignment
163 };
164
165 typedef typename Derived::Scalar Scalar;
166 typedef typename packet_traits<Scalar>::type PacketScalar;
167
168 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
169 {
170 return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
171 }
172};
173
174/***************************************************************************
175* Part 3 : implementation of all cases
176***************************************************************************/
177
178template<typename Func, typename Derived,
181>
183
184template<typename Func, typename Derived>
185struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
186{
187 typedef typename Derived::Scalar Scalar;
189 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
190 {
191 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
192 Scalar res;
193 res = mat.coeffByOuterInner(0, 0);
194 for(Index i = 1; i < mat.innerSize(); ++i)
195 res = func(res, mat.coeffByOuterInner(0, i));
196 for(Index i = 1; i < mat.outerSize(); ++i)
197 for(Index j = 0; j < mat.innerSize(); ++j)
198 res = func(res, mat.coeffByOuterInner(i, j));
199 return res;
200 }
201};
202
203template<typename Func, typename Derived>
204struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
205 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
206{};
207
208template<typename Func, typename Derived>
209struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
210{
211 typedef typename Derived::Scalar Scalar;
212 typedef typename packet_traits<Scalar>::type PacketScalar;
213
214 static Scalar run(const Derived &mat, const Func& func)
215 {
216 const Index size = mat.size();
217
220 enum {
221 alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
222 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
223 };
224 const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
225 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
226 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
227 const Index alignedEnd2 = alignedStart + alignedSize2;
228 const Index alignedEnd = alignedStart + alignedSize;
229 Scalar res;
230 if(alignedSize)
231 {
233 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
234 {
236 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
237 {
238 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
239 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
240 }
241
242 packet_res0 = func.packetOp(packet_res0,packet_res1);
245 }
246 res = func.predux(packet_res0);
247
248 for(Index index = 0; index < alignedStart; ++index)
249 res = func(res,mat.coeff(index));
250
251 for(Index index = alignedEnd; index < size; ++index)
252 res = func(res,mat.coeff(index));
253 }
254 else // too small to vectorize anything.
255 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
256 {
257 res = mat.coeff(0);
258 for(Index index = 1; index < size; ++index)
259 res = func(res,mat.coeff(index));
260 }
261
262 return res;
263 }
264};
265
266// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
267template<typename Func, typename Derived, int Unrolling>
268struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
269{
270 typedef typename Derived::Scalar Scalar;
271 typedef typename packet_traits<Scalar>::type PacketType;
272
273 EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
274 {
275 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
276 const Index innerSize = mat.innerSize();
277 const Index outerSize = mat.outerSize();
278 enum {
280 };
281 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
282 Scalar res;
284 {
285 PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
286 for(Index j=0; j<outerSize; ++j)
287 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
289
290 res = func.predux(packet_res);
291 for(Index j=0; j<outerSize; ++j)
292 for(Index i=packetedInnerSize; i<innerSize; ++i)
293 res = func(res, mat.coeffByOuterInner(j,i));
294 }
295 else // too small to vectorize anything.
296 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
297 {
299 }
300
301 return res;
302 }
303};
304
305template<typename Func, typename Derived>
306struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
307{
308 typedef typename Derived::Scalar Scalar;
309 typedef typename packet_traits<Scalar>::type PacketScalar;
310 enum {
311 PacketSize = packet_traits<Scalar>::size,
312 Size = Derived::SizeAtCompileTime,
313 VectorizedSize = (Size / PacketSize) * PacketSize
314 };
315 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
316 {
317 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
318 if (VectorizedSize > 0) {
319 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
320 if (VectorizedSize != Size)
321 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
322 return res;
323 }
324 else {
325 return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
326 }
327 }
328};
329
330// evaluator adaptor
331template<typename _XprType>
333{
334public:
335 typedef _XprType XprType;
336 EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
337
338 typedef typename XprType::Scalar Scalar;
339 typedef typename XprType::CoeffReturnType CoeffReturnType;
340 typedef typename XprType::PacketScalar PacketScalar;
341 typedef typename XprType::PacketReturnType PacketReturnType;
342
343 enum {
344 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
345 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
346 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
347 Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
348 IsRowMajor = XprType::IsRowMajor,
349 SizeAtCompileTime = XprType::SizeAtCompileTime,
350 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
351 CoeffReadCost = evaluator<XprType>::CoeffReadCost,
353 };
354
355 EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
356 EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
357 EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
358 EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
359 EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
360
362 CoeffReturnType coeff(Index row, Index col) const
363 { return m_evaluator.coeff(row, col); }
364
366 CoeffReturnType coeff(Index index) const
367 { return m_evaluator.coeff(index); }
368
369 template<int LoadMode, typename PacketType>
370 PacketReturnType packet(Index row, Index col) const
371 { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
372
373 template<int LoadMode, typename PacketType>
374 PacketReturnType packet(Index index) const
375 { return m_evaluator.template packet<LoadMode,PacketType>(index); }
376
378 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
379 { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
380
381 template<int LoadMode, typename PacketType>
382 PacketReturnType packetByOuterInner(Index outer, Index inner) const
383 { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
384
385 const XprType & nestedExpression() const { return m_xpr; }
386
387protected:
389 const XprType &m_xpr;
390};
391
392} // end namespace internal
393
394/***************************************************************************
395* Part 4 : public API
396***************************************************************************/
397
398
406template<typename Derived>
407template<typename Func>
409DenseBase<Derived>::redux(const Func& func) const
410{
411 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
412
414 ThisEvaluator thisEval(derived());
415
417}
418
422template<typename Derived>
423EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
425{
426 return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
427}
428
432template<typename Derived>
433EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
435{
436 return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
437}
438
443template<typename Derived>
444EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
446{
447 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
448 return Scalar(0);
449 return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
456template<typename Derived>
457EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
459{
460 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
461}
462
470template<typename Derived>
471EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
473{
474 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
475 return Scalar(1);
476 return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
477}
478
485template<typename Derived>
486EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
488{
489 return derived().diagonal().sum();
490}
491
492} // end namespace Eigen
493
494#endif // EIGEN_REDUX_H
Base class for all dense matrices, vectors, and arrays.
Definition DenseBase.h:49
internal::traits< ArrayWrapper< ExpressionType > >::Scalar Scalar
The numeric type of the expression' coefficients, e.g.
Definition DenseBase.h:68
Base class for all dense matrices, vectors, and expressions.
Definition MatrixBase.h:50
EIGEN_DEVICE_FUNC DiagonalReturnType diagonal()
Definition Diagonal.h:188
Pseudo expression representing a solving operation.
Definition Solve.h:63
Definition Redux.h:333
@ Unaligned
Data pointer has no specific alignment.
Definition Constants.h:228
const unsigned int LinearAccessBit
Short version: means the expression can be seen as 1D vector.
Definition Constants.h:124
const unsigned int DirectAccessBit
Means that the underlying array of coefficients can be directly accessed as a plain strided array.
Definition Constants.h:149
Definition CoreEvaluators.h:82
Definition XprHelper.h:107
Definition GenericPacketMath.h:90
Definition Redux.h:182
Definition Redux.h:28
Definition BinaryFunctors.h:139
Definition BinaryFunctors.h:116
Definition BinaryFunctors.h:59
Definition BinaryFunctors.h:24
Definition ForwardDeclarations.h:17
Definition XprHelper.h:119