Medial Code Documentation
Loading...
Searching...
No Matches
PartialReduxEvaluator.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_PARTIALREDUX_H
11#define EIGEN_PARTIALREDUX_H
12
13namespace Eigen {
14
15namespace internal {
16
17
18/***************************************************************************
19*
20* This file provides evaluators for partial reductions.
21* There are two modes:
22*
23* - scalar path: simply calls the respective function on the column or row.
24* -> nothing special here, all the tricky part is handled by the return
25* types of VectorwiseOp's members. They embed the functor calling the
26* respective DenseBase's member function.
27*
28* - vectorized path: implements a packet-wise reductions followed by
29* some (optional) processing of the outcome, e.g., division by n for mean.
30*
31* For the vectorized path let's observe that the packet-size and outer-unrolling
32* are both decided by the assignement logic. So all we have to do is to decide
33* on the inner unrolling.
34*
35* For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
36* but be need to be careful to specify correct increment.
37*
38***************************************************************************/
39
40
41/* logic deciding a strategy for unrolling of vectorized paths */
42template<typename Func, typename Evaluator>
44{
45 enum {
46 OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
47 Cost = OuterSize == Dynamic ? HugeCost
48 : OuterSize * Evaluator::CoeffReadCost + (OuterSize-1) * functor_traits<Func>::Cost,
49 Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling
50 };
51
52};
53
54/* Value to be returned when size==0 , by default let's return 0 */
55template<typename PacketType,typename Func>
56EIGEN_DEVICE_FUNC
57PacketType packetwise_redux_empty_value(const Func& ) {
58 const typename unpacket_traits<PacketType>::type zero(0);
59 return pset1<PacketType>(zero);
60}
61
62/* For products the default is 1 */
63template<typename PacketType,typename Scalar>
64EIGEN_DEVICE_FUNC
65PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) {
66 return pset1<PacketType>(Scalar(1));
67}
68
69/* Perform the actual reduction */
70template<typename Func, typename Evaluator,
71 int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling
72>
74
75/* Perform the actual reduction with unrolling */
76template<typename Func, typename Evaluator>
77struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>
78{
80 typedef typename Evaluator::Scalar Scalar;
81
82 template<typename PacketType>
83 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
84 PacketType run(const Evaluator &eval, const Func& func, Index /*size*/)
85 {
87 }
88};
89
90/* Add a specialization of redux_vec_unroller for size==0 at compiletime.
91 * This specialization is not required for general reductions, which is
92 * why it is defined here.
93 */
94template<typename Func, typename Evaluator, int Start>
95struct redux_vec_unroller<Func, Evaluator, Start, 0>
96{
97 template<typename PacketType>
98 EIGEN_DEVICE_FUNC
99 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func& f)
100 {
102 }
103};
104
105/* Perform the actual reduction for dynamic sizes */
106template<typename Func, typename Evaluator>
107struct packetwise_redux_impl<Func, Evaluator, NoUnrolling>
108{
109 typedef typename Evaluator::Scalar Scalar;
110 typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
111
112 template<typename PacketType>
113 EIGEN_DEVICE_FUNC
114 static PacketType run(const Evaluator &eval, const Func& func, Index size)
115 {
116 if(size==0)
118
119 const Index size4 = (size-1)&(~3);
120 PacketType p = eval.template packetByOuterInner<Unaligned,PacketType>(0,0);
121 Index i = 1;
122 // This loop is optimized for instruction pipelining:
123 // - each iteration generates two independent instructions
124 // - thanks to branch prediction and out-of-order execution we have independent instructions across loops
125 for(; i<size4; i+=4)
126 p = func.packetOp(p,
127 func.packetOp(
130 for(; i<size; ++i)
131 p = func.packetOp(p, eval.template packetByOuterInner<Unaligned,PacketType>(i,0));
132 return p;
133 }
134};
135
136template< typename ArgType, typename MemberOp, int Direction>
137struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
138 : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
139{
142 typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested;
143 typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
144 typedef typename ArgType::Scalar InputScalar;
145 typedef typename XprType::Scalar Scalar;
146 enum {
147 TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
148 };
149 typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
150 enum {
151 CoeffReadCost = TraversalSize==Dynamic ? HugeCost
152 : TraversalSize==0 ? 1
153 : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
154
155 _ArgFlags = evaluator<ArgType>::Flags,
156
157 _Vectorizable = bool(int(_ArgFlags)&PacketAccessBit)
158 && bool(MemberOp::Vectorizable)
159 && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0)
160 && (TraversalSize!=0),
161
163 | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))
164 | (_Vectorizable ? PacketAccessBit : 0)
166
167 Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
168 };
169
170 EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
171 : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
172 {
173 EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value)));
174 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
175 }
176
177 typedef typename XprType::CoeffReturnType CoeffReturnType;
178
179 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
180 const Scalar coeff(Index i, Index j) const
181 {
182 return coeff(Direction==Vertical ? j : i);
183 }
184
185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
186 const Scalar coeff(Index index) const
187 {
188 return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
189 }
190
191 template<int LoadMode,typename PacketType>
192 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
193 PacketType packet(Index i, Index j) const
194 {
195 return packet<LoadMode,PacketType>(Direction==Vertical ? j : i);
196 }
197
198 template<int LoadMode,typename PacketType>
199 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
200 PacketType packet(Index idx) const
201 {
203 typedef Block<const ArgTypeNestedCleaned,
204 Direction==Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize),
205 Direction==Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime),
206 true /* InnerPanel */> PanelType;
207
208 PanelType panel(m_arg,
209 Direction==Vertical ? 0 : idx,
210 Direction==Vertical ? idx : 0,
211 Direction==Vertical ? m_arg.rows() : Index(PacketSize),
212 Direction==Vertical ? Index(PacketSize) : m_arg.cols());
213
214 // FIXME
215 // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of panel get reversed
216 // and methods like packetByOuterInner do not make sense anymore in this context.
217 // So let's just by pass "vectorization" in this case:
218 if(PacketSize==1)
219 return internal::pset1<PacketType>(coeff(idx));
220
223 typedef typename MemberOp::BinaryOp BinaryOp;
225 return p;
226 }
227
228protected:
229 ConstArgTypeNested m_arg;
230 const MemberOp m_functor;
231};
232
233} // end namespace internal
234
235} // end namespace Eigen
236
237#endif // EIGEN_PARTIALREDUX_H
Expression of a fixed-size or dynamic-size block.
Definition Block.h:105
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const
Definition DenseBase.h:223
Base class for all dense matrices, vectors, and expressions.
Definition MatrixBase.h:50
Generic expression of a partially reduxed matrix.
Definition VectorwiseOp.h:58
Definition Redux.h:358
DirectionType
Enum containing possible values for the Direction parameter of Reverse, PartialReduxExpr and Vectorwi...
Definition Constants.h:261
@ Vertical
For Reverse, all columns are reversed; for PartialReduxExpr and VectorwiseOp, act on columns.
Definition Constants.h:264
const unsigned int PacketAccessBit
Short version: means the expression might be vectorized.
Definition Constants.h:94
const unsigned int LinearAccessBit
Short version: means the expression can be seen as 1D vector.
Definition Constants.h:130
const unsigned int RowMajorBit
for a matrix, this means that the storage order is row-major.
Definition Constants.h:66
Namespace containing all symbols from the Eigen library.
Definition LDLT.h:16
const int HugeCost
This value means that the cost to evaluate an expression coefficient is either very expensive or cann...
Definition Constants.h:44
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
const int Dynamic
This value means that a positive quantity (e.g., a size) is not known at compile-time,...
Definition Constants.h:22
Definition XprHelper.h:332
Definition CoreEvaluators.h:111
Definition CoreEvaluators.h:91
Definition XprHelper.h:176
Definition PartialReduxEvaluator.h:73
Definition PartialReduxEvaluator.h:44
Definition ForwardDeclarations.h:17
Definition Meta.h:96
Definition GenericPacketMath.h:133
Definition PacketMath.h:47