Medial Code Documentation
Loading...
Searching...
No Matches
Memory.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
6// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
7// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
8// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
9// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
10//
11// This Source Code Form is subject to the terms of the Mozilla
12// Public License v. 2.0. If a copy of the MPL was not distributed
13// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14
15
16/*****************************************************************************
17*** Platform checks for aligned malloc functions ***
18*****************************************************************************/
19
20#ifndef EIGEN_MEMORY_H
21#define EIGEN_MEMORY_H
22
23#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
24
25// Try to determine automatically if malloc is already aligned.
26
27// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
28// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
29// This is true at least since glibc 2.8.
30// This leaves the question how to detect 64-bit. According to this document,
31// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
32// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
33// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
34#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
35 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
36 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
37#else
38 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
39#endif
40
41// FreeBSD 6 seems to have 16-byte aligned malloc
42// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
43// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
44// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
45#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
46 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
47#else
48 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
49#endif
50
51#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
52 || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
53 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
54 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
55 #define EIGEN_MALLOC_ALREADY_ALIGNED 1
56#else
57 #define EIGEN_MALLOC_ALREADY_ALIGNED 0
58#endif
59
60#endif
61
62namespace Eigen {
63
64namespace internal {
65
66EIGEN_DEVICE_FUNC
67inline void throw_std_bad_alloc()
68{
69 #ifdef EIGEN_EXCEPTIONS
70 throw std::bad_alloc();
71 #else
72 std::size_t huge = static_cast<std::size_t>(-1);
73 #if defined(EIGEN_HIPCC)
74 //
75 // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
76 // and as a consequence the code in the #else block triggers the hipcc warning :
77 // "no overloaded function has restriction specifiers that are compatible with the ambient context"
78 //
79 // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
80 // the same on "operator new"
81 // Reverting code back to the old version in this #if block for the hipcc compiler
82 //
83 new int[huge];
84 #else
85 void* unused = ::operator new(huge);
86 EIGEN_UNUSED_VARIABLE(unused);
87 #endif
88 #endif
89}
90
91/*****************************************************************************
92*** Implementation of handmade aligned functions ***
93*****************************************************************************/
94
95/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
96
100EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
101{
102 eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
103
104 EIGEN_USING_STD(malloc)
105 void *original = malloc(size+alignment);
106
107 if (original == 0) return 0;
108 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
109 *(reinterpret_cast<void**>(aligned) - 1) = original;
110 return aligned;
111}
112
114EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
115{
116 if (ptr) {
117 EIGEN_USING_STD(free)
118 free(*(reinterpret_cast<void**>(ptr) - 1));
119 }
120}
121
127inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
128{
129 if (ptr == 0) return handmade_aligned_malloc(size);
130 void *original = *(reinterpret_cast<void**>(ptr) - 1);
131 std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
132 original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
133 if (original == 0) return 0;
134 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
135 void *previous_aligned = static_cast<char *>(original)+previous_offset;
136 if(aligned!=previous_aligned)
137 std::memmove(aligned, previous_aligned, size);
138
139 *(reinterpret_cast<void**>(aligned) - 1) = original;
140 return aligned;
141}
142
143/*****************************************************************************
144*** Implementation of portable aligned versions of malloc/free/realloc ***
145*****************************************************************************/
146
147#ifdef EIGEN_NO_MALLOC
148EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
149{
150 eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
151}
152#elif defined EIGEN_RUNTIME_NO_MALLOC
153EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
154{
155 static bool value = true;
156 if (update == 1)
157 value = new_value;
158 return value;
159}
160EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
161EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
162EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
163{
164 eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
165}
166#else
167EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
168{}
169#endif
170
174EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
175{
176 check_that_malloc_is_allowed();
177
178 void *result;
179 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
180
181 EIGEN_USING_STD(malloc)
182 result = malloc(size);
183
184 #if EIGEN_DEFAULT_ALIGN_BYTES==16
185 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
186 #endif
187 #else
188 result = handmade_aligned_malloc(size);
189 #endif
190
191 if(!result && size)
192 throw_std_bad_alloc();
193
194 return result;
195}
196
198EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
199{
200 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
201
202 EIGEN_USING_STD(free)
203 free(ptr);
204
205 #else
206 handmade_aligned_free(ptr);
207 #endif
208}
209
215inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
216{
217 EIGEN_UNUSED_VARIABLE(old_size)
218
219 void *result;
220#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
221 result = std::realloc(ptr,new_size);
222#else
223 result = handmade_aligned_realloc(ptr,new_size,old_size);
224#endif
225
226 if (!result && new_size)
227 throw_std_bad_alloc();
228
229 return result;
230}
231
232/*****************************************************************************
233*** Implementation of conditionally aligned functions ***
234*****************************************************************************/
235
239template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
240{
241 return aligned_malloc(size);
242}
243
244template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
245{
246 check_that_malloc_is_allowed();
247
248 EIGEN_USING_STD(malloc)
249 void *result = malloc(size);
250
251 if(!result && size)
252 throw_std_bad_alloc();
253 return result;
254}
255
257template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
258{
259 aligned_free(ptr);
260}
261
262template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
263{
264 EIGEN_USING_STD(free)
265 free(ptr);
266}
267
268template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
269{
270 return aligned_realloc(ptr, new_size, old_size);
271}
272
273template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
274{
275 return std::realloc(ptr, new_size);
276}
277
278/*****************************************************************************
279*** Construction/destruction of array elements ***
280*****************************************************************************/
281
285template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
286{
287 // always destruct an array starting from the end.
288 if(ptr)
289 while(size) ptr[--size].~T();
290}
291
295template<typename T> EIGEN_DEVICE_FUNC inline T* default_construct_elements_of_array(T *ptr, std::size_t size)
296{
297 std::size_t i=0;
298 EIGEN_TRY
299 {
300 for (i = 0; i < size; ++i) ::new (ptr + i) T;
301 }
302 EIGEN_CATCH(...)
303 {
304 destruct_elements_of_array(ptr, i);
305 EIGEN_THROW;
306 }
307 return ptr;
308}
309
313template<typename T> EIGEN_DEVICE_FUNC inline T* copy_construct_elements_of_array(T *ptr, const T* src, std::size_t size)
314{
315 std::size_t i=0;
316 EIGEN_TRY
317 {
318 for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
319 }
320 EIGEN_CATCH(...)
321 {
322 destruct_elements_of_array(ptr, i);
323 EIGEN_THROW;
324 }
325 return ptr;
326}
327
331template<typename T> EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T *ptr, T* src, std::size_t size)
332{
333 std::size_t i=0;
334 EIGEN_TRY
335 {
336#if EIGEN_HAS_RVALUE_REFERENCES
337 for (i = 0; i < size; ++i) ::new (ptr + i) T(std::move(*(src + i)));
338#else
339 for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
340#endif
341 }
342 EIGEN_CATCH(...)
343 {
344 destruct_elements_of_array(ptr, i);
345 EIGEN_THROW;
346 }
347 return ptr;
348}
349
350/*****************************************************************************
351*** Implementation of aligned new/delete-like functions ***
352*****************************************************************************/
353
354template<typename T>
355EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
356{
357 if(size > std::size_t(-1) / sizeof(T))
358 throw_std_bad_alloc();
359}
360
365template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
366{
367 check_size_for_overflow<T>(size);
368 T *result = static_cast<T*>(aligned_malloc(sizeof(T)*size));
369 EIGEN_TRY
370 {
371 return default_construct_elements_of_array(result, size);
372 }
373 EIGEN_CATCH(...)
374 {
375 aligned_free(result);
376 EIGEN_THROW;
377 }
378 return result;
379}
380
381template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
382{
383 check_size_for_overflow<T>(size);
384 T *result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
385 EIGEN_TRY
386 {
387 return default_construct_elements_of_array(result, size);
388 }
389 EIGEN_CATCH(...)
390 {
391 conditional_aligned_free<Align>(result);
392 EIGEN_THROW;
393 }
394 return result;
395}
396
400template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
401{
402 destruct_elements_of_array<T>(ptr, size);
403 Eigen::internal::aligned_free(ptr);
404}
405
409template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
410{
411 destruct_elements_of_array<T>(ptr, size);
412 conditional_aligned_free<Align>(ptr);
413}
414
415template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
416{
417 check_size_for_overflow<T>(new_size);
418 check_size_for_overflow<T>(old_size);
419
420 // If elements need to be explicitly initialized, we cannot simply realloc
421 // (or memcpy) the memory block - each element needs to be reconstructed.
422 // Otherwise, objects that contain internal pointers like mpfr or
423 // AnnoyingScalar can be pointing to the wrong thing.
424 T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*new_size));
425 EIGEN_TRY
426 {
427 // Move-construct initial elements.
428 std::size_t copy_size = (std::min)(old_size, new_size);
429 move_construct_elements_of_array(result, pts, copy_size);
430
431 // Default-construct remaining elements.
432 if (new_size > old_size) {
433 default_construct_elements_of_array(result + copy_size, new_size - old_size);
434 }
435
436 // Delete old elements.
437 conditional_aligned_delete<T, Align>(pts, old_size);
438 }
439 EIGEN_CATCH(...)
440 {
441 conditional_aligned_free<Align>(result);
442 EIGEN_THROW;
443 }
444
445 return result;
446}
447
448
449template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
450{
451 if(size==0)
452 return 0; // short-cut. Also fixes Bug 884
453 check_size_for_overflow<T>(size);
454 T *result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
455 if(NumTraits<T>::RequireInitialization)
456 {
457 EIGEN_TRY
458 {
459 default_construct_elements_of_array(result, size);
460 }
461 EIGEN_CATCH(...)
462 {
463 conditional_aligned_free<Align>(result);
464 EIGEN_THROW;
465 }
466 }
467 return result;
468}
469
470template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
471{
472 if (NumTraits<T>::RequireInitialization) {
473 return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
474 }
475
476 check_size_for_overflow<T>(new_size);
477 check_size_for_overflow<T>(old_size);
478 return static_cast<T*>(conditional_aligned_realloc<Align>(static_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
479}
480
481template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
482{
483 if(NumTraits<T>::RequireInitialization)
484 destruct_elements_of_array<T>(ptr, size);
485 conditional_aligned_free<Align>(ptr);
486}
487
488/****************************************************************************/
489
507template<int Alignment, typename Scalar, typename Index>
508EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
509{
510 const Index ScalarSize = sizeof(Scalar);
511 const Index AlignmentSize = Alignment / ScalarSize;
512 const Index AlignmentMask = AlignmentSize-1;
513
514 if(AlignmentSize<=1)
515 {
516 // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
517 // so that all elements of the array have the same alignment.
518 return 0;
519 }
520 else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
521 {
522 // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
523 // Consequently, no element of the array is well aligned.
524 return size;
525 }
526 else
527 {
528 Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
529 return (first < size) ? first : size;
530 }
531}
532
535template<typename Scalar, typename Index>
536EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
537{
538 typedef typename packet_traits<Scalar>::type DefaultPacketType;
539 return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
540}
541
544template<typename Index>
545inline Index first_multiple(Index size, Index base)
546{
547 return ((size+base-1)/base)*base;
548}
549
550// std::copy is much slower than memcpy, so let's introduce a smart_copy which
551// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
552template<typename T, bool UseMemcpy> struct smart_copy_helper;
553
554template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
555{
557}
558
559template<typename T> struct smart_copy_helper<T,true> {
560 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
561 {
562 IntPtr size = IntPtr(end)-IntPtr(start);
563 if(size==0) return;
564 eigen_internal_assert(start!=0 && end!=0 && target!=0);
565 EIGEN_USING_STD(memcpy)
566 memcpy(target, start, size);
567 }
568};
569
570template<typename T> struct smart_copy_helper<T,false> {
571 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
572 { std::copy(start, end, target); }
573};
574
575// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
576template<typename T, bool UseMemmove> struct smart_memmove_helper;
577
578template<typename T> void smart_memmove(const T* start, const T* end, T* target)
579{
581}
582
583template<typename T> struct smart_memmove_helper<T,true> {
584 static inline void run(const T* start, const T* end, T* target)
585 {
586 IntPtr size = IntPtr(end)-IntPtr(start);
587 if(size==0) return;
588 eigen_internal_assert(start!=0 && end!=0 && target!=0);
589 std::memmove(target, start, size);
590 }
591};
592
593template<typename T> struct smart_memmove_helper<T,false> {
594 static inline void run(const T* start, const T* end, T* target)
595 {
596 if (UIntPtr(target) < UIntPtr(start))
597 {
598 std::copy(start, end, target);
599 }
600 else
601 {
602 std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
603 std::copy_backward(start, end, target + count);
604 }
605 }
606};
607
608#if EIGEN_HAS_RVALUE_REFERENCES
609template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
610{
611 return std::move(start, end, target);
612}
613#else
614template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
615{
616 return std::copy(start, end, target);
617}
618#endif
619
620/*****************************************************************************
621*** Implementation of runtime stack allocation (falling back to malloc) ***
622*****************************************************************************/
623
624// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
625// to the appropriate stack allocation function
626#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
627 #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
628 #define EIGEN_ALLOCA alloca
629 #elif EIGEN_COMP_MSVC
630 #define EIGEN_ALLOCA _alloca
631 #endif
632#endif
633
634// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
635// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
636// the compiler still emits bad code because stack allocation checks use "<=".
637// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
638// is fixed.
639#if defined(__clang__) && defined(__thumb__)
640 #undef EIGEN_ALLOCA
641#endif
642
643// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
644// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
645template<typename T> class aligned_stack_memory_handler : noncopyable
646{
647 public:
648 /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
649 * Note that \a ptr can be 0 regardless of the other parameters.
650 * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
651 * In this case, the buffer elements will also be destructed when this handler will be destructed.
652 * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
653 **/
654 EIGEN_DEVICE_FUNC
655 aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
656 : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
657 {
659 Eigen::internal::default_construct_elements_of_array(m_ptr, size);
660 }
661 EIGEN_DEVICE_FUNC
663 {
665 Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
666 if(m_deallocate)
667 Eigen::internal::aligned_free(m_ptr);
668 }
669 protected:
670 T* m_ptr;
671 std::size_t m_size;
672 bool m_deallocate;
673};
674
675#ifdef EIGEN_ALLOCA
676
677template<typename Xpr, int NbEvaluations,
678 bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
679 >
681{
682 static const bool NeedExternalBuffer = false;
683 typedef typename Xpr::Scalar Scalar;
685 ObjectType object;
686
687 EIGEN_DEVICE_FUNC
688 local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
689 {
690 EIGEN_UNUSED_VARIABLE(ptr);
691 eigen_internal_assert(ptr==0);
692 }
693};
694
695template<typename Xpr, int NbEvaluations>
696struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
697{
698 static const bool NeedExternalBuffer = true;
699 typedef typename Xpr::Scalar Scalar;
700 typedef typename plain_object_eval<Xpr>::type PlainObject;
701 typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
702 ObjectType object;
703
704 EIGEN_DEVICE_FUNC
705 local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
706 : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
707 m_deallocate(ptr==0)
708 {
709 if(NumTraits<Scalar>::RequireInitialization && object.data())
710 Eigen::internal::default_construct_elements_of_array(object.data(), object.size());
711 object = xpr;
712 }
713
714 EIGEN_DEVICE_FUNC
715 ~local_nested_eval_wrapper()
716 {
717 if(NumTraits<Scalar>::RequireInitialization && object.data())
718 Eigen::internal::destruct_elements_of_array(object.data(), object.size());
719 if(m_deallocate)
720 Eigen::internal::aligned_free(object.data());
721 }
722
723private:
724 bool m_deallocate;
725};
726
727#endif // EIGEN_ALLOCA
728
729template<typename T> class scoped_array : noncopyable
730{
731 T* m_ptr;
732public:
733 explicit scoped_array(std::ptrdiff_t size)
734 {
735 m_ptr = new T[size];
736 }
738 {
739 delete[] m_ptr;
740 }
741 T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
742 const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
743 T* &ptr() { return m_ptr; }
744 const T* ptr() const { return m_ptr; }
745 operator const T*() const { return m_ptr; }
746};
747
748template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
749{
750 std::swap(a.ptr(),b.ptr());
751}
752
753} // end namespace internal
754
780#ifdef EIGEN_ALLOCA
781
782 #if EIGEN_DEFAULT_ALIGN_BYTES>0
783 // We always manually re-align the result of EIGEN_ALLOCA.
784 // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
785 #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
786 #else
787 #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
788 #endif
789
790 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
791 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
792 TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
793 : reinterpret_cast<TYPE*>( \
794 (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
795 : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
796 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
797
798
799 #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
800 Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
801 ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
802 ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
803 typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
804
805#else
806
807 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
808 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
809 TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
810 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
811
812
813#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
814
815#endif
816
817
818/*****************************************************************************
819*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
820*****************************************************************************/
821
822#if EIGEN_HAS_CXX17_OVERALIGN
823
824// C++17 -> no need to bother about alignment anymore :)
825
826#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
827#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
828#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
829#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
830
831#else
832
833// HIP does not support new/delete on device.
834#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
835 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
836 EIGEN_DEVICE_FUNC \
837 void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
838 EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
839 EIGEN_CATCH (...) { return 0; } \
840 }
841 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
842 EIGEN_DEVICE_FUNC \
843 void *operator new(std::size_t size) { \
844 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
845 } \
846 EIGEN_DEVICE_FUNC \
847 void *operator new[](std::size_t size) { \
848 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
849 } \
850 EIGEN_DEVICE_FUNC \
851 void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
852 EIGEN_DEVICE_FUNC \
853 void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
854 EIGEN_DEVICE_FUNC \
855 void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
856 EIGEN_DEVICE_FUNC \
857 void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
858 /* in-place new and delete. since (at least afaik) there is no actual */ \
859 /* memory allocated we can safely let the default implementation handle */ \
860 /* this particular case. */ \
861 EIGEN_DEVICE_FUNC \
862 static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
863 EIGEN_DEVICE_FUNC \
864 static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
865 EIGEN_DEVICE_FUNC \
866 void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
867 EIGEN_DEVICE_FUNC \
868 void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
869 /* nothrow-new (returns zero instead of std::bad_alloc) */ \
870 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
871 EIGEN_DEVICE_FUNC \
872 void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
873 Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
874 } \
875 typedef void eigen_aligned_operator_new_marker_type;
876#else
877 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
878#endif
879
880#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
881#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
882 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
883 ((Size)!=Eigen::Dynamic) && \
884 (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
885 ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
886 ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
887
888#endif
889
890/****************************************************************************/
891
916template<class T>
917class aligned_allocator : public std::allocator<T>
918{
919public:
920 typedef std::size_t size_type;
921 typedef std::ptrdiff_t difference_type;
922 typedef T* pointer;
923 typedef const T* const_pointer;
924 typedef T& reference;
925 typedef const T& const_reference;
926 typedef T value_type;
927
928 template<class U>
929 struct rebind
930 {
932 };
933
934 aligned_allocator() : std::allocator<T>() {}
935
936 aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
937
938 template<class U>
939 aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
940
941 ~aligned_allocator() {}
942
943 #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
944 // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
945 // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
946 // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
947 size_type max_size() const {
948 return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
949 }
950 #endif
951
952 pointer allocate(size_type num, const void* /*hint*/ = 0)
953 {
954 internal::check_size_for_overflow<T>(num);
955 return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
956 }
957
958 void deallocate(pointer p, size_type /*num*/)
959 {
960 internal::aligned_free(p);
961 }
962};
963
964//---------- Cache sizes ----------
965
966#if !defined(EIGEN_NO_CPUID)
967# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
968# if defined(__PIC__) && EIGEN_ARCH_i386
969 // Case for x86 with PIC
970# define EIGEN_CPUID(abcd,func,id) \
971 __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
972# elif defined(__PIC__) && EIGEN_ARCH_x86_64
973 // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
974 // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
975# define EIGEN_CPUID(abcd,func,id) \
976 __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
977# else
978 // Case for x86_64 or x86 w/o PIC
979# define EIGEN_CPUID(abcd,func,id) \
980 __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
981# endif
982# elif EIGEN_COMP_MSVC
983# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
984# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
985# endif
986# endif
987#endif
988
989namespace internal {
990
991#ifdef EIGEN_CPUID
992
993inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
994{
995 return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
996}
997
998inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
999{
1000 int abcd[4];
1001 l1 = l2 = l3 = 0;
1002 int cache_id = 0;
1003 int cache_type = 0;
1004 do {
1005 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1006 EIGEN_CPUID(abcd,0x4,cache_id);
1007 cache_type = (abcd[0] & 0x0F) >> 0;
1008 if(cache_type==1||cache_type==3) // data or unified cache
1009 {
1010 int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
1011 int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
1012 int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
1013 int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
1014 int sets = (abcd[2]); // C[31:0]
1015
1016 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
1017
1018 switch(cache_level)
1019 {
1020 case 1: l1 = cache_size; break;
1021 case 2: l2 = cache_size; break;
1022 case 3: l3 = cache_size; break;
1023 default: break;
1024 }
1025 }
1026 cache_id++;
1027 } while(cache_type>0 && cache_id<16);
1028}
1029
1030inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
1031{
1032 int abcd[4];
1033 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1034 l1 = l2 = l3 = 0;
1035 EIGEN_CPUID(abcd,0x00000002,0);
1036 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
1037 bool check_for_p2_core2 = false;
1038 for(int i=0; i<14; ++i)
1039 {
1040 switch(bytes[i])
1041 {
1042 case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
1043 case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
1044 case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
1045 case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1046 case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1047 case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
1048 case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
1049 case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
1050 case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
1051 case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
1052 case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
1053 case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
1054 case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
1055 case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1056 case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
1057 case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
1058 case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
1059 case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
1060 case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
1061 case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
1062 case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
1063 case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
1064 case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
1065 case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
1066 case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
1067 case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
1068 case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
1069 case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
1070 case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
1071 case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
1072 case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
1073 case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
1074 case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
1075 case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
1076 case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
1077 case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
1078 case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
1079 case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
1080 case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
1081 case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
1082 case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
1083 case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1084 case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
1085 case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
1086 case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
1087 case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
1088 case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
1089 case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
1090 case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
1091 case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
1092 case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
1093 case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
1094 case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
1095 case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
1096 case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
1097 case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
1098 case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
1099
1100 default: break;
1101 }
1102 }
1103 if(check_for_p2_core2 && l2 == l3)
1104 l3 = 0;
1105 l1 *= 1024;
1106 l2 *= 1024;
1107 l3 *= 1024;
1108}
1109
1110inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
1111{
1112 if(max_std_funcs>=4)
1113 queryCacheSizes_intel_direct(l1,l2,l3);
1114 else if(max_std_funcs>=2)
1115 queryCacheSizes_intel_codes(l1,l2,l3);
1116 else
1117 l1 = l2 = l3 = 0;
1118}
1119
1120inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
1121{
1122 int abcd[4];
1123 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1124
1125 // First query the max supported function.
1126 EIGEN_CPUID(abcd,0x80000000,0);
1127 if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
1128 {
1129 EIGEN_CPUID(abcd,0x80000005,0);
1130 l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
1131 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1132 EIGEN_CPUID(abcd,0x80000006,0);
1133 l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
1134 l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1135 }
1136 else
1137 {
1138 l1 = l2 = l3 = 0;
1139 }
1140}
1141#endif
1142
1145inline void queryCacheSizes(int& l1, int& l2, int& l3)
1146{
1147 #ifdef EIGEN_CPUID
1148 int abcd[4];
1149 const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
1150 const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
1151 const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
1152
1153 // identify the CPU vendor
1154 EIGEN_CPUID(abcd,0x0,0);
1155 int max_std_funcs = abcd[0];
1156 if(cpuid_is_vendor(abcd,GenuineIntel))
1157 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1158 else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
1159 queryCacheSizes_amd(l1,l2,l3);
1160 else
1161 // by default let's use Intel's API
1162 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1163
1164 // here is the list of other vendors:
1165// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
1166// ||cpuid_is_vendor(abcd,"CyrixInstead")
1167// ||cpuid_is_vendor(abcd,"CentaurHauls")
1168// ||cpuid_is_vendor(abcd,"GenuineTMx86")
1169// ||cpuid_is_vendor(abcd,"TransmetaCPU")
1170// ||cpuid_is_vendor(abcd,"RiseRiseRise")
1171// ||cpuid_is_vendor(abcd,"Geode by NSC")
1172// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1173// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1174// ||cpuid_is_vendor(abcd,"NexGenDriven")
1175 #else
1176 l1 = l2 = l3 = -1;
1177 #endif
1178}
1179
1182inline int queryL1CacheSize()
1183{
1184 int l1(-1), l2, l3;
1185 queryCacheSizes(l1,l2,l3);
1186 return l1;
1187}
1188
1191inline int queryTopLevelCacheSize()
1192{
1193 int l1, l2(-1), l3(-1);
1194 queryCacheSizes(l1,l2,l3);
1195 return (std::max)(l2,l3);
1196}
1197
1198} // end namespace internal
1199
1200} // end namespace Eigen
1201
1202#endif // EIGEN_MEMORY_H
Base class for all dense matrices, vectors, and expressions.
Definition MatrixBase.h:50
STL compatible allocator to use with types requiring a non standrad alignment.
Definition Memory.h:918
Definition Meta.h:412
Definition Memory.h:730
Namespace containing all symbols from the Eigen library.
Definition LDLT.h:16
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
const int Dynamic
This value means that a positive quantity (e.g., a size) is not known at compile-time,...
Definition Constants.h:22
@ array
array (ordered collection of values)
Definition BFloat16.h:88
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name, cert-dcl58-cpp) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
Definition json.hpp:24418
Holds information about the various numeric (i.e.
Definition NumTraits.h:236
Definition Memory.h:930
Definition XprHelper.h:458
Definition Memory.h:552
Definition Meta.h:96