Medial Code Documentation
Loading...
Searching...
No Matches
Memory.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
6// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
7// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
8// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
9// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
10//
11// This Source Code Form is subject to the terms of the Mozilla
12// Public License v. 2.0. If a copy of the MPL was not distributed
13// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14
15
16/*****************************************************************************
17*** Platform checks for aligned malloc functions ***
18*****************************************************************************/
19
20#ifndef EIGEN_MEMORY_H
21#define EIGEN_MEMORY_H
22
23#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
24
25// Try to determine automatically if malloc is already aligned.
26
27// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
28// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
29// This is true at least since glibc 2.8.
30// This leaves the question how to detect 64-bit. According to this document,
31// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
32// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
33// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
34#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
35 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
36 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
37#else
38 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
39#endif
40
41// FreeBSD 6 seems to have 16-byte aligned malloc
42// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
43// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
44// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
45#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
46 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
47#else
48 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
49#endif
50
51#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
52 || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
53 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
54 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
55 #define EIGEN_MALLOC_ALREADY_ALIGNED 1
56#else
57 #define EIGEN_MALLOC_ALREADY_ALIGNED 0
58#endif
59
60#endif
61
62#ifndef EIGEN_HAS_POSIX_MEMALIGN
63 // See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
64 // It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
65 // Currently, let's include it only on unix systems:
66 #if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS)
67 #include <unistd.h>
68 #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
69 #define EIGEN_HAS_POSIX_MEMALIGN 1
70 #endif
71 #endif
72
73 #ifndef EIGEN_HAS_POSIX_MEMALIGN
74 #define EIGEN_HAS_POSIX_MEMALIGN 0
75 #endif
76#endif
77
78#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX
79 #define EIGEN_HAS_MM_MALLOC 1
80#else
81 #define EIGEN_HAS_MM_MALLOC 0
82#endif
83
84namespace Eigen {
85
86namespace internal {
87
88EIGEN_DEVICE_FUNC
89inline void throw_std_bad_alloc()
90{
91 #ifdef EIGEN_EXCEPTIONS
92 throw std::bad_alloc();
93 #else
94 std::size_t huge = static_cast<std::size_t>(-1);
95 new int[huge];
96 #endif
97}
98
99/*****************************************************************************
100*** Implementation of handmade aligned functions ***
101*****************************************************************************/
102
103/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
104
108inline void* handmade_aligned_malloc(std::size_t size)
109{
110 void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
111 if (original == 0) return 0;
112 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
113 *(reinterpret_cast<void**>(aligned) - 1) = original;
114 return aligned;
115}
116
118inline void handmade_aligned_free(void *ptr)
119{
120 if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
121}
122
128inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
129{
130 if (ptr == 0) return handmade_aligned_malloc(size);
131 void *original = *(reinterpret_cast<void**>(ptr) - 1);
132 std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
133 original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
134 if (original == 0) return 0;
135 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
136 void *previous_aligned = static_cast<char *>(original)+previous_offset;
137 if(aligned!=previous_aligned)
138 std::memmove(aligned, previous_aligned, size);
139
140 *(reinterpret_cast<void**>(aligned) - 1) = original;
141 return aligned;
142}
143
144/*****************************************************************************
145*** Implementation of generic aligned realloc (when no realloc can be used)***
146*****************************************************************************/
147
148EIGEN_DEVICE_FUNC void* aligned_malloc(std::size_t size);
149EIGEN_DEVICE_FUNC void aligned_free(void *ptr);
150
156inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
157{
158 if (ptr==0)
159 return aligned_malloc(size);
160
161 if (size==0)
162 {
163 aligned_free(ptr);
164 return 0;
165 }
166
167 void* newptr = aligned_malloc(size);
168 if (newptr == 0)
169 {
170 #ifdef EIGEN_HAS_ERRNO
171 errno = ENOMEM; // according to the standard
172 #endif
173 return 0;
174 }
175
176 if (ptr != 0)
177 {
178 std::memcpy(newptr, ptr, (std::min)(size,old_size));
179 aligned_free(ptr);
180 }
181
182 return newptr;
183}
184
185/*****************************************************************************
186*** Implementation of portable aligned versions of malloc/free/realloc ***
187*****************************************************************************/
188
189#ifdef EIGEN_NO_MALLOC
190EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
191{
192 eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
193}
194#elif defined EIGEN_RUNTIME_NO_MALLOC
195EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
196{
197 static bool value = true;
198 if (update == 1)
199 value = new_value;
200 return value;
201}
202EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
203EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
204EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
205{
206 eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
207}
208#else
209EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
210{}
211#endif
212
216EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
217{
218 check_that_malloc_is_allowed();
219
220 void *result;
221 #if EIGEN_DEFAULT_ALIGN_BYTES==0
222 result = std::malloc(size);
223 #elif EIGEN_MALLOC_ALREADY_ALIGNED
224 result = std::malloc(size);
225 #elif EIGEN_HAS_POSIX_MEMALIGN
226 if(posix_memalign(&result, EIGEN_DEFAULT_ALIGN_BYTES, size)) result = 0;
227 #elif EIGEN_HAS_MM_MALLOC
228 result = _mm_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
229 #elif EIGEN_OS_WIN_STRICT
230 result = _aligned_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
231 #else
232 result = handmade_aligned_malloc(size);
233 #endif
234
235 if(!result && size)
236 throw_std_bad_alloc();
237
238 return result;
239}
240
242EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
243{
244 #if EIGEN_DEFAULT_ALIGN_BYTES==0
245 std::free(ptr);
246 #elif EIGEN_MALLOC_ALREADY_ALIGNED
247 std::free(ptr);
248 #elif EIGEN_HAS_POSIX_MEMALIGN
249 std::free(ptr);
250 #elif EIGEN_HAS_MM_MALLOC
251 _mm_free(ptr);
252 #elif EIGEN_OS_WIN_STRICT
253 _aligned_free(ptr);
254 #else
255 handmade_aligned_free(ptr);
256 #endif
257}
258
264inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
265{
266 EIGEN_UNUSED_VARIABLE(old_size);
267
268 void *result;
269#if EIGEN_DEFAULT_ALIGN_BYTES==0
270 result = std::realloc(ptr,new_size);
271#elif EIGEN_MALLOC_ALREADY_ALIGNED
272 result = std::realloc(ptr,new_size);
273#elif EIGEN_HAS_POSIX_MEMALIGN
274 result = generic_aligned_realloc(ptr,new_size,old_size);
275#elif EIGEN_HAS_MM_MALLOC
276 // The defined(_mm_free) is just here to verify that this MSVC version
277 // implements _mm_malloc/_mm_free based on the corresponding _aligned_
278 // functions. This may not always be the case and we just try to be safe.
279 #if EIGEN_OS_WIN_STRICT && defined(_mm_free)
280 result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
281 #else
282 result = generic_aligned_realloc(ptr,new_size,old_size);
283 #endif
284#elif EIGEN_OS_WIN_STRICT
285 result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
286#else
287 result = handmade_aligned_realloc(ptr,new_size,old_size);
288#endif
289
290 if (!result && new_size)
291 throw_std_bad_alloc();
292
293 return result;
294}
295
296/*****************************************************************************
297*** Implementation of conditionally aligned functions ***
298*****************************************************************************/
299
303template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size)
304{
305 return aligned_malloc(size);
306}
307
308template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(size_t size)
309{
310 check_that_malloc_is_allowed();
311
312 void *result = std::malloc(size);
313 if(!result && size)
314 throw_std_bad_alloc();
315 return result;
316}
317
319template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
320{
321 aligned_free(ptr);
322}
323
324template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
325{
326 std::free(ptr);
327}
328
329template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
330{
331 return aligned_realloc(ptr, new_size, old_size);
332}
333
334template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
335{
336 return std::realloc(ptr, new_size);
337}
338
339/*****************************************************************************
340*** Construction/destruction of array elements ***
341*****************************************************************************/
342
346template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size)
347{
348 // always destruct an array starting from the end.
349 if(ptr)
350 while(size) ptr[--size].~T();
351}
352
356template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size)
357{
358 size_t i;
359 EIGEN_TRY
360 {
361 for (i = 0; i < size; ++i) ::new (ptr + i) T;
362 return ptr;
363 }
364 EIGEN_CATCH(...)
365 {
366 destruct_elements_of_array(ptr, i);
367 EIGEN_THROW;
368 }
369}
370
371/*****************************************************************************
372*** Implementation of aligned new/delete-like functions ***
373*****************************************************************************/
374
375template<typename T>
376EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
377{
378 if(size > size_t(-1) / sizeof(T))
379 throw_std_bad_alloc();
380}
381
386template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
387{
388 check_size_for_overflow<T>(size);
389 T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
390 EIGEN_TRY
391 {
392 return construct_elements_of_array(result, size);
393 }
394 EIGEN_CATCH(...)
395 {
396 aligned_free(result);
397 EIGEN_THROW;
398 }
399}
400
401template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size)
402{
403 check_size_for_overflow<T>(size);
404 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
405 EIGEN_TRY
406 {
407 return construct_elements_of_array(result, size);
408 }
409 EIGEN_CATCH(...)
410 {
411 conditional_aligned_free<Align>(result);
412 EIGEN_THROW;
413 }
414}
415
419template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size)
420{
421 destruct_elements_of_array<T>(ptr, size);
422 aligned_free(ptr);
423}
424
428template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size)
429{
430 destruct_elements_of_array<T>(ptr, size);
431 conditional_aligned_free<Align>(ptr);
432}
433
434template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
435{
436 check_size_for_overflow<T>(new_size);
437 check_size_for_overflow<T>(old_size);
438 if(new_size < old_size)
439 destruct_elements_of_array(pts+new_size, old_size-new_size);
440 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
441 if(new_size > old_size)
442 {
443 EIGEN_TRY
444 {
445 construct_elements_of_array(result+old_size, new_size-old_size);
446 }
447 EIGEN_CATCH(...)
448 {
449 conditional_aligned_free<Align>(result);
450 EIGEN_THROW;
451 }
452 }
453 return result;
454}
455
456
457template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size)
458{
459 if(size==0)
460 return 0; // short-cut. Also fixes Bug 884
461 check_size_for_overflow<T>(size);
462 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
463 if(NumTraits<T>::RequireInitialization)
464 {
465 EIGEN_TRY
466 {
467 construct_elements_of_array(result, size);
468 }
469 EIGEN_CATCH(...)
470 {
471 conditional_aligned_free<Align>(result);
472 EIGEN_THROW;
473 }
474 }
475 return result;
476}
477
478template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size)
479{
480 check_size_for_overflow<T>(new_size);
481 check_size_for_overflow<T>(old_size);
482 if(NumTraits<T>::RequireInitialization && (new_size < old_size))
483 destruct_elements_of_array(pts+new_size, old_size-new_size);
484 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
485 if(NumTraits<T>::RequireInitialization && (new_size > old_size))
486 {
487 EIGEN_TRY
488 {
489 construct_elements_of_array(result+old_size, new_size-old_size);
490 }
491 EIGEN_CATCH(...)
492 {
493 conditional_aligned_free<Align>(result);
494 EIGEN_THROW;
495 }
496 }
497 return result;
498}
499
500template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size)
501{
502 if(NumTraits<T>::RequireInitialization)
503 destruct_elements_of_array<T>(ptr, size);
504 conditional_aligned_free<Align>(ptr);
505}
506
507/****************************************************************************/
508
526template<int Alignment, typename Scalar, typename Index>
527inline Index first_aligned(const Scalar* array, Index size)
528{
529 static const Index ScalarSize = sizeof(Scalar);
530 static const Index AlignmentSize = Alignment / ScalarSize;
531 static const Index AlignmentMask = AlignmentSize-1;
532
533 if(AlignmentSize<=1)
534 {
535 // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
536 // so that all elements of the array have the same alignment.
537 return 0;
538 }
539 else if( (std::size_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
540 {
541 // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
542 // Consequently, no element of the array is well aligned.
543 return size;
544 }
545 else
546 {
547 return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size);
548 }
549}
550
553template<typename Scalar, typename Index>
554inline Index first_default_aligned(const Scalar* array, Index size)
555{
556 typedef typename packet_traits<Scalar>::type DefaultPacketType;
557 return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
558}
559
562template<typename Index>
563inline Index first_multiple(Index size, Index base)
564{
565 return ((size+base-1)/base)*base;
566}
567
568// std::copy is much slower than memcpy, so let's introduce a smart_copy which
569// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
570template<typename T, bool UseMemcpy> struct smart_copy_helper;
571
572template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
573{
575}
576
577template<typename T> struct smart_copy_helper<T,true> {
578 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
579 { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
580};
581
582template<typename T> struct smart_copy_helper<T,false> {
583 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
584 { std::copy(start, end, target); }
585};
586
587// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
588template<typename T, bool UseMemmove> struct smart_memmove_helper;
589
590template<typename T> void smart_memmove(const T* start, const T* end, T* target)
591{
593}
594
595template<typename T> struct smart_memmove_helper<T,true> {
596 static inline void run(const T* start, const T* end, T* target)
597 { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
598};
599
600template<typename T> struct smart_memmove_helper<T,false> {
601 static inline void run(const T* start, const T* end, T* target)
602 {
603 if (uintptr_t(target) < uintptr_t(start))
604 {
605 std::copy(start, end, target);
606 }
607 else
608 {
609 std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
610 std::copy_backward(start, end, target + count);
611 }
612 }
613};
614
615
616/*****************************************************************************
617*** Implementation of runtime stack allocation (falling back to malloc) ***
618*****************************************************************************/
619
620// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
621// to the appropriate stack allocation function
622#ifndef EIGEN_ALLOCA
623 #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
624 #define EIGEN_ALLOCA alloca
625 #elif EIGEN_COMP_MSVC
626 #define EIGEN_ALLOCA _alloca
627 #endif
628#endif
629
630// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
631// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
632template<typename T> class aligned_stack_memory_handler : noncopyable
633{
634 public:
635 /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
636 * Note that \a ptr can be 0 regardless of the other parameters.
637 * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
638 * In this case, the buffer elements will also be destructed when this handler will be destructed.
639 * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
640 **/
641 aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc)
642 : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
643 {
645 Eigen::internal::construct_elements_of_array(m_ptr, size);
646 }
648 {
650 Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
651 if(m_deallocate)
652 Eigen::internal::aligned_free(m_ptr);
653 }
654 protected:
655 T* m_ptr;
656 size_t m_size;
657 bool m_deallocate;
658};
659
660template<typename T> class scoped_array : noncopyable
661{
662 T* m_ptr;
663public:
664 explicit scoped_array(std::ptrdiff_t size)
665 {
666 m_ptr = new T[size];
667 }
669 {
670 delete[] m_ptr;
671 }
672 T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
673 const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
674 T* &ptr() { return m_ptr; }
675 const T* ptr() const { return m_ptr; }
676 operator const T*() const { return m_ptr; }
677};
678
679template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
680{
681 std::swap(a.ptr(),b.ptr());
682}
683
684} // end namespace internal
685
701#ifdef EIGEN_ALLOCA
702
703 #if EIGEN_DEFAULT_ALIGN_BYTES>0
704 // We always manually re-align the result of EIGEN_ALLOCA.
705 // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
706 #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<std::size_t>(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
707 #else
708 #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
709 #endif
710
711 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
712 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
713 TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
714 : reinterpret_cast<TYPE*>( \
715 (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
716 : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
717 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
718
719#else
720
721 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
722 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
723 TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
724 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
725
726#endif
727
728
729/*****************************************************************************
730*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
731*****************************************************************************/
732
733#if EIGEN_MAX_ALIGN_BYTES!=0
734 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
735 void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
736 EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
737 EIGEN_CATCH (...) { return 0; } \
738 }
739 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
740 void *operator new(size_t size) { \
741 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
742 } \
743 void *operator new[](size_t size) { \
744 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
745 } \
746 void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
747 void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
748 void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
749 void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
750 /* in-place new and delete. since (at least afaik) there is no actual */ \
751 /* memory allocated we can safely let the default implementation handle */ \
752 /* this particular case. */ \
753 static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
754 static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
755 void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
756 void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
757 /* nothrow-new (returns zero instead of std::bad_alloc) */ \
758 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
759 void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
760 Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
761 } \
762 typedef void eigen_aligned_operator_new_marker_type;
763#else
764 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
765#endif
766
767#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
768#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
769 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
770
771/****************************************************************************/
772
789template<class T>
790class aligned_allocator : public std::allocator<T>
791{
792public:
793 typedef size_t size_type;
794 typedef std::ptrdiff_t difference_type;
795 typedef T* pointer;
796 typedef const T* const_pointer;
797 typedef T& reference;
798 typedef const T& const_reference;
799 typedef T value_type;
800
801 template<class U>
802 struct rebind
803 {
805 };
806
807 aligned_allocator() : std::allocator<T>() {}
808
809 aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
810
811 template<class U>
812 aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
813
814 ~aligned_allocator() {}
815
816 pointer allocate(size_type num, const void* /*hint*/ = 0)
817 {
818 internal::check_size_for_overflow<T>(num);
819 return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
820 }
821
822 void deallocate(pointer p, size_type /*num*/)
823 {
824 internal::aligned_free(p);
825 }
826};
827
828//---------- Cache sizes ----------
829
830#if !defined(EIGEN_NO_CPUID)
831# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
832# if defined(__PIC__) && EIGEN_ARCH_i386
833 // Case for x86 with PIC
834# define EIGEN_CPUID(abcd,func,id) \
835 __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
836# elif defined(__PIC__) && EIGEN_ARCH_x86_64
837 // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
838 // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
839# define EIGEN_CPUID(abcd,func,id) \
840 __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
841# else
842 // Case for x86_64 or x86 w/o PIC
843# define EIGEN_CPUID(abcd,func,id) \
844 __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
845# endif
846# elif EIGEN_COMP_MSVC
847# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
848# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
849# endif
850# endif
851#endif
852
853namespace internal {
854
855#ifdef EIGEN_CPUID
856
857inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
858{
859 return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
860}
861
862inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
863{
864 int abcd[4];
865 l1 = l2 = l3 = 0;
866 int cache_id = 0;
867 int cache_type = 0;
868 do {
869 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
870 EIGEN_CPUID(abcd,0x4,cache_id);
871 cache_type = (abcd[0] & 0x0F) >> 0;
872 if(cache_type==1||cache_type==3) // data or unified cache
873 {
874 int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
875 int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
876 int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
877 int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
878 int sets = (abcd[2]); // C[31:0]
879
880 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
881
882 switch(cache_level)
883 {
884 case 1: l1 = cache_size; break;
885 case 2: l2 = cache_size; break;
886 case 3: l3 = cache_size; break;
887 default: break;
888 }
889 }
890 cache_id++;
891 } while(cache_type>0 && cache_id<16);
892}
893
894inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
895{
896 int abcd[4];
897 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
898 l1 = l2 = l3 = 0;
899 EIGEN_CPUID(abcd,0x00000002,0);
900 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
901 bool check_for_p2_core2 = false;
902 for(int i=0; i<14; ++i)
903 {
904 switch(bytes[i])
905 {
906 case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
907 case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
908 case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
909 case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
910 case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
911 case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
912 case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
913 case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
914 case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
915 case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
916 case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
917 case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
918 case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
919 case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
920 case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
921 case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
922 case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
923 case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
924 case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
925 case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
926 case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
927 case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
928 case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
929 case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
930 case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
931 case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
932 case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
933 case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
934 case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
935 case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
936 case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
937 case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
938 case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
939 case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
940 case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
941 case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
942 case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
943 case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
944 case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
945 case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
946 case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
947 case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
948 case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
949 case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
950 case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
951 case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
952 case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
953 case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
954 case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
955 case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
956 case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
957 case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
958 case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
959 case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
960 case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
961 case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
962 case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
963
964 default: break;
965 }
966 }
967 if(check_for_p2_core2 && l2 == l3)
968 l3 = 0;
969 l1 *= 1024;
970 l2 *= 1024;
971 l3 *= 1024;
972}
973
974inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
975{
976 if(max_std_funcs>=4)
977 queryCacheSizes_intel_direct(l1,l2,l3);
978 else
979 queryCacheSizes_intel_codes(l1,l2,l3);
980}
981
982inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
983{
984 int abcd[4];
985 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
986 EIGEN_CPUID(abcd,0x80000005,0);
987 l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
988 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
989 EIGEN_CPUID(abcd,0x80000006,0);
990 l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
991 l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
992}
993#endif
994
997inline void queryCacheSizes(int& l1, int& l2, int& l3)
998{
999 #ifdef EIGEN_CPUID
1000 int abcd[4];
1001 const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
1002 const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
1003 const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
1004
1005 // identify the CPU vendor
1006 EIGEN_CPUID(abcd,0x0,0);
1007 int max_std_funcs = abcd[1];
1008 if(cpuid_is_vendor(abcd,GenuineIntel))
1009 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1010 else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
1011 queryCacheSizes_amd(l1,l2,l3);
1012 else
1013 // by default let's use Intel's API
1014 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1015
1016 // here is the list of other vendors:
1017// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
1018// ||cpuid_is_vendor(abcd,"CyrixInstead")
1019// ||cpuid_is_vendor(abcd,"CentaurHauls")
1020// ||cpuid_is_vendor(abcd,"GenuineTMx86")
1021// ||cpuid_is_vendor(abcd,"TransmetaCPU")
1022// ||cpuid_is_vendor(abcd,"RiseRiseRise")
1023// ||cpuid_is_vendor(abcd,"Geode by NSC")
1024// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1025// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1026// ||cpuid_is_vendor(abcd,"NexGenDriven")
1027 #else
1028 l1 = l2 = l3 = -1;
1029 #endif
1030}
1031
1034inline int queryL1CacheSize()
1035{
1036 int l1(-1), l2, l3;
1037 queryCacheSizes(l1,l2,l3);
1038 return l1;
1039}
1040
1043inline int queryTopLevelCacheSize()
1044{
1045 int l1, l2(-1), l3(-1);
1046 queryCacheSizes(l1,l2,l3);
1047 return (std::max)(l2,l3);
1048}
1049
1050} // end namespace internal
1051
1052} // end namespace Eigen
1053
1054#endif // EIGEN_MEMORY_H
Pseudo expression representing a solving operation.
Definition Solve.h:63
STL compatible allocator to use with with 16 byte aligned types.
Definition Memory.h:791
Definition Meta.h:232
Definition Memory.h:661
@ array
array (ordered collection of values)
Definition StdDeque.h:58
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name, cert-dcl58-cpp) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
Definition json.hpp:24418
Holds information about the various numeric (i.e.
Definition NumTraits.h:108
Definition Memory.h:803
Definition Memory.h:570