Medial Code Documentation
Loading...
Searching...
No Matches
linalg.h
Go to the documentation of this file.
1
6#ifndef XGBOOST_LINALG_H_
7#define XGBOOST_LINALG_H_
8
9#include <dmlc/endian.h>
10#include <xgboost/base.h>
11#include <xgboost/context.h>
13#include <xgboost/json.h>
14#include <xgboost/span.h>
15
16#include <algorithm>
17#include <cassert>
18#include <cinttypes> // for int32_t
19#include <cstddef> // for size_t
20#include <limits>
21#include <string>
22#include <tuple> // for make_tuple
23#include <type_traits>
24#include <utility>
25#include <vector>
26
27#if defined(_MSC_VER)
28#include <intrin.h>
29#endif // defined(_MSC_VER)
30
31// decouple it from xgboost.
32#ifndef LINALG_HD
33#if defined(__CUDA__) || defined(__NVCC__)
34#define LINALG_HD __host__ __device__
35#else
36#define LINALG_HD
37#endif // defined (__CUDA__) || defined(__NVCC__)
38#endif // LINALG_HD
39
40namespace xgboost::linalg {
41namespace detail {
42
44 template <typename T>
45 static constexpr char TypeChar() {
46 return (std::is_floating_point<T>::value
47 ? 'f'
48 : (std::is_integral<T>::value ? (std::is_signed<T>::value ? 'i' : 'u') : '\0'));
49 }
50};
51
52template <size_t dim, typename S, typename Head, size_t D>
53constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {
54 static_assert(dim < D);
55 return n + head * strides[dim];
56}
57
58template <size_t dim, typename S, size_t D, typename Head, typename... Tail>
59constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,
60 Head head, Tail &&...rest) {
61 static_assert(dim < D);
62 return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
63}
64
65template <int32_t D, bool f_array = false>
66constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {
67 if (f_array) {
68 stride[0] = 1;
69 for (int32_t s = 1; s < D; ++s) {
70 stride[s] = shape[s - 1] * stride[s - 1];
71 }
72 } else {
73 stride[D - 1] = 1;
74 for (int32_t s = D - 2; s >= 0; --s) {
75 stride[s] = shape[s + 1] * stride[s + 1];
76 }
77 }
78}
79
80struct AllTag {};
81
82struct IntTag {};
83
84template <typename I>
85struct RangeTag {
86 I beg;
87 I end;
88 [[nodiscard]] constexpr size_t Size() const { return end - beg; }
89};
90
94template <typename T>
95constexpr int32_t CalcSliceDim() {
96 return std::is_same<T, IntTag>::value ? 0 : 1;
97}
98
99template <typename T, typename... S>
100constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {
101 return CalcSliceDim<T>() + CalcSliceDim<S...>();
102}
103
104template <int32_t D>
105constexpr size_t CalcSize(size_t (&shape)[D]) {
106 size_t size = 1;
107 for (auto d : shape) {
108 size *= d;
109 }
110 return size;
111}
112
113template <typename S>
114using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;
115
116template <typename S>
117using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value, IntTag, S>;
118
119template <int32_t n, typename Fn>
120LINALG_HD constexpr auto UnrollLoop(Fn fn) {
121#if defined __CUDA_ARCH__
122#pragma unroll n
123#endif // defined __CUDA_ARCH__
124 for (int32_t i = 0; i < n; ++i) {
125 fn(i);
126 }
127}
128
129template <typename T>
130int32_t NativePopc(T v) {
131 int c = 0;
132 for (; v != 0; v &= v - 1) c++;
133 return c;
134}
135
136inline LINALG_HD int Popc(uint32_t v) {
137#if defined(__CUDA_ARCH__)
138 return __popc(v);
139#elif defined(__GNUC__) || defined(__clang__)
140 return __builtin_popcount(v);
141#elif defined(_MSC_VER)
142 return __popcnt(v);
143#else
144 return NativePopc(v);
145#endif // compiler
146}
147
148inline LINALG_HD int Popc(uint64_t v) {
149#if defined(__CUDA_ARCH__)
150 return __popcll(v);
151#elif defined(__GNUC__) || defined(__clang__)
152 return __builtin_popcountll(v);
153#elif defined(_MSC_VER) && defined(_M_X64)
154 return __popcnt64(v);
155#else
156 return NativePopc(v);
157#endif // compiler
158}
159
160template <std::size_t D, typename Head>
161LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
162 static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
163 arr[D - 1] = head;
164}
165
169template <std::size_t D, typename Head, typename... Rest>
170LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
171 static_assert(sizeof...(Rest) < D, "Index overflow.");
172 static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
173 arr[D - sizeof...(Rest) - 1] = head;
174 IndexToArr(arr, std::forward<Rest>(index)...);
175}
176
177template <class T, std::size_t N, std::size_t... Idx>
178constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
179 return std::make_tuple(arr[Idx]...);
180}
181
185template <class T, std::size_t N>
186constexpr auto ArrToTuple(T (&arr)[N]) {
187 return ArrToTuple(arr, std::make_index_sequence<N>{});
188}
189
190// uint division optimization inspired by the CIndexer in cupy. Division operation is
191// slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
192// bit when the index is smaller, then try to avoid division when it's exp of 2.
193template <typename I, int32_t D>
194LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {
195 size_t index[D]{0};
196 static_assert(std::is_signed<decltype(D)>::value,
197 "Don't change the type without changing the for loop.");
198 for (int32_t dim = D; --dim > 0;) {
199 auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape[dim]);
200 if (s & (s - 1)) {
201 auto t = idx / s;
202 index[dim] = idx - t * s;
203 idx = t;
204 } else { // exp of 2
205 index[dim] = idx & (s - 1);
206 idx >>= Popc(s - 1);
207 }
208 }
209 index[0] = idx;
210 return ArrToTuple(index);
211}
212
213template <size_t dim, typename I, int32_t D>
214void ReshapeImpl(size_t (&out_shape)[D], I s) {
215 static_assert(dim < D);
216 out_shape[dim] = s;
217}
218
219template <size_t dim, int32_t D, typename... S, typename I,
220 std::enable_if_t<sizeof...(S) != 0> * = nullptr>
221void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
222 static_assert(dim < D);
223 out_shape[dim] = s;
224 ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
225}
226
227template <typename Fn, typename Tup, size_t... I>
228LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence<I...>) {
229 return f(std::get<I>(t)...);
230}
231
238template <typename Fn, typename Tup>
239LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
240 constexpr auto kSize = std::tuple_size<Tup>::value;
241 return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
242}
243
247template <class...>
248struct Conjunction : std::true_type {};
249template <class B1>
250struct Conjunction<B1> : B1 {};
251template <class B1, class... Bn>
252struct Conjunction<B1, Bn...>
253 : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};
254
255template <typename... Index>
257
258template <typename... Index>
259using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
260} // namespace detail
261
265constexpr detail::AllTag All() { return {}; }
269template <typename I>
270constexpr detail::RangeTag<I> Range(I beg, I end) {
271 return {beg, end};
272}
273
274enum Order : std::uint8_t {
275 kC, // Row major
276 kF, // Col major
277};
278
292template <typename T, int32_t kDim>
294 public:
295 using ShapeT = size_t[kDim];
296 using StrideT = ShapeT;
297
298 private:
299 StrideT stride_{1};
300 ShapeT shape_{0};
301 common::Span<T> data_;
302 T *ptr_{nullptr}; // pointer of data_ to avoid bound check.
303
304 size_t size_{0};
305 int32_t device_{-1};
306
307 // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.
308 LINALG_HD void CalcSize() {
309 if (data_.empty()) {
310 size_ = 0;
311 } else {
312 size_ = detail::CalcSize(shape_);
313 }
314 }
315
316 template <size_t old_dim, size_t new_dim, int32_t D, typename I>
317 LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
318 detail::RangeTag<I> &&range) const {
319 static_assert(new_dim < D);
320 static_assert(old_dim < kDim);
321 new_stride[new_dim] = stride_[old_dim];
322 new_shape[new_dim] = range.Size();
323 assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
324
325 auto offset = stride_[old_dim] * range.beg;
326 return offset;
327 }
331 template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>
332 LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
333 detail::RangeTag<I> &&range, S &&...slices) const {
334 static_assert(new_dim < D);
335 static_assert(old_dim < kDim);
336 new_stride[new_dim] = stride_[old_dim];
337 new_shape[new_dim] = range.Size();
338 assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
339
340 auto offset = stride_[old_dim] * range.beg;
341 return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
342 std::forward<S>(slices)...) +
343 offset;
344 }
345
346 template <size_t old_dim, size_t new_dim, int32_t D>
347 LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {
348 static_assert(new_dim < D);
349 static_assert(old_dim < kDim);
350 new_stride[new_dim] = stride_[old_dim];
351 new_shape[new_dim] = shape_[old_dim];
352 return 0;
353 }
357 template <size_t old_dim, size_t new_dim, int32_t D, typename... S>
358 LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,
359 S &&...slices) const {
360 static_assert(new_dim < D);
361 static_assert(old_dim < kDim);
362 new_stride[new_dim] = stride_[old_dim];
363 new_shape[new_dim] = shape_[old_dim];
364 return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
365 std::forward<S>(slices)...);
366 }
367
368 template <size_t old_dim, size_t new_dim, int32_t D, typename Index>
369 LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],
370 DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {
371 static_assert(old_dim < kDim);
372 return stride_[old_dim] * i;
373 }
377 template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>
378 LINALG_HD std::enable_if_t<std::is_integral<Index>::value, size_t> MakeSliceDim(
379 size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {
380 static_assert(old_dim < kDim);
381 auto offset = stride_[old_dim] * i;
382 auto res =
383 MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
384 return res + offset;
385 }
386
387 public:
388 size_t constexpr static kValueSize = sizeof(T);
389 size_t constexpr static kDimension = kDim;
390
391 public:
403 template <typename I, int32_t D>
404 LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device)
405 : TensorView{data, shape, device, Order::kC} {}
406
407 template <typename I, int32_t D>
408 LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device, Order order)
409 : data_{data}, ptr_{data_.data()}, device_{device} {
410 static_assert(D > 0 && D <= kDim, "Invalid shape.");
411 // shape
412 detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });
413 for (auto i = D; i < kDim; ++i) {
414 shape_[i] = 1;
415 }
416 // stride
417 switch (order) {
418 case Order::kC: {
419 detail::CalcStride(shape_, stride_);
420 break;
421 }
422 case Order::kF: {
423 detail::CalcStride<kDim, true>(shape_, stride_);
424 break;
425 }
426 default: {
427 SPAN_CHECK(false);
428 }
429 }
430 // size
431 this->CalcSize();
432 }
433
438 template <typename I, std::int32_t D>
439 LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
440 std::int32_t device)
441 : data_{data}, ptr_{data_.data()}, device_{device} {
442 static_assert(D == kDim, "Invalid shape & stride.");
443 detail::UnrollLoop<D>([&](auto i) {
444 shape_[i] = shape[i];
445 stride_[i] = stride[i];
446 });
447 this->CalcSize();
448 }
449
450 template <
451 typename U,
452 std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>
453 LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT
454 : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.DeviceIdx()} {
455 detail::UnrollLoop<kDim>([&](auto i) {
456 stride_[i] = that.Stride(i);
457 shape_[i] = that.Shape(i);
458 });
459 }
460
474 template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
475 LINALG_HD T &operator()(Index &&...index) {
476 static_assert(sizeof...(index) <= kDim, "Invalid index.");
477 size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
478 assert(offset < data_.size() && "Out of bound access.");
479 return ptr_[offset];
480 }
484 template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
485 LINALG_HD T const &operator()(Index &&...index) const {
486 static_assert(sizeof...(index) <= kDim, "Invalid index.");
487 size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
488 assert(offset < data_.size() && "Out of bound access.");
489 return ptr_[offset];
490 }
491
505 template <typename... S>
506 LINALG_HD auto Slice(S &&...slices) const {
507 static_assert(sizeof...(slices) <= kDim, "Invalid slice.");
508 int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
509 size_t new_shape[kNewDim];
510 size_t new_stride[kNewDim];
511 auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
512 // ret is a different type due to changed dimension, so we can not access its private
513 // fields.
514 TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,
515 device_};
516 return ret;
517 }
518
519 LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
523 LINALG_HD auto Shape(size_t i) const { return shape_[i]; }
524 LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }
528 LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
529
533 [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
537 [[nodiscard]] LINALG_HD bool Contiguous() const {
538 return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
539 }
543 [[nodiscard]] LINALG_HD bool CContiguous() const {
544 StrideT stride;
545 static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
546 // It's contiguous if the stride can be calculated from shape.
547 detail::CalcStride(shape_, stride);
549 }
553 [[nodiscard]] LINALG_HD bool FContiguous() const {
554 StrideT stride;
555 static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
556 // It's contiguous if the stride can be calculated from shape.
557 detail::CalcStride<kDim, true>(shape_, stride);
559 }
563 LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }
567 LINALG_HD auto DeviceIdx() const { return device_; }
568};
569
573template <typename Container, typename... S,
574 std::enable_if_t<!common::detail::IsSpan<Container>::value &&
575 !std::is_pointer_v<Container>> * = nullptr>
576auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
577 using T = std::conditional_t<std::is_const_v<Container>,
578 std::add_const_t<typename Container::value_type>,
579 typename Container::value_type>;
580 std::size_t in_shape[sizeof...(S)];
581 detail::IndexToArr(in_shape, std::forward<S>(shape)...);
582 return TensorView<T, sizeof...(S)>{data, in_shape, ctx->gpu_id};
583}
584
585template <typename T, typename... S>
586LINALG_HD auto MakeTensorView(std::int32_t device, common::Span<T> data, S &&...shape) {
587 std::size_t in_shape[sizeof...(S)];
588 detail::IndexToArr(in_shape, std::forward<S>(shape)...);
589 return TensorView<T, sizeof...(S)>{data, in_shape, device};
590}
591
592template <typename T, typename... S>
593auto MakeTensorView(Context const *ctx, common::Span<T> data, S &&...shape) {
594 return MakeTensorView(ctx->gpu_id, data, std::forward<S>(shape)...);
595}
596
597template <typename T, typename... S>
598auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
599 auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
600 return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
601}
602
603template <typename T, typename... S>
604auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
605 auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
606 return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
607}
608
612template <size_t D>
613LINALG_HD auto UnravelIndex(size_t idx, common::Span<size_t const, D> shape) {
614 if (idx > std::numeric_limits<uint32_t>::max()) {
615 return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);
616 } else {
617 return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);
618 }
619}
620
621template <size_t D>
622LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
624}
625
626template <typename... S>
627LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
628 std::size_t s[sizeof...(S)];
629 detail::IndexToArr(s, shape...);
630 return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
631}
632
638template <typename T>
640
648template <typename T>
649auto MakeVec(T *ptr, size_t s, int32_t device = -1) {
650 return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
651}
652
653template <typename T>
654auto MakeVec(HostDeviceVector<T> *data) {
655 return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),
656 data->Size(), data->DeviceIdx());
657}
658
659template <typename T>
660auto MakeVec(HostDeviceVector<T> const *data) {
661 return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),
662 data->Size(), data->DeviceIdx());
663}
664
670template <typename T>
672
679template <typename T, int32_t D>
681 Json array_interface{Object{}};
682 array_interface["data"] = std::vector<Json>(2);
683 array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};
684 array_interface["data"][1] = Boolean{true};
685 if (t.DeviceIdx() >= 0) {
686 // Change this once we have different CUDA stream.
687 array_interface["stream"] = Null{};
688 }
689 std::vector<Json> shape(t.Shape().size());
690 std::vector<Json> stride(t.Stride().size());
691 for (size_t i = 0; i < t.Shape().size(); ++i) {
692 shape[i] = Integer(t.Shape(i));
693 stride[i] = Integer(t.Stride(i) * sizeof(T));
694 }
695 array_interface["shape"] = Array{shape};
696 array_interface["strides"] = Array{stride};
697 array_interface["version"] = 3;
698
699 char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
700 static_assert(kT != '\0');
701 if (DMLC_LITTLE_ENDIAN) {
702 array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};
703 } else {
704 array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};
705 }
706 return array_interface;
707}
708
712template <typename T, int32_t D>
714 TensorView<T const, D> const &as_const = t;
715 auto res = ArrayInterface(as_const);
716 res["data"][1] = Boolean{false};
717 return res;
718}
719
723template <typename T, int32_t D>
725 std::string str;
726 Json::Dump(ArrayInterface(t), &str);
727 return str;
728}
729
730template <typename T, int32_t D>
731auto ArrayInterfaceStr(TensorView<T, D> const &t) {
732 std::string str;
733 Json::Dump(ArrayInterface(t), &str);
734 return str;
735}
736
741template <typename T, int32_t kDim = 5>
742class Tensor {
743 public:
744 using ShapeT = size_t[kDim];
745 using StrideT = ShapeT;
746
747 private:
749 ShapeT shape_{0};
750 Order order_{Order::kC};
751
752 template <typename I, std::int32_t D>
753 void Initialize(I const (&shape)[D], std::int32_t device) {
754 static_assert(D <= kDim, "Invalid shape.");
755 std::copy(shape, shape + D, shape_);
756 for (auto i = D; i < kDim; ++i) {
757 shape_[i] = 1;
758 }
759 if (device >= 0) {
760 data_.SetDevice(device);
761 data_.ConstDevicePointer(); // Pull to device;
762 }
763 CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
764 }
765
766 public:
767 Tensor() = default;
768
775 template <typename I, int32_t D>
776 explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
777 : Tensor{common::Span<I const, D>{shape}, device, order} {}
778
779 template <typename I, size_t D>
780 explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
781 : order_{order} {
782 // No device unroll as this is a host only function.
783 std::copy(shape.data(), shape.data() + D, shape_);
784 for (auto i = D; i < kDim; ++i) {
785 shape_[i] = 1;
786 }
787 auto size = detail::CalcSize(shape_);
788 if (device >= 0) {
789 data_.SetDevice(device);
790 }
791 data_.Resize(size);
792 if (device >= 0) {
793 data_.DevicePointer(); // Pull to device
794 }
795 }
799 template <typename It, typename I, int32_t D>
800 explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
801 : order_{order} {
802 auto &h_vec = data_.HostVector();
803 h_vec.insert(h_vec.begin(), begin, end);
804 // shape
805 this->Initialize(shape, device);
806 }
807
808 template <typename I, int32_t D>
809 explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
810 Order order = kC)
811 : order_{order} {
812 auto &h_vec = data_.HostVector();
813 h_vec = data;
814 // shape
815 this->Initialize(shape, device);
816 }
821 template <typename... Index>
822 T &operator()(Index &&...idx) {
823 return this->HostView()(std::forward<Index>(idx)...);
824 }
829 template <typename... Index>
830 T const &operator()(Index &&...idx) const {
831 return this->HostView()(std::forward<Index>(idx)...);
832 }
833
837 TensorView<T, kDim> View(int32_t device) {
838 if (device >= 0) {
839 data_.SetDevice(device);
840 auto span = data_.DeviceSpan();
841 return {span, shape_, device, order_};
842 } else {
843 auto span = data_.HostSpan();
844 return {span, shape_, device, order_};
845 }
846 }
847 TensorView<T const, kDim> View(int32_t device) const {
848 if (device >= 0) {
849 data_.SetDevice(device);
850 auto span = data_.ConstDeviceSpan();
851 return {span, shape_, device, order_};
852 } else {
853 auto span = data_.ConstHostSpan();
854 return {span, shape_, device, order_};
855 }
856 }
857
858 auto HostView() const { return this->View(-1); }
859 auto HostView() { return this->View(-1); }
860
861 [[nodiscard]] size_t Size() const { return data_.Size(); }
862 auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
863 auto Shape(size_t i) const { return shape_[i]; }
864
865 HostDeviceVector<T> *Data() { return &data_; }
866 HostDeviceVector<T> const *Data() const { return &data_; }
867
874 template <typename Fn>
875 void ModifyInplace(Fn &&fn) {
876 fn(this->Data(), common::Span<size_t, kDim>{this->shape_});
877 CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))
878 << "Inconsistent size after modification.";
879 }
880
886 template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
887 void Reshape(S &&...s) {
888 static_assert(sizeof...(S) <= kDim, "Invalid shape.");
889 detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
890 auto constexpr kEnd = sizeof...(S);
891 static_assert(kEnd <= kDim, "Invalid shape.");
892 std::fill(shape_ + kEnd, shape_ + kDim, 1);
893 auto n = detail::CalcSize(shape_);
894 data_.Resize(n);
895 }
896
902 template <size_t D>
904 static_assert(D <= kDim, "Invalid shape.");
905 std::copy(shape.data(), shape.data() + D, this->shape_);
906 std::fill(shape_ + D, shape_ + kDim, 1);
907 auto n = detail::CalcSize(shape_);
908 data_.Resize(n);
909 }
910
911 template <size_t D>
912 void Reshape(size_t (&shape)[D]) {
913 this->Reshape(common::Span<size_t const, D>{shape});
914 }
918 template <typename... S>
919 auto Slice(S &&...slices) const {
920 return this->HostView().Slice(std::forward<S>(slices)...);
921 }
925 template <typename... S>
926 auto Slice(S &&...slices) {
927 return this->HostView().Slice(std::forward<S>(slices)...);
928 }
929
933 void SetDevice(int32_t device) const { data_.SetDevice(device); }
934 [[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
935};
936
937template <typename T>
938using Matrix = Tensor<T, 2>;
939
940template <typename T>
941using Vector = Tensor<T, 1>;
942
946template <typename T, typename... Index>
947auto Empty(Context const *ctx, Index &&...index) {
948 Tensor<T, sizeof...(Index)> t;
949 t.SetDevice(ctx->gpu_id);
950 t.Reshape(index...);
951 return t;
952}
953
957template <typename T, typename... Index>
958auto Constant(Context const *ctx, T v, Index &&...index) {
959 Tensor<T, sizeof...(Index)> t;
960 t.SetDevice(ctx->gpu_id);
961 t.Reshape(index...);
962 t.Data()->Fill(std::move(v));
963 return t;
964}
965
969template <typename T, typename... Index>
970auto Zeros(Context const *ctx, Index &&...index) {
971 return Constant(ctx, static_cast<T>(0), index...);
972}
973
974// Only first axis is supported for now.
975template <typename T, int32_t D>
976void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
977 if (r.DeviceIdx() >= 0) {
978 l->SetDevice(r.DeviceIdx());
979 }
980 l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
981 for (size_t i = 1; i < D; ++i) {
982 if (shape[i] == 0) {
983 shape[i] = r.Shape(i);
984 } else {
985 CHECK_EQ(shape[i], r.Shape(i));
986 }
987 }
988 data->Extend(*r.Data());
989 shape[0] = l->Shape(0) + r.Shape(0);
990 });
991}
992} // namespace xgboost::linalg
993
994#if defined(LINALG_HD)
995#undef LINALG_HD
996#endif // defined(LINALG_HD)
997#endif // XGBOOST_LINALG_H_
A type erased view over array_interface protocol defined by numpy.
Definition array_interface.h:388
Definition host_device_vector.h:87
Definition json.h:113
Describes both true and false.
Definition json.h:312
Definition json.h:253
Definition json.h:296
Definition json.h:190
Definition json.h:87
Data structure representing JSON format.
Definition json.h:357
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object.
Definition json.cc:669
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
Definition span.h:596
A tensor view with static type and dimension.
Definition linalg.h:293
LINALG_HD auto DeviceIdx() const
Obtain the CUDA device ordinal.
Definition linalg.h:567
LINALG_HD std::size_t Size() const
Number of items in the tensor.
Definition linalg.h:533
LINALG_HD bool CContiguous() const
Whether it's a c-contiguous array.
Definition linalg.h:543
LINALG_HD auto Stride(size_t i) const
Get the stride for i^th dimension, stride is specified as number of items instead of bytes.
Definition linalg.h:528
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor.
Definition linalg.h:506
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition linalg.h:563
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition linalg.h:537
LINALG_HD T & operator()(Index &&...index)
Index the tensor to obtain a scalar value.
Definition linalg.h:475
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], std::int32_t device)
Create a tensor with data and shape.
Definition linalg.h:404
LINALG_HD bool FContiguous() const
Whether it's a f-contiguous array.
Definition linalg.h:553
LINALG_HD auto Shape(size_t i) const
Get the shape for i^th dimension.
Definition linalg.h:523
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], std::int32_t device)
Create a tensor with data, shape and strides.
Definition linalg.h:439
LINALG_HD T const & operator()(Index &&...index) const
Index the tensor to obtain a scalar value.
Definition linalg.h:485
A tensor storage.
Definition linalg.h:742
auto Slice(S &&...slices)
Get a host view on the slice.
Definition linalg.h:926
auto Slice(S &&...slices) const
Get a host view on the slice.
Definition linalg.h:919
void SetDevice(int32_t device) const
Set device ordinal for this tensor.
Definition linalg.h:933
Tensor(I const (&shape)[D], std::int32_t device, Order order=kC)
Create a tensor with shape and device ordinal.
Definition linalg.h:776
T & operator()(Index &&...idx)
Index operator.
Definition linalg.h:822
void ModifyInplace(Fn &&fn)
Visitor function for modification that changes shape and data.
Definition linalg.h:875
T const & operator()(Index &&...idx) const
Index operator.
Definition linalg.h:830
void Reshape(common::Span< size_t const, D > shape)
Reshape the tensor.
Definition linalg.h:903
void Reshape(S &&...s)
Reshape the tensor.
Definition linalg.h:887
TensorView< T, kDim > View(int32_t device)
Get a TensorView for this tensor.
Definition linalg.h:837
Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order=kC)
Initialize from 2 host iterators.
Definition linalg.h:800
Copyright 2014-2023, XGBoost Contributors.
#define DMLC_ATTRIBUTE_UNUSED
helper macro to supress unused warning
Definition base.h:170
Endian testing, need c++11.
A device-and-host vector abstraction layer.
Copyright 2015-2023 by XGBoost Contributors.
constexpr int32_t CalcSliceDim()
Calculate the dimension of sliced tensor.
Definition linalg.h:95
detail namespace with internal helper functions
Definition json.hpp:249
Copyright 2021-2023 by XGBoost Contributors.
Definition linalg.h:40
auto MakeVec(T *ptr, size_t s, int32_t device=-1)
Create a vector view from contigious memory.
Definition linalg.h:649
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape)
Constructor for automatic type deduction.
Definition linalg.h:576
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
Return string representation of array interface.
Definition linalg.h:724
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
Turns linear index into multi-dimension index.
Definition linalg.h:613
auto Constant(Context const *ctx, T v, Index &&...index)
Create an array with value v.
Definition linalg.h:958
auto Zeros(Context const *ctx, Index &&...index)
Like np.zeros, return a new array of given shape and type, filled with zeros.
Definition linalg.h:970
constexpr detail::RangeTag< I > Range(I beg, I end)
Specify a range of elements in the axis for slicing.
Definition linalg.h:270
auto Empty(Context const *ctx, Index &&...index)
Create an array without initialization.
Definition linalg.h:947
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition linalg.h:265
Definition inference.c:32
Runtime context for XGBoost.
Definition context.h:84
bool IsCPU() const
Is XGBoost running on CPU?
Definition context.h:133
Definition linalg.h:80
C++ 17 conjunction.
Definition linalg.h:248
Definition linalg.h:82
Definition linalg.h:85