Medial Code Documentation
Loading...
Searching...
No Matches
compressed_iterator.h
Go to the documentation of this file.
1
5#pragma once
6#include <xgboost/base.h>
7
8#include <algorithm>
9#include <cmath>
10#include <cstddef> // for size_t
11
12#include "common.h"
13
14#ifdef __CUDACC__
15#include "device_helpers.cuh"
16#endif // __CUDACC__
17
18namespace xgboost {
19namespace common {
20
21using CompressedByteT = unsigned char;
22
23namespace detail {
24inline void SetBit(CompressedByteT *byte, int bit_idx) {
25 *byte |= 1 << bit_idx;
26}
27template <typename T>
28inline T CheckBit(const T &byte, int bit_idx) {
29 return byte & (1 << bit_idx);
30}
31inline void ClearBit(CompressedByteT *byte, int bit_idx) {
32 *byte &= ~(1 << bit_idx);
33}
34static const int kPadding = 4; // Assign padding so we can read slightly off
35 // the beginning of the array
36
37// The number of bits required to represent a given unsigned range
38inline XGBOOST_DEVICE size_t SymbolBits(size_t num_symbols) {
39 auto bits = std::ceil(log2(static_cast<double>(num_symbols)));
40 return common::Max(static_cast<size_t>(bits), static_cast<std::size_t>(1));
41}
42} // namespace detail
43
56 size_t symbol_bits_;
57
58 public:
59 XGBOOST_DEVICE explicit CompressedBufferWriter(size_t num_symbols)
60 : symbol_bits_(detail::SymbolBits(num_symbols)) {}
61
77 static size_t CalculateBufferSize(size_t num_elements, size_t num_symbols) {
78 constexpr int kBitsPerByte = 8;
79 size_t compressed_size = static_cast<size_t>(std::ceil(
80 static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
81 kBitsPerByte));
82 // Handle atomicOr where input must be unsigned int, hence 4 bytes aligned.
83 size_t ret =
84 std::ceil(static_cast<double>(compressed_size + detail::kPadding) /
85 static_cast<double>(sizeof(unsigned int))) *
86 sizeof(unsigned int);
87 return ret;
88 }
89
90 template <typename T>
91 void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset) {
92 const int bits_per_byte = 8;
93
94 for (size_t i = 0; i < symbol_bits_; i++) {
95 size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
96 byte_idx += detail::kPadding;
97 size_t bit_idx =
98 ((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;
99
100 if (detail::CheckBit(symbol, i)) {
101 detail::SetBit(&buffer[byte_idx], bit_idx);
102 } else {
103 detail::ClearBit(&buffer[byte_idx], bit_idx);
104 }
105 }
106 }
107
108#ifdef __CUDACC__
109 __device__ void AtomicWriteSymbol
110 (CompressedByteT* buffer, uint64_t symbol, size_t offset) {
111 size_t ibit_start = offset * symbol_bits_;
112 size_t ibit_end = (offset + 1) * symbol_bits_ - 1;
113 size_t ibyte_start = ibit_start / 8, ibyte_end = ibit_end / 8;
114
115 symbol <<= 7 - ibit_end % 8;
116 for (ptrdiff_t ibyte = ibyte_end; ibyte >= static_cast<ptrdiff_t>(ibyte_start); --ibyte) {
117 dh::AtomicOrByte(reinterpret_cast<unsigned int*>(buffer + detail::kPadding),
118 ibyte, symbol & 0xff);
119 symbol >>= 8;
120 }
121 }
122#endif // __CUDACC__
123
124 template <typename IterT>
125 void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
126 uint64_t tmp = 0;
127 size_t stored_bits = 0;
128 const size_t max_stored_bits = 64 - symbol_bits_;
129 size_t buffer_position = detail::kPadding;
130 const size_t num_symbols = input_end - input_begin;
131 for (size_t i = 0; i < num_symbols; i++) {
132 typename std::iterator_traits<IterT>::value_type symbol = input_begin[i];
133 if (stored_bits > max_stored_bits) {
134 // Eject only full bytes
135 size_t tmp_bytes = stored_bits / 8;
136 for (size_t j = 0; j < tmp_bytes; j++) {
137 buffer[buffer_position] = static_cast<CompressedByteT>(
138 tmp >> (stored_bits - (j + 1) * 8));
139 buffer_position++;
140 }
141 stored_bits -= tmp_bytes * 8;
142 tmp &= (1 << stored_bits) - 1;
143 }
144 // Store symbol
145 tmp <<= symbol_bits_;
146 tmp |= symbol;
147 stored_bits += symbol_bits_;
148 }
149
150 // Eject all bytes
151 int tmp_bytes =
152 static_cast<int>(std::ceil(static_cast<float>(stored_bits) / 8));
153 for (int j = 0; j < tmp_bytes; j++) {
154 int shift_bits = static_cast<int>(stored_bits) - (j + 1) * 8;
155 if (shift_bits >= 0) {
156 buffer[buffer_position] =
157 static_cast<CompressedByteT>(tmp >> shift_bits);
158 } else {
159 buffer[buffer_position] =
160 static_cast<CompressedByteT>(tmp << std::abs(shift_bits));
161 }
162 buffer_position++;
163 }
164 }
165};
166
175template <typename T>
177 public:
178 // Type definitions for thrust
179 typedef CompressedIterator<T> self_type; // NOLINT
180 typedef ptrdiff_t difference_type; // NOLINT
181 typedef T value_type; // NOLINT
182 typedef value_type *pointer; // NOLINT
183 typedef value_type reference; // NOLINT
184
185 private:
186 const CompressedByteT *buffer_ {nullptr};
187 size_t symbol_bits_ {0};
188 size_t offset_ {0};
189
190 public:
191 CompressedIterator() = default;
192 CompressedIterator(const CompressedByteT *buffer, size_t num_symbols)
193 : buffer_(buffer) {
194 symbol_bits_ = detail::SymbolBits(num_symbols);
195 }
196
197 XGBOOST_DEVICE reference operator*() const {
198 const int bits_per_byte = 8;
199 size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
200 size_t start_byte_idx = start_bit_idx / bits_per_byte;
201 start_byte_idx += detail::kPadding;
202
203 // Read 5 bytes - the maximum we will need
204 uint64_t tmp = static_cast<uint64_t>(buffer_[start_byte_idx - 4]) << 32 |
205 static_cast<uint64_t>(buffer_[start_byte_idx - 3]) << 24 |
206 static_cast<uint64_t>(buffer_[start_byte_idx - 2]) << 16 |
207 static_cast<uint64_t>(buffer_[start_byte_idx - 1]) << 8 |
208 buffer_[start_byte_idx];
209 int bit_shift =
210 (bits_per_byte - ((offset_ + 1) * symbol_bits_)) % bits_per_byte;
211 tmp >>= bit_shift;
212 // Mask off unneeded bits
213 uint64_t mask = (static_cast<uint64_t>(1) << symbol_bits_) - 1;
214 return static_cast<T>(tmp & mask);
215 }
216
217 XGBOOST_DEVICE reference operator[](size_t idx) const {
218 self_type offset = (*this);
219 offset.offset_ += idx;
220 return *offset;
221 }
222};
223} // namespace common
224} // namespace xgboost
Writes bit compressed symbols to a memory buffer.
Definition compressed_iterator.h:55
static size_t CalculateBufferSize(size_t num_elements, size_t num_symbols)
Calculates number of bytes required for a given number of elements and a symbol range.
Definition compressed_iterator.h:77
Read symbols from a bit compressed memory buffer.
Definition compressed_iterator.h:176
Copyright 2015-2023 by XGBoost Contributors.
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition base.h:64
detail namespace with internal helper functions
Definition json.hpp:249
namespace of xgboost
Definition base.h:90
Copyright 2015-2023 by XGBoost Contributors.