1#ifndef LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
2#define LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
4#include <LightGBM/bin.h>
17 : bin_data_(bin_data), min_bin_(
static_cast<uint8_t
>(min_bin)),
18 max_bin_(
static_cast<uint8_t
>(max_bin)),
19 default_bin_(
static_cast<uint8_t
>(default_bin)) {
20 if (default_bin_ == 0) {
42 int len = (num_data_ + 1) / 2;
43 data_ = std::vector<uint8_t>(len,
static_cast<uint8_t
>(0));
44 buf_ = std::vector<uint8_t>(len,
static_cast<uint8_t
>(0));
51 const int i1 = idx >> 1;
52 const int i2 = (idx & 1) << 2;
53 const uint8_t val =
static_cast<uint8_t
>(value) << i2;
64 const int len = (num_data_ + 1) / 2;
69 inline BinIterator*
GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin)
const override;
76 for (; i <
num_data - rest; i += 4) {
78 const auto bin0 = (data_[idx0 >> 1] >> ((idx0 & 1) << 2)) & 0xf;
81 const auto bin1 = (data_[idx1 >> 1] >> ((idx1 & 1) << 2)) & 0xf;
84 const auto bin2 = (data_[idx2 >> 1] >> ((idx2 & 1) << 2)) & 0xf;
87 const auto bin3 = (data_[idx3 >> 1] >> ((idx3 & 1) << 2)) & 0xf;
89 out[bin0].sum_gradients += ordered_gradients[i];
90 out[bin1].sum_gradients += ordered_gradients[i + 1];
91 out[bin2].sum_gradients += ordered_gradients[i + 2];
92 out[bin3].sum_gradients += ordered_gradients[i + 3];
94 out[bin0].sum_hessians += ordered_hessians[i];
95 out[bin1].sum_hessians += ordered_hessians[i + 1];
96 out[bin2].sum_hessians += ordered_hessians[i + 2];
97 out[bin3].sum_hessians += ordered_hessians[i + 3];
107 const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
108 out[bin].sum_gradients += ordered_gradients[i];
109 out[bin].sum_hessians += ordered_hessians[i];
115 const score_t* ordered_gradients,
const score_t* ordered_hessians,
119 for (; i <
num_data - rest; i += 4) {
120 const auto bin0 = (data_[i >> 1]) & 0xf;
121 const auto bin1 = (data_[i >> 1] >> 4) & 0xf;
122 const auto bin2 = (data_[(i >> 1) + 1]) & 0xf;
123 const auto bin3 = (data_[(i >> 1) + 1] >> 4) & 0xf;
125 out[bin0].sum_gradients += ordered_gradients[i];
126 out[bin1].sum_gradients += ordered_gradients[i + 1];
127 out[bin2].sum_gradients += ordered_gradients[i + 2];
128 out[bin3].sum_gradients += ordered_gradients[i + 3];
130 out[bin0].sum_hessians += ordered_hessians[i];
131 out[bin1].sum_hessians += ordered_hessians[i + 1];
132 out[bin2].sum_hessians += ordered_hessians[i + 2];
133 out[bin3].sum_hessians += ordered_hessians[i + 3];
141 const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
142 out[bin].sum_gradients += ordered_gradients[i];
143 out[bin].sum_hessians += ordered_hessians[i];
149 const score_t* ordered_gradients,
153 for (; i <
num_data - rest; i += 4) {
155 const auto bin0 = (data_[idx0 >> 1] >> ((idx0 & 1) << 2)) & 0xf;
158 const auto bin1 = (data_[idx1 >> 1] >> ((idx1 & 1) << 2)) & 0xf;
161 const auto bin2 = (data_[idx2 >> 1] >> ((idx2 & 1) << 2)) & 0xf;
164 const auto bin3 = (data_[idx3 >> 1] >> ((idx3 & 1) << 2)) & 0xf;
166 out[bin0].sum_gradients += ordered_gradients[i];
167 out[bin1].sum_gradients += ordered_gradients[i + 1];
168 out[bin2].sum_gradients += ordered_gradients[i + 2];
169 out[bin3].sum_gradients += ordered_gradients[i + 3];
179 const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
180 out[bin].sum_gradients += ordered_gradients[i];
186 const score_t* ordered_gradients,
190 for (; i <
num_data - rest; i += 4) {
191 const auto bin0 = (data_[i >> 1]) & 0xf;
192 const auto bin1 = (data_[i >> 1] >> 4) & 0xf;
193 const auto bin2 = (data_[(i >> 1) + 1]) & 0xf;
194 const auto bin3 = (data_[(i >> 1) + 1] >> 4) & 0xf;
196 out[bin0].sum_gradients += ordered_gradients[i];
197 out[bin1].sum_gradients += ordered_gradients[i + 1];
198 out[bin2].sum_gradients += ordered_gradients[i + 2];
199 out[bin3].sum_gradients += ordered_gradients[i + 3];
207 const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
208 out[bin].sum_gradients += ordered_gradients[i];
214 uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type,
bool default_left,
218 uint8_t th =
static_cast<uint8_t
>(threshold + min_bin);
219 const uint8_t minb =
static_cast<uint8_t
>(min_bin);
220 const uint8_t maxb =
static_cast<uint8_t
>(max_bin);
221 uint8_t t_default_bin =
static_cast<uint8_t
>(min_bin + default_bin);
222 if (default_bin == 0) {
230 if (missing_type == MissingType::NaN) {
231 if (default_bin <= threshold) {
232 default_indices = lte_indices;
233 default_count = <e_count;
238 missing_default_indices = lte_indices;
239 missing_default_count = <e_count;
243 const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
244 if (bin < minb || bin > maxb || t_default_bin == bin) {
245 default_indices[(*default_count)++] = idx;
246 }
else if (bin == maxb) {
247 missing_default_indices[(*missing_default_count)++] = idx;
248 }
else if (bin > th) {
249 gt_indices[gt_count++] = idx;
251 lte_indices[lte_count++] = idx;
255 if ((default_left && missing_type == MissingType::Zero) || (default_bin <= threshold && missing_type != MissingType::Zero)) {
256 default_indices = lte_indices;
257 default_count = <e_count;
261 const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
262 if (bin < minb || bin > maxb || t_default_bin == bin) {
263 default_indices[(*default_count)++] = idx;
264 }
else if (bin > th) {
265 gt_indices[gt_count++] = idx;
267 lte_indices[lte_count++] = idx;
275 uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
283 if (Common::FindInBitset(threshold, num_threahold, default_bin)) {
284 default_indices = lte_indices;
285 default_count = <e_count;
289 const uint32_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
290 if (bin < min_bin || bin > max_bin) {
291 default_indices[(*default_count)++] = idx;
292 }
else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) {
293 lte_indices[lte_count++] = idx;
295 gt_indices[gt_count++] = idx;
307 if (buf_.empty()) {
return; }
308 int len = (num_data_ + 1) / 2;
309 for (
int i = 0; i < len; ++i) {
315 void LoadFromMemory(
const void* memory,
const std::vector<data_size_t>& local_used_indices)
override {
316 const uint8_t* mem_data =
reinterpret_cast<const uint8_t*
>(memory);
317 if (!local_used_indices.empty()) {
319 for (
int i = 0; i < num_data_ - rest; i += 2) {
322 const auto bin1 =
static_cast<uint8_t
>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
323 idx = local_used_indices[i + 1];
324 const auto bin2 =
static_cast<uint8_t
>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
326 const int i1 = i >> 1;
327 data_[i1] = (bin1 | (bin2 << 4));
330 data_size_t idx = local_used_indices[num_data_ - 1];
331 data_[num_data_ >> 1] = (mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
334 for (
size_t i = 0; i < data_.size(); ++i) {
335 data_[i] = mem_data[i];
341 auto other_bin =
dynamic_cast<const Dense4bitsBin*
>(full_bin);
343 for (
int i = 0; i < num_used_indices - rest; i += 2) {
345 const auto bin1 =
static_cast<uint8_t
>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
346 idx = used_indices[i + 1];
347 const auto bin2 =
static_cast<uint8_t
>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
348 const int i1 = i >> 1;
349 data_[i1] = (bin1 | (bin2 << 4));
352 data_size_t idx = used_indices[num_used_indices - 1];
353 data_[num_used_indices >> 1] = (other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
358 writer->
Write(data_.data(),
sizeof(uint8_t) * data_.size());
362 return sizeof(uint8_t) * data_.size();
367 std::vector<uint8_t> data_;
368 std::vector<uint8_t> buf_;
372 const auto bin = (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
373 if (bin >= min_bin_ && bin <= max_bin_) {
374 return bin - min_bin_ + bias_;
380uint32_t Dense4bitsBinIterator::RawGet(
data_size_t idx) {
381 return (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
Iterator for one bin column.
Definition bin.h:267
Interface for bin data. This class will store bin data for one feature. unlike OrderedBin,...
Definition bin.h:286
Definition dense_nbits_bin.hpp:14
uint32_t Get(data_size_t idx) override
Get bin data on specific row index.
Definition dense_nbits_bin.hpp:371
Definition dense_nbits_bin.hpp:37
data_size_t num_data() const override
Number of all data.
Definition dense_nbits_bin.hpp:301
virtual data_size_t Split(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left, uint32_t threshold, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const override
Split data according to threshold, if bin <= threshold, will put into left(lte_indices),...
Definition dense_nbits_bin.hpp:213
void Push(int, data_size_t idx, uint32_t value) override
Push one record \pram tid Thread id.
Definition dense_nbits_bin.hpp:50
virtual data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, const uint32_t *threshold, int num_threahold, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const override
Split data according to threshold, if bin <= threshold, will put into left(lte_indices),...
Definition dense_nbits_bin.hpp:274
void ConstructHistogram(const data_size_t *data_indices, data_size_t num_data, const score_t *ordered_gradients, HistogramBinEntry *out) const override
Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve c...
Definition dense_nbits_bin.hpp:148
size_t SizesInByte() const override
Get sizes in byte of this object.
Definition dense_nbits_bin.hpp:361
void FinishLoad() override
After pushed all feature data, call this could have better refactor for bin data.
Definition dense_nbits_bin.hpp:306
void ConstructHistogram(const data_size_t *data_indices, data_size_t num_data, const score_t *ordered_gradients, const score_t *ordered_hessians, HistogramBinEntry *out) const override
Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve c...
Definition dense_nbits_bin.hpp:71
BinIterator * GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override
Get bin iterator of this bin for specific feature.
Definition dense_nbits_bin.hpp:384
OrderedBin * CreateOrderedBin() const override
not ordered bin for dense feature
Definition dense_nbits_bin.hpp:304
void LoadFromMemory(const void *memory, const std::vector< data_size_t > &local_used_indices) override
Load from memory.
Definition dense_nbits_bin.hpp:315
void SaveBinaryToFile(const VirtualFileWriter *writer) const override
Save binary data to file.
Definition dense_nbits_bin.hpp:357
Interface for ordered bin data. efficient for construct histogram, especially for sparse bin There ar...
Definition bin.h:219
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
float score_t
Type of score, and gradients.
Definition meta.h:26
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14
Store data for one histogram bin.
Definition bin.h:29
An interface for writing files from buffers.
Definition file_io.h:15
virtual size_t Write(const void *data, size_t bytes) const =0
Append buffer to file.