1#ifndef LIGHTGBM_IO_DENSE_BIN_HPP_
2#define LIGHTGBM_IO_DENSE_BIN_HPP_
4#include <LightGBM/bin.h>
12template <
typename VAL_T>
15template <
typename VAL_T>
19 : bin_data_(bin_data), min_bin_(
static_cast<VAL_T
>(min_bin)),
20 max_bin_(
static_cast<VAL_T
>(max_bin)),
21 default_bin_(
static_cast<VAL_T
>(default_bin)) {
22 if (default_bin_ == 0) {
42template <
typename VAL_T>
47 : num_data_(
num_data), data_(num_data_,
static_cast<VAL_T
>(0)) {
54 data_[idx] =
static_cast<VAL_T
>(value);
60 data_.resize(num_data_);
64 BinIterator*
GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin)
const override;
71 for (; i <
num_data - rest; i += 4) {
72 const VAL_T bin0 = data_[data_indices[i]];
73 const VAL_T bin1 = data_[data_indices[i + 1]];
74 const VAL_T bin2 = data_[data_indices[i + 2]];
75 const VAL_T bin3 = data_[data_indices[i + 3]];
77 out[bin0].sum_gradients += ordered_gradients[i];
78 out[bin1].sum_gradients += ordered_gradients[i + 1];
79 out[bin2].sum_gradients += ordered_gradients[i + 2];
80 out[bin3].sum_gradients += ordered_gradients[i + 3];
82 out[bin0].sum_hessians += ordered_hessians[i];
83 out[bin1].sum_hessians += ordered_hessians[i + 1];
84 out[bin2].sum_hessians += ordered_hessians[i + 2];
85 out[bin3].sum_hessians += ordered_hessians[i + 3];
93 const VAL_T bin = data_[data_indices[i]];
94 out[bin].sum_gradients += ordered_gradients[i];
95 out[bin].sum_hessians += ordered_hessians[i];
101 const score_t* ordered_gradients,
const score_t* ordered_hessians,
105 for (; i <
num_data - rest; i += 4) {
106 const VAL_T bin0 = data_[i];
107 const VAL_T bin1 = data_[i + 1];
108 const VAL_T bin2 = data_[i + 2];
109 const VAL_T bin3 = data_[i + 3];
111 out[bin0].sum_gradients += ordered_gradients[i];
112 out[bin1].sum_gradients += ordered_gradients[i + 1];
113 out[bin2].sum_gradients += ordered_gradients[i + 2];
114 out[bin3].sum_gradients += ordered_gradients[i + 3];
116 out[bin0].sum_hessians += ordered_hessians[i];
117 out[bin1].sum_hessians += ordered_hessians[i + 1];
118 out[bin2].sum_hessians += ordered_hessians[i + 2];
119 out[bin3].sum_hessians += ordered_hessians[i + 3];
127 const VAL_T bin = data_[i];
128 out[bin].sum_gradients += ordered_gradients[i];
129 out[bin].sum_hessians += ordered_hessians[i];
135 const score_t* ordered_gradients,
139 for (; i <
num_data - rest; i += 4) {
140 const VAL_T bin0 = data_[data_indices[i]];
141 const VAL_T bin1 = data_[data_indices[i + 1]];
142 const VAL_T bin2 = data_[data_indices[i + 2]];
143 const VAL_T bin3 = data_[data_indices[i + 3]];
145 out[bin0].sum_gradients += ordered_gradients[i];
146 out[bin1].sum_gradients += ordered_gradients[i + 1];
147 out[bin2].sum_gradients += ordered_gradients[i + 2];
148 out[bin3].sum_gradients += ordered_gradients[i + 3];
156 const VAL_T bin = data_[data_indices[i]];
157 out[bin].sum_gradients += ordered_gradients[i];
163 const score_t* ordered_gradients,
167 for (; i <
num_data - rest; i += 4) {
168 const VAL_T bin0 = data_[i];
169 const VAL_T bin1 = data_[i + 1];
170 const VAL_T bin2 = data_[i + 2];
171 const VAL_T bin3 = data_[i + 3];
173 out[bin0].sum_gradients += ordered_gradients[i];
174 out[bin1].sum_gradients += ordered_gradients[i + 1];
175 out[bin2].sum_gradients += ordered_gradients[i + 2];
176 out[bin3].sum_gradients += ordered_gradients[i + 3];
184 const VAL_T bin = data_[i];
185 out[bin].sum_gradients += ordered_gradients[i];
191 uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type,
bool default_left,
195 VAL_T th =
static_cast<VAL_T
>(threshold + min_bin);
196 const VAL_T minb =
static_cast<VAL_T
>(min_bin);
197 const VAL_T maxb =
static_cast<VAL_T
>(max_bin);
198 VAL_T t_default_bin =
static_cast<VAL_T
>(min_bin + default_bin);
199 if (default_bin == 0) {
207 if (missing_type == MissingType::NaN) {
208 if (default_bin <= threshold) {
209 default_indices = lte_indices;
210 default_count = <e_count;
215 missing_default_indices = lte_indices;
216 missing_default_count = <e_count;
220 const VAL_T bin = data_[idx];
221 if (bin < minb || bin > maxb || t_default_bin == bin) {
222 default_indices[(*default_count)++] = idx;
223 }
else if (bin == maxb) {
224 missing_default_indices[(*missing_default_count)++] = idx;
225 }
else if (bin > th) {
226 gt_indices[gt_count++] = idx;
228 lte_indices[lte_count++] = idx;
232 if ((default_left && missing_type == MissingType::Zero) || (default_bin <= threshold && missing_type != MissingType::Zero)) {
233 default_indices = lte_indices;
234 default_count = <e_count;
238 const VAL_T bin = data_[idx];
239 if (bin < minb || bin > maxb || t_default_bin == bin) {
240 default_indices[(*default_count)++] = idx;
241 }
else if (bin > th) {
242 gt_indices[gt_count++] = idx;
244 lte_indices[lte_count++] = idx;
252 uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
260 if (Common::FindInBitset(threshold, num_threahold, default_bin)) {
261 default_indices = lte_indices;
262 default_count = <e_count;
266 const uint32_t bin = data_[idx];
267 if (bin < min_bin || bin > max_bin) {
268 default_indices[(*default_count)++] = idx;
269 }
else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) {
270 lte_indices[lte_count++] = idx;
272 gt_indices[gt_count++] = idx;
285 void LoadFromMemory(
const void* memory,
const std::vector<data_size_t>& local_used_indices)
override {
286 const VAL_T* mem_data =
reinterpret_cast<const VAL_T*
>(memory);
287 if (!local_used_indices.empty()) {
288 for (
int i = 0; i < num_data_; ++i) {
289 data_[i] = mem_data[local_used_indices[i]];
292 for (
int i = 0; i < num_data_; ++i) {
293 data_[i] = mem_data[i];
300 for (
int i = 0; i < num_used_indices; ++i) {
301 data_[i] = other_bin->data_[used_indices[i]];
306 writer->
Write(data_.data(),
sizeof(VAL_T) * num_data_);
310 return sizeof(VAL_T) * num_data_;
315 std::vector<VAL_T> data_;
318template <
typename VAL_T>
320 auto ret = bin_data_->data_[idx];
321 if (ret >= min_bin_ && ret <= max_bin_) {
322 return ret - min_bin_ + bias_;
328template <
typename VAL_T>
330 return bin_data_->data_[idx];
333template <
typename VAL_T>
Iterator for one bin column.
Definition bin.h:267
Interface for bin data. This class will store bin data for one feature. unlike OrderedBin,...
Definition bin.h:286
Definition dense_bin.hpp:16
uint32_t Get(data_size_t idx) override
Get bin data on specific row index.
Definition dense_bin.hpp:319
Used to store bins for dense feature Use template to reduce memory cost.
Definition dense_bin.hpp:43
BinIterator * GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override
Get bin iterator of this bin for specific feature.
Definition dense_bin.hpp:334
virtual data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, const uint32_t *threshold, int num_threahold, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const override
Split data according to threshold, if bin <= threshold, will put into left(lte_indices),...
Definition dense_bin.hpp:251
void Push(int, data_size_t idx, uint32_t value) override
Push one record \pram tid Thread id.
Definition dense_bin.hpp:53
void LoadFromMemory(const void *memory, const std::vector< data_size_t > &local_used_indices) override
Load from memory.
Definition dense_bin.hpp:285
void ConstructHistogram(const data_size_t *data_indices, data_size_t num_data, const score_t *ordered_gradients, const score_t *ordered_hessians, HistogramBinEntry *out) const override
Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve c...
Definition dense_bin.hpp:66
OrderedBin * CreateOrderedBin() const override
not ordered bin for dense feature
Definition dense_bin.hpp:281
data_size_t num_data() const override
Number of all data.
Definition dense_bin.hpp:278
void SaveBinaryToFile(const VirtualFileWriter *writer) const override
Save binary data to file.
Definition dense_bin.hpp:305
void FinishLoad() override
After pushed all feature data, call this could have better refactor for bin data.
Definition dense_bin.hpp:283
size_t SizesInByte() const override
Get sizes in byte of this object.
Definition dense_bin.hpp:309
void ConstructHistogram(const data_size_t *data_indices, data_size_t num_data, const score_t *ordered_gradients, HistogramBinEntry *out) const override
Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve c...
Definition dense_bin.hpp:134
virtual data_size_t Split(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left, uint32_t threshold, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const override
Split data according to threshold, if bin <= threshold, will put into left(lte_indices),...
Definition dense_bin.hpp:190
Interface for ordered bin data. efficient for construct histogram, especially for sparse bin There ar...
Definition bin.h:219
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
float score_t
Type of score, and gradients.
Definition meta.h:26
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14
Store data for one histogram bin.
Definition bin.h:29
An interface for writing files from buffers.
Definition file_io.h:15
virtual size_t Write(const void *data, size_t bytes) const =0
Append buffer to file.