Medial Code Documentation
Loading...
Searching...
No Matches
ordered_sparse_bin.hpp
1#ifndef LIGHTGBM_IO_ORDERED_SPARSE_BIN_HPP_
2#define LIGHTGBM_IO_ORDERED_SPARSE_BIN_HPP_
3
4#include <LightGBM/bin.h>
5
6#include <cstring>
7#include <cstdint>
8
9#include <vector>
10#include <mutex>
11#include <algorithm>
12
13#include "sparse_bin.hpp"
14
15namespace LightGBM {
16
25template <typename VAL_T>
27public:
29 struct SparsePair {
30 data_size_t ridx; // data(row) index
31 VAL_T bin; // bin for this data
32 SparsePair() : ridx(0), bin(0) {}
33 };
34
35 OrderedSparseBin(const SparseBin<VAL_T>* bin_data)
36 :bin_data_(bin_data) {
37 data_size_t cur_pos = 0;
38 data_size_t i_delta = -1;
39 int non_zero_cnt = 0;
40 while (bin_data_->NextNonzero(&i_delta, &cur_pos)) {
41 ++non_zero_cnt;
42 }
43 ordered_pair_.resize(non_zero_cnt);
44 leaf_cnt_.push_back(non_zero_cnt);
45 }
46
47 ~OrderedSparseBin() {
48 }
49
50 void Init(const char* used_idices, int num_leaves) override {
51 // initialize the leaf information
52 leaf_start_ = std::vector<data_size_t>(num_leaves, 0);
53 leaf_cnt_ = std::vector<data_size_t>(num_leaves, 0);
54 if (used_idices == nullptr) {
55 // if using all data, copy all non-zero pair
56 data_size_t j = 0;
57 data_size_t cur_pos = 0;
58 data_size_t i_delta = -1;
59 while (bin_data_->NextNonzero(&i_delta, &cur_pos)) {
60 ordered_pair_[j].ridx = cur_pos;
61 ordered_pair_[j].bin = bin_data_->vals_[i_delta];
62 ++j;
63 }
64 leaf_cnt_[0] = static_cast<data_size_t>(j);
65 } else {
66 // if using part of data(bagging)
67 data_size_t j = 0;
68 data_size_t cur_pos = 0;
69 data_size_t i_delta = -1;
70 while (bin_data_->NextNonzero(&i_delta, &cur_pos)) {
71 if (used_idices[cur_pos]) {
72 ordered_pair_[j].ridx = cur_pos;
73 ordered_pair_[j].bin = bin_data_->vals_[i_delta];
74 ++j;
75 }
76 }
77 leaf_cnt_[0] = j;
78 }
79 }
80
81 void ConstructHistogram(int leaf, const score_t* gradient, const score_t* hessian,
82 HistogramBinEntry* out) const override {
83 // get current leaf boundary
84 const data_size_t start = leaf_start_[leaf];
85 const data_size_t end = start + leaf_cnt_[leaf];
86 const int rest = (end - start) % 4;
87 data_size_t i = start;
88 // use data on current leaf to construct histogram
89 for (; i < end - rest; i += 4) {
90 const VAL_T bin0 = ordered_pair_[i].bin;
91 const VAL_T bin1 = ordered_pair_[i + 1].bin;
92 const VAL_T bin2 = ordered_pair_[i + 2].bin;
93 const VAL_T bin3 = ordered_pair_[i + 3].bin;
94
95 const auto g0 = gradient[ordered_pair_[i].ridx];
96 const auto h0 = hessian[ordered_pair_[i].ridx];
97 const auto g1 = gradient[ordered_pair_[i + 1].ridx];
98 const auto h1 = hessian[ordered_pair_[i + 1].ridx];
99 const auto g2 = gradient[ordered_pair_[i + 2].ridx];
100 const auto h2 = hessian[ordered_pair_[i + 2].ridx];
101 const auto g3 = gradient[ordered_pair_[i + 3].ridx];
102 const auto h3 = hessian[ordered_pair_[i + 3].ridx];
103
104 out[bin0].sum_gradients += g0;
105 out[bin1].sum_gradients += g1;
106 out[bin2].sum_gradients += g2;
107 out[bin3].sum_gradients += g3;
108
109 out[bin0].sum_hessians += h0;
110 out[bin1].sum_hessians += h1;
111 out[bin2].sum_hessians += h2;
112 out[bin3].sum_hessians += h3;
113
114 ++out[bin0].cnt;
115 ++out[bin1].cnt;
116 ++out[bin2].cnt;
117 ++out[bin3].cnt;
118 }
119
120 for (; i < end; ++i) {
121 const VAL_T bin0 = ordered_pair_[i].bin;
122
123 const auto g0 = gradient[ordered_pair_[i].ridx];
124 const auto h0 = hessian[ordered_pair_[i].ridx];
125
126 out[bin0].sum_gradients += g0;
127 out[bin0].sum_hessians += h0;
128 ++out[bin0].cnt;
129 }
130 }
131
132 void ConstructHistogram(int leaf, const score_t* gradient,
133 HistogramBinEntry* out) const override {
134 // get current leaf boundary
135 const data_size_t start = leaf_start_[leaf];
136 const data_size_t end = start + leaf_cnt_[leaf];
137 const int rest = (end - start) % 4;
138 data_size_t i = start;
139 // use data on current leaf to construct histogram
140 for (; i < end - rest; i += 4) {
141 const VAL_T bin0 = ordered_pair_[i].bin;
142 const VAL_T bin1 = ordered_pair_[i + 1].bin;
143 const VAL_T bin2 = ordered_pair_[i + 2].bin;
144 const VAL_T bin3 = ordered_pair_[i + 3].bin;
145
146 const auto g0 = gradient[ordered_pair_[i].ridx];
147 const auto g1 = gradient[ordered_pair_[i + 1].ridx];
148 const auto g2 = gradient[ordered_pair_[i + 2].ridx];
149 const auto g3 = gradient[ordered_pair_[i + 3].ridx];
150
151 out[bin0].sum_gradients += g0;
152 out[bin1].sum_gradients += g1;
153 out[bin2].sum_gradients += g2;
154 out[bin3].sum_gradients += g3;
155
156 ++out[bin0].cnt;
157 ++out[bin1].cnt;
158 ++out[bin2].cnt;
159 ++out[bin3].cnt;
160 }
161 for (; i < end; ++i) {
162 const VAL_T bin0 = ordered_pair_[i].bin;
163 const auto g0 = gradient[ordered_pair_[i].ridx];
164 out[bin0].sum_gradients += g0;
165 ++out[bin0].cnt;
166 }
167 }
168
169 void Split(int leaf, int right_leaf, const char* is_in_leaf, char mark) override {
170 // get current leaf boundary
171 const data_size_t l_start = leaf_start_[leaf];
172 const data_size_t l_end = l_start + leaf_cnt_[leaf];
173 // new left leaf end after split
174 data_size_t new_left_end = l_start;
175
176 for (data_size_t i = l_start; i < l_end; ++i) {
177 if (is_in_leaf[ordered_pair_[i].ridx] == mark) {
178 std::swap(ordered_pair_[new_left_end], ordered_pair_[i]);
179 ++new_left_end;
180 }
181 }
182
183 leaf_start_[right_leaf] = new_left_end;
184 leaf_cnt_[leaf] = new_left_end - l_start;
185 leaf_cnt_[right_leaf] = l_end - new_left_end;
186 }
187 data_size_t NonZeroCount(int leaf) const override {
188 return static_cast<data_size_t>(leaf_cnt_[leaf]);
189 }
194
195private:
196 const SparseBin<VAL_T>* bin_data_;
198 std::vector<SparsePair> ordered_pair_;
200 std::vector<data_size_t> leaf_start_;
202 std::vector<data_size_t> leaf_cnt_;
203};
204
205template <typename VAL_T>
209
210} // namespace LightGBM
211#endif // LightGBM_IO_ORDERED_SPARSE_BIN_HPP_
Interface for ordered bin data. efficient for construct histogram, especially for sparse bin There ar...
Definition bin.h:219
Interface for ordered bin data. efficient for construct histogram, especially for sparse bin There ar...
Definition ordered_sparse_bin.hpp:26
void Split(int leaf, int right_leaf, const char *is_in_leaf, char mark) override
Split current bin, and perform re-order by leaf.
Definition ordered_sparse_bin.hpp:169
void ConstructHistogram(int leaf, const score_t *gradient, const score_t *hessian, HistogramBinEntry *out) const override
Construct histogram by using this bin Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ...
Definition ordered_sparse_bin.hpp:81
void ConstructHistogram(int leaf, const score_t *gradient, HistogramBinEntry *out) const override
Construct histogram by using this bin Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ...
Definition ordered_sparse_bin.hpp:132
OrderedSparseBin< VAL_T > & operator=(const OrderedSparseBin< VAL_T > &)=delete
Disable copy.
Definition sparse_bin.hpp:69
OrderedBin * CreateOrderedBin() const override
Create the ordered bin for this bin.
Definition ordered_sparse_bin.hpp:206
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
float score_t
Type of score, and gradients.
Definition meta.h:26
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name, cert-dcl58-cpp) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
Definition json.hpp:24418
Store data for one histogram bin.
Definition bin.h:29
Pair to store one bin entry.
Definition ordered_sparse_bin.hpp:29