Medial Code Documentation
Loading...
Searching...
No Matches
gpu_tree_learner.h
1#ifndef LIGHTGBM_TREELEARNER_GPU_TREE_LEARNER_H_
2#define LIGHTGBM_TREELEARNER_GPU_TREE_LEARNER_H_
3
4#include <LightGBM/utils/random.h>
5#include <LightGBM/utils/array_args.h>
6#include <LightGBM/dataset.h>
7#include <LightGBM/tree.h>
8#include <LightGBM/feature_group.h>
9#include "feature_histogram.hpp"
10#include "serial_tree_learner.h"
11#include "data_partition.hpp"
12#include "split_info.hpp"
13#include "leaf_splits.hpp"
14
15#include <cstdio>
16#include <vector>
17#include <random>
18#include <cmath>
19#include <memory>
20
21#ifdef USE_GPU
22
23#define BOOST_COMPUTE_THREAD_SAFE
24#define BOOST_COMPUTE_HAVE_THREAD_LOCAL
25// Use Boost.Compute on-disk kernel cache
26#define BOOST_COMPUTE_USE_OFFLINE_CACHE
27#include <boost/compute/core.hpp>
28#include <boost/compute/container/vector.hpp>
29#include <boost/align/aligned_allocator.hpp>
30
31using namespace json11;
32
33namespace LightGBM {
34
38class GPUTreeLearner: public SerialTreeLearner {
39public:
40 explicit GPUTreeLearner(const Config* tree_config);
41 ~GPUTreeLearner();
42 void Init(const Dataset* train_data, bool is_constant_hessian) override;
43 void ResetTrainingData(const Dataset* train_data) override;
44 Tree* Train(const score_t* gradients, const score_t *hessians,
45 bool is_constant_hessian, Json& forced_split_json) override;
46
47 void SetBaggingData(const data_size_t* used_indices, data_size_t num_data) override {
48 SerialTreeLearner::SetBaggingData(used_indices, num_data);
49 // determine if we are using bagging before we construct the data partition
50 // thus we can start data movement to GPU earlier
51 if (used_indices != nullptr) {
52 if (num_data != num_data_) {
53 use_bagging_ = true;
54 return;
55 }
56 }
57 use_bagging_ = false;
58 }
59
60protected:
61 void BeforeTrain() override;
62 bool BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) override;
63 void FindBestSplits() override;
64 void Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) override;
65 void ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override;
66
67private:
69 struct Feature4 {
70 uint8_t s[4];
71 };
72
74 struct GPUHistogramBinEntry {
75 score_t sum_gradients;
76 score_t sum_hessians;
77 uint32_t cnt;
78 };
79
85 int GetNumWorkgroupsPerFeature(data_size_t leaf_num_data);
86
93 void InitGPU(int platform_id, int device_id);
94
98 void AllocateGPUMemory();
99
103 void BuildGPUKernels();
104
110 std::string GetBuildLog(const std::string &opts);
111
115 void SetupKernelArguments();
116
123 void GPUHistogram(data_size_t leaf_num_data, bool use_all_features);
124
129 template <typename HistType>
130 void WaitAndGetHistograms(HistogramBinEntry* histograms);
131
147 bool ConstructGPUHistogramsAsync(
148 const std::vector<int8_t>& is_feature_used,
149 const data_size_t* data_indices, data_size_t num_data,
150 const score_t* gradients, const score_t* hessians,
151 score_t* ordered_gradients, score_t* ordered_hessians);
152
153
155 const int kMaxLogWorkgroupsPerFeature = 10; // 2^10
158 int preallocd_max_num_wg_ = 1024;
159
161 bool use_bagging_;
162
164 boost::compute::device dev_;
166 boost::compute::context ctx_;
168 boost::compute::command_queue queue_;
170 const char *kernel256_src_ =
171 #include "ocl/histogram256.cl"
172 ;
174 const char *kernel64_src_ =
175 #include "ocl/histogram64.cl"
176 ;
178 const char *kernel16_src_ =
179 #include "ocl/histogram16.cl"
180 ;
182 std::string kernel_source_;
184 std::string kernel_name_;
185
188 std::vector<boost::compute::kernel> histogram_kernels_;
191 std::vector<boost::compute::kernel> histogram_allfeats_kernels_;
194 std::vector<boost::compute::kernel> histogram_fulldata_kernels_;
196 int num_feature_groups_;
198 int num_dense_feature_groups_;
203 int dword_features_;
206 int num_dense_feature4_;
209 int max_num_bin_;
211 int device_bin_size_;
213 size_t hist_bin_entry_sz_;
215 std::vector<int> dense_feature_group_map_;
217 std::vector<int> sparse_feature_group_map_;
219 std::vector<int> device_bin_mults_;
221 std::unique_ptr<boost::compute::vector<Feature4>> device_features_;
223 boost::compute::buffer device_gradients_;
225 boost::compute::buffer pinned_gradients_;
227 void * ptr_pinned_gradients_ = nullptr;
229 boost::compute::buffer device_hessians_;
231 boost::compute::buffer pinned_hessians_;
233 void * ptr_pinned_hessians_ = nullptr;
235 std::vector<char, boost::alignment::aligned_allocator<char, 4096>> feature_masks_;
237 boost::compute::buffer device_feature_masks_;
239 boost::compute::buffer pinned_feature_masks_;
241 void * ptr_pinned_feature_masks_ = nullptr;
243 std::unique_ptr<boost::compute::vector<data_size_t>> device_data_indices_;
245 std::unique_ptr<boost::compute::vector<int>> sync_counters_;
247 std::unique_ptr<boost::compute::vector<char>> device_subhistograms_;
249 boost::compute::buffer device_histogram_outputs_;
251 void * host_histogram_outputs_;
253 boost::compute::wait_list kernel_wait_obj_;
255 boost::compute::wait_list histograms_wait_obj_;
257 boost::compute::future<void> indices_future_;
259 boost::compute::event gradients_future_;
261 boost::compute::event hessians_future_;
262};
263
264} // namespace LightGBM
265#else
266
267// When GPU support is not compiled in, quit with an error message
268
269namespace LightGBM {
270
272public:
273 #pragma warning(disable : 4702)
274 explicit GPUTreeLearner(const Config* tree_config) : SerialTreeLearner(tree_config) {
275 Log::Fatal("GPU Tree Learner was not enabled in this build.\n"
276 "Please recompile with CMake option -DUSE_GPU=1");
277 }
278};
279
280} // namespace LightGBM
281
282#endif // USE_GPU
283
284#endif // LightGBM_TREELEARNER_GPU_TREE_LEARNER_H_
285
Definition gpu_tree_learner.h:271
Used for learning a tree by single machine.
Definition serial_tree_learner.h:34
data_size_t num_data_
number of data
Definition serial_tree_learner.h:116
void Init(const Dataset *train_data, bool is_constant_hessian) override
Initialize tree learner with training dataset.
Definition serial_tree_learner.cpp:44
Tree * Train(const score_t *gradients, const score_t *hessians, bool is_constant_hessian, Json &forced_split_json) override
training tree model on dataset
Definition serial_tree_learner.cpp:157
void SetBaggingData(const data_size_t *used_indices, data_size_t num_data) override
Set bagging data.
Definition serial_tree_learner.h:54
virtual void Split(Tree *tree, int best_leaf, int *left_leaf, int *right_leaf)
Partition tree and data according best split.
Definition serial_tree_learner.cpp:703
virtual void BeforeTrain()
Some initial works before training.
Definition serial_tree_learner.cpp:255
virtual bool BeforeFindBestSplit(const Tree *tree, int left_leaf, int right_leaf)
Some initial works before FindBestSplit.
Definition serial_tree_learner.cpp:348
Definition json11.hpp:79
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
float score_t
Type of score, and gradients.
Definition meta.h:26
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14
Definition config.h:27