|
virtual | ~Bin () |
| virtual destructor
|
|
virtual void | Push (int tid, data_size_t idx, uint32_t value)=0 |
| Push one record \pram tid Thread id.
|
|
virtual void | CopySubset (const Bin *full_bin, const data_size_t *used_indices, data_size_t num_used_indices)=0 |
|
virtual BinIterator * | GetIterator (uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const =0 |
| Get bin iterator of this bin for specific feature.
|
|
virtual void | SaveBinaryToFile (const VirtualFileWriter *writer) const =0 |
| Save binary data to file.
|
|
virtual void | LoadFromMemory (const void *memory, const std::vector< data_size_t > &local_used_indices)=0 |
| Load from memory.
|
|
virtual size_t | SizesInByte () const =0 |
| Get sizes in byte of this object.
|
|
virtual data_size_t | num_data () const =0 |
| Number of all data.
|
|
virtual void | ReSize (data_size_t num_data)=0 |
|
virtual void | ConstructHistogram (const data_size_t *data_indices, data_size_t num_data, const score_t *ordered_gradients, const score_t *ordered_hessians, HistogramBinEntry *out) const =0 |
| Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve cache hit chance The naive solution is using gradients[data_indices[i]] for data_indices[i] to get gradients, which is not cache friendly, since the access of memory is not continuous. ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices. Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
|
|
virtual void | ConstructHistogram (data_size_t num_data, const score_t *ordered_gradients, const score_t *ordered_hessians, HistogramBinEntry *out) const =0 |
|
virtual void | ConstructHistogram (const data_size_t *data_indices, data_size_t num_data, const score_t *ordered_gradients, HistogramBinEntry *out) const =0 |
| Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve cache hit chance The naive solution is using gradients[data_indices[i]] for data_indices[i] to get gradients, which is not cache friendly, since the access of memory is not continuous. ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices. Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
|
|
virtual void | ConstructHistogram (data_size_t num_data, const score_t *ordered_gradients, HistogramBinEntry *out) const =0 |
|
virtual data_size_t | Split (uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left, uint32_t threshold, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const =0 |
| Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
|
|
virtual data_size_t | SplitCategorical (uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, const uint32_t *threshold, int num_threshold, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const =0 |
| Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
|
|
virtual OrderedBin * | CreateOrderedBin () const =0 |
| Create the ordered bin for this bin.
|
|
virtual void | FinishLoad ()=0 |
| After pushed all feature data, call this could have better refactor for bin data.
|
|
|
static Bin * | CreateBin (data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, double sparse_threshold, bool *is_sparse) |
| Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse".
|
|
static Bin * | CreateDenseBin (data_size_t num_data, int num_bin) |
| Create object for bin data of one feature, used for dense feature.
|
|
static Bin * | CreateSparseBin (data_size_t num_data, int num_bin) |
| Create object for bin data of one feature, used for sparse feature.
|
|
Interface for bin data. This class will store bin data for one feature. unlike OrderedBin, this class will store data by original order. Note that it may cause cache misses when construct histogram, but it doesn't need to re-order operation, So it will be faster than OrderedBin for dense feature.
Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve cache hit chance The naive solution is using gradients[data_indices[i]] for data_indices[i] to get gradients, which is not cache friendly, since the access of memory is not continuous. ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices. Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
- Parameters
-
data_indices | Used data indices in current leaf |
num_data | Number of used data |
ordered_gradients | Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i] |
ordered_hessians | Pointer to hessians, the data_indices[i]-th data's hessian is ordered_hessians[i] |
out | Output Result |
Implemented in LightGBM::SparseBin< VAL_T >, LightGBM::DenseBin< VAL_T >, and LightGBM::Dense4bitsBin.
Construct histogram of this feature, Note: We use ordered_gradients and ordered_hessians to improve cache hit chance The naive solution is using gradients[data_indices[i]] for data_indices[i] to get gradients, which is not cache friendly, since the access of memory is not continuous. ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices. Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
- Parameters
-
data_indices | Used data indices in current leaf |
num_data | Number of used data |
ordered_gradients | Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i] |
out | Output Result |
Implemented in LightGBM::SparseBin< VAL_T >, LightGBM::DenseBin< VAL_T >, and LightGBM::Dense4bitsBin.