|
Medial Code Documentation
|
The main class of data set, which are used to traning or validation. More...
#include <dataset.h>
Public Member Functions | |
| LIGHTGBM_EXPORT | Dataset (data_size_t num_data) |
| void | Construct (std::vector< std::unique_ptr< BinMapper > > &bin_mappers, int **sample_non_zero_indices, const int *num_per_col, size_t total_sample_cnt, const Config &io_config) |
| LIGHTGBM_EXPORT | ~Dataset () |
| Destructor. | |
| LIGHTGBM_EXPORT bool | CheckAlign (const Dataset &other) const |
| void | PushOneRow (int tid, data_size_t row_idx, const std::vector< double > &feature_values) |
| void | PushOneRow (int tid, data_size_t row_idx, const std::vector< std::pair< int, double > > &feature_values) |
| void | PushOneData (int tid, data_size_t row_idx, int group, int sub_feature, double value) |
| int | RealFeatureIndex (int fidx) const |
| int | InnerFeatureIndex (int col_idx) const |
| int | Feature2Group (int feature_idx) const |
| int | Feture2SubFeature (int feature_idx) const |
| uint64_t | GroupBinBoundary (int group_idx) const |
| uint64_t | NumTotalBin () const |
| std::vector< int > | ValidFeatureIndices () const |
| void | ReSize (data_size_t num_data) |
| void | CopySubset (const Dataset *fullset, const data_size_t *used_indices, data_size_t num_used_indices, bool need_meta_data) |
| LIGHTGBM_EXPORT void | FinishLoad () |
| LIGHTGBM_EXPORT bool | SetFloatField (const char *field_name, const float *field_data, data_size_t num_element) |
| LIGHTGBM_EXPORT bool | SetDoubleField (const char *field_name, const double *field_data, data_size_t num_element) |
| LIGHTGBM_EXPORT bool | SetIntField (const char *field_name, const int *field_data, data_size_t num_element) |
| LIGHTGBM_EXPORT bool | GetFloatField (const char *field_name, data_size_t *out_len, const float **out_ptr) |
| LIGHTGBM_EXPORT bool | GetDoubleField (const char *field_name, data_size_t *out_len, const double **out_ptr) |
| LIGHTGBM_EXPORT bool | GetIntField (const char *field_name, data_size_t *out_len, const int **out_ptr) |
| LIGHTGBM_EXPORT void | SaveBinaryFile (const char *bin_filename) |
| Save current dataset into binary file, will save to "filename.bin". | |
| LIGHTGBM_EXPORT void | CopyFeatureMapperFrom (const Dataset *dataset) |
| LIGHTGBM_EXPORT void | CreateValid (const Dataset *dataset) |
| void | ConstructHistograms (const std::vector< int8_t > &is_feature_used, const data_size_t *data_indices, data_size_t num_data, int leaf_idx, std::vector< std::unique_ptr< OrderedBin > > &ordered_bins, const score_t *gradients, const score_t *hessians, score_t *ordered_gradients, score_t *ordered_hessians, bool is_constant_hessian, HistogramBinEntry *histogram_data) const |
| void | FixHistogram (int feature_idx, double sum_gradient, double sum_hessian, data_size_t num_data, HistogramBinEntry *data) const |
| data_size_t | Split (int feature, const uint32_t *threshold, int num_threshold, bool default_left, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const |
| int | SubFeatureBinOffset (int i) const |
| int | FeatureNumBin (int i) const |
| int8_t | FeatureMonotone (int i) const |
| double | FeaturePenalte (int i) const |
| bool | HasMonotone () const |
| int | FeatureGroupNumBin (int group) const |
| const BinMapper * | FeatureBinMapper (int i) const |
| const Bin * | FeatureBin (int i) const |
| const Bin * | FeatureGroupBin (int group) const |
| bool | FeatureGroupIsSparse (int group) const |
| BinIterator * | FeatureIterator (int i) const |
| BinIterator * | FeatureGroupIterator (int group) const |
| double | RealThreshold (int i, uint32_t threshold) const |
| uint32_t | BinThreshold (int i, double threshold_double) const |
| void | CreateOrderedBins (std::vector< std::unique_ptr< OrderedBin > > *ordered_bins) const |
| const Metadata & | metadata () const |
| Get meta data pointer. | |
| int | num_features () const |
| Get Number of used features. | |
| int | num_feature_groups () const |
| Get Number of feature groups. | |
| int | num_total_features () const |
| Get Number of total features. | |
| int | label_idx () const |
| Get the index of label column. | |
| const std::vector< std::string > & | feature_names () const |
| Get names of current data set. | |
| void | set_feature_names (const std::vector< std::string > &feature_names) |
| std::vector< std::string > | feature_infos () const |
| void | ResetConfig (const char *parameters) |
| data_size_t | num_data () const |
| Get Number of data. | |
| Dataset & | operator= (const Dataset &)=delete |
| Disable copy. | |
| Dataset (const Dataset &)=delete | |
| Disable copy. | |
Data Fields | |
| friend | DatasetLoader |
The main class of data set, which are used to traning or validation.
|
inline |
Get meta data pointer.