Medial Code Documentation
|
The main class of data set, which are used to traning or validation. More...
#include <dataset.h>
Public Member Functions | |
LIGHTGBM_EXPORT | Dataset (data_size_t num_data) |
void | Construct (std::vector< std::unique_ptr< BinMapper > > &bin_mappers, int **sample_non_zero_indices, const int *num_per_col, size_t total_sample_cnt, const Config &io_config) |
LIGHTGBM_EXPORT | ~Dataset () |
Destructor. | |
LIGHTGBM_EXPORT bool | CheckAlign (const Dataset &other) const |
void | PushOneRow (int tid, data_size_t row_idx, const std::vector< double > &feature_values) |
void | PushOneRow (int tid, data_size_t row_idx, const std::vector< std::pair< int, double > > &feature_values) |
void | PushOneData (int tid, data_size_t row_idx, int group, int sub_feature, double value) |
int | RealFeatureIndex (int fidx) const |
int | InnerFeatureIndex (int col_idx) const |
int | Feature2Group (int feature_idx) const |
int | Feture2SubFeature (int feature_idx) const |
uint64_t | GroupBinBoundary (int group_idx) const |
uint64_t | NumTotalBin () const |
std::vector< int > | ValidFeatureIndices () const |
void | ReSize (data_size_t num_data) |
void | CopySubset (const Dataset *fullset, const data_size_t *used_indices, data_size_t num_used_indices, bool need_meta_data) |
LIGHTGBM_EXPORT void | FinishLoad () |
LIGHTGBM_EXPORT bool | SetFloatField (const char *field_name, const float *field_data, data_size_t num_element) |
LIGHTGBM_EXPORT bool | SetDoubleField (const char *field_name, const double *field_data, data_size_t num_element) |
LIGHTGBM_EXPORT bool | SetIntField (const char *field_name, const int *field_data, data_size_t num_element) |
LIGHTGBM_EXPORT bool | GetFloatField (const char *field_name, data_size_t *out_len, const float **out_ptr) |
LIGHTGBM_EXPORT bool | GetDoubleField (const char *field_name, data_size_t *out_len, const double **out_ptr) |
LIGHTGBM_EXPORT bool | GetIntField (const char *field_name, data_size_t *out_len, const int **out_ptr) |
LIGHTGBM_EXPORT void | SaveBinaryFile (const char *bin_filename) |
Save current dataset into binary file, will save to "filename.bin". | |
LIGHTGBM_EXPORT void | CopyFeatureMapperFrom (const Dataset *dataset) |
LIGHTGBM_EXPORT void | CreateValid (const Dataset *dataset) |
void | ConstructHistograms (const std::vector< int8_t > &is_feature_used, const data_size_t *data_indices, data_size_t num_data, int leaf_idx, std::vector< std::unique_ptr< OrderedBin > > &ordered_bins, const score_t *gradients, const score_t *hessians, score_t *ordered_gradients, score_t *ordered_hessians, bool is_constant_hessian, HistogramBinEntry *histogram_data) const |
void | FixHistogram (int feature_idx, double sum_gradient, double sum_hessian, data_size_t num_data, HistogramBinEntry *data) const |
data_size_t | Split (int feature, const uint32_t *threshold, int num_threshold, bool default_left, data_size_t *data_indices, data_size_t num_data, data_size_t *lte_indices, data_size_t *gt_indices) const |
int | SubFeatureBinOffset (int i) const |
int | FeatureNumBin (int i) const |
int8_t | FeatureMonotone (int i) const |
double | FeaturePenalte (int i) const |
bool | HasMonotone () const |
int | FeatureGroupNumBin (int group) const |
const BinMapper * | FeatureBinMapper (int i) const |
const Bin * | FeatureBin (int i) const |
const Bin * | FeatureGroupBin (int group) const |
bool | FeatureGroupIsSparse (int group) const |
BinIterator * | FeatureIterator (int i) const |
BinIterator * | FeatureGroupIterator (int group) const |
double | RealThreshold (int i, uint32_t threshold) const |
uint32_t | BinThreshold (int i, double threshold_double) const |
void | CreateOrderedBins (std::vector< std::unique_ptr< OrderedBin > > *ordered_bins) const |
const Metadata & | metadata () const |
Get meta data pointer. | |
int | num_features () const |
Get Number of used features. | |
int | num_feature_groups () const |
Get Number of feature groups. | |
int | num_total_features () const |
Get Number of total features. | |
int | label_idx () const |
Get the index of label column. | |
const std::vector< std::string > & | feature_names () const |
Get names of current data set. | |
void | set_feature_names (const std::vector< std::string > &feature_names) |
std::vector< std::string > | feature_infos () const |
void | ResetConfig (const char *parameters) |
data_size_t | num_data () const |
Get Number of data. | |
Dataset & | operator= (const Dataset &)=delete |
Disable copy. | |
Dataset (const Dataset &)=delete | |
Disable copy. | |
Data Fields | |
friend | DatasetLoader |
The main class of data set, which are used to traning or validation.
|
inline |
Get meta data pointer.