Medial Code Documentation
Loading...
Searching...
No Matches
dataset_loader.h
1#ifndef LIGHTGBM_DATASET_LOADER_H_
2#define LIGHTGBM_DATASET_LOADER_H_
3
4#include <LightGBM/dataset.h>
5
6namespace LightGBM {
7
9public:
10 LIGHTGBM_EXPORT DatasetLoader(const Config& io_config, const PredictFunction& predict_fun, int num_class, const char* filename);
11
12 LIGHTGBM_EXPORT ~DatasetLoader();
13
14 LIGHTGBM_EXPORT Dataset* LoadFromFile(const char* filename, const char* initscore_file, int rank, int num_machines);
15
16 LIGHTGBM_EXPORT Dataset* LoadFromFile(const char* filename, const char* initscore_file) {
17 return LoadFromFile(filename, initscore_file, 0, 1);
18 }
19
20 LIGHTGBM_EXPORT Dataset* LoadFromFileAlignWithOtherDataset(const char* filename, const char* initscore_file, const Dataset* train_data);
21
22 LIGHTGBM_EXPORT Dataset* CostructFromSampleData(double** sample_values,
23 int** sample_indices, int num_col, const int* num_per_col,
24 size_t total_sample_size, data_size_t num_data);
25
29 DatasetLoader(const DatasetLoader&) = delete;
30
31private:
32 Dataset* LoadFromBinFile(const char* data_filename, const char* bin_filename, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices);
33
34 void SetHeader(const char* filename);
35
36 void CheckDataset(const Dataset* dataset);
37
38 std::vector<std::string> LoadTextDataToMemory(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices);
39
40 std::vector<std::string> SampleTextDataFromMemory(const std::vector<std::string>& data);
41
42 std::vector<std::string> SampleTextDataFromFile(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices);
43
44 void ConstructBinMappersFromTextData(int rank, int num_machines, const std::vector<std::string>& sample_data, const Parser* parser, Dataset* dataset);
45
47 void ExtractFeaturesFromMemory(std::vector<std::string>& text_data, const Parser* parser, Dataset* dataset);
48
50 void ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset);
51
53 std::string CheckCanLoadFromBin(const char* filename);
54
55 const Config& config_;
57 Random random_;
59 const PredictFunction& predict_fun_;
61 int num_class_;
63 int label_idx_;
65 int weight_idx_;
67 int group_idx_;
69 std::unordered_set<int> ignore_features_;
71 std::vector<std::string> feature_names_;
73 std::unordered_set<int> categorical_features_;
74};
75
76}
77
78#endif // LIGHTGBM_DATASET_LOADER_H_
Definition dataset_loader.h:8
DatasetLoader & operator=(const DatasetLoader &)=delete
Disable copy.
DatasetLoader(const DatasetLoader &)=delete
Disable copy.
The main class of data set, which are used to traning or validation.
Definition dataset.h:278
This class is used to store some meta(non-feature) data for training data, e.g. labels,...
Definition dataset.h:36
Interface for Parser.
Definition dataset.h:249
A wrapper for random generator.
Definition random.h:15
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14
Definition config.h:27