Medial Code Documentation
Loading...
Searching...
No Matches
BinSplitOptimizer.h
1#ifndef __BIN_SPLITTER_H__
2#define __BIN_SPLITTER_H__
3#include <map>
4#include <vector>
5#include <limits.h>
7
8using namespace std;
9
14public:
15 vector<int> SplitToBins(const vector<float> &vec, const vector<float> &y, size_t kBins,
16 size_t min_samples, vector<float> &histAvg, vector<float> &partitionValues, bool &has_error);
17private:
18 bool evaluate(const vector<int> &indexes, float &totScore);
19
20 float *_sortedArr;
21 int _minSamples;
22 map<float, int> _histElements;
23 map<float, float> _histAvg;
24
25};
26
27enum BinSplitMethod {
28 SameValueWidth,
29 PartitaionMover,
30 IterativeMerge,
31 DynamicSplit,
32};
33
38public:
41 int binCnt;
44 bool weighted;
45
46 BinSplitMethod split_method;
47
48 int init(map<string, string>& map);
49
50 static const unordered_map<int, string> name_to_method;
51 static BinSplitMethod bin_method_name_to_type(const string& bin_method);
52
53 BinSettings() {
54 min_bin_count = 1;
55 min_res_value = 0;
56 binCnt = 10;
57 min_value_cutoff = (float)INT_MIN;
58 max_value_cutoff = (float)INT_MAX;
59 weighted = false;
60 }
61
62 ADD_CLASS_NAME(BinSettings)
64 split_method, weighted)
65};
66
71namespace medial {
75 namespace process {
86 void split_feature_to_bins(const BinSettings &setting, vector<float> &feature,
87 const vector<int> &sel_indexes, vector<float> &y);
92 void normalize_feature_to_uniform(const BinSettings &setting, vector<float> &feature);
93 }
94}
95
96#endif
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A specific settings for binning feature.
Definition BinSplitOptimizer.h:37
int init(map< string, string > &map)
the split method, please reffer to BinSplitMethod
Definition BinSplitOptimizer.cpp:543
double min_res_value
minimal distance from each feature value between bins. if 0 will not use
Definition BinSplitOptimizer.h:40
float min_value_cutoff
a minimal value trim cutoff
Definition BinSplitOptimizer.h:42
int min_bin_count
minimal count of cases+controls to create bin for feature
Definition BinSplitOptimizer.h:39
int binCnt
the bin Count for spliting, if 0 will not use
Definition BinSplitOptimizer.h:41
bool weighted
if true in fixed width will average the value in each bin
Definition BinSplitOptimizer.h:44
float max_value_cutoff
a maximal value trim cutoff
Definition BinSplitOptimizer.h:43
a bin split using optimizer
Definition BinSplitOptimizer.h:13
Definition SerializableObject.h:32
void split_feature_to_bins(const BinSettings &setting, vector< float > &feature, const vector< int > &sel_indexes, vector< float > &y)
splits feature to bin using setting
Definition BinSplitOptimizer.cpp:471
void normalize_feature_to_uniform(const BinSettings &setting, vector< float > &feature)
normalize feature to be between [0-1] but also change the distribution of values to be uniform
Definition BinSplitOptimizer.cpp:583
medial namespace for function
Definition InfraMed.h:667
Definition StdDeque.h:58