Medial Code Documentation
Loading...
Searching...
No Matches
MedUtils_imp.h
1//
2// templated code for MedUtils class, included after class definition
3//
4#ifndef __MED_UTILS_IMP__H_
5#define __MED_UTILS_IMP__H_
6#include <math.h>
7
8// Discretization of values
9template <class S> int discretize(vector<S>& x, vector<int>& binned_x, int& nbins, int max_bins) {
10 return discretize(x, binned_x, nbins, max_bins, MED_MAT_MISSING_VALUE, BIN_EQUISIZE);
11}
12
13template <class S> int discretize(vector<S>& x, vector<int>& binned_x, int& nbins, int max_bins, float missing_value) {
14 return discretize(x, binned_x, nbins, max_bins, missing_value, BIN_EQUISIZE);
15}
16
17template <class S> int discretize(vector<S>& x, vector<int>& binned_x, int& nbins, int max_bins, MedBinningType binning) {
18 return discretize(x, binned_x, nbins, max_bins, MED_MAT_MISSING_VALUE, binning);
19}
20
21template <class S> int discretize(vector<S>& x, vector<int>& binned_x, int& nbins, int max_bins, float missing_value, MedBinningType binning) {
22 if (max_bins <= 0)
23 HMTHROW_AND_ERR("Error max_bins should be > 0\n");
24
25 binned_x.clear();
26 binned_x.reserve(x.size());
27
28 if (binning >= BIN_LAST) {
29 MEDLOG(LOG_MED_UTILS, MAX_LOG_LEVEL, "Unknown binning type %d\n", binning);
30 return -1;
31 }
32
33 map<float, int> x_values;
34 for (unsigned int i = 0; i < x.size(); i++) {
35 if (x[i] != missing_value)
36 x_values[(float)x[i]]++;
37 }
38
39 nbins = (int)x_values.size();
40 map<float, int> x_index;
41
42 if (nbins <= max_bins) { // Leave as is
43 int idx = 0;
44 for (auto it = x_values.begin(); it != x_values.end(); it++)
45 x_index[it->first] = idx++;
46 assert(idx == nbins);
47 }
48 else { // Need to combine values into bins
49 if (binning == BIN_EQUIDIST) {
50 float min_val = x_values.begin()->first;
51 float max_val = x_values.rbegin()->first;
52 float bin_size = (max_val - min_val) / max_bins;
53 if (bin_size > 0)
54 for (auto it = x_values.begin(); it != x_values.end(); it++)
55 x_index[it->first] = (int)((it->first - min_val) / bin_size);
56 }
57 else if (binning == BIN_EQUISIZE) {
58 int tot = 0;
59 for (auto it = x_values.begin(); it != x_values.end(); it++)
60 tot += it->second;
61 int bin_size = tot / max_bins;
62
63 if (bin_size > 0) {
64 tot = 0;
65 for (auto it = x_values.begin(); it != x_values.end(); it++) {
66 x_index[it->first] = tot / bin_size;
67 tot += it->second;
68 }
69 }
70 }
71
72 nbins = max_bins;
73 }
74
75
76 for (unsigned int i = 0; i < x.size(); i++) {
77 if (x[i] == missing_value)
78 binned_x.push_back(-1);
79 else
80 binned_x.push_back(x_index[x[i]]);
81 }
82
83 return 0;
84}
85#endif
#define MEDLOG(Section, Level, fmt,...)
LOG() - all print options : section and level.
Definition Logger.h:141
Copyright 2015-2023 by XGBoost Contributors.