Medial Code Documentation
Loading...
Searching...
No Matches
MedCleaner.h
1#ifndef _MED_CLEANER_H_
2#define _MED_CLEANER_H_
3
4#include <stdlib.h>
5#include <stdarg.h>
6#include <stdio.h>
7
8#include <assert.h>
9#include <math.h>
10
11#include <vector>
12#include <map>
13#include <string>
14#include <algorithm>
16
17//======================================================================================
18// MedCleaner - class to handle cleaning/normalization of data
19//======================================================================================
20
21#define MED_CLEANER_MAX_Z 15
22#define MED_CLEANER_EPSILON 0.0001
23
24// Cleaner class : Normalizing and cleaning of outliers
26public:
27
28 float missing_value;
29 float min_trim;
30
31 int n, nvals, most_common_count, nzeros;
32 float median, q1, q3, iqr, mean, sdv, skew, min, max;
33 float most_common_value;
34
35 bool trim_flag, remove_flag, normalize_flag, replace_missing_to_mean_flag;
36 float trim_min, trim_max;
37 float remove_min, remove_max;
38
39 float sk;
40 int skew_sign;
41
42 MedCleaner();
43
44 void print(const string& prefix);
45 void print_short(const string& prefix);
46 void calculate(vector<float> &values);
47 void get_mean_and_sdv(vector<float> &values, bool take_missing_into_account = false);
48 void get_cleaning_range(vector<float>& values, float& min_val, float& max_val, float std_mult = MED_CLEANER_MAX_Z);
49 void get_limits_iteratively(vector<float> values, float std_mult = MED_CLEANER_MAX_Z);
50 void get_cleaning_params(vector<float> values);
51 int clear(vector<float>& values);
52 int clean(vector<float>& values) { return clear(values); };
53 void remove_trim_replace(vector<float> &values);
54
55 void normalize(vector<float>& values);
56
57 bool is_valid(float value);
58 float get_trimmed(float value);
59 float get_value(float value);
60 int trim(float& value);
61 void single_remove_trim_replace(float &val);
62 void single_normalize(float &val);
63
64 ADD_CLASS_NAME(MedCleaner)
65 size_t get_size();
66 size_t serialize(unsigned char *buffer);
67 size_t deserialize(unsigned char *buffer);
68 string object_json() const;
69};
70
72
73
74#endif
An Abstract class that can be serialized and written/read from file.
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition MedCleaner.h:25
size_t get_size()
Gets bytes sizes for serializations.
Definition MedCleaner.cpp:343
size_t deserialize(unsigned char *buffer)
Deserialiazing blob to object. returns number of bytes read.
Definition MedCleaner.cpp:380
size_t serialize(unsigned char *buffer)
Serialiazing object to blob memory. return number ob bytes wrote to memory.
Definition MedCleaner.cpp:348
Definition SerializableObject.h:32
Copyright 2015-2023 by XGBoost Contributors.