Medial Code Documentation
Loading...
Searching...
No Matches
InputTesters.h
1#pragma once
2#include <string>
3#include <MedProcessTools/MedProcessTools/SampleFilter.h>
6
7using namespace std;
8
9typedef enum {
10 INPUT_TESTER_TYPE_UNDEFINED = 0,
11 INPUT_TESTER_TYPE_SIMPLE = 1,
12 INPUT_TESTER_TYPE_ATTR = 2,
13 INPUT_TESTER_TYPE_FEATURE_JSON = 3
14} InputTesterType;
15
16typedef enum {
17 TESTER_STAGE_UNDEFINED = 0,
18 TESTER_STAGE_BEFORE_MODEL = 1,
19 TESTER_STAGE_AFTER_MODEL = 2
20} TesterStage;
21
22//==============================================================================================================
23// InputTester : holds a single tester - this is the base class
24//==============================================================================================================
26
27public:
28 // the type of the tester
29 int type = (int)INPUT_TESTER_TYPE_UNDEFINED;
30
31 int stage = (int)TESTER_STAGE_UNDEFINED;
32
33 // the tester can be defined as a warning only
34 int is_warning = 0;
35
36 // return code and messages to return in case of not passing the test
37 int externl_rc = 0; // rcs -1 and 0 are reserved
38 int internal_rc = 0; // rcs -1 and 0 are reserved
39 string err_msg = "";
40 string cant_evel_msg = "";
41
42 int max_outliers_flag = 0; // use or not use the tester to accumulate outliers counts
43
45
46 string tester_params; // params for the internal tester
47
48 // initialize from string
49 virtual void input_from_string(const string &in_str) { return; };
50
51 // testing the tester on a given rep for a certain pid,timestamp
52 // returns: 1: passes the test , 0: did not pass , -1: could not test
53 // also returns: nvals (if relevant): number of tests in the window time defined in the test
54 // noutliers (if relevant) : number of outliers found
55 virtual int test_if_ok(MedPidRepository &rep, int pid, long long timestamp, int &nvals, int &noutliers) { nvals = 0; noutliers = 0; return -1; }
56
57 virtual int test_if_ok(MedSample &sample) { return -1; }; // 1: good to go 0: did not pass -1: could not test
58
59 virtual int test_if_ok(int pid, long long timestamp,
60 const unordered_map<string, unordered_set<string>> &dict_unknown) { return -1; }
61
62 // 1: good to go 0: did not pass -1: could not test
63 int test_if_ok(MedPidRepository &rep, int pid, long long timestamp) {
64 int nvals, noutliers;
65 return test_if_ok(rep, pid, timestamp, nvals, noutliers);
66 }
67
68 void print();
69
70 // get a new InputTester
71 static InputTester *make_input_tester(int it_type);
72 static int name_to_input_tester_type(const string &name);
73
74 virtual ~InputTester() {};
75};
76//==============================================================================================================
77
78//==============================================================================================================
79// InputTesterSimple : an implementation that is able to test one of the following tests:
80// (1) test that the signal actually exist in name (in the signals list in the repository)
81// (2) within a given window: minimal number of tests
82// (3) within a given window: maximal number of outliers
83// (4) count outliers within a given window
84// (5) within a given window: maximal number of tests
85//
86// Does this using the object SanitySimpleFilter defined in MeProcessTools/SampleFilter.h
87//==============================================================================================================
89
90public:
92 string err_message_template;
93
95 type = (int)INPUT_TESTER_TYPE_SIMPLE;
96 stage = (int)TESTER_STAGE_BEFORE_MODEL;
97 }
98
99 void input_from_string(const string &in_str);
100 int test_if_ok(MedPidRepository &rep, int pid, long long timestamp, int &nvals, int &noutliers); // 1: good to go 0: did not pass -1: could not test
101
102 int test_if_ok(int pid, long long timestamp, const unordered_map<string, unordered_set<string>> &dict_unknown);
103};
104//==============================================================================================================
105// InputTesterAttr : an implementation that is able to test the attributes created in the samples file of a model
106// (1) test that the attribute exists (it should be there or an error will be reported)
107// (2) test its value is below some bound (<=)
108//
109// Does this by directly testing the given sample
110//==============================================================================================================
112
113public:
114 string attr_name;
115 float attr_max_val;
116
118 type = (int)INPUT_TESTER_TYPE_ATTR;
119 stage = (int)TESTER_STAGE_AFTER_MODEL;
120 }
121
122 void input_from_string(const string &in_str);
123 int init(map<string, string>& mapper);
124 int test_if_ok(MedSample &sample); // 1: good to go 0: did not pass -1: could not test
125
126 int test_if_ok(int pid, long long timestamp, const unordered_map<string, unordered_set<string>> &dict_unknown) { return 1; }
127};
128
137private:
138 MedModel feature_generator;
139 bool _learned = false;
140 string resolved_feat_name = "";
141 vector<string> req_signals;
142public:
143 bool is_binary_model = false;
144 string json_model_path = "";
145 string feature_name = "";
146 float feat_min_val = MED_MAT_MISSING_VALUE;
147 float feat_max_val = MED_MAT_MISSING_VALUE;
148 bool verbose_learn = true;
149 bool verbose_apply = false;
150 unordered_set<string> allow_missing_signals;
151
153 type = (int)INPUT_TESTER_TYPE_FEATURE_JSON;
154 stage = (int)TESTER_STAGE_BEFORE_MODEL;
155 }
156
157 void input_from_string(const string &in_str);
158 int init(map<string, string>& mapper);
160 int test_if_ok(MedPidRepository &rep, int pid, long long timestamp, int &nvals, int &noutliers);
161
162 int test_if_ok(int pid, long long timestamp, const unordered_map<string, unordered_set<string>> &dict_unknown) { return 1; }
163};
164//==============================================================================================================
165
167 int external_rc = 0;
168 int internal_rc = 0;
169 string err_msg = "";
170};
171//==============================================================================================================
172// InputSanityTester : able to read a config file containing several tests and test them.
173// Format of config file:
174// # comment lines start with #
175// NAME <name of tester : for debug prints, etc>
176// # each filter defined using:
177// FILTER <filter type>|<filter params>|warning_or_error|use_for_max_outliers_flag|external_rc|internal_rc|err_msg
178// warining_or_error: values are WARNING or ERROR
179// use_for_max_outliers_flag: ACC=yes or ACC=no
180// # max_overall_outliers config
181// MAX_OVERALL_OUTLIERS <number>
182//==============================================================================================================
184
185public:
186 vector<InputTester *> testers;
187 int max_overall_outliers = (int)1e9;
188 string name = "";
189
190
191 ~InputSanityTester() { clear(); }
192
193 int read_config(const string &f_conf);
194
195 int test_if_ok(int pid, long long timestamp, const unordered_map<string, unordered_set<string>> &dict_unknown, vector<InputSanityTesterResult> &res);
196
197 // tests all simple testers (Before running model)
198 int test_if_ok(MedPidRepository &rep, int pid, long long timestamp, int &nvals, int &noutliers, vector<InputSanityTesterResult> &res); // tests and stops at first cardinal failed test
199
200 // tests and stops at first cardinal failed test
201 int test_if_ok(MedPidRepository &rep, int pid, long long timestamp, vector<InputSanityTesterResult> &res) {
202 int nvals, noutliers;
203 return test_if_ok(rep, pid, timestamp, nvals, noutliers, res);
204 }
205
206 // tests all attr testers for a single given sample (After running model)
207 int test_if_ok(MedSample &sample, vector<InputSanityTesterResult> &res);
208
209 void clear() {
210 for (auto &p_it : testers)
211 if (p_it != NULL) delete p_it;
212 testers.clear();
213 max_overall_outliers = (int)1e9;
214 name = "";
215 }
216};
217//==============================================================================================================
An Abstract class that can be serialized and written/read from file.
Definition InputTesters.h:183
Definition InputTesters.h:111
int init(map< string, string > &mapper)
Virtual to init object from parsed fields.
Definition AlgoMarkerInternal.cpp:61
=====================================================================================================...
Definition InputTesters.h:136
bool verbose_learn
can control output to screen in first time
Definition InputTesters.h:148
int init(map< string, string > &mapper)
Virtual to init object from parsed fields.
Definition AlgoMarkerInternal.cpp:238
string feature_name
feature name to look for
Definition InputTesters.h:145
float feat_max_val
when missing value, no limit
Definition InputTesters.h:147
bool verbose_apply
can control output to screen on apply
Definition InputTesters.h:149
int test_if_ok(MedPidRepository &rep, int pid, long long timestamp, int &nvals, int &noutliers)
1: good to go 0: did not pass -1: could not test
Definition AlgoMarkerInternal.cpp:140
unordered_set< string > allow_missing_signals
list of allowed signal to miss
Definition InputTesters.h:150
string json_model_path
realative path to am config, in same folder
Definition InputTesters.h:144
float feat_min_val
when missing value, no limit
Definition InputTesters.h:146
bool is_binary_model
if true it is trained model
Definition InputTesters.h:143
Definition InputTesters.h:88
Definition InputTesters.h:25
bool stop_processing_more_errors
if true will stop process more errors
Definition InputTesters.h:44
string cant_evel_msg
message when can't evalute test. result < 0
Definition InputTesters.h:40
A model = repCleaner + featureGenerator + featureProcessor + MedPredictor.
Definition MedModel.h:56
Definition MedPidRepository.h:87
MedSample represents a signle sample: id + time (date) Additional (optinal) entries: outcome,...
Definition MedSamples.h:20
SanitySimpleFilter helps making sanity tests on input data The basic tests optional are: (1) te...
Definition SampleFilter.h:412
Definition SerializableObject.h:32
Definition StdDeque.h:58
Definition InputTesters.h:166