Medial Code Documentation
Loading...
Searching...
No Matches
SampleFilter.h
1#ifndef _SAMPLE_FILTER_H_
2#define _SAMPLE_FILTER_H_
3
4#include "InfraMed/InfraMed/InfraMed.h"
5#include "MedProcessTools/MedProcessTools/MedSamples.h"
7#include "InfraMed/InfraMed/MedPidRepository.h"
9#include <MedMat/MedMat/MedMat.h>
10
11#define DEFAULT_SMPL_FLTR_NTHREADS 8
12
13//.......................................................................................
25//.......................................................................................
26
27//.......................................................................................
30//.......................................................................................
31typedef enum {
32 SMPL_FILTER_TRN,
33 SMPL_FILTER_TST,
34 SMPL_FILTER_OUTLIERS,
35 SMPL_FILTER_MATCH,
36 SMPL_FILTER_REQ_SIGNAL,
37 SMPL_FILTER_BASIC,
38 SMPL_FILTER_LAST
39} SampleFilterTypes;
40
42public:
43
44 // Type
45 SampleFilterTypes filter_type;
46
47 // create a new sample filter
49 static SampleFilter *make_filter(string name);
51 static SampleFilter *make_filter(SampleFilterTypes type);
53 static SampleFilter *make_filter(string name, string params);
55 static SampleFilter *make_filter(SampleFilterTypes type, string params);
56
57 // Initialization
59 virtual int init(void *params) { return 0; };
61 virtual int init(map<string, string>& mapper) { return 0; };
63 virtual void init_defaults() {};
64
65 // Learning : Actually learn
67 virtual int _learn(MedRepository& rep, MedSamples& samples) { return _learn(samples); }
69 virtual int _learn(MedSamples& samples) { return 0; }
70
71 // Learning : Envelopes (Here because of probelsm with overload + inheritance)
73 virtual int learn(MedRepository& rep, MedSamples& samples) { return _learn(rep,samples); }
75 virtual int learn(MedSamples& samples) { return _learn(samples); }
76
77 // Filtering
79 virtual int _filter(MedRepository& rep, MedSamples& inSamples, MedSamples& outSamples) {return _filter(inSamples,outSamples) ;}
81 virtual int _filter(MedSamples& inSamples, MedSamples& outSamples) = 0;
82
83 // Filtering : Envelopes (Here because of probelsm with overload + inheritance)
85 virtual int filter(MedRepository& rep, MedSamples& inSamples, MedSamples& outSamples) { return _filter(rep, inSamples, outSamples); }
87 int filter(MedRepository& rep, MedSamples& samples);
89 virtual int filter(MedSamples& inSamples, MedSamples& outSamples) { return _filter(inSamples, outSamples); }
91 int filter(MedSamples& samples);
92
94 virtual void get_required_signals(vector<string>& req_sigs) {return; }
95
96 // Serialization (including type)
97 ADD_CLASS_NAME(SampleFilter)
99 virtual void *new_polymorphic(string derived_class_name);
100
102 size_t get_filter_size();
104 size_t filter_serialize(unsigned char *blob);
105};
106
107// Utilities
109SampleFilterTypes sample_filter_name_to_type(const string& filter_name);
110
111//.......................................................................................
115//.......................................................................................
117public:
118
120 BasicTrainFilter() { filter_type = SMPL_FILTER_TRN; };
121
123 int _filter(MedSamples& inSamples, MedSamples& outSamples);
124 ADD_CLASS_NAME(BasicTrainFilter)
126};
127
128//.......................................................................................
132//.......................................................................................
134public:
135
137 BasicTestFilter() { filter_type = SMPL_FILTER_TST; };
138 ~BasicTestFilter() {};
139
141 int _filter(MedSamples& inSamples, MedSamples& outSamples);
142
143 ADD_CLASS_NAME(BasicTestFilter)
145};
146
147#define SMPL_FLTR_TRIMMING_SD_NUM 7
148#define SMPL_FLTR_REMOVING_SD_NUM 7
149//.......................................................................................
154//.......................................................................................
156public:
157
160
162 int _learn(MedSamples& samples);
164 int iterativeLearn(MedSamples& samples);
166 int quantileLearn(MedSamples& samples);
168 void get_values(MedSamples& samples, vector<float>& values);
169
171 int _filter(MedSamples& inSamples, MedSamples& outSamples);
172
174 int init(map<string, string>& mapper) { return MedValueCleaner::init(mapper); }
177 filter_type = SMPL_FILTER_OUTLIERS;
178 params.trimming_sd_num = SMPL_FLTR_TRIMMING_SD_NUM; params.removing_sd_num = SMPL_FLTR_REMOVING_SD_NUM;
179 params.take_log = 0;
180 params.doTrim = false;
181 params.doRemove = true;
183 params.missing_value = MED_MAT_MISSING_VALUE;
184 };
185
186 // Serialization
187 ADD_CLASS_NAME(OutlierSampleFilter)
189};
190
191//.......................................................................................
194//.......................................................................................
195typedef enum {
196 SMPL_MATCH_SIGNAL,
197 SMPL_MATCH_AGE,
198 SMPL_MATCH_TIME,
199 SMPL_MATCH_FTR,
200 SMPL_MATCH_LAST
201} SampleMatchingType;
202
203//.......................................................................................
206//.......................................................................................
208public:
209
210 SampleMatchingType match_type;
211
212 // Matching details
213 string signalName;
214 string featureName,resolvedFeatureName;
215 int timeWindow, windowTimeUnit ;
217 float resolution ;
218
223
224 // Serialization
225 // Serialization
226 ADD_CLASS_NAME(matchingParams)
228};
229
230//.......................................................................................
243//.......................................................................................
245public:
246
247 vector<matchingParams> matchingStrata;
248
252 int verbose = 0;
253 float match_to_prior = -1;
254
255 // helpers
258
261
262 // Initialization
264 int init(map<string, string>& mapper);
266 void init_defaults() { filter_type = SMPL_FILTER_MATCH; };
267
269 int addMatchingStrata(string& init_string);
270
271 // Utilities
273 bool isRepRequired();
275 bool isAgeRequired();
278 int getSampleSignature(MedSample& sample, MedFeatures& features, int i, MedRepository& rep, string& signature);
281 int addToSampleSignature(MedSample& sample, matchingParams& stratum, MedFeatures& features, int i, MedRepository& rep, string& signature);
284 int initHelpers(MedSamples& inSamples, MedFeatures& features, MedRepository& rep);
286 void get_required_signals(vector<string>& req_sigs);
287
289 int _filter(MedRepository& rep, MedSamples& inSamples, MedSamples& outSamples);
292 int _filter(MedSamples& inSamples, MedSamples& outSamples);
293
295 int _filter(MedFeatures& features, MedRepository& rep, MedSamples& inSamples, MedSamples& outSamples);
298 int _filter(MedFeatures& features, MedSamples& inSamples, MedSamples& outSamples);
299
300 // Serialization
301 ADD_CLASS_NAME(MatchingSampleFilter)
303};
304
305//.......................................................................................
310//.......................................................................................
312public:
313
314 string signalName;
315 int timeWindow, windowTimeUnit;
316
319
321 int init(map<string, string>& mapper);
323 void init_defaults();
324
325 // Filter
327 int _filter(MedRepository& rep, MedSamples& inSamples, MedSamples& outSamples);
330 int _filter(MedSamples& inSamples, MedSamples& outSamples);
331
332 // Serialization
333 ADD_CLASS_NAME(RequiredSignalFilter)
335
336};
337
338//.......................................................................................
341//.......................................................................................
343 string sig_name;
344 int win_from = 0;
345 int win_to = (int)(1<<30);
346 float min_val = -1e10;
347 float max_val = 1e10;
348 int min_Nvals = 1; ;
349 int time_channel = 0;
350 int val_channel = 0;
351
353 int init_from_string(const string &init_str);
356 int test_filter(MedSample &sample, MedRepository &rep, int win_time_unit);
357
358 ADD_CLASS_NAME(BasicFilteringParams)
360
361private:
362 int sig_id = -1;
363 int use_byear= 0;
364};
365
366//...........................................................................................................
372//...........................................................................................................
374public:
375
376 // filtering parameters
378 int max_sample_time = (int)(1<<30);
379 vector<BasicFilteringParams> bfilters;
380 int min_bfilter = -1;
382
383 // next is initialized with init string
384 vector<string> req_sigs;
385
387 int init(map<string, string>& mapper);
389 void get_required_signals(vector<string>& req_sigs);
390
391 // Filter
393 int _filter(MedRepository& rep, MedSamples& inSamples, MedSamples& outSamples);
396 int _filter(MedSamples& inSamples, MedSamples& outSamples);
397
398 ADD_CLASS_NAME(BasicSampleFilter)
400};
401
402//...........................................................................................................
411//...........................................................................................................
413public:
414 string sig_name;
415 int win_from = 0;
416 int win_to = (int)(1 << 30);
417 float min_val = -1e10;
418 float max_val = 1e10;
419 int min_Nvals = -1; ;
420 int max_Nvals = -1; ;
421 int time_channel = 0;
422 int val_channel = 0;
423 int max_outliers = -1;
426 unordered_set<float> allowed_values;
428 int min_left = -1;
429
430
431
433 int init_from_string(const string &init_str);
437 int nvals, noutliers; return test_filter(sample, rep, nvals, noutliers);
438 }
441 int test_filter(MedSample &sample, MedRepository &rep, int &nvals, int &noutliers);
442
443 // test_filter return codes
444 const static int Passed = 0;
445 const static int Failed = 1; // General fail due to reasons different than the following
446 const static int Signal_Not_Valid = 2;
447 const static int Failed_Min_Nvals = 3;
448 const static int Failed_Max_Nvals = 4;
449 const static int Failed_Outliers = 5;
450 const static int Failed_Age = 6;
451 const static int Failed_Age_No_Byear = 7;
452 const static int Failed_Allowed_Values = 8;
453 const static int Failed_Dictionary_Test = 9;
454 const static int Failed_Not_Enough_Non_Outliers_Left = 10;
455
456 ADD_CLASS_NAME(SanitySimpleFilter)
458
459private:
460 int sig_id = -1;
461 int section_id = -1;
462 int bdate_id = -1;
463 bool used_byear = false;
464};
465
466//=======================================
467// Joining the MedSerialze wagon
468//=======================================
479
480#endif
A parent class for single-value cleaners.
@ VAL_CLNR_ITERATIVE
"iterative"
Definition MedValueCleaner.h:12
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
BasicSimpleFilter is a general filter to allow the following basics: (1) min and max time of outcom...
Definition SampleFilter.h:373
int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
Filter with repository.
Definition SampleFilter.cpp:867
vector< BasicFilteringParams > bfilters
vector of filters to apply
Definition SampleFilter.h:379
int min_bfilter
-1: force each bfilter to pass , other n : at least n bfilters must pass
Definition SampleFilter.h:380
int max_sample_time
maximal allowed time (should always be given in the samples' time-unit)
Definition SampleFilter.h:378
int min_sample_time
minimal allowed time (should always be given in the samples' time-unit)
Definition SampleFilter.h:377
int winsTimeUnit
time unit to be used
Definition SampleFilter.h:381
vector< string > req_sigs
useful to load the repository needed for this filter
Definition SampleFilter.h:384
void get_required_signals(vector< string > &req_sigs)
Get all signals required for filtering.
Definition SampleFilter.cpp:849
int init(map< string, string > &mapper)
initialize from a map : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.cpp:819
Test set filter.
Definition SampleFilter.h:133
int _filter(MedSamples &inSamples, MedSamples &outSamples)
Filter without repository.
Definition SampleFilter.cpp:177
BasicTestFilter()
constructor
Definition SampleFilter.h:137
Training set filter take all controls samples (outcome=0) and all cases before outcomeTime.
Definition SampleFilter.h:116
int _filter(MedSamples &inSamples, MedSamples &outSamples)
Filter without repository.
Definition SampleFilter.cpp:149
BasicTrainFilter()
constructor
Definition SampleFilter.h:120
Matching filter
Definition SampleFilter.h:244
int samplesTimeUnit
Time unit of samples.
Definition SampleFilter.h:256
MatchingSampleFilter()
Constructor.
Definition SampleFilter.h:260
void init_defaults()
init to defaults
Definition SampleFilter.h:266
float match_to_prior
If given (0-1) will ignore price ratio and will match to this prior.
Definition SampleFilter.h:253
int verbose
control level of debug printing
Definition SampleFilter.h:252
int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
Filter with repository.
Definition SampleFilter.cpp:369
int bdateId
signal-id for byear
Definition SampleFilter.h:257
bool isAgeRequired()
Check if age is needed for matching.
Definition SampleFilter.cpp:471
int addMatchingStrata(string &init_string)
Add a matching stratum defined by a string.
Definition SampleFilter.cpp:296
vector< matchingParams > matchingStrata
Matching parameters.
Definition SampleFilter.h:247
int min_group_size
minimal group size to take - smaller than that, will drop
Definition SampleFilter.h:250
float eventToControlPriceRatio
Cost of removing case relative to removing control.
Definition SampleFilter.h:249
float maxControlToEventRatio
maximal allowed control/case ratio
Definition SampleFilter.h:251
int addToSampleSignature(MedSample &sample, matchingParams &stratum, MedFeatures &features, int i, MedRepository &rep, string &signature)
add indexing of a single sample according to a single stratum to sample's index
Definition SampleFilter.cpp:543
void get_required_signals(vector< string > &req_sigs)
Get all signals required for matching.
Definition SampleFilter.cpp:608
bool isRepRequired()
Check if repository is needed for matching (e.g. strata includes signal/age)
Definition SampleFilter.cpp:459
int initHelpers(MedSamples &inSamples, MedFeatures &features, MedRepository &rep)
initialize values of requried helpers
Definition SampleFilter.cpp:484
int getSampleSignature(MedSample &sample, MedFeatures &features, int i, MedRepository &rep, string &signature)
Indexing of a single sample according to strata.
Definition SampleFilter.cpp:530
int init(map< string, string > &mapper)
init from map
Definition SampleFilter.cpp:265
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Definition InfraMed.h:303
MedSample represents a signle sample: id + time (date) Additional (optinal) entries: outcome,...
Definition MedSamples.h:20
MedSamples represent a collection of samples per different id The data is conatined in a vector of ...
Definition MedSamples.h:129
static const int Days
days since 1900/01/01
Definition MedTime.h:28
static const int Date
dates are in full regular format YYYYMMDD
Definition MedTime.h:25
Definition MedValueCleaner.h:61
float removeMax
Thresholds for removing.
Definition MedValueCleaner.h:71
ValueCleanerParams params
Learning parameters.
Definition MedValueCleaner.h:65
Outliers filter.
Definition SampleFilter.h:155
int quantileLearn(MedSamples &samples)
Learning : learn outliers using MedValueCleaner's quantile appeoximation of moments.
Definition SampleFilter.cpp:240
int iterativeLearn(MedSamples &samples)
Learning : learn outliers using MedValueCleaner's iterative approximation of moments.
Definition SampleFilter.cpp:229
OutlierSampleFilter()
constructor
Definition SampleFilter.h:159
void init_defaults()
init to defaults
Definition SampleFilter.h:176
int _filter(MedSamples &inSamples, MedSamples &outSamples)
Filter without repository.
Definition SampleFilter.cpp:192
int init(map< string, string > &mapper)
init from map
Definition SampleFilter.h:174
void get_values(MedSamples &samples, vector< float > &values)
Helper for learning - extract all outcomes from samples.
Definition SampleFilter.cpp:251
int _learn(MedSamples &samples)
Learning : check outlier-detection method and call appropriate learner (iterative/quantile)
Definition SampleFilter.cpp:215
Required Signal Filter
Definition SampleFilter.h:311
int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
Filter with repository.
Definition SampleFilter.cpp:658
int windowTimeUnit
Time before sample-time (and time-unit)
Definition SampleFilter.h:315
void init_defaults()
init to defaults
Definition SampleFilter.cpp:650
string signalName
Required signal.
Definition SampleFilter.h:314
int init(map< string, string > &mapper)
init from map
Definition SampleFilter.cpp:631
RequiredSignalFilter()
Constructor.
Definition SampleFilter.h:318
Definition SampleFilter.h:41
virtual int init(void *params)
initialize from a params object : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.h:59
virtual int _learn(MedSamples &samples)
learn without repository : Should be implemented for inheriting classes that learn parameters
Definition SampleFilter.h:69
virtual int filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
filter with repository
Definition SampleFilter.h:85
SampleFilterTypes filter_type
The type of the filter.
Definition SampleFilter.h:45
virtual int _filter(MedSamples &inSamples, MedSamples &outSamples)=0
_filter without repository : Should be implemented for all inheriting classes
virtual int learn(MedRepository &rep, MedSamples &samples)
learn with repository
Definition SampleFilter.h:73
size_t filter_serialize(unsigned char *blob)
seialize filter + filter_type
Definition SampleFilter.cpp:107
static SampleFilter * make_filter(string name)
create a new sample filter from name
Definition SampleFilter.cpp:39
size_t get_filter_size()
get size of filter + filter_type
Definition SampleFilter.cpp:102
virtual int _learn(MedRepository &rep, MedSamples &samples)
learn with repository : Should be implemented for inheriting classes that learn parameters using Repo...
Definition SampleFilter.h:67
virtual int learn(MedSamples &samples)
learn without repository
Definition SampleFilter.h:75
virtual int filter(MedSamples &inSamples, MedSamples &outSamples)
filter without repository : Should be implemented for all inheriting classes
Definition SampleFilter.h:89
virtual int init(map< string, string > &mapper)
initialize from a map : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.h:61
virtual void init_defaults()
initialize to default values : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.h:63
virtual void * new_polymorphic(string derived_class_name)
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived clas...
Definition SampleFilter.cpp:52
virtual void get_required_signals(vector< string > &req_sigs)
Get all signals required for filtering : Should be implemented for inheriting classes that filter usi...
Definition SampleFilter.h:94
virtual int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
filter with repository : Should be implemented for inheriting classes that filter using Repository in...
Definition SampleFilter.h:79
SanitySimpleFilter helps making sanity tests on input data The basic tests optional are: (1) te...
Definition SampleFilter.h:412
int win_from
Time window for deciding on filtering - start.
Definition SampleFilter.h:415
float min_val
Allowed values range for signal - minimum.
Definition SampleFilter.h:417
int val_channel
signal value channel to consider
Definition SampleFilter.h:422
int test_filter(MedSample &sample, MedRepository &rep)
summary> Test filtering criteria
Definition SampleFilter.h:436
int max_outliers
maximla allowed number of outliers. -1 means don't do the max_outliers test
Definition SampleFilter.h:423
int win_time_unit
time unit to be used
Definition SampleFilter.h:424
int values_in_dictionary
list of allowed values for the signal
Definition SampleFilter.h:427
int max_Nvals
Required number of instances of signal within time window.
Definition SampleFilter.h:420
int samples_time_unit
time unit to be used
Definition SampleFilter.h:425
float max_val
Allowed values range for signal - maximum.
Definition SampleFilter.h:418
int min_left
flag: if 1: make sure all given values are valid - that is are in the signal dictionary.
Definition SampleFilter.h:428
int init_from_string(const string &init_str)
test the min number of instances left that are not outliers
Definition SampleFilter.cpp:925
string sig_name
Name of signal to filter by.
Definition SampleFilter.h:414
int win_to
Time window for deciding on filtering - end.
Definition SampleFilter.h:416
int time_channel
Maximal allowed number of instances of signal within time window.
Definition SampleFilter.h:421
Definition SerializableObject.h:32
MatchingParams defines a specific matching criterion.
Definition SampleFilter.h:207
float resolution
binnning resolution
Definition SampleFilter.h:217
int matchingTimeUnit
time-unit for matching by time
Definition SampleFilter.h:216
int signalId
Helpers (for matching by signal)
Definition SampleFilter.h:220
SampleMatchingType match_type
matching criterion
Definition SampleFilter.h:210
int windowTimeUnit
time-window info For matching by signal
Definition SampleFilter.h:215
int signalTimeUnit
matching signal time-unit
Definition SampleFilter.h:222
string resolvedFeatureName
feature name for matching by feature
Definition SampleFilter.h:214
string signalName
signal name for matching by signal
Definition SampleFilter.h:213
bool isTimeDependent
flag: is the signal time-dependent (e.g. hemoglobin) or not (e.g. byear)
Definition SampleFilter.h:221
BasicFilteringParams defines filtering parameters for BasicFilter with helpers.
Definition SampleFilter.h:342
int val_channel
signal value channel to consider
Definition SampleFilter.h:350
float max_val
Allowed values range for signal - maximum.
Definition SampleFilter.h:347
int time_channel
Required number of instances of signal within time window.
Definition SampleFilter.h:349
int init_from_string(const string &init_str)
Initialization from string.
Definition SampleFilter.cpp:719
int win_to
Time window for deciding on filtering - end.
Definition SampleFilter.h:345
float min_val
Allowed values range for signal - minimum.
Definition SampleFilter.h:346
int test_filter(MedSample &sample, MedRepository &rep, int win_time_unit)
Test filtering criteria.
Definition SampleFilter.cpp:743
string sig_name
Name of signal to filter by.
Definition SampleFilter.h:343
int win_from
Time window for deciding on filtering - start.
Definition SampleFilter.h:344