1#ifndef _SAMPLE_FILTER_H_
2#define _SAMPLE_FILTER_H_
4#include "InfraMed/InfraMed/InfraMed.h"
5#include "MedProcessTools/MedProcessTools/MedSamples.h"
7#include "InfraMed/InfraMed/MedPidRepository.h"
9#include <MedMat/MedMat/MedMat.h>
11#define DEFAULT_SMPL_FLTR_NTHREADS 8
36 SMPL_FILTER_REQ_SIGNAL,
59 virtual int init(
void *params) {
return 0; };
61 virtual int init(map<string, string>& mapper) {
return 0; };
109SampleFilterTypes sample_filter_name_to_type(
const string& filter_name);
147#define SMPL_FLTR_TRIMMING_SD_NUM 7
148#define SMPL_FLTR_REMOVING_SD_NUM 7
174 int init(map<string, string>& mapper) {
return MedValueCleaner::init(mapper); }
178 params.trimming_sd_num = SMPL_FLTR_TRIMMING_SD_NUM;
params.removing_sd_num = SMPL_FLTR_REMOVING_SD_NUM;
183 params.missing_value = MED_MAT_MISSING_VALUE;
264 int init(map<string, string>& mapper);
321 int init(map<string, string>& mapper);
387 int init(map<string, string>& mapper);
419 int min_Nvals = -1; ;
426 unordered_set<float> allowed_values;
437 int nvals, noutliers;
return test_filter(sample, rep, nvals, noutliers);
444 const static int Passed = 0;
445 const static int Failed = 1;
446 const static int Signal_Not_Valid = 2;
447 const static int Failed_Min_Nvals = 3;
448 const static int Failed_Max_Nvals = 4;
449 const static int Failed_Outliers = 5;
450 const static int Failed_Age = 6;
451 const static int Failed_Age_No_Byear = 7;
452 const static int Failed_Allowed_Values = 8;
453 const static int Failed_Dictionary_Test = 9;
454 const static int Failed_Not_Enough_Non_Outliers_Left = 10;
457 ADD_SERIALIZATION_FUNCS(
sig_name,
time_channel,
val_channel,
win_from,
win_to,
min_val,
max_val, min_Nvals,
max_Nvals, allowed_values,
values_in_dictionary,
max_outliers,
win_time_unit)
463 bool used_byear =
false;
A parent class for single-value cleaners.
@ VAL_CLNR_ITERATIVE
"iterative"
Definition MedValueCleaner.h:12
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
BasicSimpleFilter is a general filter to allow the following basics: (1) min and max time of outcom...
Definition SampleFilter.h:373
int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
Filter with repository.
Definition SampleFilter.cpp:867
vector< BasicFilteringParams > bfilters
vector of filters to apply
Definition SampleFilter.h:379
int min_bfilter
-1: force each bfilter to pass , other n : at least n bfilters must pass
Definition SampleFilter.h:380
int max_sample_time
maximal allowed time (should always be given in the samples' time-unit)
Definition SampleFilter.h:378
int min_sample_time
minimal allowed time (should always be given in the samples' time-unit)
Definition SampleFilter.h:377
int winsTimeUnit
time unit to be used
Definition SampleFilter.h:381
vector< string > req_sigs
useful to load the repository needed for this filter
Definition SampleFilter.h:384
void get_required_signals(vector< string > &req_sigs)
Get all signals required for filtering.
Definition SampleFilter.cpp:849
int init(map< string, string > &mapper)
initialize from a map : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.cpp:819
Test set filter.
Definition SampleFilter.h:133
int _filter(MedSamples &inSamples, MedSamples &outSamples)
Filter without repository.
Definition SampleFilter.cpp:177
BasicTestFilter()
constructor
Definition SampleFilter.h:137
Training set filter take all controls samples (outcome=0) and all cases before outcomeTime.
Definition SampleFilter.h:116
int _filter(MedSamples &inSamples, MedSamples &outSamples)
Filter without repository.
Definition SampleFilter.cpp:149
BasicTrainFilter()
constructor
Definition SampleFilter.h:120
Matching filter
Definition SampleFilter.h:244
int samplesTimeUnit
Time unit of samples.
Definition SampleFilter.h:256
MatchingSampleFilter()
Constructor.
Definition SampleFilter.h:260
void init_defaults()
init to defaults
Definition SampleFilter.h:266
float match_to_prior
If given (0-1) will ignore price ratio and will match to this prior.
Definition SampleFilter.h:253
int verbose
control level of debug printing
Definition SampleFilter.h:252
int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
Filter with repository.
Definition SampleFilter.cpp:369
int bdateId
signal-id for byear
Definition SampleFilter.h:257
bool isAgeRequired()
Check if age is needed for matching.
Definition SampleFilter.cpp:471
int addMatchingStrata(string &init_string)
Add a matching stratum defined by a string.
Definition SampleFilter.cpp:296
vector< matchingParams > matchingStrata
Matching parameters.
Definition SampleFilter.h:247
int min_group_size
minimal group size to take - smaller than that, will drop
Definition SampleFilter.h:250
float eventToControlPriceRatio
Cost of removing case relative to removing control.
Definition SampleFilter.h:249
float maxControlToEventRatio
maximal allowed control/case ratio
Definition SampleFilter.h:251
int addToSampleSignature(MedSample &sample, matchingParams &stratum, MedFeatures &features, int i, MedRepository &rep, string &signature)
add indexing of a single sample according to a single stratum to sample's index
Definition SampleFilter.cpp:543
void get_required_signals(vector< string > &req_sigs)
Get all signals required for matching.
Definition SampleFilter.cpp:608
bool isRepRequired()
Check if repository is needed for matching (e.g. strata includes signal/age)
Definition SampleFilter.cpp:459
int initHelpers(MedSamples &inSamples, MedFeatures &features, MedRepository &rep)
initialize values of requried helpers
Definition SampleFilter.cpp:484
int getSampleSignature(MedSample &sample, MedFeatures &features, int i, MedRepository &rep, string &signature)
Indexing of a single sample according to strata.
Definition SampleFilter.cpp:530
int init(map< string, string > &mapper)
init from map
Definition SampleFilter.cpp:265
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Definition InfraMed.h:303
MedSample represents a signle sample: id + time (date) Additional (optinal) entries: outcome,...
Definition MedSamples.h:20
MedSamples represent a collection of samples per different id The data is conatined in a vector of ...
Definition MedSamples.h:129
static const int Days
days since 1900/01/01
Definition MedTime.h:28
static const int Date
dates are in full regular format YYYYMMDD
Definition MedTime.h:25
Definition MedValueCleaner.h:61
float removeMax
Thresholds for removing.
Definition MedValueCleaner.h:71
ValueCleanerParams params
Learning parameters.
Definition MedValueCleaner.h:65
Outliers filter.
Definition SampleFilter.h:155
int quantileLearn(MedSamples &samples)
Learning : learn outliers using MedValueCleaner's quantile appeoximation of moments.
Definition SampleFilter.cpp:240
int iterativeLearn(MedSamples &samples)
Learning : learn outliers using MedValueCleaner's iterative approximation of moments.
Definition SampleFilter.cpp:229
OutlierSampleFilter()
constructor
Definition SampleFilter.h:159
void init_defaults()
init to defaults
Definition SampleFilter.h:176
int _filter(MedSamples &inSamples, MedSamples &outSamples)
Filter without repository.
Definition SampleFilter.cpp:192
int init(map< string, string > &mapper)
init from map
Definition SampleFilter.h:174
void get_values(MedSamples &samples, vector< float > &values)
Helper for learning - extract all outcomes from samples.
Definition SampleFilter.cpp:251
int _learn(MedSamples &samples)
Learning : check outlier-detection method and call appropriate learner (iterative/quantile)
Definition SampleFilter.cpp:215
Required Signal Filter
Definition SampleFilter.h:311
int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
Filter with repository.
Definition SampleFilter.cpp:658
int windowTimeUnit
Time before sample-time (and time-unit)
Definition SampleFilter.h:315
void init_defaults()
init to defaults
Definition SampleFilter.cpp:650
string signalName
Required signal.
Definition SampleFilter.h:314
int init(map< string, string > &mapper)
init from map
Definition SampleFilter.cpp:631
RequiredSignalFilter()
Constructor.
Definition SampleFilter.h:318
Definition SampleFilter.h:41
virtual int init(void *params)
initialize from a params object : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.h:59
virtual int _learn(MedSamples &samples)
learn without repository : Should be implemented for inheriting classes that learn parameters
Definition SampleFilter.h:69
virtual int filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
filter with repository
Definition SampleFilter.h:85
SampleFilterTypes filter_type
The type of the filter.
Definition SampleFilter.h:45
virtual int _filter(MedSamples &inSamples, MedSamples &outSamples)=0
_filter without repository : Should be implemented for all inheriting classes
virtual int learn(MedRepository &rep, MedSamples &samples)
learn with repository
Definition SampleFilter.h:73
size_t filter_serialize(unsigned char *blob)
seialize filter + filter_type
Definition SampleFilter.cpp:107
static SampleFilter * make_filter(string name)
create a new sample filter from name
Definition SampleFilter.cpp:39
size_t get_filter_size()
get size of filter + filter_type
Definition SampleFilter.cpp:102
virtual int _learn(MedRepository &rep, MedSamples &samples)
learn with repository : Should be implemented for inheriting classes that learn parameters using Repo...
Definition SampleFilter.h:67
virtual int learn(MedSamples &samples)
learn without repository
Definition SampleFilter.h:75
virtual int filter(MedSamples &inSamples, MedSamples &outSamples)
filter without repository : Should be implemented for all inheriting classes
Definition SampleFilter.h:89
virtual int init(map< string, string > &mapper)
initialize from a map : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.h:61
virtual void init_defaults()
initialize to default values : Should be implemented for inheriting classes that have parameters
Definition SampleFilter.h:63
virtual void * new_polymorphic(string derived_class_name)
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived clas...
Definition SampleFilter.cpp:52
virtual void get_required_signals(vector< string > &req_sigs)
Get all signals required for filtering : Should be implemented for inheriting classes that filter usi...
Definition SampleFilter.h:94
virtual int _filter(MedRepository &rep, MedSamples &inSamples, MedSamples &outSamples)
filter with repository : Should be implemented for inheriting classes that filter using Repository in...
Definition SampleFilter.h:79
SanitySimpleFilter helps making sanity tests on input data The basic tests optional are: (1) te...
Definition SampleFilter.h:412
int win_from
Time window for deciding on filtering - start.
Definition SampleFilter.h:415
float min_val
Allowed values range for signal - minimum.
Definition SampleFilter.h:417
int val_channel
signal value channel to consider
Definition SampleFilter.h:422
int test_filter(MedSample &sample, MedRepository &rep)
summary> Test filtering criteria
Definition SampleFilter.h:436
int max_outliers
maximla allowed number of outliers. -1 means don't do the max_outliers test
Definition SampleFilter.h:423
int win_time_unit
time unit to be used
Definition SampleFilter.h:424
int values_in_dictionary
list of allowed values for the signal
Definition SampleFilter.h:427
int max_Nvals
Required number of instances of signal within time window.
Definition SampleFilter.h:420
int samples_time_unit
time unit to be used
Definition SampleFilter.h:425
float max_val
Allowed values range for signal - maximum.
Definition SampleFilter.h:418
int min_left
flag: if 1: make sure all given values are valid - that is are in the signal dictionary.
Definition SampleFilter.h:428
int init_from_string(const string &init_str)
test the min number of instances left that are not outliers
Definition SampleFilter.cpp:925
string sig_name
Name of signal to filter by.
Definition SampleFilter.h:414
int win_to
Time window for deciding on filtering - end.
Definition SampleFilter.h:416
int time_channel
Maximal allowed number of instances of signal within time window.
Definition SampleFilter.h:421
Definition SerializableObject.h:32
MatchingParams defines a specific matching criterion.
Definition SampleFilter.h:207
float resolution
binnning resolution
Definition SampleFilter.h:217
int matchingTimeUnit
time-unit for matching by time
Definition SampleFilter.h:216
int signalId
Helpers (for matching by signal)
Definition SampleFilter.h:220
SampleMatchingType match_type
matching criterion
Definition SampleFilter.h:210
int windowTimeUnit
time-window info For matching by signal
Definition SampleFilter.h:215
int signalTimeUnit
matching signal time-unit
Definition SampleFilter.h:222
string resolvedFeatureName
feature name for matching by feature
Definition SampleFilter.h:214
string signalName
signal name for matching by signal
Definition SampleFilter.h:213
bool isTimeDependent
flag: is the signal time-dependent (e.g. hemoglobin) or not (e.g. byear)
Definition SampleFilter.h:221
BasicFilteringParams defines filtering parameters for BasicFilter with helpers.
Definition SampleFilter.h:342
int val_channel
signal value channel to consider
Definition SampleFilter.h:350
float max_val
Allowed values range for signal - maximum.
Definition SampleFilter.h:347
int time_channel
Required number of instances of signal within time window.
Definition SampleFilter.h:349
int init_from_string(const string &init_str)
Initialization from string.
Definition SampleFilter.cpp:719
int win_to
Time window for deciding on filtering - end.
Definition SampleFilter.h:345
float min_val
Allowed values range for signal - minimum.
Definition SampleFilter.h:346
int test_filter(MedSample &sample, MedRepository &rep, int win_time_unit)
Test filtering criteria.
Definition SampleFilter.cpp:743
string sig_name
Name of signal to filter by.
Definition SampleFilter.h:343
int win_from
Time window for deciding on filtering - start.
Definition SampleFilter.h:344