Medial Code Documentation
|
A cleaner that is based on rules that describe relations of signal values to each other. More...
#include <RepProcess.h>
Public Member Functions | |
void | init_defaults () |
initialize to default values : Should be implemented for inheriting classes that have parameters | |
void | parse_rules_signals (const string &path) |
void | parse_sig_channels (const string &path) |
int | init (void *processor_params) |
initialize from a params object : Should be implemented for inheriting classes that have parameters | |
int | init (map< string, string > &mapper) |
The parsed fields from init command. | |
void | init_attributes () |
Init attributes information : Should be implemented for inheriting classes that have attributes. | |
void | set_signal_ids (MedSignals &sigs) |
set signals | |
void | init_tables (MedDictionarySections &dict, MedSignals &sigs) |
Init required tables : Should be implemented for inheriting classes that have such tables. | |
int | _learn (MedPidRepository &rep, MedSamples &samples, vector< RepProcessor * > &prev_processors) |
In this class there's never learning - we init tables and return 0 immediately. | |
int | _apply (PidDynamicRec &rec, vector< int > &time_points, vector< vector< float > > &attributes_mat) |
Apply cleaning model. | |
void | fit_for_repository (MedPidRepository &rep) |
make changes to RepProcessor according to available signals in Repository | |
void | init_lists () |
void | make_summary () |
void | dprint (const string &pref, int rp_flag) |
used for debug prints, each inheriting class can overload this one to get a more precise debug print. | |
void | select_rules_to_apply () |
select which rules to apply according to consideredRules | |
![]() | |
virtual void | clear () |
virtual void | set_signal (const string &_signalName) |
set signal-name : Should be implemented for inheriting classes that have signalName | |
virtual void | get_required_signal_names (unordered_set< string > &signalNames) |
Append required signal names to set : parent function just uses req_signals. | |
virtual void | get_required_signal_names (unordered_set< string > &signalNames, unordered_set< string > preReqSignals) |
Append required signal names to set only if processor is actually required to produce any of preReqSignals : parent function just uses req_signals. | |
virtual void | set_required_signal_ids (MedDictionarySections &dict) |
Fill req_signal_ids : parent function just fills from req_signals. | |
virtual void | add_virtual_signals (map< string, int > &_virtual_signals, map< string, string > &_virtual_signals_generic) const |
rep processors CREATING virtual signals need to implement this: adding their signals to the pile | |
virtual void | get_required_signal_ids (unordered_set< int > &signalIds) |
Append required signal names to set : parent function just uses req_signals. | |
virtual void | get_required_signal_ids (unordered_set< int > &signalIds, unordered_set< int > preReqSignals) |
Append required signal names to set only if processor is actually required to produce any of preReqSignals : parent function just uses req_signals. | |
virtual void | set_affected_signal_ids (MedDictionarySections &dict) |
Fill aff_signal_ids : parent function just fills from aff_signals. | |
bool | is_signal_affected (int signalId) |
Check if a signal is affected by processor </summray>
| |
bool | is_signal_affected (string &signalName) |
virtual void | register_virtual_section_name_id (MedDictionarySections &dict) |
Register section id to section name of new virtual signals. | |
virtual bool | filter (unordered_set< string > &reqSignals) |
Check if processor (and 'sub'-processors within) should be applied according to set of required signals </summray>
| |
virtual int | _conditional_learn (MedPidRepository &rep, MedSamples &samples, vector< RepProcessor * > &prev_processors, unordered_set< int > &neededSignalIds) |
learn processing model on a subset of samples only if required. Apply set of preceeding processors on DynamicPidRec before learning : | |
int | learn (MedPidRepository &rep, MedSamples &samples, vector< RepProcessor * > &prev_processors) |
learn processing model on a subset of ids. Apply set of preceeding processors on DynamicPidRec before learning | |
int | learn (MedPidRepository &rep) |
learn on all pids in repository, using fake samples - works only for repProcessors that ignore sample dates | |
int | learn (MedPidRepository &rep, MedSamples &samples) |
learn on subset of samples without preceesing processors | |
virtual int | conditional_learn (MedPidRepository &rep, MedSamples &samples, vector< RepProcessor * > &prev_processors, unordered_set< int > &neededSignalIds) |
learn processing model on a subset of samples only if required. Apply set of preceeding processors on DynamicPidRec before learning : | |
int | conditional_learn (MedPidRepository &rep, MedSamples &samples, unordered_set< int > &neededSignalIds) |
learn processing model on a subset of ids only if required without preceesing processors | |
virtual int | _conditional_apply (PidDynamicRec &rec, vector< int > &time_points, unordered_set< int > &neededSignalIds, vector< vector< float > > &attributes_vals) |
apply processing on a single PidDynamicRec at a set of time-points only if required : May be implemented for inheriting classes | |
virtual int | _apply_simple (PidDynamicRec &rec, vector< int > &time_points) |
int | apply (PidDynamicRec &rec, vector< int > &time_points, vector< vector< float > > &attributes_vals) |
apply processing on a single PidDynamicRec at a set of time-points | |
int | conditional_apply (PidDynamicRec &rec, vector< int > &time_points, unordered_set< int > &neededSignalIds, vector< vector< float > > &attributes_vals) |
apply processing on a single PidDynamicRec at a set of time-points only if required : if any of the signals in neededSignalIds is actually affected by processor | |
int | apply (PidDynamicRec &rec, MedIdSamples &samples) |
apply processing on a single PidDynamicRec at a set of time-points given by samples | |
int | conditional_apply (PidDynamicRec &rec, MedIdSamples &samples, unordered_set< int > &neededSignalIds) |
apply processing on a single PidDynamicRec at a set of time-points given by samples only if required | |
int | conditional_apply_without_attributes (PidDynamicRec &rec, const MedIdSamples &samples, unordered_set< int > &neededSignalIds) |
apply processing on a single PidDynamicRec at a set of time-points given by samples only if required, not affecting attributes | |
virtual void | get_required_signal_categories (unordered_map< string, vector< string > > &signal_categories_in_use) const |
returns for each used signal it's used categories | |
void * | new_polymorphic (string derived_class_name) |
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type) | |
size_t | get_processor_size () |
get size of processor + processor_type | |
size_t | processor_serialize (unsigned char *blob) |
seialize processor + processor_type | |
virtual void | print () |
optional printing of processor | |
![]() | |
virtual int | version () const |
Relevant for serializations. | |
virtual string | my_class_name () const |
For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method. | |
virtual void | serialized_fields_name (vector< string > &field_names) const |
The names of the serialized fields. | |
virtual void | pre_serialization () |
virtual void | post_deserialization () |
virtual size_t | get_size () |
Gets bytes sizes for serializations. | |
virtual size_t | serialize (unsigned char *blob) |
Serialiazing object to blob memory. return number ob bytes wrote to memory. | |
virtual size_t | deserialize (unsigned char *blob) |
Deserialiazing blob to object. returns number of bytes read. | |
size_t | serialize_vec (vector< unsigned char > &blob) |
size_t | deserialize_vec (vector< unsigned char > &blob) |
virtual size_t | serialize (vector< unsigned char > &blob) |
virtual size_t | deserialize (vector< unsigned char > &blob) |
virtual int | read_from_file (const string &fname) |
read and deserialize model | |
virtual int | write_to_file (const string &fname) |
serialize model and write to file | |
virtual int | read_from_file_unsafe (const string &fname) |
read and deserialize model without checking version number - unsafe read | |
int | init_from_string (string init_string) |
Init from string. | |
int | init_params_from_file (string init_file) |
int | init_param_from_file (string file_str, string ¶m) |
int | update_from_string (const string &init_string) |
virtual int | update (map< string, string > &map) |
Virtual to update object from parsed fields. | |
virtual string | object_json () const |
Data Fields | |
vector< int > | signalIds |
Signals to clean. | |
vector< int > | consideredRules |
only rules in this list will be considered in this cleaner (read list from jason) rule number 0 means apply all rules. | |
string | nRem_attr = "" |
Attribute name (in sample) for number of removed. not recorded if empty. | |
string | nRem_attr_suffix = "" |
Attribute suffix (name is sample is signalName_suffix) for number of removed. not recorded if empty. | |
float | tolerance = 0.1F |
int | time_window = 0 |
the size of time window to search for signals | |
string | verbose_file |
cleaning output_file for debuging | |
float | calc_res = 0 |
signal resolution calc, 0 no resolution | |
bool | print_summary = false |
If true will always print clean summary. | |
float | print_summary_critical_cleaned = (float)0.05 |
beyond this value will print summary | |
map< int, vector< string > > | rules2Signals |
static map from rule to participating signals | |
map< int, string > | rules2RemoveSignal |
which signal to remove if contradiction found. If not exists default to remove all | |
vector< int > | rulesToApply |
unordered_map< string, pair< int, int > > | signal_channels |
signal channels (if exists). first is time, second is for val | |
unordered_map< int, pair< int, int > > | signal_id_channels |
signal channels (if exists). first is time, second is for val | |
set< int > | reqSignalIds |
Helpers. | |
set< int > | affSignalIds |
unordered_map< int, vector< int > > | rules_sids |
unordered_map< int, vector< bool > > | affected_by_rules |
ADD_SERIALIZATION_FUNCS(processor_type, time_window, calc_res, rules2Signals, rulesToApply, rules2RemoveSignal, signal_channels, consideredRules, tolerance, req_signals, aff_signals, nRem_attr, nRem_attr_suffix, verbose_file, print_summary, print_summary_critical_cleaned) private unordered_map< int, string > | affected_ids_to_name |
Serialization. | |
ofstream | log_file |
unordered_map< string, remove_stats > | _rmv_stats |
![]() | |
RepProcessorTypes | processor_type = REP_PROCESS_LAST |
type of repository processor | |
unordered_set< string > | req_signals |
names of signals required for processsing | |
unordered_set< int > | req_signal_ids |
ids of signals required for processing | |
unordered_set< string > | aff_signals |
names of signals affected by processing | |
unordered_set< int > | aff_signal_ids |
ids of signals affected by processing | |
bool | unconditional = false |
indicated that processor should ALWAYS be applied | |
vector< string > | attributes |
attributes generated by the processor (optional) | |
vector< pair< string, int > > | virtual_signals |
virtual signals are created only in rep processors but can be used by any rep processor that comes after or any feture generator as a regular signal. | |
vector< pair< string, string > > | virtual_signals_generic |
Additional Inherited Members | |
![]() | |
static RepProcessor * | make_processor (string name) |
create a new repository processor from name | |
static RepProcessor * | make_processor (string type, string params) |
create a new repository processor from name and a parameters string | |
static RepProcessor * | make_processor (RepProcessorTypes type) |
create a new repository processor from type | |
static RepProcessor * | make_processor (RepProcessorTypes type, string params) |
create a new repository processor from type and a parameters string | |
static RepProcessor * | create_processor (string ¶ms) |
create a new repository processor from parameters string which contains rp_type | |
A cleaner that is based on rules that describe relations of signal values to each other.
This is a static cleaner ( no learning involved).
Rules:
Rule1: BMI = Weight / Height ^ 2 * 1e4
Rule2:MCH = (Hemoglobin / RBC) * 10(units)
Rule3:MCV = (Hematocrit / RBC) * 10(units)
Rule4:MCHC - M = (MCH / MCV) * 100 (units)
Rule5:Eosinophils# + Monocytes# + Basophils# + Lymphocytes# + Neutrophils# <= WBC
Rule6:MPV = Platelets_Hematocrit / Platelets
Rule7:UrineAlbumin <= UrineTotalProtein
Rule8:UrineAlbumin_over_Creatinine = UrineAlbumin / UrineCreatinine
Rule9:LDL + HDL <= Cholesterol
Rule10:NonHDLCholesterol + HDL = Cholesterol
Rule11:HDL_over_nonHDL = HDL / NonHDLCholesterol
Rule12:HDL_over_Cholesterol = HDL / Cholesterol
Rule13:HDL_over_LDL = HDL / LDL
Rule14:HDL_over_LDL = 1 / LDL_over_HDL
Rule15:Cholesterol_over_HDL = Cholesterol / HDL
Rule16:------------------—
Rule17:Cholesterol_over_HDL = 1 / HDL_over_Cholestrol
Rule18:LDL_over_HDL = LDL / HDL
Rule19:Albumin <= Protein_Total
Rule20:FreeT4 <= T4
Rule21:NRBC <= RBC
Rule22:CHADS2_VASC >= CHADS2
Rule23: BP.Systolic(channel 0) >= BP.Diastoilic(channel 1)
|
virtual |
Apply cleaning model.
Reimplemented from RepProcessor.
|
inlinevirtual |
In this class there's never learning - we init tables and return 0 immediately.
Reimplemented from RepProcessor.
|
virtual |
used for debug prints, each inheriting class can overload this one to get a more precise debug print.
rp_flag can be used to transfer verbosity levels. the default print just prints the basic type, etc. summary> prints summary of rep_processor job. optional, called after apply. for example - prints how many values were cleaned /summary>
Reimplemented from RepProcessor.
|
virtual |
make changes to RepProcessor according to available signals in Repository
Reimplemented from RepProcessor.
|
virtual |
The parsed fields from init command.
[RepRuleBasedOutlierCleaner::init]
[RepRuleBasedOutlierCleaner::init]
Reimplemented from RepProcessor.
|
inlinevirtual |
initialize from a params object : Should be implemented for inheriting classes that have parameters
Reimplemented from RepProcessor.
|
virtual |
Init attributes information : Should be implemented for inheriting classes that have attributes.
Reimplemented from RepProcessor.
|
virtual |
initialize to default values : Should be implemented for inheriting classes that have parameters
Reimplemented from RepProcessor.
|
virtual |
Init required tables : Should be implemented for inheriting classes that have such tables.
Reimplemented from RepProcessor.
|
virtual |
Reimplemented from RepProcessor.
|
virtual |
set signals
Reimplemented from RepProcessor.
ADD_SERIALIZATION_FUNCS (processor_type, time_window, calc_res, rules2Signals, rulesToApply, rules2RemoveSignal, signal_channels, consideredRules, tolerance, req_signals, aff_signals, nRem_attr, nRem_attr_suffix, verbose_file, print_summary, print_summary_critical_cleaned) private unordered_map<int, string> RepRuleBasedOutlierCleaner::affected_ids_to_name |
Serialization.
ruleUsvs hold the signals in the order they appear in the rule in the rules2Signals above apply the rule and return true if data is consistent with the rule
vector<int> RepRuleBasedOutlierCleaner::consideredRules |
only rules in this list will be considered in this cleaner (read list from jason) rule number 0 means apply all rules.
Empty vector: do nothing in this cleaner.
map<int, vector<string> > RepRuleBasedOutlierCleaner::rules2Signals |
static map from rule to participating signals