Medial Code Documentation
|
MedSamples represent a collection of samples per different id
The data is conatined in a vector of MedIdSamples.
More...
#include <MedSamples.h>
Public Member Functions | |
MedSamples () | |
Constructor. init time_unit according to default. | |
void | clear () |
Clear data and init time_unit according to default. | |
int | insert_preds (MedFeatures &featuresData) |
Extract predictions from MedFeatures and insert to corresponding samples Samples in MedFeatures are assumed to be of the same size and order as in MedSamples. | |
int | copy_attributes (const vector< MedSample > &samples) |
Copy attributes from MedSample vector. | |
void | get_ids (vector< int > &ids) const |
Get all patient ids. | |
void | append (MedSamples &newSamples) |
Append new MedIdSamples at the end of current ones. | |
int | read_from_bin_file (const string &file_name) |
Read from bin file. | |
int | write_to_bin_file (const string &file_name) |
Write to bin file. | |
int | read_from_file (const string &file_name, bool sort_rows=true) |
Read from text file. | |
int | write_to_file (const string &fname, int pred_precision=-1, bool print_attributes=true) |
Write to text file in new format. | |
void | write_to_file (ofstream &of, int pred_precision, bool print_attributes, bool print_header) |
void | get_preds (vector< float > &preds) const |
Extract a single vector of concatanated predictions. | |
void | get_preds_channel (vector< float > &preds, int channel) |
void | get_y (vector< float > &y) const |
Extract a vector of all outcomes. | |
void | get_categs (vector< float > &categs) const |
Get a list of all categories (different values) appearing in the outcome. | |
void | get_attr_values (const string &attr_name, vector< float > &values) const |
get a vector corresponding to given attr (name should include attr_) | |
void | export_to_sample_vec (vector< MedSample > &vec_samples) const |
Get all MedSamples as a single vector. | |
void | import_from_sample_vec (const vector< MedSample > &vec_samples, bool allow_split_inconsistency=false) |
Set MedSamples from a single vector. | |
void | sort_by_id_date () |
Sort by id and then date. | |
void | normalize () |
Make sure that : (1) every pid has one idSample at most and (2) everything is sorted. | |
bool | same_as (MedSamples &other, int mode) |
Comparison function : mode 0 requires equal id/time, mode 1 requires equal outcome info, mode 2 also compares split and prediction. | |
int | nSamples () const |
Return number of samples. | |
int | nSplits () |
Return number of splits, also check mismatches between idSample and internal MedSamples and set idSamples.split if missing. | |
int | get_predictions_size (int &nPreds) |
Get predictions vector size. Return -1 if not-consistent. | |
int | get_all_attributes (vector< string > &attributes, vector< string > &str_attributes) const |
Get all attributes. Return -1 if not-consistent. | |
void | dilute (float prob) |
given a probability dilution prob, dilute current samples | |
void | binary_dilute (float p0, float p1) |
will dilute 0 labeled samples (traditionally controls) with p0, and all the rest with p1 | |
void | subtract (MedSamples &_dont_include) |
removing all ids that appear in _dont_include from the current samples | |
void | split_train_test (MedSamples &train, MedSamples &test, float p_test) |
gets p_test and splits by id , p_test of the ids into test, and the rest into train | |
void | split_by_split (MedSamples &in_split, MedSamples &off_split, int split) |
gets a split number and splits samples to lists in/off the split | |
void | add_splits_from_file (string f_splits) |
adding splits to the samples given in an external file | |
void | init_all_jrecs () |
initializing all jrecs to contain pid and time | |
void | flatten (vector< MedSample > &flat) const |
void | insertRec (int pid, int time, float outcome, int outcomeTime) |
API's for online insertions : main use case is a single time point for prediction per pid. | |
void | insertRec (int pid, int time, float outcome, int outcomeTime, float pred) |
void | insertRec (int pid, int time) |
![]() | |
virtual int | version () const |
Relevant for serializations. | |
virtual string | my_class_name () const |
For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method. | |
virtual void | serialized_fields_name (vector< string > &field_names) const |
The names of the serialized fields. | |
virtual void * | new_polymorphic (string derived_name) |
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type) | |
virtual void | pre_serialization () |
virtual void | post_deserialization () |
virtual size_t | get_size () |
Gets bytes sizes for serializations. | |
virtual size_t | serialize (unsigned char *blob) |
Serialiazing object to blob memory. return number ob bytes wrote to memory. | |
virtual size_t | deserialize (unsigned char *blob) |
Deserialiazing blob to object. returns number of bytes read. | |
size_t | serialize_vec (vector< unsigned char > &blob) |
size_t | deserialize_vec (vector< unsigned char > &blob) |
virtual size_t | serialize (vector< unsigned char > &blob) |
virtual size_t | deserialize (vector< unsigned char > &blob) |
virtual int | read_from_file (const string &fname) |
read and deserialize model | |
virtual int | write_to_file (const string &fname) |
serialize model and write to file | |
virtual int | read_from_file_unsafe (const string &fname) |
read and deserialize model without checking version number - unsafe read | |
int | init_from_string (string init_string) |
Init from string. | |
int | init_params_from_file (string init_file) |
int | init_param_from_file (string file_str, string ¶m) |
virtual int | init (map< string, string > &map) |
Virtual to init object from parsed fields. | |
int | update_from_string (const string &init_string) |
virtual int | update (map< string, string > &map) |
Virtual to update object from parsed fields. | |
virtual string | object_json () const |
Data Fields | |
int | time_unit = MedTime::Date |
The time unit in which the samples are given. Default: Date. | |
vector< MedIdSamples > | idSamples |
The vector of MedIdSamples. | |
int | raw_format = 0 |
MedSamples represent a collection of samples per different id
The data is conatined in a vector of MedIdSamples.
int MedSamples::copy_attributes | ( | const vector< MedSample > & | samples | ) |
Copy attributes from MedSample vector.
This function is mainly used to
Extract post processors results from MedFeatures and insert to corresponding samples
Samples in MedFeatures are assumed to be of the same size and order as in MedSamples
int MedSamples::insert_preds | ( | MedFeatures & | featuresData | ) |
Extract predictions from MedFeatures and insert to corresponding samples
Samples in MedFeatures are assumed to be of the same size and order as in MedSamples.
|
inline |
Read from bin file.
int MedSamples::read_from_file | ( | const string & | file_name, |
bool | sort_rows = true |
||
) |
Read from text file.
If a line starting with EVENT_FIELDS (followed by tabe-delimeted field names : id,date,outcome,outcome_date,split,pred) appears before the data lines, it is used to determine fields positions, otherwise - old or new formats are used.
bool MedSamples::same_as | ( | MedSamples & | other, |
int | mode | ||
) |
Comparison function : mode 0 requires equal id/time, mode 1 requires equal outcome info, mode 2 also compares split and prediction.
|
inline |
Write to bin file.
int MedSamples::write_to_file | ( | const string & | fname, |
int | pred_precision = -1 , |
||
bool | print_attributes = true |
||
) |
Write to text file in new format.