Medial Code Documentation
|
A class for holding features data as a virtual matrix
More...
#include <MedFeatures.h>
Public Member Functions | |
MedFeatures (int _time_unit) | |
Constructor Given time-unit. | |
void | clear () |
Clear all vectors. | |
void | set_time_unit (int _time_unit) |
set time unit | |
void | get_feature_names (vector< string > &names) const |
Get a vector of feature names. | |
void | get_as_matrix (MedMat< float > &mat) const |
Get data (+attributes) as a MedMat. | |
void | get_as_matrix (MedMat< float > &mat, vector< string > &names) const |
Get subset of data (+attributes) as a MedMat : Only features in 'names'. | |
void | get_as_matrix (MedMat< float > &mat, const vector< string > &names, vector< int > &idx) const |
Get subset of data (+attributes) as a MetMat: Only features in 'names' and rows in 'idx'. | |
void | set_as_matrix (const MedMat< float > &mat) |
Set data (+attributes) from MedMat. | |
void | append_samples (MedIdSamples &in_samples) |
Append samples at end of samples vector (used for generating samples set before generating features) | |
void | insert_samples (MedIdSamples &in_samples, int index) |
Insert samples at position idex, assuming samples vector is properly allocated (used for generating samples set before generating features) | |
void | init_all_samples (vector< MedIdSamples > &in_samples) |
Fill samples vetor and initialize pid_pos_len according to input vector of MedIdSamples. | |
void | init_pid_pos_len () |
initialize pid_pos_len vector according to samples | |
int | get_pid_pos (int pid) const |
Return the first row in the virtual matrix for an id (-1 if none) | |
int | get_pid_len (int pid) const |
Return the number of rows in the virtual matrix for an id (-1 if none) | |
unsigned int | get_crc () |
Calculate a crc for the data (used for debugging mainly) | |
void | print_csv () const |
MLOG data in csv format. | |
void | get_samples (MedSamples &outSamples) const |
Get the corresponding MedSamples object . Assuming samples vector in features are ordered (all id's samples are consecutive) | |
int | get_max_serial_id_cnt () const |
Return the max serial_id_cnt. | |
int | write_as_csv_mat (const string &csv_fname, bool write_attributes=false) const |
Write features (samples + weights + data) as csv with a header line. | |
int | add_to_csv_mat (const string &csv_fname, bool write_attributes, int start_idx) const |
void | write_csv_data (ofstream &out_f, bool write_attributes, vector< string > &col_names, int start_idx) const |
int | read_from_csv_mat (const string &csv_fname, bool read_time_raw=true) |
Read features (samples + weights + data) from a csv file with a header line. | |
int | filter (unordered_set< string > &selectedFeatures) |
Filter data (and attributes) to include only selected features. | |
int | prep_selected_list (vector< string > &search_str, unordered_set< string > &selected) |
preparing a list all features that contain as a substring one of the given search strings, adds (that is not clearing selected on start) | |
int | init_masks () |
int | get_masks_as_mat (MedMat< unsigned char > &masks_mat) |
int | mark_imputed_in_masks (float _missing_val) |
int | mark_imputed_in_masks () |
void | round_data (float r) |
void | noise_data (float r) |
void | samples_sort () |
Sort by id and time. | |
string | resolve_name (string &substr) const |
Get feature name that matches a substring. | |
![]() | |
virtual int | version () const |
Relevant for serializations. | |
virtual string | my_class_name () const |
For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method. | |
virtual void | serialized_fields_name (vector< string > &field_names) const |
The names of the serialized fields. | |
virtual void * | new_polymorphic (string derived_name) |
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type) | |
virtual void | pre_serialization () |
virtual void | post_deserialization () |
virtual size_t | get_size () |
Gets bytes sizes for serializations. | |
virtual size_t | serialize (unsigned char *blob) |
Serialiazing object to blob memory. return number ob bytes wrote to memory. | |
virtual size_t | deserialize (unsigned char *blob) |
Deserialiazing blob to object. returns number of bytes read. | |
size_t | serialize_vec (vector< unsigned char > &blob) |
size_t | deserialize_vec (vector< unsigned char > &blob) |
virtual size_t | serialize (vector< unsigned char > &blob) |
virtual size_t | deserialize (vector< unsigned char > &blob) |
virtual int | read_from_file (const string &fname) |
read and deserialize model | |
virtual int | write_to_file (const string &fname) |
serialize model and write to file | |
virtual int | read_from_file_unsafe (const string &fname) |
read and deserialize model without checking version number - unsafe read | |
int | init_from_string (string init_string) |
Init from string. | |
int | init_params_from_file (string init_file) |
int | init_param_from_file (string file_str, string ¶m) |
virtual int | init (map< string, string > &map) |
Virtual to init object from parsed fields. | |
int | update_from_string (const string &init_string) |
virtual int | update (map< string, string > &map) |
Virtual to update object from parsed fields. | |
virtual string | object_json () const |
Data Fields | |
map< string, vector< float > > | data |
the actual matrix of values per sample | |
vector< float > | weights |
a vector of weight per sample | |
vector< MedSample > | samples |
The samples representing the lines. | |
map< int, pair< int, int > > | pid_pos_len |
feature generation assumes that all "rows" for a specific pid are adjacent. | |
map< string, FeatureAttr > | attributes |
a FeatureAttr per feature | |
map< string, unordered_set< string > > | tags |
a set of tags per feature | |
map< string, vector< unsigned char > > | masks |
float | medf_missing_value = (float)MED_MAT_MISSING_VALUE |
int | time_unit |
the time unit of the samples | |
A class for holding features data as a virtual matrix
Helpers :
|
inline |
Constructor Given time-unit.
summary> Constructor setting time-unit to undef
int MedFeatures::filter | ( | unordered_set< string > & | selectedFeatures | ) |
Filter data (and attributes) to include only selected features.
<return> -1 if any of the selected features is not present. 0 upon success
int MedFeatures::read_from_csv_mat | ( | const string & | csv_fname, |
bool | read_time_raw = true |
||
) |
Read features (samples + weights + data) from a csv file with a header line.
int MedFeatures::write_as_csv_mat | ( | const string & | csv_fname, |
bool | write_attributes = false |
||
) | const |
Write features (samples + weights + data) as csv with a header line.
map<int, pair<int, int> > MedFeatures::pid_pos_len |
feature generation assumes that all "rows" for a specific pid are adjacent.
pid_pos_len[pid].first holds the first position, pid_pos_len[pid].second holds its length