18enum EmbeddedCodeType { ECTYPE_CATEGORIAL = 0, ECTYPE_CONTINUOUS, ECTYPE_AGE, ECTYPE_DUMMY , ECTYPE_MODEL, ECTYPE_UNDEFINED};
28 EmbeddedCodeType type = ECTYPE_CATEGORIAL;
29 int add_hierarchy = 1;
35 vector<string> categories_to_embed;
36 string regex_filter =
"";
43 vector<vector<float>> ranges;
49 int sig_time_unit = global_default_time_unit;
50 int win_time_unit = global_default_windows_time_unit;
58 string model_file =
"";
59 vector<string> model_req_sigs;
60 vector<string> model_features_names;
61 vector<float *> feat_ptrs;
62 map<pair<int, int>,
int> pidtime2idx;
66 unordered_map<int, vector<int>> sig_members2sets;
69 unordered_map<int, vector<int>> sig_members2sets_in_range;
72 map<string, int> Name2Id;
75 vector<int> categ_convert;
76 map<int, int> Orig2Code;
77 map<int, string> Orig2Name;
80 map<int, int> Orig2ShrunkCode;
82 void clear_tables() { sig_members2sets.clear(); sig_members2sets_in_range.clear(); Name2Id.clear(); Orig2Code.clear(); Orig2Name.clear(); Orig2ShrunkCode.clear(); }
87 int get_categ_orig(
int val, vector<int> &codes)
const;
90 int get_categ_codes(
int val, vector<int> &codes,
int use_shrink = 1)
const;
93 int get_categ_shrunk_codes(
int val, vector<int> &codes)
const;
96 int get_continuous_orig(
float val)
const;
99 int get_continuous_codes(
float val,
int use_shrink = 1)
const;
102 int get_continuous_shrunk_codes(
float val)
const;
108 int init(map<string, string>& _map);
123 int init_continous(
int &curr_code);
130 int add_sig_to_lines(UniversalSigVec &usv,
int pid,
int time,
int use_shrink, map<
int, map<int, float>> &out_lines)
const;
131 int get_codes(UniversalSigVec &usv,
int pid,
int time,
int use_shrink, vector<int> &codes)
const;
132 int add_codes_to_line(vector<int> &codes, map<int, float> &out_line)
const;
133 int add_to_line(UniversalSigVec &usv,
int pid,
int time,
int use_shrink, map<int, float> &out_line)
const;
139 EmbeddedCodeType type_name_to_code(
string name);
141 string print_to_string(
int verbosity);
148 ADD_SERIALIZATION_FUNCS(sig, type, add_hierarchy, do_shrink, ranges, time_chan, val_chan, win_from, win_to, categories_to_embed, Name2Id, Orig2Code, Orig2Name, Orig2ShrunkCode, model)
158 vector<string> sigs_to_load;
164 vector<EmbeddingSig> embed_sigs;
176 int get_pid_out_line(
PidDynamicRec &pdr,
int ver,
int time,
int use_shrink, map<int, float> &out_line);
193 int get_shrinked_dictionary(
MedSparseMat &smat,
float min_p,
float max_p);
200 int init(map<string, string>& _map);
206 void init_tables(
MedDictionarySections &dict) {
for (
auto &es : embed_sigs) es.init_categorial_tables(dict); }
210 int write_dict_to_file(
string fname,
int only_shrink);
214 string print_to_string(
int verbosity);
218 int minimize() {
for (
auto &es : embed_sigs) es.minimize();
return 0; };
221 void prep_models_batches(
MedPidRepository &rep,
MedSamples &samples) {
for (
auto &es : embed_sigs) es.prep_model_batch(rep, samples); }
253 int use_same_dictionaries = 1;
257 int min_time = 20060101;
258 int max_time = 20160101;
261 int npoints_per_pid = 1;
262 float min_p = (float)0.001;
263 float max_p = (float)0.95;
264 vector<int> time_dist_range;
265 vector<int> time_dist_points ={ -365, 0, 365 };
271 string prefix =
"smat";
274 float p_train = (float)0.8;
279 int generate_from_xy_file(
string xy_fname,
string rep_fname,
string out_prefix);
282 int generate_xy_list(
string xy_fname,
string pids_fname,
string rep_fname);
285 int read_xy_records(
string xy_fname, vector<EmbedXYRecord> &xy);
286 int write_xy_records(
string xy_fname, vector<EmbedXYRecord> &xy);
289 int init(map<string, string>& _map);
int init(map< string, string > &_map)
Virtual to init object from parsed fields.
Definition MedEmbed.cpp:490
int init(map< string, string > &_map)
Virtual to init object from parsed fields.
Definition MedEmbed.cpp:880
int init(map< string, string > &_map)
Virtual to init object from parsed fields.
Definition MedEmbed.cpp:27