27 static const int base_pid;
34 map<int, MedIdSamples* > pid2samples;
35 map<string, vector<map<int, string> > > sig_dict_cached;
37 void load(
const string& rep_fname,
const string& model_fname,
const string& samples_fname =
"",
bool read_signals =
true);
39 void get_sig_dict_cached(
const string& cat_prefix =
"",
bool force_cat_prefix =
false) {
40 sig_dict_cached = get_sig_dict(cat_prefix, force_cat_prefix);
43 map<string, vector<map<int, string> > > get_sig_dict(
const string& cat_prefix =
"",
bool force_cat_prefix =
false) {
44 map<string, vector<map<int, string> > > sig_dict;
45 for (
auto& sig : sigs) {
46 vector<map<int, string > > chan_dict;
47 int section_id = rep.dict.section_id(sig);
48 int sid = rep.sigs.Name2Sid[sig];
49 int n_vchan = rep.sigs.Sid2Info[sid].n_val_channels;
50 for (
int vchan = 0; vchan < n_vchan; ++vchan) {
51 if (rep.sigs.is_categorical_channel(sig, vchan)) {
52 map<int, string> new_dict;
53 const auto& Id2Names = rep.dict.dict(section_id)->Id2Names;
54 const auto& Member2Sets = rep.dict.dict(section_id)->Member2Sets;
55 for (
const auto& entry : Id2Names) {
56 if (boost::starts_with(entry.second[0], cat_prefix)) {
57 new_dict[entry.first] = entry.second[0];
60 string new_ent = entry.second[0];
61 if (Member2Sets.count(entry.first) != 0)
62 for (
const auto& setid : Member2Sets.at(entry.first)) {
63 if (Id2Names.count(setid) != 0 && boost::starts_with(Id2Names.at(setid)[0], cat_prefix)) {
64 if (!boost::starts_with(new_ent, cat_prefix) || new_ent.length() > Id2Names.at(setid)[0].length())
65 new_ent = Id2Names.at(setid)[0];
68 if (!force_cat_prefix || boost::starts_with(new_ent, cat_prefix))
69 new_dict[entry.first] = new_ent;
72 chan_dict.push_back(new_dict);
74 else chan_dict.push_back(map<int, string>());
76 sig_dict[sig] = chan_dict;
81 map<string, vector<map<string, int>* > > get_sig_reverse_dict() {
82 map<string, vector<map<string, int >* > > sig_dict;
83 MLOG(
"(II) Preparing signal reverse dictionary for signals\n");
84 for (
auto& sig : sigs) {
86 vector<map<string, int >* > chan_dict;
87 if (rep.sigs.Name2Sid.count(sig) == 0) {
88 MERR(
"no Name2Sid entry for signal '%s'\n", sig.c_str());
91 int section_id = rep.dict.section_id(sig);
92 int sid = rep.sigs.Name2Sid[sig];
93 int n_vchan = rep.sigs.Sid2Info[sid].n_val_channels;
94 for (
int vchan = 0; vchan < n_vchan; ++vchan) {
95 if (rep.sigs.is_categorical_channel(sig, vchan))
97 chan_dict.push_back(&(rep.dict.dict(section_id)->Name2Id));
100 chan_dict.push_back(
nullptr);
103 sig_dict[sig] = chan_dict;
108 void export_required_data(
const string& fname,
const string& cat_prefix,
bool force_cat_prefix) {
109 ofstream outfile(fname, ios::binary | ios::out);
111 MLOG(
"(II) Preparing dictinaries to export\n", fname.c_str());
113 auto sig_dict = get_sig_dict(cat_prefix, force_cat_prefix);
115 MLOG(
"(II) Exporting required data to %s\n", fname.c_str());
119 for (
int pid : pids) {
120 for (
auto &sig : sigs) {
121 rep.uget(pid, sig, usv);
122 for (
int i = 0; i < usv.len; ++i) {
124 outss << pid <<
'\t';
126 for (
int tchan = 0, n_tchan = usv.n_time_channels(); tchan < n_tchan; ++tchan) {
127 outss <<
'\t' << usv.Time(i, tchan);
129 bool ignore_line =
false;
130 for (
int vchan = 0, n_vchan = usv.n_val_channels(); vchan < n_vchan; ++vchan) {
131 if (sig_dict.at(sig)[vchan].size() == 0)
132 outss <<
'\t' << setprecision(10) << usv.Val(i, vchan);
134 if (sig_dict.at(sig)[vchan].count((
int)(usv.Val(i, vchan))) != 0) {
135 outss <<
'\t' << sig_dict.at(sig)[vchan].at((
int)(usv.Val(i, vchan)));
143 outfile << outss.str() <<
'\n';
150 static void convert_reqfile_to_data(
const string& input_json_fname,
const string& output_data_fname) {
151 ofstream outfile(output_data_fname, ios::binary | ios::out);
152 ifstream infile(input_json_fname, ios::binary | ios::in);
154 MLOG(
"(II) Exporting required data to %s\n", output_data_fname.c_str());
159 MLOG(
"(II) num of requests = %d\n", j.size());
161 for (
int pid = 0; pid < j.size(); ++pid) {
163 if (j[pid].count(
"body") != 0)
164 j_req_signals = j[pid][
"body"][
"signals"];
165 else if (j[pid].count(
"signals") != 0)
166 j_req_signals = j[pid][
"signals"];
167 else throw runtime_error(
"Unrecognized JSON fromat");
169 for (
const auto& j_sig : j_req_signals)
171 string sig = j_sig[
"code"];
172 for (
const auto& j_data : j_sig[
"data"]) {
173 outfile << pid + base_pid <<
'\t';
175 for (
const auto& j_time : j_data[
"timestamp"]) {
176 outfile <<
'\t' << j_time;
178 for (
const auto& j_val : j_data[
"value"]) {
179 if (boost::to_upper_copy(sig) ==
"GENDER")
180 outfile <<
'\t' << (boost::to_upper_copy(j_val.get<
string>()) ==
"MALE" ?
"1" :
"2");
182 outfile <<
'\t' << j_val.get<
string>();
195 void import_required_data(
const string& fname);
197 void import_json_request_data(
const string& fname);
199 int load_samples_from_dates_to_score(
const string& fname)
202 vector<vector<string>> raw_scores;
203 if (read_text_file_cols(fname,
" \t", raw_scores) < 0) {
204 MERR(
"Could not read scores file %s\n", fname.c_str());
207 MLOG(
"(II) Read %d lines from scores file %s\n", raw_scores.size(), fname.c_str());
210 for (
auto &v : raw_scores)
212 samples.insertRec(stoi(v[0]), stoi(v[1]));
215 MLOG(
"(II) Prepared MedSamples\n");
216 for (
auto &
id : samples.idSamples)
217 pid2samples[
id.
id] = &
id;
221 void am_add_data(
AlgoMarker *am,
int pid,
int max_date,
bool force_add_data, vector<string> ignore_sig, json& json_out);