26 static const int base_pid;
33 map<int, MedIdSamples* > pid2samples;
34 map<string, vector<map<int, string> > > sig_dict_cached;
36 void load(
const string& rep_fname,
const string& model_fname,
const string& samples_fname =
"",
bool read_signals =
true);
38 void get_sig_dict_cached(
const string& cat_prefix =
"",
bool force_cat_prefix =
false) {
39 sig_dict_cached = get_sig_dict(cat_prefix, force_cat_prefix);
42 map<string, vector<map<int, string> > > get_sig_dict(
const string& cat_prefix =
"",
bool force_cat_prefix =
false) {
43 map<string, vector<map<int, string> > > sig_dict;
44 for (
auto& sig : sigs) {
45 vector<map<int, string > > chan_dict;
46 int section_id = rep.dict.section_id(sig);
47 int sid = rep.sigs.Name2Sid[sig];
48 int n_vchan = rep.sigs.Sid2Info[sid].n_val_channels;
49 for (
int vchan = 0; vchan < n_vchan; ++vchan) {
50 if (rep.sigs.is_categorical_channel(sig, vchan)) {
51 map<int, string> new_dict;
52 const auto& Id2Names = rep.dict.dict(section_id)->Id2Names;
53 const auto& Member2Sets = rep.dict.dict(section_id)->Member2Sets;
54 for (
const auto& entry : Id2Names) {
55 if (boost::starts_with(entry.second[0], cat_prefix)) {
56 new_dict[entry.first] = entry.second[0];
59 string new_ent = entry.second[0];
60 if (Member2Sets.count(entry.first) != 0)
61 for (
const auto& setid : Member2Sets.at(entry.first)) {
62 if (Id2Names.count(setid) != 0 && boost::starts_with(Id2Names.at(setid)[0], cat_prefix)) {
63 if (!boost::starts_with(new_ent, cat_prefix) || new_ent.length() > Id2Names.at(setid)[0].length())
64 new_ent = Id2Names.at(setid)[0];
67 if (!force_cat_prefix || boost::starts_with(new_ent, cat_prefix))
68 new_dict[entry.first] = new_ent;
71 chan_dict.push_back(new_dict);
73 else chan_dict.push_back(map<int, string>());
75 sig_dict[sig] = chan_dict;
80 map<string, vector<map<string, int>* > > get_sig_reverse_dict() {
81 map<string, vector<map<string, int >* > > sig_dict;
82 MLOG(
"(II) Preparing signal reverse dictionary for signals\n");
83 for (
auto& sig : sigs) {
85 vector<map<string, int >* > chan_dict;
86 if (rep.sigs.Name2Sid.count(sig) == 0) {
87 MERR(
"no Name2Sid entry for signal '%s'\n", sig.c_str());
90 int section_id = rep.dict.section_id(sig);
91 int sid = rep.sigs.Name2Sid[sig];
92 int n_vchan = rep.sigs.Sid2Info[sid].n_val_channels;
93 for (
int vchan = 0; vchan < n_vchan; ++vchan) {
94 if (rep.sigs.is_categorical_channel(sig, vchan))
96 chan_dict.push_back(&(rep.dict.dict(section_id)->Name2Id));
99 chan_dict.push_back(
nullptr);
102 sig_dict[sig] = chan_dict;
107 void export_required_data(
const string& fname,
const string& cat_prefix,
bool force_cat_prefix) {
108 ofstream outfile(fname, ios::binary | ios::out);
110 MLOG(
"(II) Preparing dictinaries to export\n", fname.c_str());
112 auto sig_dict = get_sig_dict(cat_prefix, force_cat_prefix);
114 MLOG(
"(II) Exporting required data to %s\n", fname.c_str());
118 for (
int pid : pids) {
119 for (
auto &sig : sigs) {
120 rep.uget(pid, sig, usv);
121 for (
int i = 0; i < usv.len; ++i) {
123 outss << pid <<
'\t';
125 for (
int tchan = 0, n_tchan = usv.n_time_channels(); tchan < n_tchan; ++tchan) {
126 outss <<
'\t' << usv.Time(i, tchan);
128 bool ignore_line =
false;
129 for (
int vchan = 0, n_vchan = usv.n_val_channels(); vchan < n_vchan; ++vchan) {
130 if (sig_dict.at(sig)[vchan].size() == 0)
131 outss <<
'\t' << setprecision(10) << usv.Val(i, vchan);
133 if (sig_dict.at(sig)[vchan].count((
int)(usv.Val(i, vchan))) != 0) {
134 outss <<
'\t' << sig_dict.at(sig)[vchan].at((
int)(usv.Val(i, vchan)));
142 outfile << outss.str() <<
'\n';
149 static void convert_reqfile_to_data(
const string& input_json_fname,
const string& output_data_fname) {
150 ofstream outfile(output_data_fname, ios::binary | ios::out);
151 ifstream infile(input_json_fname, ios::binary | ios::in);
153 MLOG(
"(II) Exporting required data to %s\n", output_data_fname.c_str());
158 MLOG(
"(II) num of requests = %d\n", j.size());
160 for (
int pid = 0; pid < j.size(); ++pid) {
162 if (j[pid].count(
"body") != 0)
163 j_req_signals = j[pid][
"body"][
"signals"];
164 else if (j[pid].count(
"signals") != 0)
165 j_req_signals = j[pid][
"signals"];
166 else throw runtime_error(
"Unrecognized JSON fromat");
168 for (
const auto& j_sig : j_req_signals)
170 string sig = j_sig[
"code"];
171 for (
const auto& j_data : j_sig[
"data"]) {
172 outfile << pid + base_pid <<
'\t';
174 for (
const auto& j_time : j_data[
"timestamp"]) {
175 outfile <<
'\t' << j_time;
177 for (
const auto& j_val : j_data[
"value"]) {
178 if (boost::to_upper_copy(sig) ==
"GENDER")
179 outfile <<
'\t' << (boost::to_upper_copy(j_val.get<
string>()) ==
"MALE" ?
"1" :
"2");
181 outfile <<
'\t' << j_val.get<
string>();
194 void import_required_data(
const string& fname);
196 void import_json_request_data(
const string& fname);
198 int load_samples_from_dates_to_score(
const string& fname)
201 vector<vector<string>> raw_scores;
202 if (read_text_file_cols(fname,
" \t", raw_scores) < 0) {
203 MERR(
"Could not read scores file %s\n", fname.c_str());
206 MLOG(
"(II) Read %d lines from scores file %s\n", raw_scores.size(), fname.c_str());
209 for (
auto &v : raw_scores)
211 samples.insertRec(stoi(v[0]), stoi(v[1]));
214 MLOG(
"(II) Prepared MedSamples\n");
215 for (
auto &
id : samples.idSamples)
216 pid2samples[
id.
id] = &
id;
220 void am_add_data(
AlgoMarker *am,
int pid,
int max_date,
bool force_add_data, vector<string> ignore_sig, json& json_out);