Medial Code Documentation
Loading...
Searching...
No Matches
MedCohort.h
1#pragma once
2#ifndef __MED_COHORT_H__
3#define __MED_COHORT_H__
4
5//===================================================================================
19//===================================================================================
20
21
22#include <vector>
24#include <InfraMed/InfraMed/InfraMed.h>
25#include <MedProcessTools/MedProcessTools/MedSamples.h>
26
27using namespace std;
28
29
30//===================================================================================
37//===================================================================================
39 int pid = -1;
40 int from = 0;
41 int to = 0;
42 int outcome_date = 0;
43 float outcome = -1;
44 string comments = "";
45
49 CohortRec(int _pid, int _from, int _to, int _outcome_date, float _outcome) {
50 pid = _pid; from = _from; to = _to; outcome_date = _outcome_date; outcome = _outcome;
51 }
52
54 int init(map<string, string>& map);
55
57 void get_string(string &to_str);
60 int from_string(string &from_str);
61};
62
63//===================================================================================
66//===================================================================================
69 float max_control_years = 10;
70 float min_case_years = 0;
71 float max_case_years = 1;
72 int is_continous = 1;
74 int jump_days = 180;
75 int min_year = 1900;
76 int max_year = 2100;
77 int gender_mask = 0x3;
78 int train_mask = 0x7;
79 int min_age = 0;
80 int max_age = 200;
81 string rep_fname;
82
84 vector<string> stick_to_sigs;
85 int take_closest = 0;
86 int take_all = 0;
87 int max_samples_per_id = (int)1e9;
88 string max_samples_per_id_method = "last";
89
91 int init(map<string, string>& map);
92};
93
94//===================================================================================
97//===================================================================================
99 int from_year = 2007;
100 int to_year = 2013;
101 int start_date = 101;
102 int from_age = 30;
103 int to_age = 90;
104 int age_bin = 5;
106 int incidence_days_win = -1;
108 int gender_mask = 0x3;
109 int train_mask = 0x7;
110 string rep_fname;
111
112
114 int init(map<string, string>& map);
115};
116
117//===================================================================================
120//===================================================================================
122
123 public:
124 vector<CohortRec> recs;
125
129 void insert(int pid, int from, int to, int outcome_date, float outcome) { recs.push_back(CohortRec(pid, from, to, outcome_date, outcome)); }
130
133 int read_from_file(string fname);
136 int write_to_file(string fname);
137
140 int read_from_bin_file(string fname) { return SerializableObject::read_from_file(fname); }
143 int write_to_bin_file(string fname) { return SerializableObject::write_to_file(fname); }
144
146 void get_pids(vector<int> &pids);
147
148 //int print_general_stats();
149
156 int create_incidence_file(IncidenceParams &i_params, string out_file, const string &debug_file = "");
157
164 int create_sampling_file(SamplingParams &s_params, string out_sample_file);
165 int create_samples(MedRepository& rep, SamplingParams &s_params, MedSamples& samples);
166
173 int create_sampling_file_sticked(SamplingParams &s_params, string out_sample_file);
174 int create_samples_sticked(MedRepository& rep, SamplingParams &s_params, MedSamples& samples);
175};
176
177
178//===================================================================================
179// A few more MedSamples Helpers
180
181// Scanner ::
182// Given a MedSamples file , allows defining a sub-sample of it (can be all),
183// And define a list of tests and a list of base_tests
184//
185// The Scanner then allows the following:
186// (1) Count for every test / base_test how many had at least N tests in a window W.
187// This helps in considering only variables that HAVE data.
188// (2) Train a model M for each of:
189// - base_tests only
190// - base_tests + single test
191// -> for all train/test group
192// -> only for the subgroup of points that HAS no missing values (and compare to the base just on those)
193//
194
195
196#endif
An Abstract class that can be serialized and written/read from file.
MedCohort - a vector of CohortRec's.
Definition MedCohort.h:121
void get_pids(vector< int > &pids)
Get all pids.
Definition MedCohort.cpp:206
int read_from_bin_file(string fname)
Read to binary file.
Definition MedCohort.h:140
int write_to_file(string fname)
Write from tab-delimited file.
Definition MedCohort.cpp:183
int read_from_file(string fname)
Read to tab-delimited file.
Definition MedCohort.cpp:148
int write_to_bin_file(string fname)
Write from binary file.
Definition MedCohort.h:143
int create_sampling_file_sticked(SamplingParams &s_params, string out_sample_file)
Generate a samples file from cohort + sampling-params Generate samples within cohort times that fit...
Definition MedCohort.cpp:657
void insert(int pid, int from, int to, int outcome_date, float outcome)
Add a record.
Definition MedCohort.h:129
int create_sampling_file(SamplingParams &s_params, string out_sample_file)
Generate a samples file from cohort + sampling-params Generate samples within cohort times that fit...
Definition MedCohort.cpp:516
int create_incidence_file(IncidenceParams &i_params, string out_file, const string &debug_file="")
Generate an incidence file from cohort + incidence-params Check all patient-years within cohort tha...
Definition MedCohort.cpp:217
vector< CohortRec > recs
Cohort information.
Definition MedCohort.h:124
Definition InfraMed.h:303
MedSamples represent a collection of samples per different id The data is conatined in a vector of ...
Definition MedSamples.h:129
Definition SerializableObject.h:32
virtual int write_to_file(const string &fname)
serialize model and write to file
Definition SerializableObject.cpp:92
virtual int read_from_file(const string &fname)
read and deserialize model
Definition SerializableObject.cpp:86
Definition StdDeque.h:58
CohortRec : a single entry within a cohort; includes:
Definition MedCohort.h:38
int from_string(string &from_str)
Get a cohort rec from a tab-delimited string.
Definition MedCohort.cpp:57
string comments
additional option for comments
Definition MedCohort.h:44
void get_string(string &to_str)
Represent a cohort as a tab-delimited string.
Definition MedCohort.cpp:49
int to
Followup end.
Definition MedCohort.h:41
CohortRec(int _pid, int _from, int _to, int _outcome_date, float _outcome)
Constructor with initialization.
Definition MedCohort.h:49
float outcome
Outcome.
Definition MedCohort.h:43
int from
Followup start.
Definition MedCohort.h:40
int pid
Patient Id.
Definition MedCohort.h:39
int outcome_date
Date(Time) at which outcome is given.
Definition MedCohort.h:42
CohortRec()
empty constructor
Definition MedCohort.h:47
int init(map< string, string > &map)
Initialize from a map.
Definition MedCohort.cpp:31
IncidenceParams: Parameters for calculating incidence from repostory + cohort.
Definition MedCohort.h:98
int to_year
last year to consider in calculating incidence
Definition MedCohort.h:100
int to_age
maximal age to consider
Definition MedCohort.h:103
string rep_fname
Repository configration file.
Definition MedCohort.h:110
int age_bin
binning of ages
Definition MedCohort.h:104
int init(map< string, string > &map)
Initialize from a map.
Definition MedCohort.cpp:120
int from_age
the date in each year to start looking from (default is 0101), format is MMDD
Definition MedCohort.h:102
int min_samples_in_bin
if -1: using incidence_years_window
Definition MedCohort.h:107
int from_year
first year to consider in calculating incidence
Definition MedCohort.h:99
int gender_mask
mask for gender specification (rightmost bit on for male, second for female)
Definition MedCohort.h:108
int train_mask
mask for TRAIN-value specification (three rightmost bits for TRAIN = 1,2,3)
Definition MedCohort.h:109
int incidence_years_window
how many years ahead do we consider an outcome?
Definition MedCohort.h:105
SamplingParams : Parameters for sampling from repostory + cohort.
Definition MedCohort.h:67
int max_year
last year for sampling
Definition MedCohort.h:76
int train_mask
mask for TRAIN-value specification (three rightmost bits for TRAIN = 1,2,3)
Definition MedCohort.h:78
int min_year
first year for sampling
Definition MedCohort.h:75
int take_all
flag: take all samples with requrired-signal within each sampling period is selected
Definition MedCohort.h:86
int gender_mask
mask for gender specification (rightmost bit on for male, second for female)
Definition MedCohort.h:77
int init(map< string, string > &map)
Initialize from a map.
Definition MedCohort.cpp:83
string rep_fname
Repository configration file.
Definition MedCohort.h:81
string max_samples_per_id_method
determine how to pick samples - 'last' or 'rand'
Definition MedCohort.h:88
float max_control_years
maximal number of years before outcome for controls
Definition MedCohort.h:69
int max_samples_per_id
maximum samples per ID
Definition MedCohort.h:87
int take_closest
flag: take the sample with requrired-signals that is closest to each target sampling-date
Definition MedCohort.h:85
float min_control_years
minimal number of years before outcome for controls
Definition MedCohort.h:68
int max_age
maximum age for sampling
Definition MedCohort.h:80
float min_case_years
minimal number of years before outcome for cases
Definition MedCohort.h:70
int min_age
minimum age for sampling
Definition MedCohort.h:79
int min_days_from_outcome
minimal number of days before outcome
Definition MedCohort.h:73
int is_continous
continous mode of sampling vs. stick to (0 = stick)
Definition MedCohort.h:72
int jump_days
days to jump between sampling periods
Definition MedCohort.h:74
float max_case_years
maximal number of years before outcome for cases
Definition MedCohort.h:71
vector< string > stick_to_sigs
sticking related. if none of take_closest/take_all is on, a random sample with requrired-signal withi...
Definition MedCohort.h:84