Medial Code Documentation
Loading...
Searching...
No Matches
MedPidRepository.h
1//
2// MedPidRepository :
3// Extending the MedRepository data structure to support creating and using a repository
4// organized as patient by patient rather than signal by signal.
5// This allows for (Very) fast retrieval of ALL the signals for a certain pid
6// Also - this allows for memory efficient use of a repository in cases where a sweep over all (or part of) patients is needed
7// This helps when creating feature matrices for a patient.
8//
9// Tools for handling thread safe reading of data using a predefined amount of memory to use are also available here.
10// The general idea is holding a pre allocated cyclic buffer for the data (enough such that there's always free space for the next ones needed by the next threads.)
11//
12
13#ifndef __MED_PID_REPOSITORY_
14#define __MED_PID_REPOSITORY_
15
16#include "InfraMed.h"
17#include <InfraMed/InfraMed/MedSparseVec.h>
18
19#define MAX_PID_DATA_SIZE 10000000
21
22class PosLen {
23 public:
24 int pos;
25 int len;
26 int do_split; // if 1, means that when updating this pos,len we need to move version to a new allocated memory. if 0 , it may move due to other reasons.
27 PosLen& operator =(const int a) { pos = (unsigned long long)a; len=a; return *this; }
28 bool operator==(const PosLen a) { return (pos == a.pos && len == a.len);}
29};
30
31class PidIdxRec {
32 public:
33 unsigned short fnum;
34 unsigned long long pos;
35 unsigned int byte_len;
36 int idx;
37
38 PidIdxRec& operator =(const int a) { fnum=(unsigned short)a; pos = (unsigned long long)a; byte_len=(unsigned int)a; idx = a; return *this; }
39
40 PidIdxRec() { idx = -1; }
41};
42
43class PidRec {
44 public:
45 int pid; // pid num of this record
46 unsigned char *data; // pointer to actual record in memory
47 unsigned int data_len; // actual size used (always <= data_size)
48 unsigned int data_size; // max size available
49 int is_allocated; // was the space for data allocated (and hence we need to free it) or not (allocated some other place).
50 MedSparseVec<PosLen> sv; // from serial sid to a pair of pos, len
51 MedPidRepository *my_rep; // needed for the get() method in order to get access to dictionaries
52 MedRepository *my_base_rep; // needed for the get() method in order to get access to dictionaries
53 int allow_realloc; // allow reallocation of data in read if given not enough space
54
55 PidRec() { pid = -1; data = NULL; data_len = 0; data_size = 0; is_allocated = 0; my_rep = NULL; sv.clear(); allow_realloc = 1; }
56
57 // after reading the data to *data this operation is needed to build the sparse vec from (serial) sid to PosLen
58 int init_sv();
59
60 // get methods - no need for pid just by signal
61 void *get(const string &sig_name, int &len);
62 void *get(int sid, int &len);
63
64 // universal API
65 UniversalSigVec usv; // we keep a usv inside, to allow saving of the init() time
66 inline void *uget(int sid, UniversalSigVec &_usv) { _usv.init(my_base_rep->sigs.Sid2Info.at(sid)); return (_usv.data = get(sid, _usv.len)); }
67 inline void *uget(int sid) { return uget(sid, usv); }
68 inline void *uget(const string &sig_name, UniversalSigVec &_usv) { return uget(my_base_rep->sigs.sid(sig_name), _usv); }
69 inline void *uget(const string &sig_name) { return uget(sig_name, usv); }
70
71
72 int init_from_rep(MedRepository *rep, int pid, vector<int> &sids_to_use);
73
74 // memory alloc & free
75 void prealloc(unsigned int len);
76 int realloc(unsigned int len);
77 int resize_data(unsigned int len) { return realloc(len); }
78 void set_data_to_buffer() { data = &data_buffer[0]; }
79 void free();
80
81 private:
82 vector<unsigned char> data_buffer; // the actual holder of the data when using prealloc/realloc/resize_data
83 // this makes for much easier data garbage collection (no need for free)
84};
85
86
88
89 public:
90
91 MedSparseVec<PidIdxRec> pids_idx; // sparse vec from pid to an index record on files
92 vector<MedBufferedFile> in_files; // keeping open input files
93
94
95 int init(const string &conf_fname); // when using MedPidRepository, init it with this API, then use the load() APIs in MedRepository to load full signals.
96
97 // creating the "by pid" index and data files for a range of given pids with at most "jump" pids in each file
98 int create(string &rep_fname, int from_pid, int to_pid, int jump);
99
100 // get data size of a pid (0 => pid not in data)
101 unsigned int get_data_size(int pid);
102
103 // if data is NULL it will be allocated and data_size will be the allocated size
104 // if data is not NULL data_size should contain the max size allowed on input, and on output contains the actual size used
105 // general error : -1
106 // error due to insufficient data_size (non NULL data) : -2
107 int get_pid_rec(int pid, unsigned char *&data, unsigned int &data_size, PidRec &prec);
108
109 // same but simpler API, will use the data and data_len inside the prec instead
110 // it is recommended for use with pre allocation of enough space in prec.data when going to reuse the same prec for reads.
111 int get_pid_rec(int pid, PidRec &prec);
112
114
115};
116
117
118//
119// PidDynamicRec is a PidRec with additional options to have "versions" for each signal
120// These "versions" are initially all pointing to the original signal version.
121// A new version can be created out of an existing one by changing it or loading it.
122// This allows for example for complex scenarios in cleaning and feature generation in which
123// a version of the signal per time point is needed.
124// A PidDynamicRec is a PidRec and hence can be read from a repository using the get_pid_rec() API.
125// Another option to initialize a PidDynamicRec is to use init_from_rep , which will load it from repository data that's already in memory.
126//
127class PidDynamicRec : public PidRec {
128
129public:
130
131 int set_n_versions(int n_ver); // a version is always a positive (>0) number. 0 is kept as the version number of the original data.
132 // this method should always be called AFTER the original version had been read.
133 int set_n_versions(vector<int> &time_points); // works with init_from_rep version that uses time_points , see below
134
135 int get_n_versions() { return n_versions; }
136
137 // calling get without version is defined in PidRec and will return the original version
138 void *get(const string &sig_name, int version, int &len);
139 void *get(int sid, int version, int &len);
140 void *get(int sid, int &len) { return PidRec::get(sid, len); }
141
142 // universal API
143 inline void *uget(int sid, int version, UniversalSigVec &_usv) { _usv.init(my_base_rep->sigs.Sid2Info.at(sid)); return (_usv.data = get(sid, version, _usv.len)); }
144 inline void *uget(int sid, int version) { return uget(sid, version, usv); }
145 inline void *uget(const string &sig_name, int version, UniversalSigVec &_usv) { int sid = my_base_rep->sigs.sid(sig_name); if (sid < 0) HMTHROW_AND_ERR("Error: can't find signal %s in repository\n", sig_name.c_str()); return uget(sid, version, _usv); }
146 inline void *uget(const string &sig_name, int version) { return uget(sig_name, version, usv); }
147
148 // clearing
149 void clear_vers(); // deletes all versions and remains just with the original one.
150
151 // creating and changing versions
152 int set_version_data(int sid, int version, void *datap, int len);
153 int set_version_off_orig(int sid, int version); // if the version is still pointing the original area, we will make a copy of it outside
154 int point_version_to(int sid, int v_src, int v_dst); // will point version v_dst to the data of version v_src
155 int remove(int sid, int version, int idx); // removing element idx from version
156 int remove(int sid, int v_in, int idx, int v_out); // removing element idx from version v_in and putting it in v_out
157 int change(int sid, int version, int idx, void *new_elem); // changing element idx in version to hold *new_elem
158 int change(int sid, int v_in, int idx, void *new_elem, int v_out); // changing element idx in v_in to *new_elem, and putting it all in v_out
159 int update(int sid, int v_in, vector<pair<int, void *>>& changes, vector<int>& removes); // Apply changes and removals
160 int update(int sid, int v_in, int val_channel, vector<pair<int, float>>& changes, vector<int>& removes); // Apply val changes and removals, unified variant
161 int update(int sid, int v_in, vector<pair<int, vector<float>>>& changes, vector<int>& removes);
162
163 // an API to push a data vector universal signal style , given all its value and time channels
164 // when there are several channels (for time or values) , they should be placed one after the other for each member
165 // so for example a *_times may have : t_0_ch0, t_0_ch1, t_1_ch0, t_1_ch1, ...
166 int set_version_universal_data(int sid, int version, int *_times, float *_vals, int len);
167
168 // test if two versions point to the same place in memory
169 int versions_are_the_same(int sid, int v1, int v2) { return ((int)((*get_poslen(sid, v1)) == (*get_poslen(sid, v2)))); }
170 int versions_are_the_same(set<int> sids, int v1, int v2) {
171 for (int sid : sids)
172 if (!versions_are_the_same(sid, v1, v2))return(0);
173 return(1);
174 }
175 // a few debug helpers
176 int print_ver(int sid, int ver);
177 int print_all_vers(int sid);
178 int print_all(); // print all len > 0 signals in all vers
179 int print_sigs(const vector<string> &sigs); // print some sigs we need (debugging tool)
180
181 PidDynamicRec() { n_versions = 0; }
182
183 // next are options to init a PidDynamicRec from data that already resides in some part of a MedRepository that is already in memory.
184
185 // most general initialization - just stating how many versions
186 int init_from_rep(MedRepository *rep, int pid, vector<int> &sids_to_use, int n_versions);
187
188 // init with time points: given N time points (in the SAME time units of the signals), this will initialize N versions for each signal
189 // BUT will also set the len of each version such that all the data points for this signal are <= the matching time point.
190 // This is very useful, as now version(i) will be a virtual snapshot of how the data looks if looking only at times <= time_point(i).
191 int init_from_rep(MedRepository *rep, int pid, vector<int> &sids_to_use, vector<int> &time_points);
192
193
194 vector<UniversalSigVec> usvs;
195
196private:
197 int n_versions;
198 unsigned int curr_len;
199 MedSparseVec<PosLen> sv_vers;
200 PosLen *get_poslen(int sid, int version) { if (version >= n_versions) return NULL; return sv_vers.get((unsigned int)(my_base_rep->sigs.sid2serial[sid])*n_versions+version); }
201 void set_poslen(int sid, int version, PosLen pl) { sv_vers[(unsigned int)my_base_rep->sigs.sid2serial[sid]*n_versions+version] = pl; }
202};
203
204
205//
206// Dynamic Version iterator allows iteration over versions in two manners -
207// 1. For version-dependent operations - forward iterate over all versions
208// 2. For verion-independent operations - backward iterate only over versions that are already different
209//
210// Methods are -
211// constructors
212// init : Get first version to work on
213// next : Get next version to work on
214// done : have we analyzed all versions ?
215//
216
218
219protected:
220 PidDynamicRec *my_rec;
221 int iVersion;
222 set<int> signalIds;
223
224public:
225 versionIterator(PidDynamicRec& _rec, int signalId) { my_rec = &_rec; signalIds = { signalId }; }
226 versionIterator(PidDynamicRec& _rec, set<int>& _signalIds) { my_rec = &_rec; signalIds = _signalIds; }
227
228 virtual int init() { return 0; };
229 virtual int next() { return 0; };
230 virtual bool done() { return true; };
231};
232
234
235public:
236 allVersionsIterator(PidDynamicRec& _rec, int signalId) : versionIterator(_rec, signalId) {}
237 allVersionsIterator(PidDynamicRec& _rec, set<int>& _signalIds) : versionIterator(_rec, _signalIds) {}
238
239 int init() { iVersion = 0; return iVersion; }
240 int next() { return ++iVersion; }
241 bool done() { return iVersion >= my_rec->get_n_versions(); }
242};
243
245
246 int jVersion;
247
248public:
249 differentVersionsIterator(PidDynamicRec& _rec, int signalId) : versionIterator(_rec, signalId) {}
250 differentVersionsIterator(PidDynamicRec& _rec, set<int>& _signalIds) : versionIterator(_rec, _signalIds) {}
251
252 int init();
253 int next();
254 bool done() { return iVersion < 0; }
255
256 inline int block_first() { return jVersion+1; }
257 inline int block_last() { return iVersion; }
258};
259
260#endif
261
Definition MedPidRepository.h:87
Definition InfraMed.h:303
Definition MedSparseVec.h:69
Definition MedPidRepository.h:127
int set_version_universal_data(int sid, int version, int *_times, float *_vals, int len)
Apply val changes to all channels + removals.
Definition MedPidRepository.cpp:639
Definition MedPidRepository.h:31
Definition MedPidRepository.h:43
Definition MedPidRepository.h:22
Definition MedPidRepository.h:233
Definition MedPidRepository.h:244
Definition MedPidRepository.h:217