Medial Code Documentation
Loading...
Searching...
No Matches
MedMat_imp.h
1//
2// templated code for MedMat class, included after class definition
3//
4
5#ifndef _CRT_SECURE_NO_WARNINGS
6#define _CRT_SECURE_NO_WARNINGS
7#endif
8
9#define MED_FLT_EPSILON 1.192092896e-07F
10
11//...........................................................................................
12template <class T> template <class S> void MedMat<T>::load(S *x, int n_rows, int n_cols)
13{
14 nrows = n_rows;
15 ncols = n_cols;
16 m.resize(nrows*ncols);
17
18 for (size_t i=0; i<nrows; i++)
19 for (size_t j=0; j<ncols; j++)
20 set(i,j) = (T)x[i*ncols + j];
21 transposed_flag = 0;
22}
23
24//...........................................................................................
25template <class T> template <class S> void MedMat<T>::load_transposed(S *x, int n_rows, int n_cols)
26{
27 nrows = n_cols;
28 ncols = n_rows;
29 m.resize(nrows*ncols);
30
31 for (size_t i=0; i<n_rows; i++)
32 for (size_t j=0; j<n_cols; j++)
33 set(j,i) = (T)x[i*n_cols + j];
34 transposed_flag = 1;
35}
36
37//...........................................................................................
38template <class T> template <class S> void MedMat<T>::load(const vector<S> &x, int n_cols)
39{
40 ncols = n_cols;
41 if (ncols == 0)
42 return;
43 nrows = x.size()/ncols;
44 m.resize(nrows*ncols);
45
46 for (size_t i=0; i<nrows; i++)
47 for (size_t j=0; j<ncols; j++)
48 set(i,j) = (T)x[i*ncols + j];
49}
50
51//...........................................................................................
52// also copies metadata
53template <class T> template <class S> void MedMat<T>::load(MedMat<S> &x)
54{
55 ncols = x.ncols;
56 nrows = x.nrows;
57 m.resize(nrows*ncols);
58
59 for (size_t i=0; i<nrows; i++)
60 for (size_t j=0; j<ncols; j++)
61 set(i,j) = (T)x(i,j);
62
63 signals.clear();
64 signals = x.signals;
65 recordsMetadata.clear();
66 recordsMetadata = x.recordsMetadata;
67
68}
69
70//...........................................................................................
71template <class T> void MedMat<T>::transpose()
72{
73 vector<T> m_orig = m;
74 size_t ncols_orig = ncols;
75
76 swap(ncols,nrows);
77
78 for (size_t i=0; i<nrows; i++)
79 for (size_t j=0; j<ncols; j++)
80 set(i,j) = m_orig[j*ncols_orig+i];
81 transposed_flag = 1 - transposed_flag;
82}
83
84//...........................................................................................
85// empty list means - take them all
86// taking rows/cols in the order they were given
87// duplicated numbers will be duplicated in rows/cols.
88// also sublists metadata
89template <class T> void MedMat<T>::get_sub_mat(vector<int> &rows_to_take, vector<int> &cols_to_take)
90{
91 size_t new_n_rows = (rows_to_take.size() == 0 ? nrows : (int) rows_to_take.size());
92 size_t new_n_cols = (cols_to_take.size() == 0 ? ncols : (int) cols_to_take.size());
93
94 vector<T> m_orig = m; // copying
95 vector<RecordData> r_orig = recordsMetadata;
96 vector<string> c_orig = signals;
97 recordsMetadata.clear(); signals.clear();
98
99 m.resize(new_n_rows*new_n_cols);
100
101 for (size_t i=0; i<new_n_rows; i++) {
102 size_t r = (rows_to_take.size() == 0 ? i : rows_to_take[i]);
103 if (r_orig.size() > 0)
104 recordsMetadata.push_back(r_orig[r]);
105 for (size_t j=0; j<new_n_cols; j++) {
106 size_t c = (cols_to_take.size() == 0 ? j : cols_to_take[j]);
107 if ((i == 0) && (c_orig.size() > 0))
108 signals.push_back(c_orig[c]);
109
110 m[i*new_n_cols + j] = m_orig[r*ncols+c];
111 }
112 }
113
114 ncols = new_n_cols;
115 nrows = new_n_rows;
116}
117
118//...........................................................................................
119template <class T> void MedMat<T>::get_sub_mat_by_flags(vector<int> &rows_to_take_flag, vector<int> &cols_to_take_flag)
120{
121 vector<int> rows_to_take;
122 vector<int> cols_to_take;
123
124 flags_to_indexes(rows_to_take_flag, rows_to_take);
125 flags_to_indexes(cols_to_take_flag, cols_to_take);
126
127 get_sub_mat(rows_to_take, cols_to_take);
128}
129
130//...........................................................................................
131template <class T> template <class S> void MedMat<T>::add_rows(MedMat<S> &m_add)
132{
133 if (recordsMetadata.size() > 0 || m_add.recordsMetadata.size() > 0)
134 throw runtime_error("concating matrices with metadata is not supported yet");
135 if (ncols != m_add.ncols)
136 throw runtime_error("can not concat matrices with different number of cols");
137
138 add_rows(&m_add.m[0], m_add.nrows);
139}
140
141//...........................................................................................
142template <class T> template <class S> void MedMat<T>::add_rows(S *m_add, int nrows_to_add)
143{
144 if (nrows_to_add <= 0)
145 return;
146
147 m.resize((nrows+(size_t)nrows_to_add)*ncols);
148 for (size_t j=0; j<nrows_to_add*ncols; j++)
149 m[ncols*nrows+j] = (T)m_add[j];
150 nrows += nrows_to_add;
151}
152
153//...........................................................................................
154template <class T> template <class S> void MedMat<T>::add_rows(vector<S> &m_add)
155{
156 if (ncols == 0 || (int)(m_add.size() % ncols) != 0)
157 return;
158
159 int nrows_to_add =(int) (m_add.size()/ncols);
160
161 add_rows(&m_add[0], nrows_to_add);
162
163}
164
165//...........................................................................................
166template <class T> template <class S> void MedMat<T>::add_cols(MedMat<S> &m_add)
167{
168 if (signals.size() > 0 || m_add.signals.size() > 0)
169 throw runtime_error("concating matrices with metadata is not supported yet");
170 if (m_add.nrows != nrows)
171 throw runtime_error("can not concat matrices with different number of rows");
172 add_cols(&m_add.m[0], m_add.ncols);
173}
174
175//...........................................................................................
176// packed as nrows x ncols_to_add
177template <class T> template <class S> void MedMat<T>::add_cols(S *m_add, int ncols_to_add)
178{
179 if (ncols_to_add == 0)
180 return;
181
182 vector<T> m_orig = m;
183
184 size_t new_ncols = ncols+ncols_to_add;
185
186 m.resize(nrows*new_ncols);
187 for (size_t i=0; i<nrows; i++) {
188 for (size_t j=0; j<ncols; j++)
189 m[i*new_ncols+j] = (T)m_orig[i*ncols+j];
190 for (size_t j=0; j<ncols_to_add; j++)
191 m[i*new_ncols+ncols+j] = (T)m_add[i*(size_t)ncols_to_add + j];
192 }
193
194 ncols = new_ncols;
195}
196
197//...........................................................................................
198template <class T> template <class S> void MedMat<T>::add_cols(vector<S> &m_add)
199{
200 if (nrows == 0 || (int)(m_add.size() % nrows) != 0)
201 return;
202
203 int ncols_to_add = (int)(m_add.size()/nrows);
204 add_cols(&m_add[0], ncols_to_add);
205}
206
207//...........................................................................................
208template <class T> void MedMat<T>::reorder_by_row(vector<int> &row_order)
209{
210 get_sub_mat(row_order, vector<int>());
211}
212
213//...........................................................................................
214template <class T> void MedMat<T>::reorder_by_col(vector<int> &col_order)
215{
216 get_sub_mat(vector<int>(), col_order);
217}
218
219//...........................................................................................
220template <class T> void MedMat<T>::get_row(int i_row, vector<T> &rowv) const
221{
222 rowv.resize(ncols);
223 if (ncols > 0 && nrows > 0)
224 memcpy(&(rowv[0]),&(m[(size_t)i_row*ncols]),ncols*sizeof(T));
225}
226
227//...........................................................................................
228template <class T> void MedMat<T>::get_col(int i_col, vector<T> &colv) const
229{
230 colv.resize(nrows);
231 if (ncols > 0 && nrows > 0) {
232 for (size_t i=0; i<nrows; i++)
233 colv[i] = m[i*ncols + (size_t)i_col];
234 }
235
236}
237
238/*
239#define MEDMAT_MAGIC_NUMBER 0x0011223344556677
240//...........................................................................................
241template <class T> size_t MedMat<T>::get_size()
242{
243 size_t size = 0;
244
245 // 2 magic nums
246 size += 2*sizeof(unsigned long long);
247
248 // sizeof(T)
249 size += sizeof(int);
250
251 // matrix nrows, ncols, normalized_flag, transposed_flag, missing value
252 size += 4*sizeof(int);
253 size += sizeof(T);
254
255 // matrix
256 size += sizeof(T)*nrows*ncols;
257
258 // avg
259 size += sizeof(int);
260 size += sizeof(T)*avg.size();
261
262 // std
263 size += sizeof(int);
264 size += sizeof(T)*std.size();
265
266 // recordsMetadata
267 size += sizeof(int);
268 size += sizeof(RecordData) * recordsMetadata.size();
269
270 // signals
271 size += sizeof(int);
272 for (int i=0; i<(int)signals.size(); i++)
273 size += signals[i].length();
274
275 return size;
276}
277
278//...........................................................................................
279template <class T> size_t MedMat<T>::serialize(unsigned char *buf)
280{
281 // start with 2 magic numbers (first for identification, second for future use/versions)
282 size_t size = 0;
283
284 *((unsigned long long *)&buf[size]) = (unsigned long long)MEDMAT_MAGIC_NUMBER; size += sizeof(unsigned long long);
285 *((unsigned long long *)&buf[size]) = (unsigned long long)MEDMAT_MAGIC_NUMBER; size += sizeof(unsigned long long);
286
287 // writing sizeof(T) for debugging
288 *((int *)&buf[size]) = (int)sizeof(T); size += sizeof(int);
289
290
291 // matrix nrows, ncols, normalized_flag, transposed_flag, missing value
292 *((int *)&buf[size]) = nrows; size += sizeof(int);
293 *((int *)&buf[size]) = ncols; size += sizeof(int);
294 *((int *)&buf[size]) = normalized_flag; size += sizeof(int);
295 *((int *)&buf[size]) = transposed_flag; size += sizeof(int);
296 *((T *)&buf[size]) = missing_value; size += sizeof(T);
297
298 // matrix itself
299 memcpy(&buf[size], &m[0], (size_t)nrows*ncols*sizeof(T));
300 size += (unsigned long long)nrows * ncols * sizeof(T);
301
302 // avg size followed by avg elements, then std size followed by std elements
303 *((int *)&buf[size]) = (int)avg.size(); size += sizeof(int);
304 memcpy(&buf[size], &avg[0], avg.size()*sizeof(T));
305 *((int *)&buf[size]) = (int)std.size(); size += sizeof(int);
306 memcpy(&buf[size], &std[0], std.size()*sizeof(T));
307
308 // recordsMetadata size followed by records
309 int r_size = (int)recordsMetadata.size();
310 *((int *)&buf[size]) = r_size; size += sizeof(int);
311 memcpy(&buf[size], &recordsMetadata[0], (size_t)r_size * sizeof(RecordData));
312 size += (unsigned long long)r_size * sizeof(RecordData);
313
314
315 // names of columns
316 int s_size = (int)signals.size();
317 *((int *)&buf[size]) = s_size; size += sizeof(int);
318 for (int i=0; i<s_size; i++) {
319 // size of string, followed by string
320 int slen = (int)signals[i].length();
321 *((int *)&buf[size]) = slen; size += sizeof(int);
322 memcpy(&buf[size], signals[i].c_str(), slen);
323 size += slen;
324 }
325
326 // Done !
327 return size;
328}
329
330//...........................................................................................
331template <class T> size_t MedMat<T>::deserialize(unsigned char *buf)
332{
333 size_t size = 0;
334 // 2 magic nums
335 unsigned long long magic1 = *((unsigned long long *)&buf[size]); size += sizeof(unsigned long long);
336 unsigned long long magic2 = *((unsigned long long *)&buf[size]); size += sizeof(unsigned long long);
337
338 if (magic1 != (unsigned long long)MEDMAT_MAGIC_NUMBER) {
339 fprintf(stderr, "MedMat deserialize error: Wrong magic number %llx instead of %llx\n", magic1, (unsigned long long)MEDMAT_MAGIC_NUMBER);
340 return (size_t)-1;
341 }
342
343 if (magic2 == (unsigned long long)MEDMAT_MAGIC_NUMBER) {
344
345 // sizeof (T)
346 int sizeT = *((int *)&buf[size]); size += sizeof(int);
347 if (sizeT != (int)sizeof(T)) {
348 fprintf(stderr, "MedMat deserialize error: sizeT not matching %d vs. %d\n", sizeT, (int)sizeof(T));
349 return (size_t)-1;
350 }
351
352 // matrix nrows, ncols, normalized_flag, transposed_flag, missing value
353 nrows = *((int *)&buf[size]); size += sizeof(int);
354 ncols = *((int *)&buf[size]); size += sizeof(int);
355 normalized_flag = *((int *)&buf[size]); size += sizeof(int);
356 transposed_flag = *((int *)&buf[size]); size += sizeof(int);
357 missing_value = *((T *)&buf[size]); size += sizeof(T);
358
359 //cerr << "desrialize: nrows " << nrows << " ncols " << ncols << " sizeT " << sizeT << "\n";
360 // matrix itself
361 m.resize((size_t)nrows*ncols);
362 memcpy(&m[0], &buf[size], (size_t)nrows*ncols*sizeof(T));
363 size += (unsigned long long)nrows * ncols * sizeof(T);
364
365 // avg size followed by avg elements, then std size followed by std elements
366 int a_size = *((int *)&buf[size]); size += sizeof(int);
367 avg.resize(a_size);
368 memcpy(&avg[0], &buf[size], avg.size()*sizeof(T));
369 int std_size = *((int *)&buf[size]); size += sizeof(int);
370 std.resize(a_size);
371 memcpy(&std[0], &buf[size], std.size()*sizeof(T));
372
373 // recordsMetadata size followed by records
374 int r_size = *((int *)&buf[size]); size += sizeof(int);
375 recordsMetadata.resize(r_size);
376 memcpy(&recordsMetadata[0], &buf[size], (size_t)r_size * sizeof(RecordData));
377 size += (unsigned long long)r_size * sizeof(RecordData);
378
379
380 // names of columns
381 int s_size = *((int *)&buf[size]); size += sizeof(int);
382 signals.resize(s_size);
383 for (int i=0; i<s_size; i++) {
384 // size of string, followed by string
385 int slen = *((int *)&buf[size]); size += sizeof(int);
386 signals[i].assign((char *)&buf[size], slen);
387 size += slen;
388 }
389
390 }
391 else {
392 fprintf(stderr, "MedMat deserialize error: unsupported mode %llx\n", magic2);
393 return (size_t)-1;
394 }
395
396 return size;
397}
398*/
399
400//...........................................................................................
401template <class T> int MedMat<T>::read_from_bin_file(const string &fname)
402{
403#if 1
404 unsigned char *data;
405 unsigned long long size;
406 if (read_binary_data_alloc(fname, data, size) < 0) {
407 fprintf(stderr, "Error reading file %s\n", fname.c_str()); fflush(stderr);
408 return -1;
409 }
410
411// cerr << "before serialize size is " << size << "\n";
412 if (deserialize(data) == (size_t)-1)
413 return -1;
414
415// cerr << "after deserialize\n";
416
417 delete[] data;
418 return 0;
419#endif
420#if 0
421 // OLDER code - kept for a while
422
423 if (!file_exists(fname)) {
424 cerr << "File " << fname << " doesn't exist\n";
425 return -1;
426 }
427 cerr << "reading binary data from " << fname << "\n";
428
429 ifstream inf;
430
431 inf.open(fname,ios::in|ios::binary);
432 if (!inf)
433 return -1;
434
435
436
437
438 // read nrows,ncols
439 inf.read((char *)(&nrows),sizeof(int));
440 inf.read((char *)(&ncols),sizeof(int));
441
442 m.resize(nrows*ncols);
443
444 char *d = (char *)&m[0];
445 inf.read(d, nrows*ncols*sizeof(T));
446
447 int r_size;
448 inf.read((char *)(&r_size), sizeof(int));
449 for (int i = 0; i < r_size; i++) {
450 RecordData r;
451 inf.read((char *)(&r.id), sizeof(r.id));
452 inf.read((char *)(&r.date), sizeof(r.date));
453 inf.read((char *)(&r.time), sizeof(r.time));
454 inf.read((char *)(&r.split), sizeof(r.split));
455 inf.read((char *)(&r.weight), sizeof(r.weight));
456 inf.read((char *)(&r.label), sizeof(r.label));
457 inf.read((char *)(&r.pred), sizeof(r.pred));
458 recordsMetadata.push_back(r);
459 }
460
461 int c_size;
462 inf.read((char *)(&c_size), sizeof(int));
463 for (int i = 0; i < c_size; i++) {
464 int len;
465 inf.read((char *)(&len), sizeof(int));
466 std::vector<char> tmp(len);
467 inf.read(tmp.data(), len); //deserialize characters of string
468 string name;
469 name.assign(tmp.data(), len);
470 signals.push_back(name);
471 }
472
473 inf.close();
474
475 return 0;
476
477#endif
478
479}
480
481//...........................................................................................
482template <class T> int MedMat<T>::write_to_bin_file(const string &fname)
483{
484 vector<unsigned char> serialized;
485 size_t size = get_size();
486 serialized.resize(size+1);
487 serialize(&serialized[0]);
488 if (write_binary_data(fname, &serialized[0], size) < 0) {
489 fprintf(stderr, "MedMat write_to_bon_file ERROR: failed writing to %s\n", fname.c_str());
490 return -1;
491 }
492
493 return 0;
494
495#if 0
496
497 ofstream of;
498 of.open(fname, ios::out|ios::binary);
499 if (!of) {
500 fprintf(stderr, "Can not write to %s\n", fname.c_str());
501 throw exception();
502 }
503
504 // OLDER code - kept for a while
505 cerr << "writing binary " << fname << " with " << nrows << "X" << ncols <<" :: elem size " << sizeof(T) << "\n";
506 of.write((char *)(&nrows),sizeof(int));
507 of.write((char *)(&ncols),sizeof(int));
508 of.write((char *)(&m[0]),sizeof(T)*nrows*ncols);
509 int r_size = (int)recordsMetadata.size();
510 of.write((char *)(&r_size), sizeof(int));
511 cerr << "writing additional data for " << r_size << " records\n";
512 for (RecordData r : recordsMetadata) {
513 of.write((char *)(&r.id), sizeof(r.id));
514 of.write((char *)(&r.date), sizeof(r.date));
515 of.write((char *)(&r.time), sizeof(r.time));
516 of.write((char *)(&r.split), sizeof(r.split));
517 of.write((char *)(&r.weight), sizeof(r.weight));
518 of.write((char *)(&r.label), sizeof(r.label));
519 of.write((char *)(&r.pred), sizeof(r.pred));
520 }
521
522 int c_size = (int)signals.size();
523 of.write((char *)(&c_size), sizeof(int));
524 cerr << "writing additional data for " << c_size << " columns\n";
525 for (string name: signals) {
526 int len = (int)name.size();
527 of.write((char *)(&len), sizeof(len));
528 of << name;
529 }
530
531 of.close();
532
533 return 0;
534#endif
535}
536
537
538//...........................................................................................
539// expected format for titles line: id, date, time, split, weight, <signals>, label, pred
540// if no titles line, then only the naked matrix is expected
541//template <class T> int MedMat<T>::read_from_csv_file(const string &fname, int titles_line_flag, vector<string>& fields_out)
542template <class T> int MedMat<T>::read_from_csv_file(const string &fname, int titles_line_flag)
543{
544 clear();
545 if (!file_exists(fname)) {
546 fprintf(stderr, "File %s doesn't exist\n",fname.c_str());
547 throw std::exception();
548 }
549 fprintf(stderr, "reading data from %s\n", fname.c_str());
550 ifstream inf;
551 inf.open(fname, ios::in);
552 if (!inf) {
553 cerr << "can not open file\n";
554 throw std::exception();
555 }
556 ncols = -1;
557 string curr_line;
558 int METADATA_COLUMNS_PREFIX = 0;
559 int METADATA_COLUMNS_SUFFIX = 0;
560 if (titles_line_flag == 1) {
561 METADATA_COLUMNS_PREFIX = 5;
562 METADATA_COLUMNS_SUFFIX = 2;
563 }
564 int METADATA_COLUMNS = METADATA_COLUMNS_PREFIX + METADATA_COLUMNS_SUFFIX;
565
566 while (getline(inf, curr_line)) {
567 boost::trim(curr_line);
568 vector<string> fields;
569 boost::split(fields, curr_line, boost::is_any_of(","));
570 if (ncols == -1) {
571 if (titles_line_flag) {
572 assert(fields[0].compare("pid") == 0);
573 assert(fields[1].compare("date") == 0);
574 assert(fields[2].compare("outcomeTime") == 0);
575 assert(fields[3].compare("split") == 0);
576 assert(fields[4].compare("weight") == 0);
577 for (int i = METADATA_COLUMNS_PREFIX; i < fields.size() - METADATA_COLUMNS_SUFFIX; i++)
578 signals.push_back(fields[i]);
579
580 assert(fields.end()[-2].compare("label") == 0);
581 assert(fields.end()[-1].compare("pred") == 0);
582 ncols = (int)fields.size() - METADATA_COLUMNS;
583 assert(ncols >= 0);
584 continue;
585 }
586 else {
587 ncols = (int)fields.size();
588 assert(ncols >= 0);
589 }
590 }
591 if (fields.size() != ncols + METADATA_COLUMNS) {
592 //char msg[200];
593 string msg = "expected " + to_string(ncols + METADATA_COLUMNS) + " fields, got " + to_string((int)fields.size()) + "fields in line: " + curr_line.c_str() + "\n";
594 //sprintf(msg, "expected %d fields, got %d fields in line: %s\n", ncols + METADATA_COLUMNS, (int)fields.size(), curr_line.c_str());
595 throw runtime_error(msg.c_str());
596 }
597 if (METADATA_COLUMNS > 0) {
598 RecordData sample(stoi(fields[0]), stoi(fields[1]), stol(fields[2]), stoi(fields[3]), stof(fields[4]),
599 stof(fields.end()[-2]), stof(fields.end()[-1]));
600 recordsMetadata.push_back(sample);
601 }
602 vector<T> new_row(ncols);
603 for (int i = 0; i < ncols; i++)
604 new_row[i] = (T)stof(fields[i + METADATA_COLUMNS_PREFIX]);
605 add_rows(new_row);
606 }
607
608 inf.close();
609 fprintf(stderr, "read %lldX%lld data\n", nrows, ncols);
610 return 0;
611}
612
613template <class T> int MedMat<T>::write_to_csv_file(const string &fname) {
614 fprintf(stderr, "writing %s with %lldX%lld data\n", fname.c_str(), nrows, ncols);
615 ofstream of;
616 of.open(fname, ios::out);
617 if (!of) {
618 fprintf(stderr, "Error: failed opening file %s\n", fname.c_str());
619 //cerr << "Error: " << strerror(errno);
620 throw std::exception();
621 }
622 bool with_signals = (signals.size() == ncols);
623 bool with_records = (recordsMetadata.size() == nrows);
624
625 if (signals.size() != ncols)
626 cerr << "ncols: " << ncols << " number of column names: " << signals.size() << ", not writing column names\n";
627 if (recordsMetadata.size() != nrows)
628 cerr << "nrows: " << nrows << " number of records metadata entries: " << recordsMetadata.size() << ", not writing record metadata\n";
629
630 if (with_records && with_signals)
631 of << "pid,date,outcomeTime,split,weight,";
632 if (with_signals)
633 for (int j = 0; j < ncols; j++) {
634 of << signals[j] << ",";
635 }
636 if (with_records && with_signals)
637 of << "label,pred\n";
638 else if (with_signals)
639 of << "\n";
640
641
642 for (int i = 0; i < nrows; i++) {
643 if (with_records)
644 of << recordsMetadata[i].id << "," << med_time_converter.convert_times_S(global_default_time_unit, MedTime::DateTimeString, recordsMetadata[i].date)
645 << "," << recordsMetadata[i].outcomeTime << "," << recordsMetadata[i].split << "," <<
646 recordsMetadata[i].weight << ",";
647 for (int j = 0; j < ncols; j++) {
648 of << get(i, j) << ",";
649 }
650 if (with_records)
651 of << recordsMetadata[i].label << "," << recordsMetadata[i].pred;
652 of << "\n";
653 }
654 of.close();
655 return 0;
656}
657
658
659// normalization
660//..............................................................................................................
661inline void calculate_moments(int num, double sum, double sum2, float& mean, float& std, float missing_val) {
662
663 if (num == 0) {
664 mean = std = missing_val ;
665 } else {
666 mean = (float)(sum/(double)num);
667 if (num > 1) {
668 float val = (float)((sum2 - sum*mean)/(double)(num-1)) ;
669 if (val > MED_FLT_EPSILON)
670 std = sqrt(val) ;
671 else
672 std = 1 ; // Dummy std for constant value
673 } else {
674 std = 1 ;
675 }
676 }
677}
678
679//..............................................................................................
680template <class T> void MedMat<T>::normalize(int norm_type, float *wgts) {
681
682 double sum,sum2; // square sums become large fast...need doubles for this
683 int num ;
684 float val;
685
686 if (norm_type == Normalize_Cols) { // Column-wise moments
687
688 avg.resize(ncols) ;
689 std.resize(ncols) ;
690
691 for (int j=0; j<ncols; j++) {
692
693 sum = sum2 = 0 ;
694 num = 0 ;
695
696 for (int i=0; i<nrows; i++) {
697 val = (float)get(i,j);
698 if (val != missing_value) {
699 if (wgts != NULL) val *= wgts[i];
700 num ++ ;
701 sum += val ;
702 sum2 += val*val ;
703 }
704 }
705
706 float av,sd;
707 calculate_moments(num,sum,sum2,av,sd,(float)missing_value) ;
708 avg[j] = (T)av;
709 std[j] = (T)sd;
710 }
711
712 } else { // Row-wise moments
713
714 avg.resize(nrows) ;
715 std.resize(nrows) ;
716
717 for (int i=0; i<nrows; i++) {
718 sum = sum2 = 0 ;
719 num = 0 ;
720
721 for (int j=0; j<ncols; j++) {
722 val = (float)get(i,j);
723 if (val != missing_value) {
724 if (wgts != NULL) val *= wgts[i];
725 num ++ ;
726 sum += val ;
727 sum2 += val*val ;
728 }
729 }
730
731 float av,sd;
732 calculate_moments(num,sum,sum2,av,sd,(float)missing_value) ;
733 avg[i] = (T)av;
734 std[i] = (T)sd;
735 }
736 }
737
738 // Normalize
739 normalize(avg,std,norm_type) ;
740}
741
742template <class T> void MedMat<T>::get_cols_avg_std(vector<T>& _avg, vector<T>& _std)
743{
744 _avg.resize(ncols);
745 _std.resize(ncols);
746
747 for (int j=0; j<ncols; j++) {
748
749 double _sum = 0 , _sum2 = 0;
750 int _num = 0;
751
752 for (int i=0; i<nrows; i++) {
753 float val = (float)get(i, j);
754 if (val != missing_value) {
755 _num++;
756 _sum += val;
757 }
758 }
759
760
761 if (_num > 0)
762 _avg[j] = (T)(_sum/_num);
763 else
764 _avg[j] = (T)0;
765
766 _num = 0;
767 for (int i=0; i<nrows; i++) {
768 T val = get(i, j);
769 if (val != missing_value) {
770 _num++;
771 _sum2 += (double)(val - _avg[j])*(val - _avg[j]);
772 }
773 }
774
775 if (_num > 0)
776 _std[j] = (T)sqrt((double)_sum2/_num);
777 else
778 _std[j] = (T)1;
779 //float av, sd;
780 //calculate_moments(_num, _sum, _sum2, av, sd, (float)missing_value);
781 //_avg[j] = (T)av;
782 //_std[j] = (T)sd;
783 }
784}
785
786template <class T> template <class S> void MedMat<T>::normalize (const vector<S>& external_mean, const vector<S>& external_std, int norm_type) {
787
788 normalized_flag = norm_type ;
789 vector<S> internal_std(external_std.size());// to go with the const attr
790 for (int i=0; i<external_std.size(); i++) {
791 if (external_std[i] == 0)
792 internal_std[i] = 1;
793 else
794 internal_std[i] = external_std[i];
795 }
796
797 for (size_t i=0; i<nrows; i++) {
798 for (size_t j=0; j<ncols; j++) {
799 if (normalized_flag == Normalize_Cols) {
800 if (m[i*ncols +j] == missing_value)
801 m[i*ncols +j] = 0 ;
802 else if (internal_std.size())
803 m[i*ncols + j] = (m[i*ncols + j] - external_mean[j])/internal_std[j] ;
804 else
805 m[i*ncols + j] = (m[i*ncols + j] - external_mean[j]) ;
806 } else {
807 if (m[i*ncols +j] == missing_value)
808 m[i*ncols +j] = 0 ;
809 else if (internal_std.size())
810 m[i*ncols + j] = (m[i*ncols + j] - external_mean[i])/internal_std[i] ;
811 else
812 m[i*ncols + j] = (m[i*ncols + j] - external_mean[i]) ;
813 }
814 }
815 }
816}
817
818//..............................................................................................................................
819template <class T> void MedMat<T>::print_row(FILE *fout, const string &prefix, const string &format, int i_row)
820{
821 fprintf(fout, "%s :: [%d,:] :", prefix.c_str(),i_row);
822 for (int i=0; i<ncols; i++)
823 fprintf(fout, format.c_str(), get(i_row, i));
824 fprintf(fout, "\n");
825}
826
827
828//..............................................................................................................................
829template <class T> void MedMat<T>::set_signals(vector<string> & sigs)
830{
831 signals.clear();
832 for (auto &sig : sigs)
833 {
834 signals.push_back(sig);
835 }
836}
837
838//..............................................................................................................................
839template <class T> void MedMat<T>::random_split_mat_by_ids(MedMat<T> &mat_0, MedMat<T> &mat_1, float p0, vector<int> &inds0, vector<int> &inds1)
840{
841 if (recordsMetadata.size() != nrows)
842 HMTHROW_AND_ERR("ERROR: MedMat : Can't split a matrix by id with a non matching recordsMetadata (%d records != %d rows)\n", (int)recordsMetadata.size(), (int)nrows);
843
844 // collect ids and randomize the 0 group
845 unordered_set<int> all_ids, ids_0;
846 for (int i = 0; i < nrows; i++)
847 all_ids.insert(recordsMetadata[i].id);
848 for (auto id : all_ids)
849 if (rand_1() < p0)
850 ids_0.insert(id);
851
852 // calculate sizes for matrices
853 mat_0.clear();
854 mat_0.copy_header(*this);
855 mat_1.copy_header(*this);
856 int nrows0 = 0, nrows1 = 0;
857 inds0.clear();
858 inds1.clear();
859 vector<int> assignment(nrows, 0);
860 for (int i = 0; i < nrows; i++) {
861 if (ids_0.find(recordsMetadata[i].id) != ids_0.end()) {
862 nrows0++;
863 inds0.push_back(i);
864 }
865 else {
866 nrows1++;
867 assignment[i] = 1;
868 inds1.push_back(i);
869 }
870 }
871
872 mat_0.nrows = nrows0;
873 mat_0.m.resize(mat_0.nrows*mat_0.ncols);
874 mat_0.recordsMetadata.resize(mat_0.nrows);
875 mat_0.row_ids.resize(mat_0.nrows);
876
877 mat_1.nrows = nrows1;
878 mat_1.m.resize(mat_1.nrows*mat_1.ncols);
879 mat_1.recordsMetadata.resize(mat_1.nrows);
880 mat_1.row_ids.resize(mat_1.nrows);
881
882 int i0 = 0, i1 = 0;
883 for (int i = 0; i < nrows; i++) {
884 if (assignment[i]) {
885 for (int j = 0; j < ncols; j++)
886 mat_1(i1, j) = m[i*ncols + j];
887 mat_1.recordsMetadata[i1] = recordsMetadata[i];
888 mat_1.row_ids[i1] = recordsMetadata[i].id;
889 i1++;
890 }
891 else {
892 for (int j = 0; j < ncols; j++)
893 mat_0(i0, j) = m[i*ncols + j];
894 mat_1.recordsMetadata[i0] = recordsMetadata[i];
895 mat_1.row_ids[i0] = recordsMetadata[i].id;
896 i0++;
897 }
898 }
899
900}
Definition MedMat.h:63
static const int DateTimeString
string only format "YYYYMMDDHHMI"
Definition MedTime.h:31
TODO: this class should be refactored and merged with MedSample.
Definition MedMat.h:30
float stof(const std::string &value, size_t *pos=nullptr)
A faster implementation of stof(). See documentation of std::stof() for more information....
Definition strtonum.h:467
Definition StdDeque.h:58