Medial Code Documentation
Loading...
Searching...
No Matches
medial_utilities.h
1// medial_utilities: cluster-analysis and numerical utilities
2#ifndef __MED_UTIL_H__
3#define __MED_UTIL_H__
4#pragma once
5
6#include "zlib/zlib/zlib.h"
7
8#include <stdio.h>
9#include <stdlib.h>
10#include <string.h>
11#include <math.h>
12#include <assert.h>
13
14#include <string>
15#include <sstream>
16#include <iostream>
17#include <map>
18#include <random>
19#include <algorithm>
20
21#define MAX_STRING_LEN 1024
22#define MAX_FIELD_SIZE 50
23
24#define IDX(i,j,ncol) ((i)*(ncol)*(MAX_STRING_LEN) + (j)*(MAX_STRING_LEN))
25#define XIDX(i,j,ncol) ((i)*(ncol) + (j))
26#define HIDX(i) ((i))*(MAX_STRING_LEN)
27#define SIDX(i,j,ncol,size) ((i)*(ncol)*(size+1) + (j)*(size+1))
28#define FIDX(i,j,ncol) ((i)*(ncol)*(MAX_FIELD_SIZE) + (j)*(MAX_FIELD_SIZE))
29
30// Utilities for sorting
31
32struct val_idx {
33 int idx ;
34 float val ;
35} ;
36
37struct dbl_idx {
38 int idx ;
39 double val ;
40} ;
41
42// IO
43int read_blob(const char *file_name, unsigned char **data) ;
44
45// QuickSort comparison functions
46int dbl_idx_compare (const void *el1, const void* el2) ;
47int val_idx_compare (const void *el1, const void* el2) ;
48int double_compare (const void *el1, const void* el2) ;
49int int_compare (const void *el1, const void* el2) ;
50int float_compare (const void *el1, const void* el2) ;
51
52// Utilitities for vector calculations in a matrix
53// Find Average of a matrix column
54float calc_col_avg(float *table, int col, int nrow, int ncol, float missing) ;
55double calc_col_avg(double *table, int col, int nrow, int ncol, double missing) ;
56
57// Find Average of a subset of the rows in a matrix column
58float calc_col_sub_avg(float *table, int col, int *inds, int ninds, int ncol, float missing) ;
59double calc_col_sub_avg(double *table, int col, int *inds, int ninds, int ncol, double missing) ;
60
61// Find standard deviation of a matrix column
62float calc_col_std(float *table, int col, int nrow, int ncol, float missing) ;
63double calc_col_std(double *table, int col, int nrow, int ncol, double missing) ;
64float calc_col_std(float *table, int col, int nrow, int ncol, float avg, float missing) ;
65double calc_col_std(double *table, int col, int nrow, int ncol, double avg, double missing) ;
66
67// Find standard deviation of a subset of the rows in a matrix column
68float calc_col_sub_std(float *table, int col, int *inds, int ninds, int ncol, float missing) ;
69double calc_col_sub_std(double *table, int col, int *inds, int ninds, int ncol, double missing) ;
70float calc_col_sub_std(float *table, int col, int *inds, int ninds, int ncol, float avg, float missing) ;
71double calc_col_sub_std(double *table, int col, int *inds, int ninds, int ncol,double avg, double missing) ;
72
73// Find weighted-average of a matrix column
74float weighted_calc_col_avg(float *table, int col, float *weights, int nrow, int ncol, float missing) ;
75double weighted_calc_col_avg(double *table, int col, double *weights, int nrow, int ncol, double missing) ;
76
77// Find weighted-average of a subset of the lines in a matrix column
78float weighted_calc_col_sub_avg(float *table, int col, float *weights, int *inds, int ninds, int ncol, float missing) ;
79double weighted_calc_col_sub_avg(double *table, int col, double *weights, int *inds, int ninds, int ncol, double missing) ;
80
81// Find weighted-standard-deviation of matrix column
82float weighted_calc_col_std(float *table, int col, float *weights, int nrow, int ncol, float missing) ;
83double weighted_calc_col_std(double *table, int col, double *weights, int nrow, int ncol, double missing) ;
84float weighted_calc_col_std(float *table, int col, float *weights, int nrow, int ncol, float avg, float missing) ;
85double weighted_calc_col_std(double *table, int col, double *weights, int nrow, int ncol, double avg, double missing) ;
86
87// Find weighted-standard-deviation of a subset of the lines in a matrix column
88float weighted_calc_col_sub_std(float *table, int col, float *weights, int *inds, int ninds, int ncol, float missing) ;
89double weighted_calc_col_sub_std(double *table, int col, double *weights, int *inds, int ninds, int ncol, double missing) ;
90float weighted_calc_col_sub_std(float *table, int col, float *weights, int *inds, int ninds, int ncol, float avg, float missing) ;
91double weighted_calc_col_sub_std(double *table, int col, double *weights, int *inds, int ninds, int ncol, double avg, double missing) ;
92
93// Find sum of a matrix column
94float calc_col_sum(float *table, int col, int nrow, int ncol, float missing) ;
95double calc_col_sum(double *table, int col, int nrow, int ncol, double missing) ;
96
97// Shuffle a vector
98int shuffle(float *vec, int n, float **new_vec) ;
99int shuffle(double *vec, int n, double **new_vec) ;
100
101// Utilities for independent vector calculations
102// Pearson correlation of two vectors
103float pearson(float *vec1, float *vec2, int n) ;
104double pearson(double *vec1, double *vec2, int n) ;
105double pearson(double *vec1, double *vec2, int n, double missing) ;
106
107// Spearman correlation of two vectors
108float spearman(float *vec1, float *vec2, int n) ;
109double spearman(double *vec1, double *vec2, int n) ;
110
111// Get indices of vec2 elements in vec1
112int get_indices(int *vec1, int n1, int *vec2, int n2, int **indices, int *n3) ;
113
114// Get the (tied) order of a vector.
115int get_order(float *vec, int n, float **order) ;
116int get_order(double *vec, int n, double **order) ;
117
118// Create a random permutation
119int *randomize (int nrows) ;
120
121// Mathematical Utilities
122// Fast Fourier Transform on data
123void four1 (double data[], unsigned long nn, int isign) ;
124
125// Get R and Phi from FFT data
126void get_r_phi(double *fft_data,int size, int i, double *r, double *phi) ;
127
128//Gauss Jordan elimination to solve AX = b ;
129void gaussj (double **a, int n, double **b, int m) ;
130
131// Clustering
132// K-Means
133int kmeans (double *x, int nrows, int ncols, int k, int *clusters, double *means) ;
134
135// Identifier closest cluster
136int get_closest(double *x,int nrows, int ncols, double *mean) ;
137
138// General utilities
139// check if a file exists
140bool file_exists (const char *filename) ;
141// open files with error message in case of failure
142FILE* safe_fopen(const char* filename, const char* mode, bool exit_on_fail = true) ;
143// gzopen files with error message in case of failure
144gzFile safe_gzopen(const char* filename, const char* mode, bool print_msg = true, bool exit_on_fail = true) ;
145// read line from a gzFile (wrapper of gzgets)
146char * gzGetLine(gzFile file, std::string& str);
147// bring various path formats ("W:/path/to/file" and "/cygdrive/w/path/to/file") to the Windows format as accepted by condor ("\\nas1\Work\path\to\file")
148int fix_path(const std::string& in, std::string& out) ;
149
150// time
151double get_hour(const char *time, int format = 0) ;
152void hours2time (double hours, char *time) ;
153double get_day(const char *time, int format = 0) ;
154double get_min(char *time, int format = 0) ;
155double min2day (double mins) ;
156
157int get_day(int date) ;
158int get_date(int days) ;
159
160// Vector Utils
161int get_moments (double *v, int n, double *mean, double *sdv, double missing=-1) ;
162int get_mean (double *v, int n, double *mean, double missing=-1) ;
163int get_sdv (double *v, int n, double mean, double *sdv, double missing=-1) ;
164int get_median (double *v, int n, double *median, double missing=-1) ;
165int get_quantiles (double *v, int n, double *qs, int nqs, double *vals, double missing=-1) ;
166
167#endif
Copyright 2015-2023 by XGBoost Contributors.
Definition medial_utilities.h:37
Definition medial_utilities.h:32