Medial Code Documentation
Loading...
Searching...
No Matches
micNet.h
1//==============================================================
2// micNet
3//==============================================================
4// simple deep learning network implementation.
5//
6
7#ifndef __micNet__H__
8#define __minNet__H__
9
10#include <vector>
11#include "MedUtils/MedUtils/MedUtils.h"
12#include <MedMat/MedMat/MedMat.h>
14
15using namespace std;
16
17//
18// A micNet has several nodes inside it (micNodes)
19// some of them (typically one) is the input node,
20// the others have rules to create inputs from previous nodes
21// and then apply the micNode function to get its output.
22// There are also output nodes, in which in training phase we also get the y to learn from
23// (typicaly we will use softmax for classification and least squares for regression).
24//
25// Training:
26// (1) We choose an initial state for the network (for example normal random weights and 0 biases)
27// (2) We forward the batch through the network, so that each micNode has now batch_in and bactch_out.
28// (3) We start from the output nodes back, calculate gradients for the each node weights
29// and propagate the differentials to get a gradient for the weights in each node.
30// (4) We step a gradient decent step using the gradients we have and the learning rates.
31// (5) We repeat with the next batch until conversion/stop criteria.
32// (6) Once an Epoch - we do a full run of the network on all data forward (could again be done in batches to save memory) and get predictions
33// On-Train and On-Test. This monitors the conversion on the network.
34//
35// In theory we could implement any differntiable function f: R^n -> R^k as a node (!) this gives us much freedom to develop new ideas for functions.
36//
37// Testing:
38// (1) Working in input batches again
39// (2) Forwarding the inputs through the network - can be parralelized easily.
40//
41
42
43class micNode;
44class micNet;
45
46#define NET_MODE_TRAIN 1
47#define NET_MODE_TEST 2
48
50public:
51 int n_input_nodes;
52 vector<int> in_node_id;
53 vector<string> mode;
54
55 void clear() { n_input_nodes = 0; in_node_id.clear(); mode.clear(); }
56 InputRules() { clear(); }
57
58 void push(int node_id, const string &_mode);
59};
60
61
62//
63// A micNode implements a function from R^n -> R^k that has M*k optimization weights.
64// n is typically the dimension of the previous layer, k will be the dimension for the next
65// in other cases we might want to build and spread the input/output dimensions according to other more complex rules (example: convolutional networks)
66// Inputs to a micNode can be coordinates from other nodes outputs, or from the input features
67//
69
70public:
71
72 int id;
73 string type; // "Input","LeakyReLU","SoftMax","Normalization","Regression","MaxOut","Chooser"
74 string subtype; // "encoder","decoder"
75 string name; // for prints
76 string loss; // if "" this is not an output node, if "log" we use logloss (works for softmax), if "least-squares" uses that on input.
77
78 int n_in; // coming in dimension
79 int k_out; // going out dimension
80
81 MedMat<float> wgt; // weights. last weight is ALWAYS the bias (in LeakyReLU and Ridge). size (n_in + 1) x (kout + 1), last column is always 0,0,....1 (to transfer 1 forward)
82 vector<float> dropout_in; // if dropout is used, in each batch we randomize a dropout vector, stating which of the inputs are taken, and which of the outputs.
83 vector<float> dropout_out; // this one is for the outputs and must be similar to the next layer in.
84 MedMat<int> sparse_bit; // ??
85
86 InputRules ir;
87 vector<int> forward_nodes; // for convience, a list of the node indexes that are forward to this node.
88 int is_terminal; // easy way to know if this one is a terminal node (output node --> loss node)
89
90 // training related
91 MedMat<float> lr_params; // in LeakyReLU: holds the slopes a,b for each one of the k neurons. size k_out x 2
92 MedMat<float> lambda; // in Ridge and LeakyReLU, this one holds the ridge regularization coefficient for each neuron. size k_out x 1 .
93 MedMat<float> learn_rates; // learning rates for wgts, currently size is k_out x 1 , that is a learn rate for each neuron.
94
95 float momentum; // momentum to use while learning
96 float rate_factor; // multiplying the learning rate - allowing to manage its decay
97 float max_wgt_norm; // maximal norm for the weights
98 float min_wgt_norm; // minimal norm for the weights
99 float dropout_prob_in; // probability for dropout of the coming in variables
100 float dropout_prob_out; // probability for dropout of the coming out variables
101 float sparse_zero_prob; // probability for initial constant random sparsness imposed on the weights
102
103 MedMat<float> batch_in; // current batch inputs. Every batch has a column of "1" as the last: this is the bias column, so the actual size should be batch_size x (n_in + 1)
104 MedMat<float> batch_out; // current batch outputs. size: batch_size x (k_out + 1) (last column always has 1's - to allow bias)
105
106 MedMat<float> grad_w; // current gradient for w. grad(i,j) = d_f/d_Wij(x) averaged over all x in the batch, size (n_in + 1) x (k_out + 1) , last col always 0
107 MedMat<float> grad_s; // current gradient for x. grad(i,j) = d_f/d_Sij(x) for each sample and neuron ,size (nb) x (k_out + 1) , last col always 0
108 MedMat<float> delta; // current gradient for x. delta(i,j) = d_L/d_Sij(x) for each sample and neuron ,size (nb) x (k_out + 1) , last col always 0
109
110 MedMat<float> prev_grad_w; // needed for being able to use momentum
111
112 // normalization related
113 vector<float> b_mean, b_var; // keeping last stage mean and variance ... to be updated
114 vector<float> alpha, beta; // actual learnt alpha,beta needed for forward normalization
115 float normalization_update_factor; // in [0,1]
116 vector<double> curr_mean, curr_var; // used for current batch estimators
117
118 // softmax related
119 int n_categ;
120 int n_per_categ;
121 MedMat<float> full_probs; // just a helper array used in calculations , here since needed both in forward and backprop
122
123 // autoencoder related
124 int data_node; // node id to decode to
125 micNode *data_node_p;
126
127
128 micNet* my_net; // pointer to the container network
129
130 MedMat<float> y; // the y values for a batch. This is relevant for output nodes only, and typically will be for a SoftMax
131 vector<float> sweights; // samples weights for this node
132
133 // next is typically 1, however, we may set a micNode (=a layer) not to update weights during a learn cycle
134 // This is useful when one needs to keep a layer in the middle with no changes, while letting other layers work.
135 // Delta propagations will still flow through the node, to allow lower levels to get correct gradients.
136 int update_weights_flag;
137
138 // this one forces ONLY forward passes on the node in learn time. This is relevant when we freeze the first layers, and hence have
139 // no need to propagate through them.
140 int only_forward_flag;
141
142
143 micNode() { data_node = -1; data_node_p = NULL; update_weights_flag = 1; only_forward_flag = 0; subtype = ""; }
144
145 // initialize weights random in a uniform segment
146 int init_wgts_rand(float min_range, float max_range);
147
148 // initialize weights from a normal distribution
149 int init_wgts_rand_normal(float mean, float std);
150
151 int fill_input_node(int *perm, int len, MedMat<float> &x_mat, int last_is_bias_flag); // copies x into input nodes
152 int fill_output_node(int *perm, int len, MedMat<float> &y_mat, vector<float> &sample_weights); // copies y into input nodes
153
154 int get_input_batch(int do_grad_flag); // sets batch_in for a node that is not an input node
155 int forward_batch(int do_grad_flag);
156
157 int forward_batch_leaky_relu(int do_grad_flag);
158 int forward_batch_normalization(int do_grad_flag);
159 int forward_batch_softmax(int do_grad_flag);
160 int forward_batch_regression(int do_grad_flag);
161
162 void forward_batch(const vector<MedMat<float>> &nodes_outputs, MedMat<float> &out) const;
163 void get_input_batch(const vector<MedMat<float>> &nodes_out, MedMat<float> &in) const;
164 void forward_batch_leaky_relu(const MedMat<float> &in, MedMat<float> &out) const;
165 void forward_batch_normalization(const MedMat<float> &in, MedMat<float> &out) const;
166 void forward_batch_softmax(const MedMat<float> &in, MedMat<float> &out) const;
167 void forward_batch_regression(const MedMat<float> &in, MedMat<float> &out) const;
168
169
170 int back_propagete_from(micNode *next);
171 int get_backprop_delta();
172
173 int weights_gd_step();
174 int weights_normalization_step(); // step for a normalization layer
175
176
177 void print(const string &prefix, int i_state, int i_in);
178
179 std::default_random_engine gen;
180
181
182 // serializations for a single node (partial... only what's needed by predictions, and not initialized by init_params)
183
184 ADD_CLASS_NAME(micNode)
185 ADD_SERIALIZATION_FUNCS(id, wgt, alpha, beta)
186
187};
188
189
190class NetEval {
191
192public:
193 string name;
194 int epoch;
195 float acc_err;
196 float auc_max;
197 float auc_exp;
198 float corr_max;
199 float corr_exp;
200 float log_loss;
201 float lsq_loss;
202
203 double dt; // time for epoch (for time performance measurements)
204
205};
206
207
208//
209// micNet - an implementation of a multilayerd NN on top of the micNode class
210//
211// currently - fully conected LeakyReLU layers with SoftMax/logloss at the end
212//
214public:
215 string params_init_string;
216
217 int batch_size;
218 int predict_batch_size;
219 int n_categ;
220 int n_per_categ;
221 int nfeat;
222
223 vector<float> samp_ratio; // if size is less than n_categ, just permuting whole data each epoch
224 // if n_categ weights given: chooses by the probability given randomly for each batch
225 //(with repetitions...but prob for that should be close to 0 on reasonable data)
226
227 float max_wgt_norm;
228 float min_wgt_norm;
229 float weights_init_std;
230 float rate_decay;
231
232 float def_A, def_B; // leaky ReLU defaults
233 float def_learning_rate, def_lambda, def_momentum; // params for case of constant params to all layers in net
234 vector<float> learning_rates; // learn rate for each layer.
235
236 int n_norm_layers;
237 float normalization_factor;
238 float sparse_zero_prob;
239
240 vector<int> n_hidden;
241 vector<float> dropout_in_probs;
242
243 string net_type; // "fc","autoencoder"
244 string loss_type;
245
246 // a few more params needed to comply with MedAlgo
247 int min_epochs;
248 int max_epochs;
249 int n_back;
250 float min_improve_n_back; // minimal relative improvemnt on train set looking n_back steps back
251 int n_preds_per_sample; // can be 1 or n_categ
252 int pred_class; // the class we will put in preds in case n_preds_per_sample == 1
253
254
255 // next params are needed for retraining and transfer learning
256 // the idea is that we start with a ready network, cut off some layers at the end,
257 // and add new layers on top of it.
258 // a classic use is train a net with an autoencoder, and then add classification layers on top of it and train them.
259 int last_layer_to_keep; // will keep layers 0 (input) up to layers <= last_layer_to_keep (for example should be 1 for the simplest one layer autoencoder)
260 // if < 0 then restarting network fresh
261
262
263// vector<NodeInfo> node_infos;
264
265 void init_defaults() {
266 batch_size = 1024; predict_batch_size = 30000; nfeat = 0; n_categ = 0; n_per_categ = 1; max_wgt_norm = 0; min_wgt_norm = 0;
267 weights_init_std = (float)0.01; rate_decay = (float)0.97;
268 n_hidden.clear();
269 dropout_in_probs.clear();
270 loss_type = "log";
271 def_A = (float)1.0; def_B = (float)0.01;
272 def_learning_rate = (float)0.1; def_lambda = 0; def_momentum = (float)0.9; normalization_factor = (float)0.99;
273 n_norm_layers = 0;
274 sparse_zero_prob = 0;
275 net_type = "fc";
276 pred_class = 1;
277 n_preds_per_sample = 1;
278 min_epochs = 10;
279 max_epochs = 100;
280 n_back = 10;
281 min_improve_n_back = (float)0.001;
282 samp_ratio.clear();
283 last_layer_to_keep = -1;
284 // node_infos.clear();
285 }
286
287 micNetParams() { init_defaults(); }
288 int init_from_string(const string &init_str);
289
290 int node_infos_init_finish();
291};
292
293
295
296public:
297
298 int version = 0;
299 vector<micNode> nodes;
300 micNetParams params;
301
302 vector<micNode> nodes_last_best;
303
304 micNet() { nodes.clear(); }
305
306 void copy_nodes(vector<micNode> &in_nodes); // needed in order to set up ir pointers correctly
307
308 // adding layers (relying on params to be already initialized)
309 int add_input_layer();
310 int add_fc_leaky_relu_layer(int in_node, int n_hidden, float dropout_out_p, float sparse_prob, float learn_rate);
311 int add_normalization_layer(int in_node);
312 int add_softmax_output_layer(int in_node);
313 int add_regression_output_layer(int in_node);
314 int add_autoencoder_loss(int in_node, int data_node); // in_node is encoder node in this case
315
316 // initialization
317 int init_fully_connected(micNetParams &in_params);
318 int init_fully_connected(const string &init_str);
319 int init_autoencoder(micNetParams &in_params);
320 int init_net(const string &init_string); // auto choose initialization (using the net_type param)
321 int init_net(micNetParams &in_params);
322
323
324 // forward and backprop
325 int forward_batch(int do_grad_flag); // assumes Input nodes contain the batch in batch_out
326 int back_prop_batch(); // assumes forward batch was run before
327
328 // learn, eval and predict
329 int learn(MedMat<float> &x_train, MedMat<float> &y_train, vector<float> &weights, MedMat<float> &x_test, MedMat<float> &y_test, int n_epochs, int eval_freq, int last_is_bias_flag = 0);
330 int learn_single_epoch(MedMat<float> &x_train, MedMat<float> &y_train, vector<float> &weights, int last_is_bias_flag = 0);
331 int eval(const string &name, MedMat<float> &x, MedMat<float> &y, NetEval &eval, int last_is_bias_flag = 0);
332 int predict(MedMat<float> &x, MedMat<float> &preds, int last_is_bias_flag = 0);
333 void predict_single(const vector<float> &x, vector<float> &preds) const;
334
335 vector<vector<int>> index_by_categ; // used when choosing a random batch by samp_ratio
336 int get_batch_with_samp_ratio(MedMat<float> &y_train, int batch_len, vector<int> &chosen);
337
338 // next is used to estimate the gradient at a specific node numerically and compare to the existing gradient there
339 // This is done for debugging
340 int test_grad_numerical(int i_node, int i_in, int i_out, float epsilon);
341
342
343 // API to allow use with MedAlgo
344 int init_from_string(string init_str);
345 int learn(MedMat<float> &x_train, MedMat<float> &y_train) { vector<float> w; return learn(x_train, y_train, w); }
346 int learn(MedMat<float> &x_train, MedMat<float> &y_train, vector<float> &weights);
347 int predict(MedMat<float> &x, vector<float> &preds);
348
349 size_t get_size() { return MedSerialize::get_size(version, params.params_init_string, nodes); }
350 size_t serialize(unsigned char *blob) { return MedSerialize::serialize(blob, version, params.params_init_string, nodes); }
351 size_t deserialize(unsigned char *blob) {
352 string init_str;
353 size_t size = MedSerialize::deserialize(blob, version, init_str);
354 fprintf(stderr, "micNet deserialize init with %s\n", init_str.c_str());
355 init_net(init_str);
356 size += MedSerialize::deserialize(&blob[size], nodes);
357 for (auto &node : nodes) { node.my_net = this; }
358 return size;
359 }
360
361 int n_preds_per_sample() const { return params.n_preds_per_sample; }
362
363};
364
365//=======================================================
366// Joining the MedSerialize Wagon
367//=======================================================
371
372
373
374#endif
375
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition micNet.h:49
Definition MedMat.h:63
Definition micNet.h:190
Definition SerializableObject.h:32
Definition micNet.h:213
int init_from_string(const string &init_str)
Definition micNet.cpp:1148
Definition micNet.h:294
size_t serialize(unsigned char *blob)
Serialiazing object to blob memory. return number ob bytes wrote to memory.
Definition micNet.h:350
size_t get_size()
Gets bytes sizes for serializations.
Definition micNet.h:349
size_t deserialize(unsigned char *blob)
Deserialiazing blob to object. returns number of bytes read.
Definition micNet.h:351
Definition micNet.h:68
Definition StdDeque.h:58