3#ifndef __SAMPLES_GENERATOR_H__
4#define __SAMPLES_GENERATOR_H__
8#include <MedStat/MedStat/GibbsSampler.h>
9#include <MedEmbed/MedEmbed/ApplyKeras.h>
10#include <MedMat/MedMat/MedMat.h>
27string GeneratorType_toStr(GeneratorType type);
29GeneratorType GeneratorType_fromStr(
const string &type);
50 void learn(
const map<
string, vector<T>> &data);
55 virtual void learn(
const map<
string, vector<T>> &data,
const vector<string> &learn_features,
bool skip_missing) {};
61 virtual void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values);
72 virtual void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values, mt19937 &rnd_gen)
const;
77 virtual void get_samples(
MedMat<T> &data,
int sample_per_row,
void *params,
const vector<vector<bool>> &mask,
const MedMat<T> &mask_values, mt19937 &rnd_gen)
const;
81 void pre_serialization();
82 void post_deserialization();
98 bool no_need_to_clear_mem;
106 void learn(
const map<
string, vector<T>> &data,
const vector<string> &learn_features,
bool skip_missing);
108 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values);
111 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values, mt19937 &rnd_gen)
const;
112 void get_samples(
MedMat<T> &data,
int sample_per_row,
void *params,
const vector<vector<bool>> &mask,
const MedMat<T> &mask_values, mt19937 &rnd_gen)
const;
114 void pre_serialization();
115 void post_deserialization();
128 int init(map<string, string> &mapper);
130 bool keep_original_values =
false;
141 vector<vector<T>> allowed_values;
144 vector<float> mean_feature_vals;
145 vector<float> std_feature_vals;
146 bool norm_by_by_file;
148 T round_to_allowed_values(T in_value,
const vector<T>& curr_allowed_values)
const;
149 void set_params(
void *params);
158 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values);
160 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values, mt19937 &rnd_gen)
const;
161 void get_samples(
MedMat<T> &data,
int sample_per_row,
void *params,
const vector<vector<bool>> &mask,
const MedMat<T> &mask_values, mt19937 &rnd_gen)
const;
162 void get_samples_from_Z(
MedMat<T> &data,
void *params,
const vector<vector<bool>> &mask,
const MedMat<T> &mask_values,
const MedMat<T> &Z);
164 void read_from_text_file(
const string& file_name);
166 void pre_serialization();
167 void post_deserialization();
170 ADD_SERIALIZATION_FUNCS(generator, allowed_values, mg_params, mean_feature_vals, std_feature_vals, norm_by_by_file)
179 vector<string> names;
185 void learn(
const map<
string, vector<T>> &data,
const vector<string> &learn_features,
bool skip_missing);
187 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values);
189 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values, mt19937 &rnd_gen)
const;
190 void get_samples(
MedMat<T> &data,
int sample_per_row,
void *params,
const vector<vector<bool>> &mask,
const MedMat<T> &mask_values, mt19937 &rnd_gen)
const;
192 void pre_serialization();
193 void post_deserialization();
206 vector<string> names;
212 void learn(
const map<
string, vector<T>> &data,
const vector<string> &learn_features,
bool skip_missing);
214 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values);
216 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values, mt19937 &rnd_gen)
const;
217 void get_samples(
MedMat<T> &data,
int sample_per_row,
void *params,
const vector<vector<bool>> &mask,
const MedMat<T> &mask_values, mt19937 &rnd_gen)
const;
219 void pre_serialization();
220 void post_deserialization();
231 T missing_value = MED_MAT_MISSING_VALUE;
236 int init(map<string, string>& mapper);
240 void learn(
const map<
string, vector<T>> &data,
const vector<string> &learn_features,
bool skip_missing);
242 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values);
244 void get_samples(map<
string, vector<T>> &data,
void *params,
const vector<bool> &mask,
const vector<T> &mask_values, mt19937 &rnd_gen)
const;
245 void get_samples(
MedMat<T> &data,
int sample_per_row,
void *params,
const vector<vector<bool>> &mask,
const MedMat<T> &mask_values, mt19937 &rnd_gen)
const;
247 void pre_serialization();
248 void post_deserialization();
256 vector<vector<T>> feature_values;
268 T find_pos(
const vector<T> &v,
const vector<double> &cumsum,
double p)
const;
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition ApplyKeras.h:82
A gibbs sampler - has learn and create sample based on mask.
Definition GibbsSampler.h:89
Samples generator using GibbsSampler object to sample from data dist.
Definition SamplesGenerator.h:94
void get_samples(map< string, vector< T > > &data, void *params, const vector< bool > &mask, const vector< T > &mask_values)
apply of sample generator - deafult arguments with mask, and mask values to generate values in mask,...
Definition SamplesGenerator.cpp:135
void learn(const map< string, vector< T > > &data, const vector< string > &learn_features, bool skip_missing)
learn of sample generator
Definition SamplesGenerator.cpp:108
void prepare(void *params)
prepare to generate
Definition SamplesGenerator.cpp:126
MaskedGAN parameters.
Definition SamplesGenerator.h:126
int init(map< string, string > &mapper)
Virtual to init object from parsed fields.
Definition SamplesGenerator.cpp:155
Masked GAN object.
Definition SamplesGenerator.h:138
void prepare(void *params)
prepare to generate
void get_samples(MedMat< T > &data, int sample_per_row, void *params, const vector< vector< bool > > &mask, const MedMat< T > &mask_values)
vector api from generating samples
void get_samples(map< string, vector< T > > &data, void *params, const vector< bool > &mask, const vector< T > &mask_values)
apply of sample generator - deafult arguments with mask, and mask values to generate values in mask,...
Definition SamplesGenerator.cpp:173
void get_samples(MedMat< T > &data, int sample_per_row, void *params, const vector< vector< bool > > &mask, const MedMat< T > &mask_values, mt19937 &rnd_gen) const
vector api from generating samples
simple - just puts missing value by mask
Definition SamplesGenerator.h:176
void learn(const map< string, vector< T > > &data, const vector< string > &learn_features, bool skip_missing)
learn of sample generator
Definition SamplesGenerator.cpp:445
void get_samples(map< string, vector< T > > &data, void *params, const vector< bool > &mask, const vector< T > &mask_values)
apply of sample generator - deafult arguments with mask, and mask values to generate values in mask,...
Definition SamplesGenerator.cpp:406
puts random values from normal distribution in missing values
Definition SamplesGenerator.h:202
void get_samples(map< string, vector< T > > &data, void *params, const vector< bool > &mask, const vector< T > &mask_values)
apply of sample generator - deafult arguments with mask, and mask values to generate values in mask,...
Definition SamplesGenerator.cpp:482
void learn(const map< string, vector< T > > &data, const vector< string > &learn_features, bool skip_missing)
learn of sample generator
Definition SamplesGenerator.cpp:461
Abstract Random Samples generator.
Definition SamplesGenerator.h:34
virtual void prepare(void *params)
prepare to generate
Definition SamplesGenerator.h:45
bool use_vector_api
In gibbs it's faster to use map<string, float> api.
Definition SamplesGenerator.h:38
virtual void learn(const map< string, vector< T > > &data, const vector< string > &learn_features, bool skip_missing)
learn of sample generator
Definition SamplesGenerator.h:55
virtual void get_samples(map< string, vector< T > > &data, void *params, const vector< bool > &mask, const vector< T > &mask_values)
apply of sample generator - deafult arguments with mask, and mask values to generate values in mask,...
Definition SamplesGenerator.cpp:43
void * new_polymorphic(string derived_name)
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived clas...
Definition SamplesGenerator.cpp:112
Definition SerializableObject.h:32
puts values in each feature selected randomly from it's distribution
Definition SamplesGenerator.h:229
featureSetStrata strata_settings
strata settings
Definition SamplesGenerator.h:234
vector< string > names
names for all features
Definition SamplesGenerator.h:266
void get_samples(map< string, vector< T > > &data, void *params, const vector< bool > &mask, const vector< T > &mask_values)
apply of sample generator - deafult arguments with mask, and mask values to generate values in mask,...
Definition SamplesGenerator.cpp:697
ADD_SERIALIZATION_FUNCS(feature_values, feature_val_probs, strata_feature_val_agg_prob, names, missing_value, strata_settings, strata_sizes, min_samples, strata_feature_val_agg_val) private vector< vector< double > > feature_val_probs
< first index is feature name, second is order index
Definition SamplesGenerator.h:251
int min_samples
minimal count of samples in strata size to use strata
Definition SamplesGenerator.h:233
vector< vector< vector< double > > > strata_feature_val_agg_prob
indexed by strata, feature_name, index of sorted value
Definition SamplesGenerator.h:260
void learn(const map< string, vector< T > > &data, const vector< string > &learn_features, bool skip_missing)
learn of sample generator
Definition SamplesGenerator.cpp:524
vector< int > strata_sizes
the strata size
Definition SamplesGenerator.h:265
vector< vector< vector< T > > > strata_feature_val_agg_val
indexed by strata, feature_name, index of sorted value
Definition SamplesGenerator.h:259
int init(map< string, string > &mapper)
Virtual to init object from parsed fields.
Definition SamplesGenerator.cpp:768
When building startas on a set of several features, we build a cartesian product of their combination...
Definition FeatureProcess.h:401