79 vector<unsigned int> counts;
80 vector<unsigned long long> is_in_bit;
83 void set_min(
int _min) { min_val= _min; }
84 void set_max(
int _max) { max_val= _max; max_set = 1; }
85 MedSparseVec() { min_val = 0; max_val = 0; max_set = 0; max_key = 0; init(); }
86 MedSparseVec(
int _min) { min_val = _min; max_val = 0; max_set = 0; max_key = 0; init(); }
87 MedSparseVec(
int _min,
int _max) { min_val = _min; max_set = 1; max_val = _max; max_key = 0; init(); }
88 void set_def(
const T val) { def_val = val; data[0] = def_val; }
90 inline T operator[] (
const unsigned int key)
const {
return (*get(key)); }
92 inline T &operator[] (
const unsigned int key) {
return (*get(key)); }
94 void reserve(
unsigned int size) {data.reserve(size);}
101 is_in_bit.resize(1, 0);
105 data.push_back(def_val);
113 is_in_bit.assign(1, 0ULL);
117 data.push_back(def_val);
118 min_val = 0; max_val = 0; max_set = 0; max_key = 0;
124 inline unsigned int get_ind(
const unsigned int key)
127 if (key < min_val || key > max_key || (max_set && key>max_val))
130 unsigned int mkey = key - min_val;
131 unsigned int i_count = mkey >> 6;
132 unsigned int i_bit = 63 - (mkey & 0x3f);
133 unsigned long long kbits = is_in_bit[i_count]>>i_bit;
138 if ((kbits & 0x1) == 0)
142 unsigned int ind = counts[i_count] + (int)_mm_popcnt_u64(kbits);
152 int insert(
const unsigned int key,
const T elem)
155 if (key<min_val || (max_set && key>max_val))
return -1;
158 unsigned int mkey = key - min_val;
159 unsigned int i_count = mkey >> 6;
160 unsigned int i_bit = 63 - (mkey & 0x3f);
161 unsigned long long i_mask = (((
unsigned long long)1)<<(i_bit));
164 if (key <= max_key) {
165 if (is_in_bit[i_count]&i_mask) {
166 unsigned int pos = counts[i_count] + (int)_mm_popcnt_u64(is_in_bit[i_count]>>i_bit);
177 unsigned int last_cnt = 0;
178 if (counts.size() > 0)
179 last_cnt = counts.back();
180 counts.resize(i_count+2, last_cnt);
181 is_in_bit.resize(i_count+1, 0);
182 is_in_bit[i_count] |= i_mask;
183 counts[i_count+1] = counts[i_count] + (int)_mm_popcnt_u64(is_in_bit[i_count]);
184 data.push_back(elem);
192 inline T *get(
unsigned int key) {
193 return &data[get_ind(key)];
200 int get_all_keys(vector<unsigned int> &keys) {
202 keys.resize(data.size()-1);
219 for (
unsigned int i=min_val; i<=max_key; i++)
233 int get_all_intersected_keys(
const vector<int> &in_keys, vector<int> &keys, vector<int> &inds) {
235 keys.resize(in_keys.size());
236 inds.resize(in_keys.size());
239 for (
int i=0; i<in_keys.size(); i++) {
240 int ind = get_ind(in_keys[i]);
242 keys[i_size] = in_keys[i];
280 size +=
sizeof(
unsigned long long);
281 size +=
sizeof(
unsigned long long);
282 size +=
sizeof(
unsigned int);
283 size +=
sizeof(
unsigned int);
285 size +=
sizeof(
unsigned int);
287 size +=
sizeof(
unsigned int);
288 size +=
sizeof(
unsigned int) * counts.size();
289 size +=
sizeof(
unsigned int);
290 size +=
sizeof(
unsigned long long) * is_in_bit.size();
291 size +=
sizeof(
unsigned int);
292 size +=
sizeof(T) * data.size();
299 size_t serialize(
unsigned char *blob) {
301 unsigned char *curr = blob;
303 curr +=
sizeof(
unsigned long long);
304 ((
unsigned long long *)curr)[0] = (
unsigned long long)MED_SPARSE_VEC_MAGIC_NUM; curr+=
sizeof(
unsigned long long);
305 ((
unsigned int *)curr)[0] = min_val; curr+=
sizeof(
unsigned int);
306 ((
unsigned int *)curr)[0] = max_val; curr+=
sizeof(
unsigned int);
307 ((
int *)curr)[0] = max_set; curr+=
sizeof(int);
308 ((
unsigned int *)curr)[0] = max_key; curr+=
sizeof(
unsigned int);
309 ((T *)curr)[0] = def_val; curr+=
sizeof(T);
310 ((
unsigned int *)curr)[0] = (
unsigned int)counts.size(); curr+=
sizeof(
unsigned int);
311 for (
int i=0; i<counts.size(); i++) {
312 ((
unsigned int *)curr)[0] = counts[i]; curr+=
sizeof(
unsigned int);
314 ((
unsigned int *)curr)[0] = (
unsigned int)is_in_bit.size(); curr+=
sizeof(
unsigned int);
315 for (
int i=0; i<is_in_bit.size(); i++) {
316 ((
unsigned long long *)curr)[0] = is_in_bit[i]; curr+=
sizeof(
unsigned long long);
318 ((
unsigned int *)curr)[0] = (
unsigned int)data.size(); curr+=
sizeof(
unsigned int);
319 for (
int i=0; i<data.size(); i++) {
320 ((T *)curr)[0] = data[i]; curr+=
sizeof(T);
322 unsigned long long len = (
unsigned long long)(curr-blob);
323 ((
unsigned long long *)blob)[0] = len;
329 size_t deserialize(
unsigned char *blob) {
331 unsigned char *curr = blob;
336 unsigned long long serialize_len = ((
unsigned long long *)curr)[0]; curr +=
sizeof(
unsigned long long);
337 unsigned long long magic_num = ((
unsigned long long *)curr)[0]; curr +=
sizeof(
unsigned long long);
338 if (magic_num != (
unsigned long long)MED_SPARSE_VEC_MAGIC_NUM) {
339 fprintf(stderr,
"ERROR: Sparse Vec Magic Num wrong : can't deserialize() (%llx)\n", magic_num);
342 min_val = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
343 max_val = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
344 max_set = ((
int *)curr)[0]; curr +=
sizeof(int);
345 max_key = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
346 def_val = ((T *)curr)[0]; curr +=
sizeof(T);
347 unsigned int len_counts = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
348 counts.resize(len_counts);
349 memcpy(&counts[0], curr, len_counts*
sizeof(
unsigned int));
350 curr +=
sizeof(
unsigned int) * len_counts;
352 unsigned int len_is_in_bit = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
353 is_in_bit.resize(len_is_in_bit);
354 memcpy(&is_in_bit[0], curr, len_is_in_bit*
sizeof(
unsigned long long));
355 curr +=
sizeof(
unsigned long long)*len_is_in_bit;
357 unsigned int len_data = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
358 data.resize(len_data);
359 memcpy(&data[0], curr, len_data*
sizeof(T));
360 curr +=
sizeof(T) * len_data;
362 size_t len = curr - blob;
363 if (len != serialize_len) {
364 fprintf(stderr,
"ERROR: Sparse Vec serialize len not matching decalred one: %zu != %llu\n", len, serialize_len);