92 vector<unsigned int> counts;
93 vector<unsigned long long> is_in_bit;
96 void set_min(
int _min) { min_val= _min; }
97 void set_max(
int _max) { max_val= _max; max_set = 1; }
98 MedSparseVec() { min_val = 0; max_val = 0; max_set = 0; max_key = 0; init(); }
99 MedSparseVec(
int _min) { min_val = _min; max_val = 0; max_set = 0; max_key = 0; init(); }
100 MedSparseVec(
int _min,
int _max) { min_val = _min; max_set = 1; max_val = _max; max_key = 0; init(); }
101 void set_def(
const T val) { def_val = val; data[0] = def_val; }
103 inline T operator[] (
const unsigned int key)
const {
return (*get(key)); }
105 inline T &operator[] (
const unsigned int key) {
return (*get(key)); }
107 void reserve(
unsigned int size) {data.reserve(size);}
114 is_in_bit.resize(1, 0);
118 data.push_back(def_val);
126 is_in_bit.assign(1, 0ULL);
130 data.push_back(def_val);
131 min_val = 0; max_val = 0; max_set = 0; max_key = 0;
137 inline unsigned int get_ind(
const unsigned int key)
140 if (key < min_val || key > max_key || (max_set && key>max_val))
143 unsigned int mkey = key - min_val;
144 unsigned int i_count = mkey >> 6;
145 unsigned int i_bit = 63 - (mkey & 0x3f);
146 unsigned long long kbits = is_in_bit[i_count]>>i_bit;
151 if ((kbits & 0x1) == 0)
155 unsigned int ind = counts[i_count] + (int)MED_POPCOUNT(kbits);
165 int insert(
const unsigned int key,
const T elem)
168 if (key<min_val || (max_set && key>max_val))
return -1;
171 unsigned int mkey = key - min_val;
172 unsigned int i_count = mkey >> 6;
173 unsigned int i_bit = 63 - (mkey & 0x3f);
174 unsigned long long i_mask = (((
unsigned long long)1)<<(i_bit));
177 if (key <= max_key) {
178 if (is_in_bit[i_count]&i_mask) {
179 unsigned int pos = counts[i_count] + (int)MED_POPCOUNT(is_in_bit[i_count]>>i_bit);
190 unsigned int last_cnt = 0;
191 if (counts.size() > 0)
192 last_cnt = counts.back();
193 counts.resize(i_count+2, last_cnt);
194 is_in_bit.resize(i_count+1, 0);
195 is_in_bit[i_count] |= i_mask;
196 counts[i_count+1] = counts[i_count] + (int)MED_POPCOUNT(is_in_bit[i_count]);
197 data.push_back(elem);
205 inline T *get(
unsigned int key) {
206 return &data[get_ind(key)];
213 int get_all_keys(vector<unsigned int> &keys) {
215 keys.resize(data.size()-1);
232 for (
unsigned int i=min_val; i<=max_key; i++)
246 int get_all_intersected_keys(
const vector<int> &in_keys, vector<int> &keys, vector<int> &inds) {
248 keys.resize(in_keys.size());
249 inds.resize(in_keys.size());
252 for (
int i=0; i<in_keys.size(); i++) {
253 int ind = get_ind(in_keys[i]);
255 keys[i_size] = in_keys[i];
293 size +=
sizeof(
unsigned long long);
294 size +=
sizeof(
unsigned long long);
295 size +=
sizeof(
unsigned int);
296 size +=
sizeof(
unsigned int);
298 size +=
sizeof(
unsigned int);
300 size +=
sizeof(
unsigned int);
301 size +=
sizeof(
unsigned int) * counts.size();
302 size +=
sizeof(
unsigned int);
303 size +=
sizeof(
unsigned long long) * is_in_bit.size();
304 size +=
sizeof(
unsigned int);
305 size +=
sizeof(T) * data.size();
312 size_t serialize(
unsigned char *blob) {
314 unsigned char *curr = blob;
316 curr +=
sizeof(
unsigned long long);
317 ((
unsigned long long *)curr)[0] = (
unsigned long long)MED_SPARSE_VEC_MAGIC_NUM; curr+=
sizeof(
unsigned long long);
318 ((
unsigned int *)curr)[0] = min_val; curr+=
sizeof(
unsigned int);
319 ((
unsigned int *)curr)[0] = max_val; curr+=
sizeof(
unsigned int);
320 ((
int *)curr)[0] = max_set; curr+=
sizeof(int);
321 ((
unsigned int *)curr)[0] = max_key; curr+=
sizeof(
unsigned int);
322 ((T *)curr)[0] = def_val; curr+=
sizeof(T);
323 ((
unsigned int *)curr)[0] = (
unsigned int)counts.size(); curr+=
sizeof(
unsigned int);
324 for (
int i=0; i<counts.size(); i++) {
325 ((
unsigned int *)curr)[0] = counts[i]; curr+=
sizeof(
unsigned int);
327 ((
unsigned int *)curr)[0] = (
unsigned int)is_in_bit.size(); curr+=
sizeof(
unsigned int);
328 for (
int i=0; i<is_in_bit.size(); i++) {
329 ((
unsigned long long *)curr)[0] = is_in_bit[i]; curr+=
sizeof(
unsigned long long);
331 ((
unsigned int *)curr)[0] = (
unsigned int)data.size(); curr+=
sizeof(
unsigned int);
332 for (
int i=0; i<data.size(); i++) {
333 ((T *)curr)[0] = data[i]; curr+=
sizeof(T);
335 unsigned long long len = (
unsigned long long)(curr-blob);
336 ((
unsigned long long *)blob)[0] = len;
342 size_t deserialize(
unsigned char *blob) {
344 unsigned char *curr = blob;
349 unsigned long long serialize_len = ((
unsigned long long *)curr)[0]; curr +=
sizeof(
unsigned long long);
350 unsigned long long magic_num = ((
unsigned long long *)curr)[0]; curr +=
sizeof(
unsigned long long);
351 if (magic_num != (
unsigned long long)MED_SPARSE_VEC_MAGIC_NUM) {
352 fprintf(stderr,
"ERROR: Sparse Vec Magic Num wrong : can't deserialize() (%llx)\n", magic_num);
355 min_val = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
356 max_val = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
357 max_set = ((
int *)curr)[0]; curr +=
sizeof(int);
358 max_key = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
359 def_val = ((T *)curr)[0]; curr +=
sizeof(T);
360 unsigned int len_counts = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
361 counts.resize(len_counts);
362 memcpy(&counts[0], curr, len_counts*
sizeof(
unsigned int));
363 curr +=
sizeof(
unsigned int) * len_counts;
365 unsigned int len_is_in_bit = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
366 is_in_bit.resize(len_is_in_bit);
367 memcpy(&is_in_bit[0], curr, len_is_in_bit*
sizeof(
unsigned long long));
368 curr +=
sizeof(
unsigned long long)*len_is_in_bit;
370 unsigned int len_data = ((
unsigned int *)curr)[0]; curr +=
sizeof(
unsigned int);
371 data.resize(len_data);
372 memcpy(&data[0], curr, len_data*
sizeof(T));
373 curr +=
sizeof(T) * len_data;
375 size_t len = curr - blob;
376 if (len != serialize_len) {
377 fprintf(stderr,
"ERROR: Sparse Vec serialize len not matching decalred one: %zu != %llu\n", len, serialize_len);