1 #ifndef BFG_BLOCKEDBLOOMFILTER_HPP 2 #define BFG_BLOCKEDBLOOMFILTER_HPP 19 #include "KmerHashTable.h" 21 #include "RepHash.hpp" 23 #include "libpopcnt.h" 25 #define NB_BITS_BLOCK (0x800ULL) 26 #define MASK_BITS_BLOCK (0x7ffULL) 27 #define NB_ELEM_BLOCK (32) 43 libdivide::divider<uint64_t> fast_div_;
49 BlockedBloomFilter(
size_t nb_elem,
size_t bits_per_elem) : table_(NULL), size_table_(0), blocks_(0), k_(0), fast_div_() {
51 size_table_ = ((bits_per_elem * nb_elem + MASK_BITS_BLOCK) / NB_BITS_BLOCK) * NB_BITS_BLOCK;
52 blocks_ = size_table_ / NB_BITS_BLOCK;
56 k_ = (int) (bits_per_elem * log(2));
57 if (fpp(bits_per_elem, k_) >= fpp(bits_per_elem, k_+1)) k_++;
65 inline std::pair<uint64_t*,uint64_t*> getBlock(uint64_t min_hash)
const{
67 uint64_t min_hash_2 = (min_hash * 49157) % (1610612741ULL);
69 min_hash -= (min_hash / fast_div_) * blocks_;
70 min_hash_2 -= (min_hash_2 / fast_div_) * blocks_;
72 return std::make_pair(table_ + NB_ELEM_BLOCK * min_hash, table_ + NB_ELEM_BLOCK * min_hash_2);
75 bool contains(uint64_t kmer_hash,
const uint64_t min_hash)
const {
81 uint64_t kmer_hash_2 = kmer_hash;
83 uint64_t* table = table_ + ((min_hash - (min_hash / fast_div_) * blocks_) * NB_ELEM_BLOCK);
85 __builtin_prefetch(table, 0, 1);
89 if ((table[(kmer_hash & MASK_BITS_BLOCK) >> 6] & (1ULL << (kmer_hash & 0x3fULL))) == 0)
break;
90 kmer_hash = (kmer_hash * 49157) % (1610612741ULL);
95 const uint64_t min_hash_2 = (min_hash * 49157) % (1610612741ULL);
97 table = table_ + ((min_hash_2 - (min_hash_2 / fast_div_) * blocks_) * NB_ELEM_BLOCK);
99 __builtin_prefetch(table, 0, 1);
101 for (i = 0; i < k; i++) {
103 if ((table[(kmer_hash_2 & MASK_BITS_BLOCK) >> 6] & (1ULL << (kmer_hash_2 & 0x3fULL))) == 0)
break;
104 kmer_hash_2 = (kmer_hash_2 * 49157) % (1610612741ULL);
111 inline bool contains(uint64_t kmer_hash,
const std::pair<uint64_t*, uint64_t*> block_ptr)
const {
113 return (contains_block(kmer_hash, block_ptr) != 0);
116 size_t contains_block(uint64_t kmer_hash,
const std::pair<const uint64_t* const, const uint64_t* const> block_ptr)
const {
118 uint64_t kmer_hash_2 = kmer_hash;
124 __builtin_prefetch(block_ptr.first, 0, 1);
126 for (; i != k; i++) {
128 if ((block_ptr.first[(kmer_hash & MASK_BITS_BLOCK) >> 6] & (1ULL << (kmer_hash & 0x3fULL))) == 0)
break;
129 kmer_hash = (kmer_hash * 49157) % (1610612741ULL);
134 __builtin_prefetch(block_ptr.second, 0, 1);
136 for (i = 0; i != k; i++) {
138 if ((block_ptr.second[(kmer_hash_2 & MASK_BITS_BLOCK) >> 6] & (1ULL << (kmer_hash_2 & 0x3fULL))) == 0)
break;
139 kmer_hash_2 = (kmer_hash_2 * 49157) % (1610612741ULL);
142 return (i == k ? 2 : 0);
148 bool search_and_insert(uint64_t kmer_hash,
const uint64_t min_hash,
const bool multi_threaded =
false) {
154 uint64_t kmer_hash_2 = kmer_hash;
156 uint64_t* table = table_ + ((min_hash - (min_hash / fast_div_) * blocks_) * NB_ELEM_BLOCK);
158 __builtin_prefetch(table, 0, 1);
160 for (; i != k; i++) {
162 if ((table[(kmer_hash & MASK_BITS_BLOCK) >> 6] & (1ULL << (kmer_hash & 0x3fULL))) == 0)
break;
163 kmer_hash = (kmer_hash * 49157) % (1610612741ULL);
168 const uint64_t min_hash_2 = (min_hash * 49157) % (1610612741ULL);
170 uint64_t* table2 = table_ + ((min_hash_2 - (min_hash_2 / fast_div_) * blocks_) * NB_ELEM_BLOCK);
172 __builtin_prefetch(table2, 0, 1);
174 for (; j != k; j++) {
176 if ((table2[(kmer_hash_2 & MASK_BITS_BLOCK) >> 6] & (1ULL << (kmer_hash_2 & 0x3fULL))) == 0)
break;
177 kmer_hash_2 = (kmer_hash_2 * 49157) % (1610612741ULL);
182 if (!multi_threaded){
184 if (popcnt(table2, NB_ELEM_BLOCK *
sizeof(uint64_t)) < popcnt(table, NB_ELEM_BLOCK *
sizeof(uint64_t))){
188 kmer_hash = kmer_hash_2;
191 __builtin_prefetch(table, 1, 1);
193 for (; i != k; i++) {
196 table[(kmer_hash & MASK_BITS_BLOCK) >> 6] |= 1ULL << (kmer_hash & 0x3fULL);
197 kmer_hash = (kmer_hash * 49157) % (1610612741ULL);
202 if (popcnt(table2, NB_ELEM_BLOCK *
sizeof(uint64_t)) < popcnt(table, NB_ELEM_BLOCK *
sizeof(uint64_t))){
208 uint64_t tmp_size_t = kmer_hash;
209 kmer_hash = kmer_hash_2;
210 kmer_hash_2 = tmp_size_t;
212 uint64_t* tmp_ptr = table;
217 __builtin_prefetch(table, 1, 1);
219 for (; i != k; i++) {
221 __sync_fetch_and_or(table + ((kmer_hash & MASK_BITS_BLOCK) >> 6), 1ULL << (kmer_hash & 0x3fULL));
223 kmer_hash = (kmer_hash * 49157) % (1610612741ULL);
226 __builtin_prefetch(table2, 0, 1);
228 for (; j != k; j++) {
230 if ((table2[(kmer_hash_2 & MASK_BITS_BLOCK) >> 6] & (1ULL << (kmer_hash_2 & 0x3fULL))) == 0)
break;
231 kmer_hash_2 = (kmer_hash_2 * 49157) % (1610612741ULL);
242 inline void insert(uint64_t kmer_hash,
const uint64_t min_hash){
244 search_and_insert(kmer_hash, min_hash,
false);
247 bool WriteBloomFilter(FILE *fp) {
249 if (fwrite(&size_table_,
sizeof(size_table_), 1, fp) != 1)
return false;
250 if (fwrite(&blocks_,
sizeof(blocks_), 1, fp) != 1)
return false;
251 if (fwrite(&k_,
sizeof(k_), 1, fp) != 1)
return false;
253 if (fwrite(table_,
sizeof(uint64_t), NB_ELEM_BLOCK * blocks_, fp) != (NB_ELEM_BLOCK * blocks_))
return false;
258 bool ReadBloomFilter(FILE *fp) {
262 if (fread(&size_table_,
sizeof(size_table_), 1, fp) != 1)
return false;
263 if (fread(&blocks_,
sizeof(blocks_), 1, fp) != 1)
return false;
264 if (fread(&k_,
sizeof(k_), 1, fp) != 1)
return false;
268 if (fread(table_,
sizeof(uint64_t), NB_ELEM_BLOCK * blocks_, fp) != (NB_ELEM_BLOCK * blocks_))
return false;
291 size_table_ = bf.size_table_;
292 blocks_ = bf.blocks_;
294 fast_div_ = bf.fast_div_;
299 inline uint64_t getNbBlocks()
const {
return blocks_; }
301 inline const uint64_t* getTable_ptr()
const {
return table_; }
307 fast_div_ = libdivide::divider<uint64_t>(blocks_);
309 posix_memalign((
void**)&table_, 64, NB_ELEM_BLOCK * blocks_*
sizeof(table_[0]));
310 memset(table_, 0, NB_ELEM_BLOCK * blocks_ *
sizeof(table_[0]));
313 inline double fpp(
size_t bits,
int k)
const {
315 return pow(1-exp(-((
double)k)/((
double)bits)),(
double)k);
319 #endif // BFG_BLOCKEDBLOOMFILTER_HPP Definition: BlockedBloomFilter.hpp:33