17#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
19#ifndef RAPTOR_HIBF_HAS_COUNT
20# define RAPTOR_HIBF_HAS_COUNT 0
86template <seqan3::data_layout data_layout_mode_ = seqan3::data_layout::uncompressed>
96#if RAPTOR_HIBF_HAS_COUNT
98 template <std::
integral value_t>
106 using ibf_t = seqan3::interleaved_bloom_filter<data_layout_mode_>;
143#if RAPTOR_HIBF_HAS_COUNT
147 template <std::
integral value_t = u
int16_t>
150 return counting_agent_type<value_t>{*
this};
162 template <seqan3::cereal_archive archive_t>
163 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
174template <seqan3::data_layout data_layout_mode>
191 size_t num_user_bins()
const noexcept
193 return user_bin_filenames.
size();
197 void set_ibf_count(
size_t const size)
199 ibf_bin_to_filename_position.
resize(size);
203 void set_user_bin_count(
size_t const size)
205 user_bin_filenames.
resize(size);
219 return ibf_bin_to_filename_position[idx];
231 std::string & filename_of_user_bin(
size_t const idx)
233 return user_bin_filenames[idx];
239 return user_bin_filenames[ibf_bin_to_filename_position[index_pair.
first][index_pair.
second]];
245 auto operator[](
size_t const ibf_idx)
const
247 return ibf_bin_to_filename_position[ibf_idx]
248 | std::views::transform(
254 return user_bin_filenames[i];
259 int64_t filename_index(
size_t const ibf_idx,
size_t const bin_idx)
const
261 return ibf_bin_to_filename_position[ibf_idx][bin_idx];
269 template <
typename stream_t>
270 void write_filenames(stream_t & out_stream)
const
274 for (
auto const & filename : user_bin_filenames)
295 template <
typename archive_t>
298 archive(user_bin_filenames);
299 archive(ibf_bin_to_filename_position);
309template <seqan3::data_layout data_layout_mode>
317 hibf_t const *
const hibf_ptr{
nullptr};
320 template <std::ranges::forward_range value_range_t>
321 void bulk_contains_impl(value_range_t && values, int64_t
const ibf_idx,
size_t const threshold)
324 auto & result = agent.bulk_count(values);
328 for (
size_t bin{}; bin < result.size(); ++bin)
332 auto const current_filename_index = hibf_ptr->
user_bins.filename_index(ibf_idx, bin);
334 if (current_filename_index < 0)
336 if (sum >= threshold)
337 bulk_contains_impl(values, hibf_ptr->
next_ibf_id[ibf_idx][bin], threshold);
340 else if (bin + 1u == result.size() ||
341 current_filename_index != hibf_ptr->
user_bins.filename_index(ibf_idx, bin + 1))
343 if (sum >= threshold)
344 result_buffer.emplace_back(current_filename_index);
370 std::vector<int64_t> result_buffer;
389 template <std::ranges::forward_range value_range_t>
390 [[nodiscard]] std::vector<int64_t>
const & bulk_contains(value_range_t && values,
size_t const threshold) &
noexcept
392 assert(hibf_ptr !=
nullptr);
394 static_assert(std::ranges::forward_range<value_range_t>,
"The values must model forward_range.");
395 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
396 "An individual value must be an unsigned integral.");
398 result_buffer.
clear();
400 bulk_contains_impl(values, 0, threshold);
402 std::ranges::sort(result_buffer);
404 return result_buffer;
409 template <std::ranges::range value_range_t>
410 [[nodiscard]] std::vector<int64_t>
const & bulk_contains(value_range_t && values,
411 size_t const threshold) &&
noexcept =
delete;
415#if RAPTOR_HIBF_HAS_COUNT
418template <seqan3::data_layout data_layout_mode>
419template <std::
integral value_t>
427 hibf_t
const *
const hibf_ptr{
nullptr};
430 template <std::ranges::forward_range value_range_t>
431 void bulk_count_impl(value_range_t && values, int64_t
const ibf_idx,
size_t const threshold)
434 auto & result = agent.bulk_count(values);
438 for (
size_t bin{}; bin < result.size(); ++bin)
441 auto const current_filename_index = hibf_ptr->user_bins.filename_index(ibf_idx, bin);
443 if (current_filename_index < 0)
445 if (sum >= threshold)
446 bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
449 else if (bin + 1u == result.size() ||
450 current_filename_index != hibf_ptr->user_bins.filename_index(ibf_idx, bin + 1))
452 if (sum >= threshold)
453 result_buffer[current_filename_index] = sum;
476 result_buffer(hibf_ptr->user_bins.num_user_bins())
481 seqan3::counting_vector<value_t> result_buffer;
502 template <std::ranges::forward_range value_range_t>
503 [[nodiscard]] seqan3::counting_vector<value_t>
const & bulk_count(value_range_t && values,
504 size_t const threshold = 1u) &
noexcept
506 assert(hibf_ptr !=
nullptr);
507 assert(threshold > 0u);
508 assert(result_buffer.size() == hibf_ptr->user_bins.num_user_bins());
510 static_assert(std::ranges::forward_range<value_range_t>,
"The values must model forward_range.");
511 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
512 "An individual value must be an unsigned integral.");
514 std::ranges::fill(result_buffer,
static_cast<value_t
>(0u));
516 bulk_count_impl(values, 0, threshold);
518 return result_buffer;
523 template <std::ranges::range value_range_t>
524 [[nodiscard]] seqan3::counting_vector<value_t>
const & bulk_count(value_range_t && values,
525 size_t const threshold = 1u) &&
noexcept =
delete;
Manages counting ranges of values for the raptor::hierarchical_interleaved_bloom_filter.
Definition hierarchical_interleaved_bloom_filter.hpp:421
Manages membership queries for the raptor::hierarchical_interleaved_bloom_filter.
Definition hierarchical_interleaved_bloom_filter.hpp:311
hierarchical_interleaved_bloom_filter< data_layout_mode > hibf_t
The type of the augmented hierarchical_interleaved_bloom_filter.
Definition hierarchical_interleaved_bloom_filter.hpp:314
Bookkeeping for user and technical bins.
Definition hierarchical_interleaved_bloom_filter.hpp:176
counting_agent_type< value_t > counting_agent() const
Returns a counting_agent_type to be used for counting.
Definition hierarchical_interleaved_bloom_filter.hpp:148
hierarchical_interleaved_bloom_filter(hierarchical_interleaved_bloom_filter const &)=default
Defaulted.
~hierarchical_interleaved_bloom_filter()=default
Defaulted.
membership_agent_type membership_agent() const
Returns a membership_agent to be used for counting.
Definition hierarchical_interleaved_bloom_filter.hpp:138
std::vector< ibf_t > ibf_vector
Definition hierarchical_interleaved_bloom_filter.hpp:123
hierarchical_interleaved_bloom_filter & operator=(hierarchical_interleaved_bloom_filter const &)=default
Defaulted.
static constexpr seqan3::data_layout data_layout_mode
Definition hierarchical_interleaved_bloom_filter.hpp:103
void serialize(archive_t &archive)
Serialisation support function.
Definition hierarchical_interleaved_bloom_filter.hpp:163
hierarchical_interleaved_bloom_filter & operator=(hierarchical_interleaved_bloom_filter &&)=default
Defaulted.
std::vector< std::vector< int64_t > > next_ibf_id
Definition hierarchical_interleaved_bloom_filter.hpp:132
hierarchical_interleaved_bloom_filter(hierarchical_interleaved_bloom_filter &&)=default
Defaulted.
seqan3::interleaved_bloom_filter< data_layout_mode_ > ibf_t
The type of an individual Bloom filter.
Definition hierarchical_interleaved_bloom_filter.hpp:106
hierarchical_interleaved_bloom_filter()=default
Defaulted.
user_bins user_bins
The underlying user bins.
Definition hierarchical_interleaved_bloom_filter.hpp:135
Must be first include.
Definition bin_size_in_bits.hpp:18