BioC++ core-0.7.0
The Modern C++ libraries for Bioinformatics.
 
Loading...
Searching...
No Matches
bitcompressed_vector.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2022 deCODE Genetics
3// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
4// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
5// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6// shipped with this file and also available at: https://github.com/biocpp/biocpp-core/blob/main/LICENSE.md
7// -----------------------------------------------------------------------------------------------------
8
14#pragma once
15
16#include <climits>
17#include <concepts>
18#include <iterator>
19#include <ranges>
20#include <type_traits>
21
28
29namespace bio::ranges
30{
31
60template <alphabet::writable_semialphabet alphabet_type>
61 requires std::regular<alphabet_type>
63{
64private:
66 static constexpr size_t bits_per_letter = std::bit_width(alphabet::size<alphabet_type>);
67 static_assert(bits_per_letter <= 64, "alphabet must be representable in at most 64bit.");
68
70 using word_type = uint64_t;
72 static constexpr size_t word_size = sizeof(word_type) * CHAR_BIT;
74 static constexpr size_t letters_per_word = word_size / bits_per_letter;
76 static constexpr uint64_t mask = (1ull << bits_per_letter) - 1ull;
77
80
82 size_t size_ = 0;
83
85 data_type data;
86
88 static uint64_t get_rank(data_type const & vec, size_t const i) noexcept
89 {
90 assert(i / letters_per_word < vec.size());
91 uint64_t const word = vec[i / letters_per_word];
92 size_t const offset = (i % letters_per_word) * bits_per_letter;
93 return (word >> offset) & mask;
94 }
95
97 static void set_rank(data_type & vec, size_t const i, uint64_t const rank) noexcept
98 {
99 assert(i / letters_per_word < vec.size());
100 uint64_t & word = vec[i / letters_per_word];
101 size_t const offset = (i % letters_per_word) * bits_per_letter;
102 word &= ~(mask << offset);
103 word |= rank << offset;
104 }
105
107 void clear_unused_bits_in_last_word()
108 {
109 if (data.empty() || (size() % letters_per_word == 0))
110 return;
111
112 size_t const offset = (size() % letters_per_word) * bits_per_letter;
113 data.back() &= ((1ull << offset) - 1ull);
114 }
115
118 class reference_proxy_type : public alphabet::proxy_base<reference_proxy_type, alphabet_type>
119 {
120 private:
123
125 data_type * data_ptr;
127 size_t index;
128
129 public:
134 reference_proxy_type() = delete;
135 constexpr reference_proxy_type(reference_proxy_type const &) noexcept = default;
136 constexpr reference_proxy_type(reference_proxy_type &&) noexcept = default;
137 ~reference_proxy_type() noexcept = default;
138
139 // Import from base:
140 using base_t::operator=;
141
143 // NOLINTNEXTLINE(bugprone-unhandled-self-assignment)
144 constexpr reference_proxy_type & operator=(reference_proxy_type const & rhs)
145 {
146 return assign_rank(rhs.to_rank());
147 }
148
150 // NOLINTNEXTLINE(bugprone-unhandled-self-assignment)
151 constexpr reference_proxy_type const & operator=(reference_proxy_type const & rhs) const
152 {
153 return assign_rank(rhs.to_rank());
154 }
155
157 reference_proxy_type(data_type * const data_ptr_, size_t const index_) noexcept :
158 data_ptr{data_ptr_}, index{index_}
159 {}
161
163 constexpr alphabet::rank_t<alphabet_type> to_rank() const noexcept { return get_rank(*data_ptr, index); }
164
166 constexpr reference_proxy_type & assign_rank(alphabet::rank_t<alphabet_type> const r) noexcept
167 {
168 set_rank(*data_ptr, index, r);
169 return *this;
170 }
171
173 constexpr reference_proxy_type const & assign_rank(alphabet::rank_t<alphabet_type> const r) const noexcept
174 {
175 set_rank(*data_ptr, index, r);
176 return *this;
177 }
178 };
179
182 //NOTE(h-2): it is entirely unclear to me why we need this
183 template <typename t>
184 requires std::is_same_v<std::ranges::range_value_t<std::remove_cvref_t<t>>, alphabet_type>
185 static constexpr bool has_same_value_type_v = true;
187
188public:
193 using value_type = alphabet_type;
195 using reference = reference_proxy_type;
197 using const_reference = alphabet_type;
199 using iterator = detail::random_access_iterator<bitcompressed_vector>;
201 using const_iterator = detail::random_access_iterator<bitcompressed_vector const>;
203 using difference_type = std::ranges::range_difference_t<data_type>;
205 using size_type = std::ranges::range_size_t<data_type>;
207
209 // this signals to range-v3 that something is a container :|
210 using allocator_type = void;
212
217 constexpr bitcompressed_vector(bitcompressed_vector const &) = default;
218 constexpr bitcompressed_vector(bitcompressed_vector &&) noexcept = default;
219 constexpr bitcompressed_vector & operator=(bitcompressed_vector const &) = default;
220 constexpr bitcompressed_vector & operator=(bitcompressed_vector &&) noexcept = default;
221 ~bitcompressed_vector() noexcept = default;
222
236 template <meta::different_from<bitcompressed_vector> other_range_t>
237 requires(std::ranges::input_range<other_range_t> && has_same_value_type_v<other_range_t>)
238 explicit bitcompressed_vector(other_range_t && range) :
239 bitcompressed_vector{std::ranges::begin(range), std::ranges::end(range)}
240 {}
241
254 bitcompressed_vector(size_type const count, value_type const value) { insert(cend(), count, value); }
255
271 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
272 bitcompressed_vector(begin_iterator_type begin_it, end_iterator_type end_it)
273 requires(std::sentinel_for<end_iterator_type, begin_iterator_type> &&
274 std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>)
275 {
276 insert(cend(), begin_it, end_it);
277 }
278
291 bitcompressed_vector(std::begin(ilist), std::end(ilist))
292 {}
293
306 {
307 assign(std::begin(ilist), std::end(ilist));
308 return *this;
309 }
310
324 template <std::ranges::input_range other_range_t>
325 void assign(other_range_t && range)
326 requires std::common_reference_with<std::ranges::range_value_t<other_range_t>, value_type>
327 {
328 bitcompressed_vector rhs{std::forward<other_range_t>(range)};
329 swap(rhs);
330 }
331
344 void assign(size_type const count, value_type const value)
345 {
346 bitcompressed_vector rhs{count, value};
347 swap(rhs);
348 }
349
365 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
366 void assign(begin_iterator_type begin_it, end_iterator_type end_it)
367 requires(std::sentinel_for<end_iterator_type, begin_iterator_type> &&
368 std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>)
369 {
370 bitcompressed_vector rhs{begin_it, end_it};
371 swap(rhs);
372 }
373
386
388
405 iterator begin() noexcept { return iterator{*this}; }
406
408 const_iterator begin() const noexcept { return const_iterator{*this}; }
409
411 const_iterator cbegin() const noexcept { return const_iterator{*this}; }
412
426 iterator end() noexcept { return iterator{*this, size()}; }
427
429 const_iterator end() const noexcept { return const_iterator{*this, size()}; }
430
432 const_iterator cend() const noexcept { return const_iterator{*this, size()}; }
434
452 {
453 if (i >= size()) // [[unlikely]]
454 {
455 throw std::out_of_range{"Trying to access element behind the last in bitcompressed_vector."};
456 }
457 return (*this)[i];
458 }
459
462 {
463 if (i >= size()) // [[unlikely]]
464 {
465 throw std::out_of_range{"Trying to access element behind the last in bitcompressed_vector."};
466 }
467 return (*this)[i];
468 }
469
486 {
487 assert(i < size());
488 return {&data, i};
489 }
490
492 const_reference operator[](size_type const i) const noexcept
493 {
494 assert(i < size());
495 return alphabet::assign_rank_to(get_rank(data, i), alphabet_type{});
496 }
497
511 reference front() noexcept
512 {
513 assert(size() > 0);
514 return (*this)[0];
515 }
516
518 const_reference front() const noexcept
519 {
520 assert(size() > 0);
521 return (*this)[0];
522 }
523
537 reference back() noexcept
538 {
539 assert(size() > 0);
540 return (*this)[size() - 1];
541 }
542
544 const_reference back() const noexcept
545 {
546 assert(size() > 0);
547 return (*this)[size() - 1];
548 }
549
557 constexpr data_type & raw_data() noexcept { return data; }
558
560 constexpr data_type const & raw_data() const noexcept { return data; }
562
577 bool empty() const noexcept { return size() == 0; }
578
590 size_type size() const noexcept { return size_; }
591
606 size_type max_size() const noexcept
607 {
608 // this protects against underflow in the multiplication
609 return std::max<size_type>(data.max_size(), data.max_size() * letters_per_word);
610 }
611
627 size_type capacity() const noexcept { return data.capacity() * letters_per_word; }
628
647 void reserve(size_type const new_cap)
648 {
649 data.reserve(new_cap / letters_per_word + (new_cap % letters_per_word != 0));
650 }
651
667 void shrink_to_fit() { data.shrink_to_fit(); }
669
683 void clear() noexcept
684 {
685 data.clear();
686 size_ = 0;
687 }
688
706 iterator insert(const_iterator pos, value_type const value) { return insert(pos, 1, value); }
707
724 iterator insert(const_iterator pos, size_type const count, value_type const value)
725 {
726 auto v = views::repeat_n(value, count);
727 return insert(pos, v.begin(), v.end());
728 }
729
751 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
752 iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
753 requires(std::sentinel_for<end_iterator_type, begin_iterator_type> &&
754 std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>)
755 {
756 //TODO UPDATE DOCUMENTATION TO REFLECT THIS
757 //TODO this is not ideal, always linear
758 size_t const pos_as_num = std::distance(cbegin(), pos);
759 size_t const size_of_insert = std::distance(begin_it, end_it);
761 tmp.resize(size() + size_of_insert);
762 //TODO use constrained algorithms here once proxy_base is out-iterator-compatible
763 std::copy(cbegin(), pos, tmp.begin());
764 std::copy(begin_it, end_it, tmp.begin() + pos_as_num);
765 std::copy(cbegin() + pos_as_num, cend(), tmp.begin() + pos_as_num + size_of_insert);
766
767 std::swap(*this, tmp);
768 return begin() + pos_as_num;
769 }
770
787 {
788 return insert(pos, ilist.begin(), ilist.end());
789 }
790
812 {
813 if (begin_it == end_it)
814 return end();
815
816 //TODO this is not ideal, always linear
817 size_t const begin_pos_of_removal = std::distance(cbegin(), begin_it);
818 size_t const size_of_removal = std::distance(begin_it, end_it);
820 tmp.resize(size() - size_of_removal);
821 //TODO use constrained algorithms here once proxy_base is out-iterator-compatible
822 std::copy(cbegin(), begin_it, tmp.begin());
823 std::copy(end_it, cend(), tmp.begin() + begin_pos_of_removal);
824
825 std::swap(*this, tmp);
826 return begin() + begin_pos_of_removal + size_of_removal;
827 }
828
844 iterator erase(const_iterator pos) { return erase(pos, pos + 1); }
845
861 void push_back(value_type const value)
862 {
863 if (data.size() * letters_per_word == size_) // the last word is "full"
864 data.emplace_back();
865
866 set_rank(data, size(), alphabet::to_rank(value));
867 ++size_;
868 }
869
886 void pop_back()
887 {
888 assert(size() > 0);
889 if ((size_--) % letters_per_word == 1) // the last word contains only one letter
890 data.pop_back();
891 else
892 clear_unused_bits_in_last_word();
893 }
894
921 void resize(size_type const count)
922 {
923 assert(count < max_size());
924 data.resize(count / letters_per_word + (count % letters_per_word != 0));
925 size_ = count;
926 clear_unused_bits_in_last_word();
927 }
928
933 void resize(size_type const count, value_type const value)
934 {
935 assert(count < max_size());
936 size_t old_size = size_;
937 resize(count);
938 for (size_t i = old_size; i < size(); ++i)
939 set_rank(data, i, alphabet::to_rank(value));
940 }
941
953 constexpr void swap(bitcompressed_vector & rhs) noexcept { std::swap(*this, rhs); }
954
956 constexpr void swap(bitcompressed_vector && rhs) noexcept { std::swap(*this, rhs); }
957
970 friend constexpr void swap(bitcompressed_vector & lhs, bitcompressed_vector & rhs) noexcept { std::swap(lhs, rhs); }
971
973 friend constexpr void swap(bitcompressed_vector && lhs, bitcompressed_vector && rhs) noexcept
974 {
975 std::swap(lhs, rhs);
976 }
978
980 friend auto operator<=>(bitcompressed_vector const & lhs, bitcompressed_vector const & rhs) noexcept = default;
981
989 template <typename archive_t>
990 void serialize(archive_t & archive)
991 {
992 archive(size_);
993 archive(data);
994 }
996};
997
998} // namespace bio::ranges
T begin(T... args)
T bit_width(T... args)
A CRTP-base that eases the definition of proxy types returned in place of regular alphabets.
Definition: proxy_base.hpp:63
A space-optimised version of std::vector that compresses multiple letters into a single byte.
Definition: bitcompressed_vector.hpp:63
size_type capacity() const noexcept
Returns the number of elements that the container has currently allocated space for.
Definition: bitcompressed_vector.hpp:627
friend constexpr void swap(bitcompressed_vector &&lhs, bitcompressed_vector &&rhs) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bitcompressed_vector.hpp:973
void resize(size_type const count, value_type const value)
Resizes the container to contain count elements.
Definition: bitcompressed_vector.hpp:933
reference at(size_type const i)
Return the i-th element.
Definition: bitcompressed_vector.hpp:451
const_reference back() const noexcept
Return the last element.
Definition: bitcompressed_vector.hpp:544
friend auto operator<=>(bitcompressed_vector const &lhs, bitcompressed_vector const &rhs) noexcept=default
Comparison operators.
void assign(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitcompressed_vector.hpp:385
bitcompressed_vector()=default
Defaulted.
constexpr void swap(bitcompressed_vector &&rhs) noexcept
Swap contents with another instance.
Definition: bitcompressed_vector.hpp:956
void assign(other_range_t &&range)
Assign from a different range.
Definition: bitcompressed_vector.hpp:325
void clear() noexcept
Removes all elements from the container.
Definition: bitcompressed_vector.hpp:683
const_reference front() const noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitcompressed_vector.hpp:518
iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
Inserts elements from range [begin_it, end_it) before position in the container.
Definition: bitcompressed_vector.hpp:752
iterator insert(const_iterator pos, value_type const value)
Inserts value before position in the container.
Definition: bitcompressed_vector.hpp:706
alphabet_type const_reference
Equals the alphabet_type / value_type.
Definition: bitcompressed_vector.hpp:197
reference front() noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitcompressed_vector.hpp:511
void reserve(size_type const new_cap)
Increase the capacity to a value that's greater or equal to new_cap.
Definition: bitcompressed_vector.hpp:647
const_iterator cend() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitcompressed_vector.hpp:432
reference_proxy_type reference
A proxy type that enables assignment, if the underlying data structure also provides a proxy.
Definition: bitcompressed_vector.hpp:195
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitcompressed_vector.hpp:411
alphabet_type value_type
Equals the alphabet_type.
Definition: bitcompressed_vector.hpp:193
constexpr bitcompressed_vector(bitcompressed_vector &&) noexcept=default
Defaulted.
iterator erase(const_iterator begin_it, const_iterator end_it)
Removes specified elements from the container.
Definition: bitcompressed_vector.hpp:811
std::ranges::range_size_t< data_type > size_type
An unsigned integer type (usually std::size_t)
Definition: bitcompressed_vector.hpp:205
constexpr void swap(bitcompressed_vector &rhs) noexcept
Swap contents with another instance.
Definition: bitcompressed_vector.hpp:953
void push_back(value_type const value)
Appends the given element value to the end of the container.
Definition: bitcompressed_vector.hpp:861
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to underlying data structures.
Definition: bitcompressed_vector.hpp:557
constexpr bitcompressed_vector(bitcompressed_vector const &)=default
Defaulted.
const_iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitcompressed_vector.hpp:408
iterator begin() noexcept
Returns an iterator to the first element of the container.
Definition: bitcompressed_vector.hpp:405
friend constexpr void swap(bitcompressed_vector &lhs, bitcompressed_vector &rhs) noexcept
Swap contents with another instance.
Definition: bitcompressed_vector.hpp:970
bitcompressed_vector & operator=(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitcompressed_vector.hpp:305
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to underlying data structures.
Definition: bitcompressed_vector.hpp:560
reference back() noexcept
Return the last element.
Definition: bitcompressed_vector.hpp:537
void assign(size_type const count, value_type const value)
Assign with count times value.
Definition: bitcompressed_vector.hpp:344
iterator end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitcompressed_vector.hpp:426
const_reference operator[](size_type const i) const noexcept
Return the i-th element.
Definition: bitcompressed_vector.hpp:492
bitcompressed_vector(std::initializer_list< value_type > ilist)
Construct from std::initializer_list.
Definition: bitcompressed_vector.hpp:290
bitcompressed_vector(begin_iterator_type begin_it, end_iterator_type end_it)
Construct from pair of iterators.
Definition: bitcompressed_vector.hpp:272
iterator insert(const_iterator pos, std::initializer_list< value_type > const &ilist)
Inserts elements from initializer list before position in the container.
Definition: bitcompressed_vector.hpp:786
void assign(begin_iterator_type begin_it, end_iterator_type end_it)
Assign from pair of iterators.
Definition: bitcompressed_vector.hpp:366
size_type max_size() const noexcept
Returns the maximum number of elements the container is able to hold due to system or library impleme...
Definition: bitcompressed_vector.hpp:606
iterator erase(const_iterator pos)
Removes specified elements from the container.
Definition: bitcompressed_vector.hpp:844
std::ranges::range_difference_t< data_type > difference_type
A signed integer type (usually std::ptrdiff_t)
Definition: bitcompressed_vector.hpp:203
void pop_back()
Removes the last element of the container.
Definition: bitcompressed_vector.hpp:886
detail::random_access_iterator< bitcompressed_vector const > const_iterator
The const_iterator type of this container (a random access iterator).
Definition: bitcompressed_vector.hpp:201
iterator insert(const_iterator pos, size_type const count, value_type const value)
Inserts count copies of value before position in the container.
Definition: bitcompressed_vector.hpp:724
const_reference at(size_type const i) const
Return the i-th element.
Definition: bitcompressed_vector.hpp:461
const_iterator end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitcompressed_vector.hpp:429
size_type size() const noexcept
Returns the number of elements in the container, i.e. std::distance(begin(), end()).
Definition: bitcompressed_vector.hpp:590
detail::random_access_iterator< bitcompressed_vector > iterator
The iterator type of this container (a random access iterator).
Definition: bitcompressed_vector.hpp:199
void resize(size_type const count)
Resizes the container to contain count elements.
Definition: bitcompressed_vector.hpp:921
bool empty() const noexcept
Checks whether the container is empty.
Definition: bitcompressed_vector.hpp:577
bitcompressed_vector(size_type const count, value_type const value)
Construct with count times value.
Definition: bitcompressed_vector.hpp:254
void shrink_to_fit()
Requests the removal of unused capacity.
Definition: bitcompressed_vector.hpp:667
reference operator[](size_type const i) noexcept
Return the i-th element.
Definition: bitcompressed_vector.hpp:485
Refines bio::alphabet::alphabet and adds assignability.
Definition: concept.hpp:688
T copy(T... args)
T distance(T... args)
T end(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:70
decltype(bio::alphabet::to_rank(std::declval< semi_alphabet_type >())) rank_t
The rank_type of the semi-alphabet; defined as the return type of bio::alphabet::to_rank.
Definition: concept.hpp:90
constexpr auto assign_rank_to
Assign a rank to an alphabet object.
Definition: concept.hpp:138
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:98
The ranges module's namespace.
Provides bio::alphabet::proxy_base.
Provides the bio::ranges::detail::random_access_iterator class.
Provides bio::views::convert.
Provides bio::views::repeat_n.
T size(T... args)
T swap(T... args)
Provides bio::views::to_char.
Provides bio::views::to_rank.