BioC++ core-0.7.0
The Modern C++ libraries for Bioinformatics.
 
Loading...
Searching...
No Matches
cigar.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2022 deCODE Genetics
3// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
4// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
5// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6// shipped with this file and also available at: https://github.com/biocpp/biocpp-core/blob/main/LICENSE.md
7// -----------------------------------------------------------------------------------------------------
8
14#pragma once
15
16#include <charconv>
17#include <cstdint>
18#include <stdexcept>
19
25
26namespace bio::alphabet
27{
28
29// ------------------------------------------------------------------
30// cigar
31// ------------------------------------------------------------------
32
68class cigar : public base<cigar, (1ul << 28) * size<cigar_op>,
69 void> // no char type, because this is only semi_alphabet
70{
71private:
73 using base_t = base<cigar, (1ul << 28) * size<cigar_op>, void>;
74
76 static constexpr size_t wasted_rank = 16ull - size<cigar_op>;
77
79 constexpr uint32_t to_num_rank() const noexcept { return rank >> 4; }
80
82 constexpr uint8_t to_op_rank() const noexcept { return rank & 0b1111; }
83
85 constexpr void assign_num_rank(uint32_t r) noexcept { rank = (rank & 0b1111) | (r << 4); }
86
88 constexpr void assign_op_rank(uint8_t r) noexcept { rank = (rank & ~0b1111) | r; }
89
91 template <typename alphabet_type>
92 class component_proxy : public proxy_base<component_proxy<alphabet_type>, alphabet_type>
93 {
94 private:
96 using base_t = proxy_base<component_proxy<alphabet_type>, alphabet_type>;
98 friend base_t;
99
101 cigar * parent;
102
103 public:
104 //Import from base type:
105 using base_t::operator=;
106
111 component_proxy() = delete;
112 constexpr component_proxy(component_proxy const &) noexcept = default;
113 constexpr component_proxy(component_proxy &&) noexcept = default;
114 ~component_proxy() noexcept = default;
115
117 constexpr component_proxy(cigar & p) : parent{&p} {}
118
120 constexpr component_proxy & operator=(component_proxy const & rhs) // NOLINT(bugprone-unhandled-self-assignment)
121 {
122 return assign_rank(rhs.to_rank());
123 }
124
126 // NOLINTNEXTLINE(bugprone-unhandled-self-assignment)
127 constexpr component_proxy const & operator=(component_proxy const & rhs) const
128 {
129 return assign_rank(rhs.to_rank());
130 }
132
134 constexpr alphabet::rank_t<alphabet_type> to_rank() const noexcept
135 {
136 if constexpr (BIOCPP_IS_SAME(alphabet_type, uint32_t))
137 return parent->to_num_rank();
138 else
139 return parent->to_op_rank();
140 }
141
143 constexpr component_proxy & assign_rank(alphabet::rank_t<alphabet_type> const r) noexcept
144 {
145 if constexpr (BIOCPP_IS_SAME(alphabet_type, uint32_t))
146 parent->assign_num_rank(r);
147 else
148 parent->assign_op_rank(r);
149 return *this;
150 }
151
153 constexpr component_proxy const & assign_rank(alphabet::rank_t<alphabet_type> const r) const noexcept
154 {
155 if constexpr (BIOCPP_IS_SAME(alphabet_type, uint32_t))
156 parent->assign_num_rank(r);
157 else
158 parent->assign_op_rank(r);
159 return *this;
160 }
161
171 friend constexpr bool operator==(cigar const lhs, component_proxy const rhs) noexcept
172 {
173 return get<alphabet_type>(lhs) == static_cast<alphabet_type>(rhs);
174 }
175
177 friend constexpr auto operator<=>(cigar const lhs, component_proxy const rhs) noexcept
178 {
179 return get<alphabet_type>(lhs) <=> static_cast<alphabet_type>(rhs);
180 }
182 };
183
184public:
188 constexpr cigar() noexcept = default;
189 constexpr cigar(cigar const &) noexcept = default;
190 constexpr cigar(cigar &&) noexcept = default;
191 constexpr cigar & operator=(cigar const &) noexcept = default;
192 constexpr cigar & operator=(cigar &&) noexcept = default;
193 ~cigar() noexcept = default;
194
196 constexpr cigar(uint32_t const count, cigar_op op) noexcept : base_t{(count << 4) | op.to_rank()} {}
197
199 explicit constexpr cigar(uint32_t const count) noexcept : base_t{count << 4} {}
200
202 explicit constexpr cigar(cigar_op const op) noexcept : base_t{op.to_rank()} {}
203
205 constexpr cigar & operator=(uint32_t const count) noexcept
206 {
207 assign_num_rank(count);
208 return *this;
209 }
210
212 constexpr cigar & operator=(cigar_op const op) noexcept
213 {
214 assign_op_rank(op.to_rank());
215 return *this;
216 }
218
240 constexpr uint32_t to_rank() const noexcept { return rank < 16u ? rank : rank - wasted_rank; }
241
243 explicit constexpr operator uint32_t() const { return to_num_rank(); }
244
246 explicit constexpr operator cigar_op() const { return alphabet::assign_rank_to(to_op_rank(), cigar_op{}); }
247
260 [[nodiscard("Always check the returned string_view for the size!")]] std::string_view to_string(
261 std::span<char> buffer) const noexcept
262 {
263 auto [ptr, errc] = std::to_chars(buffer.data(), buffer.data() + 10, operator uint32_t());
264
265 // I hope this is optimised down to a single table lookup
266 *ptr = operator cigar_op().to_char();
267 (void)errc;
268
269 return std::string_view{buffer.data(), ptr + 1};
270 }
271
274 {
275 ranges::small_string<10> ret{}; // maximum number of digits for uint32_t + 1 char for the cigar_op
276 ret.resize(10);
277
278 std::string_view s = to_string(ret);
279 ret.resize(s.size());
280 return ret;
281 }
283
303 constexpr cigar & assign_rank(uint32_t const r) noexcept
304 {
305 assert(r < alphabet_size);
306 rank = r < 16u ? r : r + wasted_rank;
307 return *this;
308 }
309
317 {
318 int64_t num{};
319 auto [ptr, errc] = std::from_chars(s.data(), s.data() + s.size(), num);
320
321 if ((errc != std::errc{}) || (ptr != s.data() + s.size() - 1))
322 {
323 throw std::invalid_argument{std::string{"Illegal string assignment to CIGAR: "} +
324 static_cast<std::string>(s)};
325 }
326 else if (num < 0 || num > (2u << 28))
327 {
328 throw std::invalid_argument{"Only numbers between 0 and 2^28 are valid in cigars."};
329 }
330 else
331 {
332 assign_num_rank(num);
333 assign_op_rank(assign_char_strictly_to(*ptr, cigar_op{}).to_rank());
334 }
335
336 return *this;
337 }
339
351 template <meta::one_of<uint32_t, cigar_op> type>
352 friend constexpr type get(cigar const & l) noexcept
353 {
354 return l.operator type();
355 }
356
365 template <meta::one_of<uint32_t, cigar_op> type>
366 friend constexpr auto get(cigar & l) noexcept
367 {
368 return component_proxy<type>{l};
369 }
370
379 template <size_t index>
380 requires(index == 0 || index == 1)
381 friend constexpr auto get(cigar const & l) noexcept
382 {
383 if constexpr (index == 0)
384 return get<uint32_t>(l);
385 else if constexpr (index == 1)
386 return get<cigar_op>(l);
387 }
388
397 template <size_t index>
398 requires(index == 0 || index == 1)
399 friend constexpr auto get(cigar & l) noexcept
400 {
401 if constexpr (index == 0)
402 return get<uint32_t>(l);
403 else if constexpr (index == 1)
404 return get<cigar_op>(l);
405 }
407};
408
409static_assert(sizeof(cigar) == 4, "Something wrong with binary layout of bio::alphabet::cigar. PLEASE REPORT BUG.");
410
411} // namespace bio::alphabet
412
413namespace std
414{
415
422template <std::size_t i>
423struct tuple_element<i, bio::alphabet::cigar>
424{
427};
428
435template <>
436struct tuple_size<bio::alphabet::cigar> : public std::integral_constant<size_t, 2>
437{};
438
439} // namespace std
440
441#if __has_include(<fmt/format.h>)
442
443# include <fmt/format.h>
444
445template <>
446struct fmt::formatter<bio::alphabet::cigar> : fmt::formatter<std::string_view>
447{
448 constexpr auto format(bio::alphabet::cigar const a, auto & ctx) const
449 {
450 auto tmp = a.to_string();
451 std::string_view v{tmp.data(), tmp.size()};
452 return fmt::formatter<std::string_view>::format(v, ctx);
453 }
454};
455#endif
Introduces the cigar_op alphabet.
A CRTP-base that makes defining a custom alphabet easier.
Definition: base.hpp:55
constexpr char_type to_char() const noexcept
Return the letter as a character of char_type.
Definition: base.hpp:104
The cigar operation alphabet..
Definition: cigar_op.hpp:59
The cigar semialphabet pairs a counter with a bio::alphabet::cigar_op letter.
Definition: cigar.hpp:70
constexpr cigar & assign_rank(uint32_t const r) noexcept
Assign from a numeric value.
Definition: cigar.hpp:303
std::string_view to_string(std::span< char > buffer) const noexcept
Convert to string representation.
Definition: cigar.hpp:260
friend constexpr type get(cigar const &l) noexcept
Get one of the two components from the cigar element (by type).
Definition: cigar.hpp:352
ranges::small_string< 10 > to_string() const noexcept
Convert to string representation.
Definition: cigar.hpp:273
constexpr cigar(cigar_op const op) noexcept
Construct from one component.
Definition: cigar.hpp:202
friend constexpr auto get(cigar &l) noexcept
Get one of the two components from the cigar element (by type).
Definition: cigar.hpp:366
constexpr uint32_t to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: cigar.hpp:240
cigar & assign_string(std::string_view const s)
Assign from the string representation.
Definition: cigar.hpp:316
constexpr cigar() noexcept=default
Defaulted.
constexpr cigar & operator=(uint32_t const count) noexcept
Assign from one component.
Definition: cigar.hpp:205
constexpr cigar & operator=(cigar_op const op) noexcept
Assign from one component.
Definition: cigar.hpp:212
friend constexpr auto get(cigar const &l) noexcept
Get one of the two components from the cigar element (by index).
Definition: cigar.hpp:381
constexpr cigar(uint32_t const count) noexcept
Construct from one component.
Definition: cigar.hpp:199
friend constexpr auto get(cigar &l) noexcept
Get one of the two components from the cigar element (by index).
Definition: cigar.hpp:399
Implements a small string that can be used for compile time computations.
Definition: small_string.hpp:47
constexpr void resize(size_type const count) noexcept
Resizes the container to contain count elements.
Definition: small_string.hpp:209
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
T data(T... args)
T from_chars(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:70
constexpr auto assign_char_strictly_to
Assign a character to an alphabet object, throw if the character is not valid.
Definition: concept.hpp:461
#define BIOCPP_IS_SAME(...)
A macro that behaves like std::is_same_v, except that it doesn't need to instantiate the template on ...
Definition: core.hpp:175
The alphabet module's namespace.
Definition: aa10li.hpp:23
The main BioC++ namespace.
Definition: aa10li.hpp:23
T size(T... args)
A constexpr string implementation to manipulate string literals at compile time.
T to_chars(T... args)
Provides bio::alphabet::tuple_base.
Provides alphabet adaptations for standard uint types.