BioC++ core-0.7.0
The Modern C++ libraries for Bioinformatics.
 
Loading...
Searching...
No Matches
dna4.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2022 deCODE Genetics
3// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
4// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
5// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6// shipped with this file and also available at: https://github.com/biocpp/biocpp-core/blob/main/LICENSE.md
7// -----------------------------------------------------------------------------------------------------
8
14#pragma once
15
16#include <vector>
17
19
20// ------------------------------------------------------------------
21// dna4
22// ------------------------------------------------------------------
23
24namespace bio::alphabet
25{
26
27class rna4;
28
48class dna4 : public nucleotide_base<dna4, 4>
49{
50private:
53
55 friend base_t;
57 friend base_t::base_t;
60 friend rna4;
61
62public:
66 constexpr dna4() noexcept = default;
67 constexpr dna4(dna4 const &) noexcept = default;
68 constexpr dna4(dna4 &&) noexcept = default;
69 constexpr dna4 & operator=(dna4 const &) noexcept = default;
70 constexpr dna4 & operator=(dna4 &&) noexcept = default;
71 ~dna4() noexcept = default;
72
73 using base_t::base_t;
74
76 template <std::same_as<rna4> t> // Accept incomplete type
77 constexpr dna4(t const & r) noexcept
78 {
79 assign_rank(r.to_rank());
80 }
82
86 constexpr dna4 complement() const noexcept { return dna4{}.assign_rank(to_rank() ^ 0b11); }
87
88protected:
90
92 static constexpr std::array<char_type, alphabet_size> rank_to_char{'A', 'C', 'G', 'T'};
93
95 static constexpr std::array<rank_type, 256> char_to_rank = []() constexpr
96 {
98
99 // reverse mapping for characters and their lowercase
100 for (size_t rnk = 0u; rnk < alphabet_size; ++rnk)
101 {
102 ret[rank_to_char[rnk]] = rnk;
103 ret[detail::to_lower(rank_to_char[rnk])] = rnk;
104 }
105
106 // set U equal to T
107 ret['U'] = ret['T'];
108 ret['u'] = ret['t'];
109
110 // iupac characters get special treatment, because there is no N
111 ret['R'] = ret['A'];
112 ret['r'] = ret['A']; // or G
113 ret['Y'] = ret['C'];
114 ret['y'] = ret['C']; // or T
115 ret['S'] = ret['C'];
116 ret['s'] = ret['C']; // or G
117 ret['W'] = ret['A'];
118 ret['w'] = ret['A']; // or T
119 ret['K'] = ret['G'];
120 ret['k'] = ret['G']; // or T
121 ret['M'] = ret['A'];
122 ret['m'] = ret['A']; // or T
123 ret['B'] = ret['C'];
124 ret['b'] = ret['C']; // or G or T
125 ret['D'] = ret['A'];
126 ret['d'] = ret['A']; // or G or T
127 ret['H'] = ret['A'];
128 ret['h'] = ret['A']; // or C or T
129 ret['V'] = ret['A'];
130 ret['v'] = ret['A']; // or C or G
131
132 return ret;
133 }();
134};
135
136} // namespace bio::alphabet
137
138// ------------------------------------------------------------------
139// literals
140// ------------------------------------------------------------------
141
142namespace bio::alphabet
143{
144
145inline namespace literals
146{
147
156consteval dna4 operator""_dna4(char const c)
157{
158 if (!char_is_valid_for<dna4>(c))
159 throw std::invalid_argument{"Illegal character in character literal."};
160
161 return dna4{}.assign_char(c);
162}
163
173template <meta::detail::literal_buffer_string str>
174constexpr std::vector<dna4> operator""_dna4()
175{
176 return detail::string_literal<str, dna4>();
177}
179
180} // namespace literals
181
182} // namespace bio::alphabet
constexpr derived_type & assign_rank(rank_type const c) noexcept
Assign from a numeric value.
Definition: base.hpp:168
static constexpr size_t alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: base.hpp:177
constexpr derived_type & assign_char(char_type const c) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: base.hpp:145
The four letter DNA alphabet of A,C,G,T..
Definition: dna4.hpp:49
constexpr dna4() noexcept=default
Defaulted.
A CRTP-base that refines bio::alphabet::base and is used by the nucleotides.
Definition: nucleotide_base.hpp:42
The four letter RNA alphabet of A,C,G,U..
Definition: rna4.hpp:48
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:70
The alphabet module's namespace.
Definition: aa10li.hpp:23
Provides bio::alphabet::nucleotide_base.