BioC++ core-0.7.0
The Modern C++ libraries for Bioinformatics.
 
Loading...
Searching...
No Matches
aa10murphy.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2022 deCODE Genetics
3// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
4// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
5// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6// shipped with this file and also available at: https://github.com/biocpp/biocpp-core/blob/main/LICENSE.md
7// -----------------------------------------------------------------------------------------------------
8
14#pragma once
15
16#include <vector>
17
20#include <bio/alphabet/detail/to_lower.hpp>
21
22namespace bio::alphabet
23{
24
80class aa10murphy : public aminoacid_base<aa10murphy, 10>
81{
82private:
85
87 friend base_t;
89 friend base_t::base_t;
91
92public:
96 constexpr aa10murphy() noexcept = default;
97 constexpr aa10murphy(aa10murphy const &) noexcept = default;
98 constexpr aa10murphy(aa10murphy &&) noexcept = default;
99 constexpr aa10murphy & operator=(aa10murphy const &) noexcept = default;
100 constexpr aa10murphy & operator=(aa10murphy &&) noexcept = default;
101 ~aa10murphy() noexcept = default;
102
104 using base_t::base_t;
106
107protected:
109 static constexpr std::array<char_type, alphabet_size> rank_to_char{
110 'A',
111 'B',
112 'C',
113 'F',
114 'G',
115 'H',
116 'I',
117 'K',
118 'P',
119 'S',
120 };
121
123 static constexpr std::array<rank_type, 256> char_to_rank = []() constexpr
124 {
126
127 // initialize with UNKNOWN
128 ret.fill(9); // value of 'S', because that appears most frequently
129
130 // reverse mapping for characters and their lowercase
131 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
132 {
133 ret[static_cast<rank_type>(rank_to_char[rnk])] = rnk;
134 ret[static_cast<rank_type>(detail::to_lower(rank_to_char[rnk]))] = rnk;
135 }
136
137 ret['D'] = ret['B'];
138 ret['d'] = ret['B']; // Convert D to B (either D/N).
139 ret['E'] = ret['B'];
140 ret['e'] = ret['B']; // Convert E to B (either D/N).
141 ret['J'] = ret['I'];
142 ret['j'] = ret['I']; // Convert J (either I/L) to I.
143 ret['L'] = ret['I'];
144 ret['l'] = ret['I']; // Convert L to I.
145 ret['M'] = ret['I'];
146 ret['m'] = ret['I']; // Convert M to I.
147 ret['N'] = ret['B'];
148 ret['n'] = ret['B']; // Convert N to B (either D/N).
149 ret['O'] = ret['K'];
150 ret['o'] = ret['K']; // Convert Pyrrolysine to K.
151 ret['Q'] = ret['B'];
152 ret['q'] = ret['B']; // Convert Q to B (either D/N).
153 ret['R'] = ret['K'];
154 ret['r'] = ret['K']; // Convert R to K.
155 ret['T'] = ret['S'];
156 ret['t'] = ret['S']; // Convert T to S.
157 ret['U'] = ret['C'];
158 ret['u'] = ret['C']; // Convert Selenocysteine to C.
159 ret['V'] = ret['I'];
160 ret['v'] = ret['I']; // Convert V to I.
161 ret['W'] = ret['F'];
162 ret['w'] = ret['F']; // Convert W to F.
163 ret['X'] = ret['S'];
164 ret['x'] = ret['S']; // Convert unknown amino acids to Serine.
165 ret['Y'] = ret['F'];
166 ret['y'] = ret['F']; // Convert Y to F.
167 ret['Z'] = ret['B'];
168 ret['z'] = ret['B']; // Convert Z (either E/Q) to B (either D/N).
169 ret['*'] = ret
170 ['F']; // The most common stop codon is UGA. This is most similar to a Tryptophan which in this alphabet gets converted to Phenylalanine.
171 return ret;
172 }();
173};
174
175} // namespace bio::alphabet
176
177// ------------------------------------------------------------------
178// literals
179// ------------------------------------------------------------------
180
181namespace bio::alphabet
182{
183
184inline namespace literals
185{
186
195consteval aa10murphy operator""_aa10murphy(char const c)
196{
197 if (!char_is_valid_for<aa10murphy>(c))
198 throw std::invalid_argument{"Illegal character in character literal."};
199
200 return aa10murphy{}.assign_char(c);
201}
202
212template <meta::detail::literal_buffer_string str>
213constexpr std::vector<aa10murphy> operator""_aa10murphy()
214{
215 return detail::string_literal<str, aa10murphy>();
216}
218
219} // namespace literals
220
221} // namespace bio::alphabet
Provides bio::alphabet::aminoacid.
Provides bio::alphabet::aminoacid_base.
The reduced Murphy amino acid alphabet..
Definition: aa10murphy.hpp:81
static constexpr std::array< char_type, alphabet_size > rank_to_char
Value to char conversion table.
Definition: aa10murphy.hpp:109
constexpr aa10murphy() noexcept=default
Defaulted.
static constexpr std::array< rank_type, 256 > char_to_rank
Char to value conversion table.
Definition: aa10murphy.hpp:123
A CRTP-base that refines bio::alphabet::base and is used by the amino acids.
Definition: aminoacid_base.hpp:31
static constexpr size_t alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: base.hpp:177
constexpr derived_type & assign_char(char_type const c) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: base.hpp:145
T fill(T... args)
The alphabet module's namespace.
Definition: aa10li.hpp:23