BioC++ core-0.7.0
The Modern C++ libraries for Bioinformatics.
 
Loading...
Searching...
No Matches
aa20.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2022 deCODE Genetics
3// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
4// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
5// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6// shipped with this file and also available at: https://github.com/biocpp/biocpp-core/blob/main/LICENSE.md
7// -----------------------------------------------------------------------------------------------------
8
14#pragma once
15
16#include <vector>
17
20#include <bio/alphabet/detail/to_lower.hpp>
21
22namespace bio::alphabet
23{
24
62class aa20 : public aminoacid_base<aa20, 20>
63{
64private:
67
69 friend base_t;
71 friend base_t::base_t;
73
74public:
78 constexpr aa20() noexcept = default;
79 constexpr aa20(aa20 const &) noexcept = default;
80 constexpr aa20(aa20 &&) noexcept = default;
81 constexpr aa20 & operator=(aa20 const &) noexcept = default;
82 constexpr aa20 & operator=(aa20 &&) noexcept = default;
83 ~aa20() noexcept = default;
84
85 using base_t::base_t;
87
88protected:
90 static constexpr std::array<char_type, alphabet_size> rank_to_char{
91 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y',
92 };
93
95 static constexpr std::array<rank_type, 256> char_to_rank = []() constexpr
96 {
98
99 // initialize with UNKNOWN
100 ret.fill(15); // value of 'S', because that appears most frequently
101
102 // reverse mapping for characters and their lowercase
103 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
104 {
105 ret[static_cast<rank_type>(rank_to_char[rnk])] = rnk;
106 ret[static_cast<rank_type>(detail::to_lower(rank_to_char[rnk]))] = rnk;
107 }
108
109 ret['B'] = ret['D'];
110 ret['b'] = ret['D']; // Convert b (either D/N) to D, since D occurs more frequently.
111 ret['J'] = ret['L'];
112 ret['j'] = ret['L']; // Convert j (either I/L) to L, since L occurs more frequently.
113 ret['O'] = ret['L'];
114 ret['o'] = ret['L']; // Convert Pyrrolysine to lysine.
115 ret['U'] = ret['C'];
116 ret['u'] = ret['C']; // Convert Selenocysteine to cysteine.
117 ret['X'] = ret['S'];
118 ret['x'] = ret['S']; // Convert unknown amino acids to serine.
119 ret['Z'] = ret['E'];
120 ret['z'] = ret['E']; // Convert z (either E/Q) to E, since E occurs more frequently.
121 ret['*'] = ret['W']; // The most common stop codon is UGA. This is most similar to a Tryptophan.
122 return ret;
123 }();
124};
125
126} // namespace bio::alphabet
127
128// ------------------------------------------------------------------
129// literals
130// ------------------------------------------------------------------
131
132namespace bio::alphabet
133{
134
135inline namespace literals
136{
137
146consteval aa20 operator""_aa20(char const c)
147{
148 if (!char_is_valid_for<aa20>(c))
149 throw std::invalid_argument{"Illegal character in character literal."};
150
151 return aa20{}.assign_char(c);
152}
153
163template <meta::detail::literal_buffer_string str>
164constexpr std::vector<aa20> operator""_aa20()
165{
166 return detail::string_literal<str, aa20>();
167}
169
170} // namespace literals
171
172} // namespace bio::alphabet
Provides bio::alphabet::aminoacid.
Provides bio::alphabet::aminoacid_base.
The canonical amino acid alphabet..
Definition: aa20.hpp:63
static constexpr std::array< char_type, alphabet_size > rank_to_char
Value to char conversion table.
Definition: aa20.hpp:90
constexpr aa20() noexcept=default
Defaulted.
static constexpr std::array< rank_type, 256 > char_to_rank
Char to value conversion table.
Definition: aa20.hpp:95
A CRTP-base that refines bio::alphabet::base and is used by the amino acids.
Definition: aminoacid_base.hpp:31
static constexpr size_t alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: base.hpp:177
constexpr derived_type & assign_char(char_type const c) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: base.hpp:145
T fill(T... args)
The alphabet module's namespace.
Definition: aa10li.hpp:23