45 #include <unordered_map> 88 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
89 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
91 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
92 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
95 27, 00, 22, 02, 03, 15, 05, 06, 07, 8, 23, 10, 9, 12, 04, 13,
98 14, 16, 17, 18, 19, 20, 21, 11, 25, 01, 24, 27, 27, 27, 27, 27,
101 27, 00, 22, 02, 03, 15, 05, 06, 07, 8, 23, 10, 9, 12, 04, 13,
104 14, 16, 17, 18, 19, 20, 21, 11, 25, 01, 24, 27, 27, 27, 27, 27,
109 struct OPENMS_DLLAPI
Hit {
112 Hit(
T needle_index,
T needle_length,
T query_pos) : needle_index(needle_index), needle_length(needle_length), query_pos(query_pos) {};
124 struct OPENMS_DLLAPI
AA 127 constexpr
AA() : aa_(
AA(
'?').aa_)
147 return aa_ == other.
aa_;
153 return aa_ <= other.
aa_;
159 return aa_ >=
AA(
'B').
aa_;
165 return aa_ !=
AA(
'?').
aa_;
171 return aa_ <=
AA(
'X').
aa_;
178 assert(aa_ <=
AA(
'?').aa_);
187 assert(aa_ <=
AA(
'?').aa_);
216 bool isInvalid()
const;
219 bool isValid()
const;
222 T operator()()
const;
233 T i_ = std::numeric_limits<T>::max();
238 template<>
struct std::hash<
OpenMS::Index>
242 return std::hash<OpenMS::Index::T> {}(s());
256 ACNode(
const AA label,
const uint8_t depth) : edge(label)
258 depth_and_hits.depth = depth;
265 memset(
this, 0,
sizeof *
this);
292 ACSpawn(std::string::const_iterator query_pos,
Index tree_pos, uint8_t max_aa, uint8_t max_mm, uint8_t max_prefix_loss);
303 uint8_t max_aaa_leftover {0};
304 uint8_t max_mm_leftover {0};
305 uint8_t max_prefix_loss_leftover {0};
310 OPENMS_DLLAPI AA
nextValidAA(
const std::string::const_iterator end, std::string::const_iterator& it_q);
319 void setQuery(
const std::string& haystack);
322 size_t textPos()
const;
325 std::string::const_iterator textPosIt()
const;
328 const std::string& getQuery()
const;
353 ACTrie(uint32_t max_aaa = 0, uint32_t max_mm = 0);
361 void addNeedle(
const std::string& needle);
366 void addNeedles(
const std::vector<std::string>& needles);
370 void addNeedlesAndCompress(
const std::vector<std::string>& needles);
382 size_t getNeedleCount()
const;
386 void setMaxAAACount(
const uint32_t max_aaa);
389 uint32_t getMaxAAACount()
const;
393 void setMaxMMCount(
const uint32_t max_mm);
396 uint32_t getMaxMMCount()
const;
426 bool addHits_(
Index i,
const size_t text_pos, std::vector<Hit>& hits)
const;
429 bool addHitsSpawn_(
Index i,
const ACSpawn& spawn,
const size_t text_pos, std::vector<Hit>& hits,
const int current_spawn_depths)
const;
450 void createSpawns_(
const Index i,
const AA fromAA,
const AA toAA,
ACTrieState& state,
const uint32_t aaa_left,
const uint32_t mm_left)
const;
453 void createSubSpawns_(
const ACSpawn& prototype,
const AA fromAA,
const AA toAA,
ACTrieState& state)
const;
456 void createMMSpawns_(
const Index i,
const AA except_fromAA,
const AA except_toAA,
const AA except_edge,
ACTrieState& state,
const uint32_t aaa_left,
const uint32_t mm_left)
const;
459 void createMMSubSpawns_(
const ACSpawn& prototype,
const AA except_fromAA,
const AA except_toAA,
const AA except_edge,
ACTrieState& state)
const;
465 Index findChildBFS_(
const Index parent,
const AA child_label)
const;
468 uint32_t needle_count_ {0};
469 uint32_t max_aaa_ {0};
470 uint32_t max_mm_ {0};
constexpr AA & operator++()
Pre-increment operator (advance to next AA)
Definition: AhoCorasickAmbiguous.h:175
Definition: AhoCorasickAmbiguous.h:109
constexpr bool operator==(const AA other) const
equality operator
Definition: AhoCorasickAmbiguous.h:145
constexpr AA operator-(const AA rhs) const
Decrement operator.
Definition: AhoCorasickAmbiguous.h:192
DepthHits depth_and_hits
depth of node in the tree and one bit if a needle ends in this node or any of its suffices ...
Definition: AhoCorasickAmbiguous.h:279
constexpr bool operator<=(const AA other) const
less-or-equal operator
Definition: AhoCorasickAmbiguous.h:151
std::queue< ACSpawn > spawns
initial spawn points which are currently active and need processing
Definition: AhoCorasickAmbiguous.h:336
Index(T val)
C'tor from T.
Definition: AhoCorasickAmbiguous.h:213
constexpr bool isValidForPeptide() const
is the AA a letter, i.e. A-Z or a-z?
Definition: AhoCorasickAmbiguous.h:169
std::string::const_iterator it_q_
position in query
Definition: AhoCorasickAmbiguous.h:340
internal struct to steal one bit from depth to use as hit indicator
Definition: AhoCorasickAmbiguous.h:262
constexpr bool isValid() const
is AA a letter or '$' ?
Definition: AhoCorasickAmbiguous.h:163
constexpr char const CharToAA[128]
Conversion table from 7-bit ASCII char to internal value representation for an amino acid (AA) ...
Definition: AhoCorasickAmbiguous.h:86
uint32_t T
Definition: AhoCorasickAmbiguous.h:208
std::string query_
current query ( = haystack = text)
Definition: AhoCorasickAmbiguous.h:339
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
const double c
Definition: Constants.h:214
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
static String suffix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:156
std::vector< Hit > hits
current hits found
Definition: AhoCorasickAmbiguous.h:334
Definition: AhoCorasickAmbiguous.h:314
bool operator==(const Hit &rhs) const
Definition: AhoCorasickAmbiguous.h:116
uint8_t aa_
internal representation as 1-byte integer
Definition: AhoCorasickAmbiguous.h:200
ACNode()
Default C'tor.
Definition: AhoCorasickAmbiguous.h:253
std::size_t operator()(OpenMS::Index const &s) const noexcept
Definition: AhoCorasickAmbiguous.h:240
constexpr char const AAtoChar[28]
Definition: AhoCorasickAmbiguous.h:52
std::vector< ACNode > trie_
the trie, in either naive structure or BFS order (after compressTrie)
Definition: AhoCorasickAmbiguous.h:467
uint32_t T
Definition: AhoCorasickAmbiguous.h:110
std::string::const_iterator it_query
position in query
Definition: AhoCorasickAmbiguous.h:301
Index tree_pos
position in trie
Definition: AhoCorasickAmbiguous.h:302
ACNode(const AA label, const uint8_t depth)
C'tor from an edge label (from parent to this node) and a depth in the tree.
Definition: AhoCorasickAmbiguous.h:256
constexpr uint8_t operator()() const
yields the internal 8bit representation
Definition: AhoCorasickAmbiguous.h:139
constexpr AA operator++(int)
Post-increment operator (advance to next AA)
Definition: AhoCorasickAmbiguous.h:183
An Aho Corasick trie (a set of nodes with suffix links mainly)
Definition: AhoCorasickAmbiguous.h:344
Index tree_pos
position in trie (for the master)
Definition: AhoCorasickAmbiguous.h:335
Definition: AhoCorasickAmbiguous.h:124
std::unordered_map< Index, std::vector< Index > > umap_index2children_naive_
maps the child nodes of a node for the naive tree; only needed during naive trie construction; storin...
Definition: AhoCorasickAmbiguous.h:475
Definition: AhoCorasickAmbiguous.h:250
DepthHits()
Definition: AhoCorasickAmbiguous.h:263
uint8_t depth
depth of node in the trie
Definition: AhoCorasickAmbiguous.h:269
T needle_length
Definition: AhoCorasickAmbiguous.h:114
std::unordered_map< Index, std::vector< uint32_t > > umap_index2needles_
maps a node to which needles end there (valid for both naive and BFS tree)
Definition: AhoCorasickAmbiguous.h:473
constexpr AA()
Default C'tor; creates an invalid AA.
Definition: AhoCorasickAmbiguous.h:127
Definition: AhoCorasickAmbiguous.h:205
T query_pos
Definition: AhoCorasickAmbiguous.h:115
AA nextValidAA(const std::string::const_iterator end, std::string::const_iterator &it_q)
uint8_t ChildCountType
Definition: AhoCorasickAmbiguous.h:272
friend ACSpawn
Definition: AhoCorasickAmbiguous.h:316
T needle_index
Definition: AhoCorasickAmbiguous.h:112
a spin-off search path through the trie, which can deal with ambiguous AAs and mismatches ...
Definition: AhoCorasickAmbiguous.h:286
constexpr bool isAmbiguous() const
is AA a 'B', 'J', 'Z', 'X', or '$' ?
Definition: AhoCorasickAmbiguous.h:157
constexpr AA(const char c)
Definition: AhoCorasickAmbiguous.h:134