45 #include <unordered_map> 88 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
89 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
91 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
92 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
95 27, 00, 22, 02, 03, 15, 05, 06, 07, 8, 23, 10, 9, 12, 04, 13,
98 14, 16, 17, 18, 19, 20, 21, 11, 25, 01, 24, 27, 27, 27, 27, 27,
101 27, 00, 22, 02, 03, 15, 05, 06, 07, 8, 23, 10, 9, 12, 04, 13,
104 14, 16, 17, 18, 19, 20, 21, 11, 25, 01, 24, 27, 27, 27, 27, 27,
109 struct OPENMS_DLLAPI
Hit {
112 Hit(
T needle_index,
T needle_length,
T query_pos) : needle_index(needle_index), needle_length(needle_length), query_pos(query_pos) {};
124 struct OPENMS_DLLAPI
AA 127 constexpr
AA() : aa_(
AA(
'?').aa_)
147 return aa_ == other.
aa_;
153 return aa_ <= other.
aa_;
159 return aa_ >=
AA(
'B').
aa_;
165 return aa_ !=
AA(
'?').
aa_;
171 return aa_ <=
AA(
'X').
aa_;
178 assert(aa_ <=
AA(
'?').aa_);
187 assert(aa_ <=
AA(
'?').aa_);
216 bool isInvalid()
const;
219 bool isValid()
const;
222 T operator()()
const;
231 T i_ = std::numeric_limits<T>::max();
236 template<>
struct std::hash<
OpenMS::Index>
240 return std::hash<OpenMS::Index::T> {}(s());
254 ACNode(
const AA label,
const uint8_t depth) : edge(label)
256 depth_and_hits.depth = depth;
263 memset(
this, 0,
sizeof *
this);
290 ACSpawn(std::string::const_iterator query_pos,
Index tree_pos, uint8_t max_aa, uint8_t max_mm, uint8_t max_prefix_loss);
301 uint8_t max_aaa_leftover {0};
302 uint8_t max_mm_leftover {0};
303 uint8_t max_prefix_loss_leftover {0};
308 OPENMS_DLLAPI AA
nextValidAA(
const std::string::const_iterator end, std::string::const_iterator& it_q);
317 void setQuery(
const std::string& haystack);
320 size_t textPos()
const;
323 std::string::const_iterator textPosIt()
const;
326 const std::string& getQuery()
const;
351 ACTrie(uint32_t max_aaa = 0, uint32_t max_mm = 0);
359 void addNeedle(
const std::string& needle);
364 void addNeedles(
const std::vector<std::string>& needles);
368 void addNeedlesAndCompress(
const std::vector<std::string>& needles);
380 size_t getNeedleCount()
const;
384 void setMaxAAACount(
const uint32_t max_aaa);
387 uint32_t getMaxAAACount()
const;
391 void setMaxMMCount(
const uint32_t max_mm);
394 uint32_t getMaxMMCount()
const;
424 bool addHits_(
Index i,
const size_t text_pos, std::vector<Hit>& hits)
const;
427 bool addHitsSpawn_(
Index i,
const ACSpawn& spawn,
const size_t text_pos, std::vector<Hit>& hits,
const int current_spawn_depths)
const;
448 void createSpawns_(
const Index i,
const AA fromAA,
const AA toAA,
ACTrieState& state,
const uint32_t aaa_left,
const uint32_t mm_left)
const;
451 void createSubSpawns_(
const ACSpawn& prototype,
const AA fromAA,
const AA toAA,
ACTrieState& state)
const;
454 void createMMSpawns_(
const Index i,
const AA except_fromAA,
const AA except_toAA,
const AA except_edge,
ACTrieState& state,
const uint32_t aaa_left,
const uint32_t mm_left)
const;
457 void createMMSubSpawns_(
const ACSpawn& prototype,
const AA except_fromAA,
const AA except_toAA,
const AA except_edge,
ACTrieState& state)
const;
463 Index findChildBFS_(
const Index parent,
const AA child_label)
const;
466 uint32_t needle_count_ {0};
467 uint32_t max_aaa_ {0};
468 uint32_t max_mm_ {0};
constexpr AA & operator++()
Pre-increment operator (advance to next AA)
Definition: AhoCorasickAmbiguous.h:175
Definition: AhoCorasickAmbiguous.h:109
constexpr bool operator==(const AA other) const
equality operator
Definition: AhoCorasickAmbiguous.h:145
constexpr AA operator-(const AA rhs) const
Decrement operator.
Definition: AhoCorasickAmbiguous.h:192
DepthHits depth_and_hits
depth of node in the tree and one bit if a needle ends in this node or any of its suffices ...
Definition: AhoCorasickAmbiguous.h:277
constexpr bool operator<=(const AA other) const
less-or-equal operator
Definition: AhoCorasickAmbiguous.h:151
std::queue< ACSpawn > spawns
initial spawn points which are currently active and need processing
Definition: AhoCorasickAmbiguous.h:334
Index(T val)
C'tor from T.
Definition: AhoCorasickAmbiguous.h:213
constexpr bool isValidForPeptide() const
is the AA a letter, i.e. A-Z or a-z?
Definition: AhoCorasickAmbiguous.h:169
std::string::const_iterator it_q_
position in query
Definition: AhoCorasickAmbiguous.h:338
internal struct to steal one bit from depth to use as hit indicator
Definition: AhoCorasickAmbiguous.h:260
constexpr bool isValid() const
is AA a letter or '$' ?
Definition: AhoCorasickAmbiguous.h:163
constexpr char const CharToAA[128]
Conversion table from 7-bit ASCII char to internal value representation for an amino acid (AA) ...
Definition: AhoCorasickAmbiguous.h:86
uint32_t T
Definition: AhoCorasickAmbiguous.h:208
std::string query_
current query ( = haystack = text)
Definition: AhoCorasickAmbiguous.h:337
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
const double c
Definition: Constants.h:209
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
static String suffix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:156
std::vector< Hit > hits
current hits found
Definition: AhoCorasickAmbiguous.h:332
Definition: AhoCorasickAmbiguous.h:312
bool operator==(const Hit &rhs) const
Definition: AhoCorasickAmbiguous.h:116
uint8_t aa_
internal representation as 1-byte integer
Definition: AhoCorasickAmbiguous.h:200
ACNode()
Default C'tor.
Definition: AhoCorasickAmbiguous.h:251
std::size_t operator()(OpenMS::Index const &s) const noexcept
Definition: AhoCorasickAmbiguous.h:238
constexpr char const AAtoChar[28]
Definition: AhoCorasickAmbiguous.h:52
std::vector< ACNode > trie_
the trie, in either naive structure or BFS order (after compressTrie)
Definition: AhoCorasickAmbiguous.h:465
uint32_t T
Definition: AhoCorasickAmbiguous.h:110
std::string::const_iterator it_query
position in query
Definition: AhoCorasickAmbiguous.h:299
Index tree_pos
position in trie
Definition: AhoCorasickAmbiguous.h:300
ACNode(const AA label, const uint8_t depth)
C'tor from an edge label (from parent to this node) and a depth in the tree.
Definition: AhoCorasickAmbiguous.h:254
constexpr uint8_t operator()() const
yields the internal 8bit representation
Definition: AhoCorasickAmbiguous.h:139
constexpr AA operator++(int)
Post-increment operator (advance to next AA)
Definition: AhoCorasickAmbiguous.h:183
An Aho Corasick trie (a set of nodes with suffix links mainly)
Definition: AhoCorasickAmbiguous.h:342
Index tree_pos
position in trie (for the master)
Definition: AhoCorasickAmbiguous.h:333
Definition: AhoCorasickAmbiguous.h:124
std::unordered_map< Index, std::vector< Index > > umap_index2children_naive_
maps the child nodes of a node for the naive tree; only needed during naive trie construction; storin...
Definition: AhoCorasickAmbiguous.h:473
Definition: AhoCorasickAmbiguous.h:248
DepthHits()
Definition: AhoCorasickAmbiguous.h:261
uint8_t depth
depth of node in the trie
Definition: AhoCorasickAmbiguous.h:267
T needle_length
Definition: AhoCorasickAmbiguous.h:114
std::unordered_map< Index, std::vector< uint32_t > > umap_index2needles_
maps a node to which needles end there (valid for both naive and BFS tree)
Definition: AhoCorasickAmbiguous.h:471
constexpr AA()
Default C'tor; creates an invalid AA.
Definition: AhoCorasickAmbiguous.h:127
Definition: AhoCorasickAmbiguous.h:205
T query_pos
Definition: AhoCorasickAmbiguous.h:115
AA nextValidAA(const std::string::const_iterator end, std::string::const_iterator &it_q)
uint8_t ChildCountType
Definition: AhoCorasickAmbiguous.h:270
friend ACSpawn
Definition: AhoCorasickAmbiguous.h:314
T needle_index
Definition: AhoCorasickAmbiguous.h:112
a spin-off search path through the trie, which can deal with ambiguous AAs and mismatches ...
Definition: AhoCorasickAmbiguous.h:284
constexpr bool isAmbiguous() const
is AA a 'B', 'J', 'Z', 'X', or '$' ?
Definition: AhoCorasickAmbiguous.h:157
constexpr AA(const char c)
Definition: AhoCorasickAmbiguous.h:134