OpenMS  2.8.0
IDBoostGraph.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 // define to get timings for connected components
38 //#define INFERENCE_BENCH
39 
40 #include <OpenMS/ANALYSIS/ID/MessagePasserFactory.h> //included in BPI
41 #include <OpenMS/CONCEPT/Types.h>
46 
47 #include <vector>
48 #include <unordered_map>
49 #include <queue>
50 
51 #include <boost/function.hpp>
52 #include <boost/graph/adjacency_list.hpp>
53 #include <boost/graph/depth_first_search.hpp>
54 #include <boost/graph/filtered_graph.hpp>
55 #include <boost/graph/properties.hpp>
56 #include <boost/variant.hpp>
57 #include <boost/variant/detail/hash_variant.hpp>
58 #include <boost/variant/static_visitor.hpp>
59 
60 namespace OpenMS
61 {
62  struct ScoreToTgtDecLabelPairs;
63 
64  namespace Internal
65  {
66 
79  //TODO Add OPENMS_DLLAPI everywhere
80  class OPENMS_DLLAPI IDBoostGraph
81  {
82 
83  public:
84 
85  // boost has a weird extra semicolon in their strong typedef
86  #pragma clang diagnostic push
87  #pragma clang diagnostic ignored "-Wextra-semi"
88 
90  BOOST_STRONG_TYPEDEF(boost::blank, PeptideCluster)
91 
92 
93  struct ProteinGroup
94  {
95  int size = 0;
96  int tgts = 0;
97  double score = 0.;
98  };
99 
101  BOOST_STRONG_TYPEDEF(String, Peptide)
102 
103 
104  BOOST_STRONG_TYPEDEF(Size, RunIndex)
105 
106 
107  BOOST_STRONG_TYPEDEF(int, Charge)
108 
109  #pragma clang diagnostic pop
110 
111  //typedefs
112  //TODO rename ProteinGroup type since it collides with the actual OpenMS ProteinGroup
113  typedef boost::variant<ProteinHit*, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit*> IDPointer;
114  typedef boost::variant<const ProteinHit*, const ProteinGroup*, const PeptideCluster*, const Peptide, const RunIndex, const Charge, const PeptideHit*> IDPointerConst;
115  //TODO check the impact of different data structures to store nodes/edges
116  // Directed graphs would make the internal computations much easier (less in/out edge checking) but boost
117  // does not allow computation of "non-strongly" connected components for directed graphs, which is what we would
118  // need. We can think about after/while copying to CCs, to insert it into a directed graph!
119  typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer> Graph;
120  typedef std::vector<Graph> Graphs;
121  typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer> GraphConst;
122 
123  typedef boost::graph_traits<Graph>::vertex_descriptor vertex_t;
124  typedef boost::graph_traits<Graph>::edge_descriptor edge_t;
125 
126  typedef std::set<IDBoostGraph::vertex_t> ProteinNodeSet;
127  typedef std::set<IDBoostGraph::vertex_t> PeptideNodeSet;
128 
129 
132  public boost::default_dfs_visitor
133  {
134  public:
136  : gs(vgs), curr_v(0), next_v(0), m()
137  {}
138 
139  template < typename Vertex, typename Graph >
140  void start_vertex(Vertex u, const Graph & tg)
141  {
142  gs.emplace_back();
143  next_v = boost::add_vertex(tg[u], gs.back());
144  m[u] = next_v;
145  }
146 
147  template < typename Vertex, typename Graph >
148  void discover_vertex(Vertex /*u*/, const Graph & /*tg*/)
149  {
150  curr_v = next_v;
151  }
152 
153  template < typename Edge, typename Graph >
154  void examine_edge(Edge e, const Graph & tg)
155  {
156  if (m.find(e.m_target) == m.end())
157  {
158  next_v = boost::add_vertex(tg[e.m_target], gs.back());
159  m[e.m_target] = next_v;
160  }
161  else
162  {
163  next_v = m[e.m_target];
164  }
165 
166  boost::add_edge(m[e.m_source], next_v, gs.back());
167  }
168 
170  vertex_t curr_v, next_v;
172  std::map<vertex_t, vertex_t> m;
173  };
174 
178  public boost::static_visitor<OpenMS::String>
179  {
180  public:
181 
183  {
184  return pep->getSequence().toString() + "_" + pep->getCharge();
185  }
186 
188  {
189  return prot->getAccession();
190  }
191 
192  OpenMS::String operator()(const ProteinGroup& /*protgrp*/) const
193  {
194  return "PG";
195  }
196 
197  OpenMS::String operator()(const PeptideCluster& /*pc*/) const
198  {
199  return "PepClust";
200  }
201 
202  OpenMS::String operator()(const Peptide& peptide) const
203  {
204  return peptide;
205  }
206 
207  OpenMS::String operator()(const RunIndex& ri) const
208  {
209  return "rep" + String(ri);
210  }
211 
212  OpenMS::String operator()(const Charge& chg) const
213  {
214  return "chg" + String(chg);
215  }
216 
217  };
218 
221  template<class CharT>
223  public boost::static_visitor<>
224  {
225  public:
226 
227  explicit PrintAddressVisitor(std::basic_ostream<CharT> stream):
228  stream_(stream)
229  {}
230 
231  void operator()(PeptideHit* pep) const
232  {
233  stream_ << pep->getSequence().toUnmodifiedString() << ": " << pep << std::endl;
234  }
235 
236  void operator()(ProteinHit* prot) const
237  {
238  stream_ << prot->getAccession() << ": " << prot << std::endl;
239  }
240 
241  void operator()(const ProteinGroup& /*protgrp*/) const
242  {
243  stream_ << "PG" << std::endl;
244  }
245 
246  void operator()(const PeptideCluster& /*pc*/) const
247  {
248  stream_ << "PepClust" << std::endl;
249  }
250 
251  void operator()(const Peptide& peptide) const
252  {
253  stream_ << peptide << std::endl;
254  }
255 
256  void operator()(const RunIndex& ri) const
257  {
258  stream_ << "rep" << ri << std::endl;
259  }
260 
261  void operator()(const Charge& chg) const
262  {
263  stream_ << "chg" << chg << std::endl;
264  }
265 
266  std::basic_ostream<CharT> stream_;
267  };
268 
273  public boost::static_visitor<>
274  {
275  public:
276 
277  void operator()(PeptideHit* pep, double posterior) const
278  {
279  pep->setScore(posterior);
280  }
281 
282  void operator()(ProteinHit* prot, double posterior) const
283  {
284  prot->setScore(posterior);
285  }
286 
287  void operator()(ProteinGroup& pg, double posterior) const
288  {
289  pg.score = posterior;
290  }
291 
292  // Everything else, do nothing for now
293  template <class T>
294  void operator()(T& /*any node type*/, double /*posterior*/) const
295  {
296  // do nothing
297  }
298 
299  };
300 
304  public boost::static_visitor<double>
305  {
306  public:
307 
308  double operator()(PeptideHit* pep) const
309  {
310  return pep->getScore();
311  }
312 
313  double operator()(ProteinHit* prot) const
314  {
315  return prot->getScore();
316  }
317 
318  double operator()(ProteinGroup& pg) const
319  {
320  return pg.score;
321  }
322 
323  // Everything else, do nothing for now
324  template <class T>
325  double operator()(T& /*any node type*/) const
326  {
327  return -1.0;
328  }
329 
330  };
331 
336  public boost::static_visitor<std::pair<double,bool>>
337  {
338  public:
339 
340  std::pair<double,bool> operator()(PeptideHit* pep) const
341  {
342  return {pep->getScore(), pep->getMetaValue("target_decoy").toString()[0] == 't'};
343  }
344 
345  std::pair<double,bool> operator()(ProteinHit* prot) const
346  {
347  return {prot->getScore(), prot->getMetaValue("target_decoy").toString()[0] == 't'};
348  }
349 
350  std::pair<double,bool> operator()(ProteinGroup& pg) const
351  {
352  return {pg.score, pg.tgts > 0};
353  }
354 
355  // Everything else, do nothing for now
356  template <class T>
357  std::pair<double,bool> operator()(T& /*any node type*/) const
358  {
359  return {-1.0, false};
360  }
361  };
362 
365  std::vector<PeptideIdentification>& idedSpectra,
366  Size use_top_psms,
367  bool use_run_info,
368  bool best_psms_annotated,
369  const std::optional<const ExperimentalDesign>& ed = std::optional<const ExperimentalDesign>());
370 
372  ConsensusMap& cmap,
373  Size use_top_psms,
374  bool use_run_info,
375  bool use_unassigned_ids,
376  bool best_psms_annotated,
377  const std::optional<const ExperimentalDesign>& ed = std::optional<const ExperimentalDesign>());
378 
379 
380  //TODO think about templating to avoid wrapping to std::function
381  // although we usually do long-running tasks per CC such that the extra virtual call does not matter much
382  // Instead we gain type erasure.
384  void applyFunctorOnCCs(const std::function<unsigned long(Graph&, unsigned int)>& functor);
386  void applyFunctorOnCCsST(const std::function<void(Graph&)>& functor);
387 
391 
392  //TODO create a new class for an extended Graph and try to reuse as much as possible
393  // use inheritance or templates
397 
404  void annotateIndistProteins(bool addSingletons = true);
405 
409  void calculateAndAnnotateIndistProteins(bool addSingletons = true);
410 
413 
420  void resolveGraphPeptideCentric(bool removeAssociationsInData = true);
421 
422 
423 
426 
430  const Graph& getComponent(Size cc);
431 
435 
436  //TODO docu
437  //void buildExtendedGraph(bool use_all_psms, std::pair<int,int> chargeRange, unsigned int nrReplicates);
438 
442  static void printGraph(std::ostream& out, const Graph& fg);
443 
452  void getUpstreamNodesNonRecursive(std::queue<vertex_t>& q, const Graph& graph, int lvl,
453  bool stop_at_first, std::vector<vertex_t>& result);
454 
463  void getDownstreamNodesNonRecursive(std::queue<vertex_t>& q, const Graph& graph, int lvl,
464  bool stop_at_first, std::vector<vertex_t>& result);
465 
474 
475  private:
476 
478 
479  struct SequenceToReplicateChargeVariantHierarchy;
480 
481 
482  //TODO introduce class hierarchy:
483  /*
484  * IDGraph<UnderlyingIDStruc>
485  *
486  * - BasicGraph<>
487  * - ExtendedGraphClustered<>
488  * - ExtendedGraphClusteredWithRunInfo<>
489  *
490  * in theory extending a basic one is desirable to create the extended one. But it means we have to
491  * copy/move the graph (node by node) because the nodes are of a broader boost::variant type. So we probably have to
492  * duplicate code and offer a from-scratch step-wise building for the extended graph, too.
493  * Note that there could be several levels of extension in the future. For now I keep everything in one
494  * class by having potential storage for the broadest extended type. Differences in the underlying ID structure
495  * e.g. ConsensusMap or PeptideIDs from idXML currently only have an effect during building, so I just overload
496  * the constructors. In theory it would be nice to generalize on that, too, especially when we adapt to the new
497  * ID data structure.
498  */
499 
500 
501  /* ---------------- Either of them is used, preferably second --------------- */
504 
507  /* ---------------------------------------------------------------------------- */
508 
509  #ifdef INFERENCE_BENCH
511  std::vector<std::tuple<vertex_t, vertex_t, unsigned long, double>> sizes_and_times_{1};
512  #endif
513 
514 
515  /* ---- Only used when run information was available --------- */
516 
517  //TODO think about preallocating it, but the number of peptide hits is not easily computed
518  // since they are inside the pepIDs
519  //TODO would multiple sets be better?
520 
523  std::unordered_map<vertex_t, Size> pepHitVtx_to_run_;
524 
529  Size nrPrefractionationGroups_ = 0;
530 
531  /* ----------------------------------------------------------- */
532 
533 
536  vertex_t addVertexWithLookup_(const IDPointer& ptr, std::unordered_map<IDPointer, vertex_t, boost::hash<IDPointer>>& vertex_map);
537  //vertex_t addVertexWithLookup_(IDPointerConst& ptr, std::unordered_map<IDPointerConst, vertex_t, boost::hash<IDPointerConst>>& vertex_map);
538 
539 
541  void annotateIndistProteins_(const Graph& fg, bool addSingletons);
542  void calculateAndAnnotateIndistProteins_(const Graph& fg, bool addSingletons);
543 
554  std::vector<PeptideIdentification>& idedSpectra,
555  Size use_top_psms,
556  bool best_psms_annotated = false);
557 
559  ConsensusMap& cmap,
560  Size use_top_psms,
561  bool use_unassigned_ids,
562  bool best_psms_annotated = false);
563 
566  PeptideIdentification& spectrum,
567  std::unordered_map<IDPointer, vertex_t, boost::hash<IDPointer>>& vertex_map,
568  const std::unordered_map<std::string, ProteinHit*>& accession_map,
569  Size use_top_psms,
570  bool best_psms_annotated);
571 
573  PeptideIdentification& spectrum,
574  std::unordered_map<unsigned, unsigned>& indexToPrefractionationGroup,
575  std::unordered_map<IDPointer, vertex_t, boost::hash<IDPointer>>& vertex_map,
576  std::unordered_map<std::string, ProteinHit*>& accession_map,
577  Size use_top_psms
578  );
579 
587  ConsensusMap& cmap,
588  Size use_top_psms,
589  bool use_unassigned_ids,
590  const ExperimentalDesign& ed);
591 
593  std::vector<PeptideIdentification>& idedSpectra,
594  Size use_top_psms,
595  const ExperimentalDesign& ed);
596 
597 
599  void resolveGraphPeptideCentric_(Graph& fg, bool removeAssociationsInData);
600 
601  template<class NodeType>
602  void getDownstreamNodes(const vertex_t& start, const Graph& graph, std::vector<NodeType>& result)
603  {
604  Graph::adjacency_iterator adjIt, adjIt_end;
605  boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
606  for (;adjIt != adjIt_end; ++adjIt)
607  {
608  if (graph[*adjIt].type() == typeid(NodeType))
609  {
610  result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
611  }
612  else if (graph[*adjIt].which() > graph[start].which())
613  {
614  getDownstreamNodes(*adjIt, graph, result);
615  }
616  }
617  }
618 
619  template<class NodeType>
620  void getUpstreamNodes(const vertex_t& start, const Graph graph, std::vector<NodeType>& result)
621  {
622  Graph::adjacency_iterator adjIt, adjIt_end;
623  boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
624  for (;adjIt != adjIt_end; ++adjIt)
625  {
626  if (graph[*adjIt].type() == typeid(NodeType))
627  {
628  result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
629  }
630  else if (graph[*adjIt].which() < graph[start].which())
631  {
632  getUpstreamNodes(*adjIt, graph, result);
633  }
634  }
635  }
636  };
637 
639  } //namespace Internal
640 } //namespace OpenMS
641 
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
A container for consensus elements.
Definition: ConsensusMap.h:90
String toString(bool full_precision=true) const
Conversion to String full_precision Controls number of fractional digits for all double types or list...
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:243
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:305
double operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:308
double operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:313
double operator()(T &) const
Definition: IDBoostGraph.h:325
double operator()(ProteinGroup &pg) const
Definition: IDBoostGraph.h:318
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:337
std::pair< double, bool > operator()(T &) const
Definition: IDBoostGraph.h:357
std::pair< double, bool > operator()(ProteinGroup &pg) const
Definition: IDBoostGraph.h:350
std::pair< double, bool > operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:340
std::pair< double, bool > operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:345
Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type creates a label e....
Definition: IDBoostGraph.h:179
OpenMS::String operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:202
OpenMS::String operator()(const Charge &chg) const
Definition: IDBoostGraph.h:212
OpenMS::String operator()(const PeptideHit *pep) const
Definition: IDBoostGraph.h:182
OpenMS::String operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:192
OpenMS::String operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:207
OpenMS::String operator()(const ProteinHit *prot) const
Definition: IDBoostGraph.h:187
OpenMS::String operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:197
Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type prints the address...
Definition: IDBoostGraph.h:224
void operator()(const Charge &chg) const
Definition: IDBoostGraph.h:261
std::basic_ostream< CharT > stream_
Definition: IDBoostGraph.h:266
PrintAddressVisitor(std::basic_ostream< CharT > stream)
Definition: IDBoostGraph.h:227
void operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:246
void operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:256
void operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:231
void operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:236
void operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:251
void operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:241
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:274
void operator()(T &, double) const
Definition: IDBoostGraph.h:294
void operator()(PeptideHit *pep, double posterior) const
Definition: IDBoostGraph.h:277
void operator()(ProteinGroup &pg, double posterior) const
Definition: IDBoostGraph.h:287
void operator()(ProteinHit *prot, double posterior) const
Definition: IDBoostGraph.h:282
A boost dfs visitor that copies connected components into a vector of graphs.
Definition: IDBoostGraph.h:133
std::map< vertex_t, vertex_t > m
A mapping from old node id to new node id to not duplicate existing ones in the new graph.
Definition: IDBoostGraph.h:172
void start_vertex(Vertex u, const Graph &tg)
Definition: IDBoostGraph.h:140
void examine_edge(Edge e, const Graph &tg)
Definition: IDBoostGraph.h:154
dfs_ccsplit_visitor(Graphs &vgs)
Definition: IDBoostGraph.h:135
vertex_t curr_v
Definition: IDBoostGraph.h:170
void discover_vertex(Vertex, const Graph &)
Definition: IDBoostGraph.h:148
Graphs & gs
Definition: IDBoostGraph.h:169
Creates and maintains a boost graph based on the OpenMS ID datastructures.
Definition: IDBoostGraph.h:81
void addPeptideIDWithAssociatedProteins_(PeptideIdentification &spectrum, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, const std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms, bool best_psms_annotated)
Used during building.
const ProteinIdentification & getProteinIDs()
Returns the underlying protein identifications for viewing.
void buildGraphWithRunInfo_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, const ExperimentalDesign &ed)
void buildGraph_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool best_psms_annotated=false)
boost::graph_traits< Graph >::vertex_descriptor vertex_t
Definition: IDBoostGraph.h:123
std::unordered_map< vertex_t, Size > pepHitVtx_to_run_
Definition: IDBoostGraph.h:523
boost::variant< const ProteinHit *, const ProteinGroup *, const PeptideCluster *, const Peptide, const RunIndex, const Charge, const PeptideHit * > IDPointerConst
Definition: IDBoostGraph.h:114
void addPeptideAndAssociatedProteinsWithRunInfo_(PeptideIdentification &spectrum, std::unordered_map< unsigned, unsigned > &indexToPrefractionationGroup, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms)
std::vector< Graph > Graphs
Definition: IDBoostGraph.h:120
double score
Definition: IDBoostGraph.h:97
IDBoostGraph(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_run_info, bool use_unassigned_ids, bool best_psms_annotated, const std::optional< const ExperimentalDesign > &ed=std::optional< const ExperimentalDesign >())
void getDownstreamNodes(const vertex_t &start, const Graph &graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:602
ProteinIdentification & protIDs_
Definition: IDBoostGraph.h:477
void getUpstreamNodes(const vertex_t &start, const Graph graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:620
void computeConnectedComponents()
Splits the initialized graph into connected components and clears it.
void getProteinGroupScoresAndHitchhikingTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
int tgts
Definition: IDBoostGraph.h:96
Size getNrConnectedComponents()
Zero means the graph was not split yet.
void resolveGraphPeptideCentric_(Graph &fg, bool removeAssociationsInData)
see equivalent public method
void getUpstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all upstream nodes from a (set of) start nodes that are lower or equal than a given leve...
const Graph & getComponent(Size cc)
Returns a specific connected component of the graph as a graph itself.
void applyFunctorOnCCsST(const std::function< void(Graph &)> &functor)
Do sth on connected components single threaded (your functor object has to inherit from std::function...
Graph g
the initial boost Graph (will be cleared when split into CCs)
Definition: IDBoostGraph.h:479
void annotateIndistProteins_(const Graph &fg, bool addSingletons)
internal function to annotate the underlying ID structures based on the given Graph
void clusterIndistProteinsAndPeptidesAndExtendGraph()
std::set< IDBoostGraph::vertex_t > PeptideNodeSet
Definition: IDBoostGraph.h:127
std::set< IDBoostGraph::vertex_t > ProteinNodeSet
Definition: IDBoostGraph.h:126
void buildGraphWithRunInfo_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, const ExperimentalDesign &ed)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > GraphConst
Definition: IDBoostGraph.h:121
void calculateAndAnnotateIndistProteins(bool addSingletons=true)
static void printGraph(std::ostream &out, const Graph &fg)
Prints a graph (component or if not split, the full graph) in graphviz (i.e. dot) format.
void calculateAndAnnotateIndistProteins_(const Graph &fg, bool addSingletons)
boost::graph_traits< Graph >::edge_descriptor edge_t
Definition: IDBoostGraph.h:124
void annotateIndistProteins(bool addSingletons=true)
void resolveGraphPeptideCentric(bool removeAssociationsInData=true)
IDBoostGraph(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool use_run_info, bool best_psms_annotated, const std::optional< const ExperimentalDesign > &ed=std::optional< const ExperimentalDesign >())
Constructors.
void getProteinGroupScoresAndTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
void buildGraph_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, bool best_psms_annotated=false)
void getProteinScores_(ScoreToTgtDecLabelPairs &scores_and_tgt)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > Graph
Definition: IDBoostGraph.h:119
void getDownstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all downstream nodes from a (set of) start nodes that are higher or equal than a given l...
boost::variant< ProteinHit *, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit * > IDPointer
an (currently unmodified) peptide sequence
Definition: IDBoostGraph.h:113
void applyFunctorOnCCs(const std::function< unsigned long(Graph &, unsigned int)> &functor)
Do sth on connected components (your functor object has to inherit from std::function or be a lambda)
Graphs ccs_
the Graph split into connected components
Definition: IDBoostGraph.h:506
vertex_t addVertexWithLookup_(const IDPointer &ptr, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map)
placeholder for peptides with the same parent proteins or protein groups
Definition: IDBoostGraph.h:94
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
Int getCharge() const
returns the charge of the peptide
void setScore(double score)
sets the PSM score
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
Representation of a protein hit.
Definition: ProteinHit.h:60
double getScore() const
returns the score of the protein hit
void setScore(const double score)
sets the score of the protein hit
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Definition: IDScoreGetterSetter.h:57