49 #include <unordered_map>
52 #include <boost/function.hpp>
53 #include <boost/graph/adjacency_list.hpp>
54 #include <boost/graph/depth_first_search.hpp>
55 #include <boost/graph/filtered_graph.hpp>
56 #include <boost/graph/properties.hpp>
57 #include <boost/variant.hpp>
58 #include <boost/variant/detail/hash_variant.hpp>
59 #include <boost/variant/static_visitor.hpp>
63 struct ScoreToTgtDecLabelPairs;
87 #pragma clang diagnostic push
88 #pragma clang diagnostic ignored "-Wextra-semi"
91 BOOST_STRONG_TYPEDEF(boost::blank, PeptideCluster)
102 BOOST_STRONG_TYPEDEF(
String, Peptide)
105 BOOST_STRONG_TYPEDEF(
Size, RunIndex)
108 BOOST_STRONG_TYPEDEF(
int, Charge)
110 #pragma clang diagnostic pop
114 typedef boost::variant<ProteinHit*, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit*>
IDPointer;
115 typedef boost::variant<const ProteinHit*, const ProteinGroup*, const PeptideCluster*, const Peptide, const RunIndex, const Charge, const PeptideHit*>
IDPointerConst;
120 typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer>
Graph;
122 typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer>
GraphConst;
124 typedef boost::graph_traits<Graph>::vertex_descriptor
vertex_t;
125 typedef boost::graph_traits<Graph>::edge_descriptor
edge_t;
133 public boost::default_dfs_visitor
137 : gs(vgs), curr_v(0), next_v(0), m()
140 template <
typename Vertex,
typename Graph >
144 next_v = boost::add_vertex(tg[u], gs.back());
148 template <
typename Vertex,
typename Graph >
154 template <
typename Edge,
typename Graph >
157 if (m.find(e.m_target) == m.end())
159 next_v = boost::add_vertex(tg[e.m_target], gs.back());
160 m[e.m_target] = next_v;
164 next_v = m[e.m_target];
167 boost::add_edge(m[e.m_source], next_v, gs.back());
173 std::map<vertex_t, vertex_t>
m;
179 public boost::static_visitor<OpenMS::String>
200 return String(
"PepClust");
222 template<
class CharT>
224 public boost::static_visitor<>
239 stream_ << prot->
getAccession() <<
": " << prot << std::endl;
244 stream_ <<
"PG" << std::endl;
249 stream_ <<
"PepClust" << std::endl;
254 stream_ << peptide << std::endl;
259 stream_ <<
"rep" << ri << std::endl;
264 stream_ <<
"chg" << chg << std::endl;
273 public boost::static_visitor<>
289 pg.
score = posterior;
302 public boost::static_visitor<double>
332 std::vector<PeptideIdentification>& idedSpectra,
335 bool best_psms_annotated,
336 const boost::optional<const ExperimentalDesign>& ed = boost::optional<const ExperimentalDesign>());
342 bool use_unassigned_ids,
343 bool best_psms_annotated,
344 const boost::optional<const ExperimentalDesign>& ed = boost::optional<const ExperimentalDesign>());
419 bool stop_at_first, std::vector<vertex_t>& result);
430 bool stop_at_first, std::vector<vertex_t>& result);
445 struct SequenceToReplicateChargeVariantHierarchy;
475 #ifdef INFERENCE_BENCH
477 std::vector<std::tuple<vertex_t, vertex_t, unsigned long, double>> sizes_and_times_{1};
495 Size nrPrefractionationGroups_ = 0;
520 std::vector<PeptideIdentification>& idedSpectra,
522 bool best_psms_annotated =
false);
527 bool use_unassigned_ids,
528 bool best_psms_annotated =
false);
534 const std::unordered_map<std::string, ProteinHit*>& accession_map,
536 bool best_psms_annotated);
540 std::unordered_map<unsigned, unsigned>& indexToPrefractionationGroup,
542 std::unordered_map<std::string, ProteinHit*>& accession_map,
555 bool use_unassigned_ids,
559 std::vector<PeptideIdentification>& idedSpectra,
567 template<
class NodeType>
570 Graph::adjacency_iterator adjIt, adjIt_end;
571 boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
572 for (;adjIt != adjIt_end; ++adjIt)
574 if (graph[*adjIt].type() ==
typeid(NodeType))
576 result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
578 else if (graph[*adjIt].which() > graph[start].which())
580 getDownstreamNodes(*adjIt, graph, result);
585 template<
class NodeType>
588 Graph::adjacency_iterator adjIt, adjIt_end;
589 boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
590 for (;adjIt != adjIt_end; ++adjIt)
592 if (graph[*adjIt].type() ==
typeid(NodeType))
594 result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
596 else if (graph[*adjIt].which() < graph[start].which())
598 getUpstreamNodes(*adjIt, graph, result);
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
A container for consensus elements.
Definition: ConsensusMap.h:88
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:244
Definition: IDBoostGraph.h:303
double operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:306
double operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:311
double operator()(T &) const
Definition: IDBoostGraph.h:323
double operator()(ProteinGroup &pg) const
Definition: IDBoostGraph.h:316
Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type creates a label.
Definition: IDBoostGraph.h:180
OpenMS::String operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:203
OpenMS::String operator()(const Charge &chg) const
Definition: IDBoostGraph.h:213
OpenMS::String operator()(const PeptideHit *pep) const
Definition: IDBoostGraph.h:183
OpenMS::String operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:193
OpenMS::String operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:208
OpenMS::String operator()(const ProteinHit *prot) const
Definition: IDBoostGraph.h:188
OpenMS::String operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:198
Definition: IDBoostGraph.h:225
void operator()(const Charge &chg) const
Definition: IDBoostGraph.h:262
std::basic_ostream< CharT > stream_
Definition: IDBoostGraph.h:267
PrintAddressVisitor(std::basic_ostream< CharT > stream)
Definition: IDBoostGraph.h:228
void operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:247
void operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:257
void operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:232
void operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:237
void operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:252
void operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:242
Definition: IDBoostGraph.h:274
void operator()(T &, double) const
Definition: IDBoostGraph.h:294
void operator()(PeptideHit *pep, double posterior) const
Definition: IDBoostGraph.h:277
void operator()(ProteinGroup &pg, double posterior) const
Definition: IDBoostGraph.h:287
void operator()(ProteinHit *prot, double posterior) const
Definition: IDBoostGraph.h:282
A boost dfs visitor that copies connected components into a vector of graphs.
Definition: IDBoostGraph.h:134
std::map< vertex_t, vertex_t > m
A mapping from old node id to new node id to not duplicate existing ones in the new graph.
Definition: IDBoostGraph.h:173
void start_vertex(Vertex u, const Graph &tg)
Definition: IDBoostGraph.h:141
void examine_edge(Edge e, const Graph &tg)
Definition: IDBoostGraph.h:155
dfs_ccsplit_visitor(Graphs &vgs)
Definition: IDBoostGraph.h:136
vertex_t curr_v
Definition: IDBoostGraph.h:171
void discover_vertex(Vertex, const Graph &)
Definition: IDBoostGraph.h:149
Graphs & gs
Definition: IDBoostGraph.h:170
Creates and maintains a boost graph based on the OpenMS ID datastructures.
Definition: IDBoostGraph.h:82
void addPeptideIDWithAssociatedProteins_(PeptideIdentification &spectrum, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, const std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms, bool best_psms_annotated)
Used during building.
const ProteinIdentification & getProteinIDs()
Returns the underlying protein identifications for viewing.
IDBoostGraph(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool use_run_info, bool best_psms_annotated, const boost::optional< const ExperimentalDesign > &ed=boost::optional< const ExperimentalDesign >())
Constructors.
void buildGraphWithRunInfo_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, const ExperimentalDesign &ed)
void buildGraph_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool best_psms_annotated=false)
boost::graph_traits< Graph >::vertex_descriptor vertex_t
Definition: IDBoostGraph.h:124
std::unordered_map< vertex_t, Size > pepHitVtx_to_run_
Definition: IDBoostGraph.h:489
boost::variant< const ProteinHit *, const ProteinGroup *, const PeptideCluster *, const Peptide, const RunIndex, const Charge, const PeptideHit * > IDPointerConst
Definition: IDBoostGraph.h:115
void addPeptideAndAssociatedProteinsWithRunInfo_(PeptideIdentification &spectrum, std::unordered_map< unsigned, unsigned > &indexToPrefractionationGroup, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms)
std::vector< Graph > Graphs
Definition: IDBoostGraph.h:121
double score
Definition: IDBoostGraph.h:98
void getDownstreamNodes(const vertex_t &start, const Graph &graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:568
ProteinIdentification & protIDs_
Definition: IDBoostGraph.h:443
void getUpstreamNodes(const vertex_t &start, const Graph graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:586
void computeConnectedComponents()
Splits the initialized graph into connected components and clears it.
void clusterIndistProteinsAndPeptides()
void getProteinGroupScoresAndHitchhikingTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
Size getNrConnectedComponents()
Zero means the graph was not split yet.
void resolveGraphPeptideCentric_(Graph &fg, bool removeAssociationsInData)
see equivalent public method
void getUpstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all upstream nodes from a (set of) start nodes that are lower or equal than a given leve...
const Graph & getComponent(Size cc)
Returns a specific connected component of the graph as a graph itself.
void applyFunctorOnCCsST(const std::function< void(Graph &)> &functor)
Do sth on connected components single threaded (your functor object has to inherit from std::function...
Graph g
the initial boost Graph (will be cleared when split into CCs)
Definition: IDBoostGraph.h:445
void annotateIndistProteins_(const Graph &fg, bool addSingletons)
internal function to annotate the underlying ID structures based on the given Graph
void clusterIndistProteinsAndPeptidesAndExtendGraph()
(under development) As above but adds charge, replicate and sequence layer of nodes (untested)
std::set< IDBoostGraph::vertex_t > PeptideNodeSet
Definition: IDBoostGraph.h:128
std::set< IDBoostGraph::vertex_t > ProteinNodeSet
Definition: IDBoostGraph.h:127
void buildGraphWithRunInfo_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, const ExperimentalDesign &ed)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > GraphConst
Definition: IDBoostGraph.h:122
void calculateAndAnnotateIndistProteins(bool addSingletons=true)
static void printGraph(std::ostream &out, const Graph &fg)
Prints a graph (component or if not split, the full graph) in graphviz (i.e. dot) format.
void calculateAndAnnotateIndistProteins_(const Graph &fg, bool addSingletons)
boost::graph_traits< Graph >::edge_descriptor edge_t
Definition: IDBoostGraph.h:125
void annotateIndistProteins(bool addSingletons=true)
void resolveGraphPeptideCentric(bool removeAssociationsInData=true)
void getProteinGroupScoresAndTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
void buildGraph_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, bool best_psms_annotated=false)
IDBoostGraph(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_run_info, bool use_unassigned_ids, bool best_psms_annotated, const boost::optional< const ExperimentalDesign > &ed=boost::optional< const ExperimentalDesign >())
void getProteinScores_(ScoreToTgtDecLabelPairs &scores_and_tgt)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > Graph
Definition: IDBoostGraph.h:120
void getDownstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all downstream nodes from a (set of) start nodes that are higher or equal than a given l...
boost::variant< ProteinHit *, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit * > IDPointer
an (currently unmodified) peptide sequence
Definition: IDBoostGraph.h:114
void applyFunctorOnCCs(const std::function< unsigned long(Graph &, unsigned int)> &functor)
Do sth on connected components (your functor object has to inherit from std::function or be a lambda)
Graphs ccs_
the Graph split into connected components
Definition: IDBoostGraph.h:472
vertex_t addVertexWithLookup_(const IDPointer &ptr, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map)
placeholder for peptides with the same parent proteins or protein groups
Definition: IDBoostGraph.h:95
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
Int getCharge() const
returns the charge of the peptide
void setScore(double score)
sets the PSM score
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
Representation of a protein hit.
Definition: ProteinHit.h:60
double getScore() const
returns the score of the protein hit
void setScore(const double score)
sets the score of the protein hit
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
A more convenient string class.
Definition: String.h:61
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Definition: IDScoreGetterSetter.h:55