OpenMS  2.5.0
PeptideAndProteinQuant.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 namespace OpenMS
45 {
53  class OPENMS_DLLAPI PeptideAndProteinQuant :
54  public DefaultParamHandler
55  {
56 public:
57 
59  typedef std::map<UInt64, double> SampleAbundances;
60 
62  struct PeptideData
63  {
65  std::map<Int, std::map<Int, SampleAbundances>> abundances;
66 
69 
71  std::set<String> accessions;
72 
75 
78  id_count(0) {}
79  };
80 
82  typedef std::map<AASequence, PeptideData> PeptideQuant;
83 
85  struct ProteinData
86  {
88  std::map<String, SampleAbundances> abundances;
89 
92 
95 
98  id_count(0) {}
99  };
100 
102  typedef std::map<String, ProteinData> ProteinQuant;
103 
105  struct Statistics
106  {
109 
112 
115 
117  Size quant_proteins, too_few_peptides;
118 
120  Size quant_peptides, total_peptides;
121 
123  Size quant_features, total_features, blank_features, ambig_features;
124 
127  n_samples(0), quant_proteins(0), too_few_peptides(0),
128  quant_peptides(0), total_peptides(0), quant_features(0),
129  total_features(0), blank_features(0), ambig_features(0) {}
130  };
131 
134 
137 
143  void readQuantData(FeatureMap& features, const ExperimentalDesign& ed);
144 
150  void readQuantData(ConsensusMap& consensus, const ExperimentalDesign& ed);
151 
157  void readQuantData(std::vector<ProteinIdentification>& proteins,
158  std::vector<PeptideIdentification>& peptides,
159  const ExperimentalDesign& ed);
160 
170  void quantifyPeptides(const std::vector<PeptideIdentification>& peptides =
171  std::vector<PeptideIdentification>());
172 
173 
179  void quantifyProteins(const ProteinIdentification& proteins =
181 
183  const Statistics& getStatistics();
184 
186  const PeptideQuant& getPeptideResults();
187 
189  const ProteinQuant& getProteinResults();
190 
192  static void annotateQuantificationsToProteins(
193  const ProteinQuant& protein_quants,
194  ProteinIdentification& proteins,
195  const UInt n_samples);
196 
197 private:
198 
201 
204 
207 
208 
215  PeptideHit getAnnotation_(std::vector<PeptideIdentification>& peptides);
216 
225  void quantifyFeature_(const FeatureHandle& feature,
226  size_t fraction,
227  size_t sample,
228  const PeptideHit& hit);
229 
237  bool getBest_(
238  const std::map<Int, std::map<Int, SampleAbundances>> & peptide_abundances,
239  std::pair<size_t, size_t> & best)
240  {
241  size_t best_n_quant(0);
242  double best_abundance(0);
243  best = std::make_pair(0,0);
244 
245  for (auto & fa : peptide_abundances) // for all fractions
246  {
247  for (auto & ca : fa.second) // for all charge states
248  {
249  const Int & fraction = fa.first;
250  const Int & charge = ca.first;
251 
252  double current_abundance = std::accumulate(
253  std::begin(ca.second),
254  std::end(ca.second),
255  0.0,
256  [] (int value, const SampleAbundances::value_type& p)
257  { return value + p.second; }
258  ); // loop over abundances
259 
260  if (current_abundance <= 0) { continue; }
261 
262  const size_t current_n_quant = ca.second.size();
263  if (current_n_quant > best_n_quant)
264  {
265  best_abundance = current_abundance;
266  best_n_quant = current_n_quant;
267  best = std::make_pair(fraction, charge);
268  }
269  else if (current_n_quant == best_n_quant
270  && current_abundance > best_abundance) // resolve tie by abundance
271  {
272  best_abundance = current_abundance;
273  best = std::make_pair(fraction, charge);
274  }
275  }
276  }
277  return best_abundance > 0.;
278  }
279 
285  template <typename T>
286  void orderBest_(const std::map<T, SampleAbundances> & abundances,
287  std::vector<T>& result)
288  {
289  typedef std::pair<Size, double> PairType;
290  std::multimap<PairType, T, std::greater<PairType> > order;
291  for (typename std::map<T, SampleAbundances>::const_iterator ab_it =
292  abundances.begin(); ab_it != abundances.end(); ++ab_it)
293  {
294  double total = 0.0;
295  for (SampleAbundances::const_iterator samp_it = ab_it->second.begin();
296  samp_it != ab_it->second.end(); ++samp_it)
297  {
298  total += samp_it->second;
299  }
300  if (total <= 0.0) continue; // not quantified
301  PairType key = std::make_pair(ab_it->second.size(), total);
302  order.insert(std::make_pair(key, ab_it->first));
303  }
304  result.clear();
305  for (typename std::multimap<PairType, T, std::greater<PairType> >::
306  iterator ord_it = order.begin(); ord_it != order.end(); ++ord_it)
307  {
308  result.push_back(ord_it->second);
309  }
310  }
311 
312 
313 
317  void normalizePeptides_();
318 
331  String getAccession_(const std::set<String>& pep_accessions,
332  std::map<String, String>& accession_to_leader);
333 
339  void countPeptides_(std::vector<PeptideIdentification>& peptides);
340 
342  void updateMembers_() override;
343 
344  }; // class
345 
346 } // namespace
347 
OpenMS::PeptideAndProteinQuant::Statistics::n_ms_files
Size n_ms_files
number of MS files
Definition: PeptideAndProteinQuant.h:114
OpenMS::PeptideAndProteinQuant::PeptideQuant
std::map< AASequence, PeptideData > PeptideQuant
Mapping: peptide sequence (modified) -> peptide data.
Definition: PeptideAndProteinQuant.h:82
OpenMS::PeptideAndProteinQuant::Statistics::total_features
Size total_features
Definition: PeptideAndProteinQuant.h:123
OpenMS::PeptideAndProteinQuant::PeptideData::total_abundances
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition: PeptideAndProteinQuant.h:68
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::PeptideAndProteinQuant::prot_quant_
ProteinQuant prot_quant_
Protein quantification data.
Definition: PeptideAndProteinQuant.h:206
OpenMS::PeptideAndProteinQuant::PeptideData::id_count
Size id_count
number of identifications
Definition: PeptideAndProteinQuant.h:74
OpenMS::PeptideAndProteinQuant::PeptideData::PeptideData
PeptideData()
constructor
Definition: PeptideAndProteinQuant.h:77
OpenMS::PeptideAndProteinQuant::Statistics::n_fractions
Size n_fractions
number of fractions
Definition: PeptideAndProteinQuant.h:111
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:71
OpenMS::PeptideAndProteinQuant::Statistics::n_samples
Size n_samples
number of samples (or assays in mzTab terms)
Definition: PeptideAndProteinQuant.h:108
OpenMS::PeptideAndProteinQuant::getBest_
bool getBest_(const std::map< Int, std::map< Int, SampleAbundances >> &peptide_abundances, std::pair< size_t, size_t > &best)
Determine fraction and charge state of a peptide with the highest number of abundances.
Definition: PeptideAndProteinQuant.h:237
OpenMS::PeptideAndProteinQuant::ProteinQuant
std::map< String, ProteinData > ProteinQuant
Mapping: protein accession -> protein data.
Definition: PeptideAndProteinQuant.h:102
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:95
OpenMS::PeptideAndProteinQuant::ProteinData::abundances
std::map< String, SampleAbundances > abundances
mapping: peptide (unmodified) -> sample -> abundance
Definition: PeptideAndProteinQuant.h:88
FeatureMap.h
OpenMS::PeptideAndProteinQuant::Statistics
Statistics for processing summary.
Definition: PeptideAndProteinQuant.h:105
ConsensusMap.h
OpenMS::PeptideAndProteinQuant::PeptideData::abundances
std::map< Int, std::map< Int, SampleAbundances > > abundances
mapping: fraction -> charge -> sample -> abundance
Definition: PeptideAndProteinQuant.h:65
int
OpenMS::PeptideAndProteinQuant::PeptideData::accessions
std::set< String > accessions
protein accessions for this peptide
Definition: PeptideAndProteinQuant.h:71
OpenMS::PeptideAndProteinQuant::~PeptideAndProteinQuant
~PeptideAndProteinQuant() override
Destructor.
Definition: PeptideAndProteinQuant.h:136
OpenMS::PeptideAndProteinQuant::ProteinData::total_abundances
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition: PeptideAndProteinQuant.h:91
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:79
OpenMS::PeptideAndProteinQuant::orderBest_
void orderBest_(const std::map< T, SampleAbundances > &abundances, std::vector< T > &result)
Order keys (charges/peptides for peptide/protein quantification) according to how many samples they a...
Definition: PeptideAndProteinQuant.h:286
ProteinIdentification.h
OpenMS::PeptideAndProteinQuant::Statistics::Statistics
Statistics()
constructor
Definition: PeptideAndProteinQuant.h:126
OpenMS::PeptideAndProteinQuant::PeptideData
Quantitative and associated data for a peptide.
Definition: PeptideAndProteinQuant.h:62
DefaultParamHandler.h
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
OpenMS::PeptideAndProteinQuant::stats_
Statistics stats_
Processing statistics for output in the end.
Definition: PeptideAndProteinQuant.h:200
ExperimentalDesign.h
OpenMS::PeptideAndProteinQuant::ProteinData::ProteinData
ProteinData()
constructor
Definition: PeptideAndProteinQuant.h:97
OpenMS::PeptideAndProteinQuant::pep_quant_
PeptideQuant pep_quant_
Peptide quantification data.
Definition: PeptideAndProteinQuant.h:203
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:54
OpenMS::PeptideAndProteinQuant
Helper class for peptide and protein quantification based on feature data annotated with IDs.
Definition: PeptideAndProteinQuant.h:53
OpenMS::FeatureHandle
Representation of a Peak2D, RichPeak2D or Feature .
Definition: FeatureHandle.h:57
OpenMS::ExperimentalDesign
Representation of the Experimental Design in OpenMS. Instances can be loaded via the ExperimentalDesi...
Definition: ExperimentalDesign.h:85
PeptideIdentification.h
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::PeptideAndProteinQuant::Statistics::total_peptides
Size total_peptides
Definition: PeptideAndProteinQuant.h:120
OpenMS::PeptideAndProteinQuant::Statistics::too_few_peptides
Size too_few_peptides
Definition: PeptideAndProteinQuant.h:117
OpenMS::PeptideAndProteinQuant::ProteinData::id_count
Size id_count
total number of identifications (of peptides mapping to this protein)
Definition: PeptideAndProteinQuant.h:94
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::PeptideAndProteinQuant::SampleAbundances
std::map< UInt64, double > SampleAbundances
Mapping: sample ID -> abundance.
Definition: PeptideAndProteinQuant.h:59
OpenMS::PeptideAndProteinQuant::ProteinData
Quantitative and associated data for a protein.
Definition: PeptideAndProteinQuant.h:85