OpenMS  2.7.0
PeptideAndProteinQuant.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 namespace OpenMS
45 {
53  class OPENMS_DLLAPI PeptideAndProteinQuant :
54  public DefaultParamHandler
55  {
56 public:
57 
59  typedef std::map<UInt64, double> SampleAbundances;
60 
62  struct PeptideData
63  {
65  std::map<Int, std::map<Int, SampleAbundances>> abundances;
66 
68  std::map<Int, std::map<Int, SampleAbundances>> psm_counts;
69 
72 
75 
77  std::set<String> accessions;
78 
80  Size psm_count = 0;
81 
83  PeptideData() = default;
84  };
85 
87  typedef std::map<AASequence, PeptideData> PeptideQuant;
88 
90  struct ProteinData
91  {
93  std::map<String, SampleAbundances> abundances;
94 
95  std::map<String, SampleAbundances> psm_counts;
96 
99 
102 
105 
107  Size psm_count = 0;
108 
110  ProteinData() = default;
111  };
112 
114  typedef std::map<String, ProteinData> ProteinQuant;
115 
117  struct Statistics
118  {
121 
124 
127 
129  Size quant_proteins, too_few_peptides;
130 
132  Size quant_peptides, total_peptides;
133 
135  Size quant_features, total_features, blank_features, ambig_features;
136 
139  n_samples(0), quant_proteins(0), too_few_peptides(0),
140  quant_peptides(0), total_peptides(0), quant_features(0),
141  total_features(0), blank_features(0), ambig_features(0) {}
142  };
143 
146 
149 
155  void readQuantData(FeatureMap& features, const ExperimentalDesign& ed);
156 
162  void readQuantData(ConsensusMap& consensus, const ExperimentalDesign& ed);
163 
169  void readQuantData(std::vector<ProteinIdentification>& proteins,
170  std::vector<PeptideIdentification>& peptides,
171  const ExperimentalDesign& ed);
172 
182  void quantifyPeptides(const std::vector<PeptideIdentification>& peptides =
183  std::vector<PeptideIdentification>());
184 
185 
191  void quantifyProteins(const ProteinIdentification& proteins =
193 
196 
199 
202 
205  const ProteinQuant& protein_quants,
206  ProteinIdentification& proteins,
207  const UInt n_samples,
208  bool remove_unquantified = true);
209 
210 private:
211 
214 
217 
220 
221 
228  PeptideHit getAnnotation_(std::vector<PeptideIdentification>& peptides);
229 
238  void quantifyFeature_(const FeatureHandle& feature,
239  size_t fraction,
240  size_t sample,
241  const PeptideHit& hit);
242 
250  bool getBest_(
251  const std::map<Int, std::map<Int, SampleAbundances>> & peptide_abundances,
252  std::pair<size_t, size_t> & best)
253  {
254  size_t best_n_quant(0);
255  double best_abundance(0);
256  best = std::make_pair(0,0);
257 
258  for (auto & fa : peptide_abundances) // for all fractions
259  {
260  for (auto & ca : fa.second) // for all charge states
261  {
262  const Int & fraction = fa.first;
263  const Int & charge = ca.first;
264 
265  double current_abundance = std::accumulate(
266  std::begin(ca.second),
267  std::end(ca.second),
268  0.0,
269  [] (int value, const SampleAbundances::value_type& p)
270  { return value + p.second; }
271  ); // loop over abundances
272 
273  if (current_abundance <= 0) { continue; }
274 
275  const size_t current_n_quant = ca.second.size();
276  if (current_n_quant > best_n_quant)
277  {
278  best_abundance = current_abundance;
279  best_n_quant = current_n_quant;
280  best = std::make_pair(fraction, charge);
281  }
282  else if (current_n_quant == best_n_quant
283  && current_abundance > best_abundance) // resolve tie by abundance
284  {
285  best_abundance = current_abundance;
286  best = std::make_pair(fraction, charge);
287  }
288  }
289  }
290  return best_abundance > 0.;
291  }
292 
298  template <typename T>
299  void orderBest_(const std::map<T, SampleAbundances> & abundances,
300  std::vector<T>& result)
301  {
302  typedef std::pair<Size, double> PairType;
303  std::multimap<PairType, T, std::greater<PairType> > order;
304  for (typename std::map<T, SampleAbundances>::const_iterator ab_it =
305  abundances.begin(); ab_it != abundances.end(); ++ab_it)
306  {
307  double total = 0.0;
308  for (SampleAbundances::const_iterator samp_it = ab_it->second.begin();
309  samp_it != ab_it->second.end(); ++samp_it)
310  {
311  total += samp_it->second;
312  }
313  if (total <= 0.0) continue; // not quantified
314  PairType key = std::make_pair(ab_it->second.size(), total);
315  order.insert(std::make_pair(key, ab_it->first));
316  }
317  result.clear();
318  for (typename std::multimap<PairType, T, std::greater<PairType> >::
319  iterator ord_it = order.begin(); ord_it != order.end(); ++ord_it)
320  {
321  result.push_back(ord_it->second);
322  }
323  }
324 
325 
326 
331 
344  String getAccession_(const std::set<String>& pep_accessions,
345  std::map<String, String>& accession_to_leader);
346 
352  void countPeptides_(std::vector<PeptideIdentification>& peptides, const Size& n_fractions);
353 
355  void updateMembers_() override;
356 
357  }; // class
358 
359 } // namespace
360 
A container for consensus elements.
Definition: ConsensusMap.h:88
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:244
Representation of a Peak2D, RichPeak2D or Feature .
Definition: FeatureHandle.h:60
A container for features.
Definition: FeatureMap.h:105
Helper class for peptide and protein quantification based on feature data annotated with IDs.
Definition: PeptideAndProteinQuant.h:55
void readQuantData(ConsensusMap &consensus, const ExperimentalDesign &ed)
Read quantitative data from a consensus map.
std::map< AASequence, PeptideData > PeptideQuant
Mapping: peptide sequence (modified) -> peptide data.
Definition: PeptideAndProteinQuant.h:87
void readQuantData(FeatureMap &features, const ExperimentalDesign &ed)
Read quantitative data from a feature map.
void quantifyPeptides(const std::vector< PeptideIdentification > &peptides=std::vector< PeptideIdentification >())
Compute peptide abundances.
std::map< String, ProteinData > ProteinQuant
Mapping: protein accession -> protein data.
Definition: PeptideAndProteinQuant.h:114
const ProteinQuant & getProteinResults()
Get protein abundance data.
void quantifyFeature_(const FeatureHandle &feature, size_t fraction, size_t sample, const PeptideHit &hit)
Gather quantitative information from a feature.
void countPeptides_(std::vector< PeptideIdentification > &peptides, const Size &n_fractions)
Count the number of identifications (best hits only) of each peptide sequence and initializes the res...
bool getBest_(const std::map< Int, std::map< Int, SampleAbundances >> &peptide_abundances, std::pair< size_t, size_t > &best)
Determine fraction and charge state of a peptide with the highest number of abundances.
Definition: PeptideAndProteinQuant.h:250
static void annotateQuantificationsToProteins(const ProteinQuant &protein_quants, ProteinIdentification &proteins, const UInt n_samples, bool remove_unquantified=true)
Annotate protein quant results as meta data to protein ids.
PeptideQuant pep_quant_
Peptide quantification data.
Definition: PeptideAndProteinQuant.h:216
const PeptideQuant & getPeptideResults()
Get peptide abundance data.
~PeptideAndProteinQuant() override
Destructor.
Definition: PeptideAndProteinQuant.h:148
void updateMembers_() override
Clear all data when parameters are set.
void normalizePeptides_()
Normalize peptide abundances across samples by (multiplicative) scaling to equal medians.
String getAccession_(const std::set< String > &pep_accessions, std::map< String, String > &accession_to_leader)
Get the "canonical" protein accession from the list of protein accessions of a peptide.
void quantifyProteins(const ProteinIdentification &proteins=ProteinIdentification())
Compute protein abundances.
Statistics stats_
Processing statistics for output in the end.
Definition: PeptideAndProteinQuant.h:213
PeptideHit getAnnotation_(std::vector< PeptideIdentification > &peptides)
Get the "canonical" annotation (a single peptide hit) of a feature/consensus feature from the associa...
const Statistics & getStatistics()
Get summary statistics.
void orderBest_(const std::map< T, SampleAbundances > &abundances, std::vector< T > &result)
Order keys (charges/peptides for peptide/protein quantification) according to how many samples they a...
Definition: PeptideAndProteinQuant.h:299
std::map< UInt64, double > SampleAbundances
Mapping: sample ID -> abundance.
Definition: PeptideAndProteinQuant.h:59
ProteinQuant prot_quant_
Protein quantification data.
Definition: PeptideAndProteinQuant.h:219
PeptideAndProteinQuant()
Constructor.
void readQuantData(std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const ExperimentalDesign &ed)
Read quantitative data from identification results (for quantification via spectral counting).
Representation of a peptide hit.
Definition: PeptideHit.h:57
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
A more convenient string class.
Definition: String.h:61
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Quantitative and associated data for a peptide.
Definition: PeptideAndProteinQuant.h:63
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition: PeptideAndProteinQuant.h:74
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition: PeptideAndProteinQuant.h:71
std::set< String > accessions
protein accessions for this peptide
Definition: PeptideAndProteinQuant.h:77
std::map< Int, std::map< Int, SampleAbundances > > abundances
mapping: fraction -> charge -> sample -> abundance
Definition: PeptideAndProteinQuant.h:65
std::map< Int, std::map< Int, SampleAbundances > > psm_counts
mapping: fraction -> charge -> sample -> abundance
Definition: PeptideAndProteinQuant.h:68
Quantitative and associated data for a protein.
Definition: PeptideAndProteinQuant.h:91
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition: PeptideAndProteinQuant.h:101
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition: PeptideAndProteinQuant.h:98
std::map< String, SampleAbundances > abundances
mapping: peptide (unmodified) -> sample -> abundance
Definition: PeptideAndProteinQuant.h:93
std::map< String, SampleAbundances > psm_counts
Definition: PeptideAndProteinQuant.h:95
SampleAbundances total_distinct_peptides
number of distinct peptide sequences
Definition: PeptideAndProteinQuant.h:104
Statistics for processing summary.
Definition: PeptideAndProteinQuant.h:118
Size quant_proteins
protein statistics
Definition: PeptideAndProteinQuant.h:129
Size quant_peptides
peptide statistics
Definition: PeptideAndProteinQuant.h:132
Size n_samples
number of samples (or assays in mzTab terms)
Definition: PeptideAndProteinQuant.h:120
Size n_fractions
number of fractions
Definition: PeptideAndProteinQuant.h:123
Statistics()
constructor
Definition: PeptideAndProteinQuant.h:138
Size n_ms_files
number of MS files
Definition: PeptideAndProteinQuant.h:126
Size ambig_features
Definition: PeptideAndProteinQuant.h:135