OpenMS
OPXLHelper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Eugen Netz $
32 // $Authors: Eugen Netz $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 #include <numeric>
44 
45 namespace OpenMS
46 {
50  class OPENMS_DLLAPI OPXLHelper
51  {
52  public:
53 
59  {
60  bool operator() (const PeptideIdentification& a, const PeptideIdentification& b) const
61  {
62  if (!a.getHits().empty() && !b.getHits().empty())
63  {
64  return a.getHits()[0].getScore() < b.getHits()[0].getScore();
65  }
66  else
67  {
68  return false;
69  }
70  }
71  bool operator() (const PeptideIdentification& a, const double& b) const
72  {
73  if (!a.getHits().empty())
74  {
75  return a.getHits()[0].getScore() < b;
76  }
77  else
78  {
79  return false;
80  }
81  }
82  bool operator() (const double& a, const PeptideIdentification& b) const
83  {
84  if (!b.getHits().empty())
85  {
86  return a < b.getHits()[0].getScore();
87  }
88  else
89  {
90  return false;
91  }
92  }
93  };
94 
112  static std::vector<OPXLDataStructs::XLPrecursor> enumerateCrossLinksAndMasses(const std::vector<OPXLDataStructs::AASeqWithMass>& peptides, double cross_link_mass_light, const DoubleList& cross_link_mass_mono_link, const StringList& cross_link_residue1, const StringList& cross_link_residue2, const std::vector< double >& spectrum_precursors, std::vector< int >& precursor_correction_positions, double precursor_mass_tolerance, bool precursor_mass_tolerance_unit_ppm);
113 
134  static std::vector<OPXLDataStructs::AASeqWithMass> digestDatabase(std::vector<FASTAFile::FASTAEntry> fasta_db,
135  const EnzymaticDigestion& digestor, Size min_peptide_length, const StringList& cross_link_residue1, const StringList& cross_link_residue2,
136  const ModifiedPeptideGenerator::MapToResidueType& fixed_modifications,
137  const ModifiedPeptideGenerator::MapToResidueType& variable_modifications,
138  Size max_variable_mods_per_peptide);
139 
155  static std::vector <OPXLDataStructs::ProteinProteinCrossLink> buildCandidates(const std::vector< OPXLDataStructs::XLPrecursor > & candidates,
156  const std::vector< int > & precursor_corrections,
157  const std::vector< int > & precursor_correction_positions,
158  const std::vector<OPXLDataStructs::AASeqWithMass> & peptide_masses,
159  const StringList & cross_link_residue1,
160  const StringList & cross_link_residue2,
161  double cross_link_mass,
162  const DoubleList & cross_link_mass_mono_link,
163  const std::vector< double >& spectrum_precursor_vector,
164  const std::vector< double >& allowed_error_vector,
165  const String& cross_link_name);
166 
179  static void buildFragmentAnnotations(std::vector<PeptideHit::PeakAnnotation> & frag_annotations, const std::vector< std::pair< Size, Size > > & matching, const PeakSpectrum & theoretical_spectrum, const PeakSpectrum & experiment_spectrum);
180 
191  static void buildPeptideIDs(std::vector<PeptideIdentification> & peptide_ids, const std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > & top_csms_spectrum, std::vector< std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > > & all_top_csms, Size all_top_csms_current_index, const PeakMap & spectra, Size scan_index, Size scan_index_heavy);
192 
197  static void addProteinPositionMetaValues(std::vector< PeptideIdentification > & peptide_ids);
198 
203  static void addXLTargetDecoyMV(std::vector< PeptideIdentification > & peptide_ids);
204 
209  static void addBetaAccessions(std::vector< PeptideIdentification > & peptide_ids);
210 
215  static void removeBetaPeptideHits(std::vector< PeptideIdentification > & peptide_ids);
216 
222 
227  static void computeDeltaScores(std::vector< PeptideIdentification >& peptide_ids);
228 
239  static std::vector< PeptideIdentification > combineTopRanksFromPairs(std::vector< PeptideIdentification > & peptide_ids, Size number_top_hits);
240 
260  static std::vector <OPXLDataStructs::ProteinProteinCrossLink> collectPrecursorCandidates(const IntList& precursor_correction_steps,
261  double precursor_mass,
262  double precursor_mass_tolerance,
263  bool precursor_mass_tolerance_unit_ppm,
264  const std::vector<OPXLDataStructs::AASeqWithMass>& filtered_peptide_masses,
265  double cross_link_mass,
266  const DoubleList& cross_link_mass_mono_link,
267  const StringList& cross_link_residue1,
268  const StringList& cross_link_residue2,
269  String cross_link_name,
270  bool use_sequence_tags = false,
271  const std::vector<std::string>& tags = std::vector<std::string>());
272 
280  static double computePrecursorError(const OPXLDataStructs::CrossLinkSpectrumMatch& csm, double precursor_mz, int precursor_charge);
281 
289  static void isoPeakMeans(OPXLDataStructs::CrossLinkSpectrumMatch& csm, DataArrays::IntegerDataArray& num_iso_peaks_array, std::vector< std::pair< Size, Size > >& matched_spec_linear_alpha, std::vector< std::pair< Size, Size > >& matched_spec_linear_beta, std::vector< std::pair< Size, Size > >& matched_spec_xlinks_alpha, std::vector< std::pair< Size, Size > >& matched_spec_xlinks_beta);
290 
297  static void filterPrecursorsByTags(std::vector <OPXLDataStructs::XLPrecursor>& candidates, std::vector< int >& precursor_correction_positions, const std::vector<std::string>& tags);
298  };
299 }
Integer data array class.
Definition: DataArrays.h:55
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:64
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
Definition: ModifiedPeptideGenerator.h:57
The CrossLinkSpectrumMatch struct represents a PSM between a ProteinProteinCrossLink and a spectrum i...
Definition: OPXLDataStructs.h:113
The OPXLHelper class contains functions needed by OpenPepXL and OpenPepXLLF to reduce duplicated code...
Definition: OPXLHelper.h:51
static void filterPrecursorsByTags(std::vector< OPXLDataStructs::XLPrecursor > &candidates, std::vector< int > &precursor_correction_positions, const std::vector< std::string > &tags)
Filters the list of candidates for cases that include at least one of the tags in at least one of the...
static std::vector< PeptideIdentification > combineTopRanksFromPairs(std::vector< PeptideIdentification > &peptide_ids, Size number_top_hits)
combines all hits to spectrum pairs with the same light spectrum into one ranked list
static void addBetaAccessions(std::vector< PeptideIdentification > &peptide_ids)
adds accessions_beta MetaValue to alpha peptides for TOPPView visualization and CSV table output
static void computeDeltaScores(std::vector< PeptideIdentification > &peptide_ids)
sorts PeptideHits for each PeptideIdentification by score and adds the delta score as a MetaValue
static std::vector< OPXLDataStructs::ProteinProteinCrossLink > collectPrecursorCandidates(const IntList &precursor_correction_steps, double precursor_mass, double precursor_mass_tolerance, bool precursor_mass_tolerance_unit_ppm, const std::vector< OPXLDataStructs::AASeqWithMass > &filtered_peptide_masses, double cross_link_mass, const DoubleList &cross_link_mass_mono_link, const StringList &cross_link_residue1, const StringList &cross_link_residue2, String cross_link_name, bool use_sequence_tags=false, const std::vector< std::string > &tags=std::vector< std::string >())
Searches for cross-link candidates for a MS/MS spectrum.
static void buildFragmentAnnotations(std::vector< PeptideHit::PeakAnnotation > &frag_annotations, const std::vector< std::pair< Size, Size > > &matching, const PeakSpectrum &theoretical_spectrum, const PeakSpectrum &experiment_spectrum)
Fills up the given FragmentAnnotation vector with annotations from a theoretical spectrum.
static void addProteinPositionMetaValues(std::vector< PeptideIdentification > &peptide_ids)
adds MetaValues for cross-link positions to PeptideHits
static void buildPeptideIDs(std::vector< PeptideIdentification > &peptide_ids, const std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > &top_csms_spectrum, std::vector< std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > > &all_top_csms, Size all_top_csms_current_index, const PeakMap &spectra, Size scan_index, Size scan_index_heavy)
Builds PeptideIdentifications and PeptideHits.
static void addPercolatorFeatureList(ProteinIdentification &prot_id)
adds the list of features that percolator should use for OpenPepXL
static void isoPeakMeans(OPXLDataStructs::CrossLinkSpectrumMatch &csm, DataArrays::IntegerDataArray &num_iso_peaks_array, std::vector< std::pair< Size, Size > > &matched_spec_linear_alpha, std::vector< std::pair< Size, Size > > &matched_spec_linear_beta, std::vector< std::pair< Size, Size > > &matched_spec_xlinks_alpha, std::vector< std::pair< Size, Size > > &matched_spec_xlinks_beta)
Computes the mass error of a precursor mass to a hit.
static void addXLTargetDecoyMV(std::vector< PeptideIdentification > &peptide_ids)
adds xl_target_decoy MetaValue that combines alpha and beta target_decoy info
static void removeBetaPeptideHits(std::vector< PeptideIdentification > &peptide_ids)
removes beta peptides from cross-link IDs, since all info is already contained in the alpha peptide h...
static std::vector< OPXLDataStructs::AASeqWithMass > digestDatabase(std::vector< FASTAFile::FASTAEntry > fasta_db, const EnzymaticDigestion &digestor, Size min_peptide_length, const StringList &cross_link_residue1, const StringList &cross_link_residue2, const ModifiedPeptideGenerator::MapToResidueType &fixed_modifications, const ModifiedPeptideGenerator::MapToResidueType &variable_modifications, Size max_variable_mods_per_peptide)
Digests a database with the given EnzymaticDigestion settings and precomputes masses for all peptides...
static double computePrecursorError(const OPXLDataStructs::CrossLinkSpectrumMatch &csm, double precursor_mz, int precursor_charge)
Computes the mass error of a precursor mass to a hit.
static std::vector< OPXLDataStructs::ProteinProteinCrossLink > buildCandidates(const std::vector< OPXLDataStructs::XLPrecursor > &candidates, const std::vector< int > &precursor_corrections, const std::vector< int > &precursor_correction_positions, const std::vector< OPXLDataStructs::AASeqWithMass > &peptide_masses, const StringList &cross_link_residue1, const StringList &cross_link_residue2, double cross_link_mass, const DoubleList &cross_link_mass_mono_link, const std::vector< double > &spectrum_precursor_vector, const std::vector< double > &allowed_error_vector, const String &cross_link_name)
Builds specific cross-link candidates with all possible combinations of linked positions from peptide...
static std::vector< OPXLDataStructs::XLPrecursor > enumerateCrossLinksAndMasses(const std::vector< OPXLDataStructs::AASeqWithMass > &peptides, double cross_link_mass_light, const DoubleList &cross_link_mass_mono_link, const StringList &cross_link_residue1, const StringList &cross_link_residue2, const std::vector< double > &spectrum_precursors, std::vector< int > &precursor_correction_positions, double precursor_mass_tolerance, bool precursor_mass_tolerance_unit_ppm)
Enumerates precursor masses for all candidates in an XL-MS search.
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Representation of a protein identification run.
Definition: ProteinIdentification.h:76
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:62
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
A comparator for PeptideIdentifications that compares the scores in the first PeptideHit.
Definition: OPXLHelper.h:59