OpenMS
PepXMLFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow, Hendrik Weisser $
6 // $Authors: Chris Bielow, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 #include <OpenMS/FORMAT/XMLFile.h>
20 
21 #include <vector>
22 #include <map>
23 #include <set>
24 
25 
26 namespace OpenMS
27 {
38  class OPENMS_DLLAPI PepXMLFile :
39  protected Internal::XMLHandler,
40  public Internal::XMLFile
41  {
42 public:
43 
46 
48  ~PepXMLFile() override;
49 
62  void load(const String& filename,
63  std::vector<ProteinIdentification>& proteins,
64  PeptideIdentificationList& peptides,
65  const String& experiment_name,
66  const SpectrumMetaDataLookup& lookup);
67 
74  void load(const String& filename,
75  std::vector<ProteinIdentification>& proteins,
76  PeptideIdentificationList& peptides,
77  const String& experiment_name = "");
78 
84  void store(const String& filename, std::vector<ProteinIdentification>& protein_ids,
85  PeptideIdentificationList& peptide_ids, const String& mz_file = "",
86  const String& mz_name = "", bool peptideprophet_analyzed = false, double rt_tolerance = 0.01);
87 
95  void keepNativeSpectrumName(bool keep)
96  {
97  keep_native_name_ = keep;
98  }
99 
101  void setPreferredFixedModifications(const std::vector<const ResidueModification*>& mods);
102 
104  void setPreferredVariableModifications(const std::vector<const ResidueModification*>& mods);
105 
107  void setParseUnknownScores(bool parse_unknown_scores);
108 
109 protected:
110 
112  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
113 
115  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
116 
117 private:
118 
120  void makeScanMap_();
121 
123  void readRTMZCharge_(const xercesc::Attributes& attributes);
124 
126  {
127  private:
128 
130  double massdiff_;
131  double mass_;
135  bool is_protein_terminus_; // "true" if protein terminus, "false" if peptide terminus
137  std::vector<String> errors_;
139 
140  const ResidueModification* lookupModInPreferredMods_(const std::vector<const ResidueModification*>& preferred_fixed_mods,
141  const String& aminoacid,
142  double massdiff,
143  const String& description,
144  const ResidueModification::TermSpecificity term_spec,
145  double tolerance);
146 
147  public:
149 
155  const String& aminoacid, const String& massdiff, const String& mass,
156  String variable, const String& description, String terminus, const String& protein_terminus,
157  const std::vector<const ResidueModification*>& preferred_fixed_mods,
158  const std::vector<const ResidueModification*>& preferred_var_mods,
159  double tolerance);
160 
162 
163  virtual ~AminoAcidModification() = default;
164 
166 
168 
169  const String& getDescription() const;
170 
171  bool isVariable() const;
172 
174 
175  double getMassDiff() const;
176 
177  double getMass() const;
178 
179  const String& getTerminus() const;
180 
181  const String& getAminoAcid() const;
182 
183  const std::vector<String>& getErrors() const;
184  };
185 
187  std::vector<ProteinIdentification>* proteins_;
188 
191 
194 
197 
200 
206 
208  bool use_precursor_data_{};
209 
211  std::map<Size, Size> scan_map_;
212 
215 
218 
221 
224 
226  bool search_summary_{};
227 
229  bool wrong_experiment_{};
230 
232  bool seen_experiment_{};
233 
235  bool checked_base_name_{};
236 
238  bool has_decoys_{};
239 
241  bool parse_unknown_scores_{};
242 
245 
248 
250  std::vector<std::vector<ProteinIdentification>::iterator> current_proteins_;
251 
254 
258 
261 
264 
267 
270 
272  double rt_{}, mz_{};
273 
275  Size scannr_{};
276 
278  Int charge_{};
279 
281  UInt search_id_{};
282 
285 
288 
290  double hydrogen_mass_{};
291 
293  std::vector<std::pair<const ResidueModification*, Size> > current_modifications_;
294 
296  std::vector<AminoAcidModification> fixed_modifications_;
297 
299  std::vector<AminoAcidModification> variable_modifications_;
300 
303  std::vector<const ResidueModification*> preferred_fixed_modifications_;
304 
307  std::vector<const ResidueModification*> preferred_variable_modifications_;
308 
310 
311  static const double mod_tol_;
312  static const double xtandem_artificial_mod_tol_;
313 
316  bool lookupAddFromHeader_(double modification_mass,
317  Size modification_position,
318  std::vector<AminoAcidModification> const& header_mods);
319 
320  //static std::vector<int> getIsotopeErrorsFromIntSetting_(int intSetting);
321  };
322 } // namespace OpenMS
char16_t XMLCh
Definition: ClassTest.h:28
DateTime Class.
Definition: DateTime.h:33
Representation of an element.
Definition: Element.h:32
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition: XMLFile.h:23
Base class for XML handlers.
Definition: XMLHandler.h:328
Used to load and store PepXML files.
Definition: PepXMLFile.h:41
void store(const String &filename, std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false, double rt_tolerance=0.01)
Stores idXML as PepXML file.
static const double xtandem_artificial_mod_tol_
Definition: PepXMLFile.h:312
void load(const String &filename, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, const String &experiment_name="")
load function with empty defaults for some parameters (see above)
bool search_score_summary_
Are we currently in an "search_score_summary" element (should be skipped)?
Definition: PepXMLFile.h:223
String exp_name_
Name of the associated experiment (filename of the data file, extension will be removed)
Definition: PepXMLFile.h:196
void setParseUnknownScores(bool parse_unknown_scores)
sets if during load, unknown scores should be parsed
String current_base_name_
current base name
Definition: PepXMLFile.h:247
static const double mod_tol_
Definition: PepXMLFile.h:311
String current_sequence_
Sequence of the current peptide hit.
Definition: PepXMLFile.h:269
PeptideHit peptide_hit_
PeptideHit instance currently being processed.
Definition: PepXMLFile.h:266
std::vector< AminoAcidModification > variable_modifications_
Variable aminoacid modifications as parsed from the header.
Definition: PepXMLFile.h:299
void keepNativeSpectrumName(bool keep)
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:95
PeptideIdentification current_peptide_
PeptideIdentification instance currently being processed.
Definition: PepXMLFile.h:260
String search_engine_
Set name of search engine.
Definition: PepXMLFile.h:199
std::vector< std::pair< const ResidueModification *, Size > > current_modifications_
The modifications of the current peptide hit (position is 1-based)
Definition: PepXMLFile.h:293
String enzyme_
Enzyme name associated with the current identification run.
Definition: PepXMLFile.h:256
String enzyme_cuttingsite_
Definition: PepXMLFile.h:257
std::vector< std::vector< ProteinIdentification >::iterator > current_proteins_
References to currently active ProteinIdentifications.
Definition: PepXMLFile.h:250
std::vector< const ResidueModification * > preferred_fixed_modifications_
Definition: PepXMLFile.h:303
PeptideIdentificationList * peptides_
Pointer to the list of identified peptides.
Definition: PepXMLFile.h:190
~PepXMLFile() override
Destructor.
void load(const String &filename, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
PeptideHit::PepXMLAnalysisResult current_analysis_result_
Analysis result instance currently being processed.
Definition: PepXMLFile.h:263
std::vector< const ResidueModification * > preferred_variable_modifications_
Definition: PepXMLFile.h:307
bool analysis_summary_
Are we currently in an "analysis_summary" element (should be skipped)?
Definition: PepXMLFile.h:217
const SpectrumMetaDataLookup * lookup_
Pointer to wrapper for looking up spectrum meta data.
Definition: PepXMLFile.h:193
String native_spectrum_name_
Several optional attributes of spectrum_query.
Definition: PepXMLFile.h:202
PepXMLFile()
Constructor.
String prot_id_
Identifier linking PeptideIdentifications and ProteinIdentifications.
Definition: PepXMLFile.h:284
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
Docu in base class.
bool keep_native_name_
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:220
std::vector< ProteinIdentification > * proteins_
Pointer to the list of identified proteins.
Definition: PepXMLFile.h:187
DateTime date_
Date the pepXML file was generated.
Definition: PepXMLFile.h:287
std::vector< AminoAcidModification > fixed_modifications_
Fixed aminoacid modifications as parsed from the header.
Definition: PepXMLFile.h:296
String decoy_prefix_
In case it has decoys, what is the prefix?
Definition: PepXMLFile.h:244
String swath_assay_
Definition: PepXMLFile.h:204
bool lookupAddFromHeader_(double modification_mass, Size modification_position, std::vector< AminoAcidModification > const &header_mods)
Element hydrogen_
Hydrogen data (for mass types)
Definition: PepXMLFile.h:214
String experiment_label_
Definition: PepXMLFile.h:203
ProteinIdentification::SearchParameters params_
Search parameters of the current identification run.
Definition: PepXMLFile.h:253
String status_
Definition: PepXMLFile.h:205
void readRTMZCharge_(const xercesc::Attributes &attributes)
Read RT, m/z, charge information from attributes of "spectrum_query".
void setPreferredFixedModifications(const std::vector< const ResidueModification * > &mods)
sets the preferred fixed modifications
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
Docu in base class.
std::map< Size, Size > scan_map_
Mapping between scan number in the pepXML file and index in the corresponding MSExperiment.
Definition: PepXMLFile.h:211
void setPreferredVariableModifications(const std::vector< const ResidueModification * > &mods)
sets the preferred variable modifications
void makeScanMap_()
Fill scan_map_.
Analysis Result (containing search engine / prophet results)
Definition: PeptideHit.h:168
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition: PeptideIdentification.h:63
Representation of a modification on an amino acid residue.
Definition: ResidueModification.h:53
TermSpecificity
Position where the modification is allowed to occur.
Definition: ResidueModification.h:72
Helper class for looking up spectrum meta data.
Definition: SpectrumMetaDataLookup.h:118
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Definition: PepXMLFile.h:126
std::vector< String > errors_
Definition: PepXMLFile.h:137
double massdiff_
Definition: PepXMLFile.h:130
double mass_
Definition: PepXMLFile.h:131
String aminoacid_
Definition: PepXMLFile.h:129
bool is_protein_terminus_
Definition: PepXMLFile.h:135
String description_
Definition: PepXMLFile.h:133
const ResidueModification * registered_mod_
Definition: PepXMLFile.h:138
AminoAcidModification(const String &aminoacid, const String &massdiff, const String &mass, String variable, const String &description, String terminus, const String &protein_terminus, const std::vector< const ResidueModification * > &preferred_fixed_mods, const std::vector< const ResidueModification * > &preferred_var_mods, double tolerance)
AminoAcidModification(const AminoAcidModification &rhs)=default
ResidueModification::TermSpecificity term_spec_
Definition: PepXMLFile.h:136
const ResidueModification * lookupModInPreferredMods_(const std::vector< const ResidueModification * > &preferred_fixed_mods, const String &aminoacid, double massdiff, const String &description, const ResidueModification::TermSpecificity term_spec, double tolerance)
const std::vector< String > & getErrors() const
const ResidueModification * getRegisteredMod() const
AminoAcidModification & operator=(const AminoAcidModification &rhs)=default
bool is_variable_
Definition: PepXMLFile.h:132
String terminus_
Definition: PepXMLFile.h:134
Search parameters of the DB search.
Definition: ProteinIdentification.h:248