OpenMS
XTandemXMLFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Andreas Bertsch $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
12 #include <OpenMS/FORMAT/XMLFile.h>
15 #include <stack>
16 
17 namespace OpenMS
18 {
19  class String;
20  class ProteinIdentification;
21 
30  class OPENMS_DLLAPI XTandemXMLFile :
31  protected Internal::XMLHandler,
32  public Internal::XMLFile
33  {
34 public:
35 
38 
40  ~XTandemXMLFile() override;
54  void load(const String& filename, ProteinIdentification& protein_identification, std::vector<PeptideIdentification>& id_data, ModificationDefinitionsSet& mod_def_set);
55 
56 
57 protected:
58 
59  // Docu in base class
60  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
61 
62  // Docu in base class
63  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
64 
65  // Docu in base class
66  void characters(const XMLCh* const chars, const XMLSize_t /*length*/) override;
67 
69 
71 
72 private:
73 
75 
76  // true during "note" element containing protein accession
78 
79  // true during "note" element containing spectrum ID
81 
82  // true after non-new protein entries, so that with the next "protein note" the
83  // accession will not be updated again
85 
86  // peptide hits per spectrum
87  std::map<UInt, std::vector<PeptideHit> > peptide_hits_;
88 
89  // protein hits
90  std::vector<ProteinHit> protein_hits_;
91 
92  // protein unique IDs (assigned by X! Tandem), to keep track of which proteins were already seen
93  std::set<UInt> protein_uids_;
94 
95  // accession of the current protein
97 
98  // charge of current peptide
100 
101  // X! Tandem ID of current peptide
103 
104  // tag
106 
107  // start position of current peptide in protein sequence
109 
110  // stop position of current peptide in protein sequence
112 
113  // previous peptide sequence
115 
116  // mapping from X! Tandem ID to spectrum ID
117  std::map<UInt, String> spectrum_ids_;
118 
119  // modification definitions
121 
122  // modifications used by X! Tandem by default
124 
125  // the possible type attributes of the group tag elements
126  enum class GroupType
127  {
128  MODEL,
129  PARAMETERS,
130  SUPPORT
131  };
132 
133  // stack of types of the group elements
134  // they can be nested (e.g. a support group in a model group)
135  // parsing of child elements sometimes depends on the group type
136  std::stack<GroupType> group_type_stack_;
137 
138  };
139 
140 } // namespace OpenMS
141 
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition: XMLFile.h:23
Base class for XML handlers.
Definition: XMLHandler.h:300
Representation of a set of modification definitions.
Definition: ModificationDefinitionsSet.h:33
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
Used to load XTandemXML files.
Definition: XTandemXMLFile.h:33
String current_protein_
Definition: XTandemXMLFile.h:96
ProteinIdentification * protein_identification_
Definition: XTandemXMLFile.h:74
bool is_spectrum_note_
Definition: XTandemXMLFile.h:80
std::map< UInt, std::vector< PeptideHit > > peptide_hits_
Definition: XTandemXMLFile.h:87
GroupType
Definition: XTandemXMLFile.h:127
ModificationDefinitionsSet default_nterm_mods_
Definition: XTandemXMLFile.h:123
String tag_
Definition: XTandemXMLFile.h:105
XTandemXMLFile & operator=(const XTandemXMLFile &rhs)
Int current_charge_
Definition: XTandemXMLFile.h:99
std::vector< ProteinHit > protein_hits_
Definition: XTandemXMLFile.h:90
ModificationDefinitionsSet mod_def_set_
Definition: XTandemXMLFile.h:120
std::stack< GroupType > group_type_stack_
Definition: XTandemXMLFile.h:136
bool skip_protein_acc_update_
Definition: XTandemXMLFile.h:84
std::set< UInt > protein_uids_
Definition: XTandemXMLFile.h:93
UInt current_id_
Definition: XTandemXMLFile.h:102
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
~XTandemXMLFile() override
Destructor.
void characters(const XMLCh *const chars, const XMLSize_t) override
std::map< UInt, String > spectrum_ids_
Definition: XTandemXMLFile.h:117
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
UInt current_stop_
Definition: XTandemXMLFile.h:111
UInt current_start_
Definition: XTandemXMLFile.h:108
bool is_protein_note_
Definition: XTandemXMLFile.h:77
String previous_seq_
Definition: XTandemXMLFile.h:114
XTandemXMLFile(const XTandemXMLFile &rhs)
XTandemXMLFile()
Default constructor.
int Int
Signed integer type.
Definition: Types.h:76
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, ModificationDefinitionsSet &mod_def_set)
loads data from an X! Tandem XML file
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22