Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MzMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Marc Sturm $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
42 
45 
47 
52 #include <OpenMS/FORMAT/Base64.h>
57 #include <OpenMS/CONCEPT/Helpers.h>
58 
59 #include <OpenMS/SYSTEM/File.h>
60 
61 #include <sstream>
62 #include <boost/shared_ptr.hpp>
63 #include <iostream>
64 
65 #include <QRegExp>
66 
67 //MISSING:
68 // - more than one selected ion per precursor (warning if more than one)
69 // - scanWindowList for each acquisition separately (currently for the whole spectrum only)
70 // - instrumentConfigurationRef attribute for scan (why should the instrument change between scans? - warning if used)
71 // - scanSettingsRef attribute for instrumentConfiguration tag (currently no information there because of missing mapping file entry - warning if used)
72 
73 // xs:id/xs:idref prefix list
74 // - sf_ru : sourceFile (run)
75 // - sf_sp : sourceFile (spectrum)
76 // - sf_pr : sourceFile (precursor)
77 // - sf_ac : sourceFile (acquisition)
78 // - sa : sample
79 // - ic : instrumentConfiguration
80 // - so_dp : software (data processing)
81 // - so_in : software (instrument)
82 // - dp_sp : dataProcessing (spectrum)
83 // - dp_bi : dataProcessing (binary data array)
84 // - dp_ch : dataProcessing (chromatogram)
85 
86 namespace OpenMS
87 {
88  class ControlledVocabulary;
89  namespace Internal
90  {
91 
109  typedef PeakMap MapType;
110  typedef MSSpectrum SpectrumType;
111  typedef MSChromatogram ChromatogramType;
112 
113  class OPENMS_DLLAPI MzMLHandler :
114  public XMLHandler
115  {
116 public:
119 
121  MzMLHandler(MapType& exp, const String& filename, const String& version, ProgressLogger& logger);
122 
124  MzMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
125 
127  ~MzMLHandler() override;
129 
132 
133  // Docu in base class
134  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
135 
136  // Docu in base class
137  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
138 
139  // Docu in base class
140  void characters(const XMLCh* const chars, const XMLSize_t length) override;
141 
142  //Docu in base class
143  void writeTo(std::ostream& os) override;
144 
146 
157 
159  void setOptions(const PeakFileOptions& opt)
160  {
161  options_ = opt;
162  spectrum_data_.reserve(options_.getMaxDataPoolSize());
163  chromatogram_data_.reserve(options_.getMaxDataPoolSize());
164  }
165 
168  {
169  return options_;
170  }
171 
173 
175  void getCounts(Size& spectra_counts, Size& chromatogram_counts)
176  {
177  spectra_counts = scan_count;
178  chromatogram_counts = chromatogram_count;
179  }
180 
183  {
184  consumer_ = consumer;
185  }
186 
187 protected:
188 
197 
199 
200  void writeSpectrum_(std::ostream& os, const SpectrumType& spec, Size s,
201  Internal::MzMLValidator& validator, bool renew_native_ids,
202  std::vector<std::vector< ConstDataProcessingPtr > >& dps);
203 
204  void writeChromatogram_(std::ostream& os, const ChromatogramType& chromatogram, Size c, Internal::MzMLValidator& validator);
205 
206  template <typename ContainerT>
207  void writeContainerData(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, String array_type);
208 
215  void populateSpectraWithData();
216 
223  void populateChromatogramsWithData();
224 
225  void addSpectrumMetaData_(const std::vector<MzMLHandlerHelper::BinaryData>& input_data,
226  const Size n, SpectrumType& spectrum) const;
227 
238  void populateSpectraWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
239  Size& default_arr_length, const PeakFileOptions& peak_file_options,
240  SpectrumType& spectrum);
241 
249  void populateChromatogramsWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
250  Size& default_arr_length, const PeakFileOptions& peak_file_options,
251  ChromatogramType& inp_chromatogram);
252 
253  template <typename DataType>
254  void writeBinaryDataArray(std::ostream& os, const PeakFileOptions& pf_options_, std::vector<DataType> data_to_encode, bool is32bit, String array_type);
255 
256  void writeHeader_(std::ostream& os, const MapType& exp, std::vector<std::vector< ConstDataProcessingPtr > >& dps, Internal::MzMLValidator& validator);
257 
261  const MapType* cexp_;
262 
265 
268  SpectrumType spec_;
273  std::vector<BinaryData> data_;
294 
303  {
304  std::vector<BinaryData> data;
307  bool skip_data;
308  };
309 
311  std::vector<SpectrumData> spectrum_data_;
312 
321  {
322  std::vector<BinaryData> data;
325  };
326 
328  std::vector<ChromatogramData> chromatogram_data_;
329 
331 
333  std::vector<std::pair<std::string, long> > spectra_offsets;
334  std::vector<std::pair<std::string, long> > chromatograms_offsets;
336 
339 
342 
345 
349 
353 
354  // Remember whether the RT of the spectrum was set or not
355  bool rt_set_;
356 
360  //~ Internal::MzMLValidator validator_;
361 
364 
365  /*
367  void fillData_();
368  */
369 
371  void fillChromatogramData_();
372 
374  void handleCVParam_(const String& parent_parent_tag, const String& parent_tag, /* const String & cvref, */ const String& accession, const String& name, const String& value, const String& unit_accession = "");
375 
377  void handleUserParam_(const String& parent_parent_tag, const String& parent_tag, const String& name, const String& type, const String& value);
378 
380  void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, UInt indent, String path, Internal::MzMLValidator& validator) const;
381 
383  ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
384 
386  void writeSoftware_(std::ostream& os, const String& id, const Software& software, Internal::MzMLValidator& validator);
387 
389  void writeSourceFile_(std::ostream& os, const String& id, const SourceFile& software, Internal::MzMLValidator& validator);
390 
392  void writeDataProcessing_(std::ostream& os, const String& id, const std::vector< ConstDataProcessingPtr >& dps, Internal::MzMLValidator& validator);
393 
395  void writePrecursor_(std::ostream& os, const Precursor& precursor, Internal::MzMLValidator& validator);
396 
398  void writeProduct_(std::ostream& os, const Product& product, Internal::MzMLValidator& validator);
399 
401  String writeCV_(const ControlledVocabulary::CVTerm& c, const DataValue& metaValue) const;
402 
404  bool validateCV_(const ControlledVocabulary::CVTerm& c, const String& path, const Internal::MzMLValidator& validator) const;
405  };
406 
407  //--------------------------------------------------------------------------------
408 
409  } // namespace Internal
410 } // namespace OpenMS
411 
PeakFileOptions options_
Options that can be set for loading/storing.
Definition: MzMLHandler.h:264
Representation of a CV term.
Definition: ControlledVocabulary.h:60
std::vector< SpectrumData > spectrum_data_
Vector of spectrum data stored for later parallel processing.
Definition: MzMLHandler.h:311
A more convenient string class.
Definition: String.h:57
Precursor meta information.
Definition: Precursor.h:57
Class to encode and decode Base64.
Definition: Base64.h:67
ControlledVocabulary cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition: MzMLHandler.h:358
Product meta information.
Definition: Product.h:48
The representation of a chromatogram.
Definition: MSChromatogram.h:54
bool rt_set_
Definition: MzMLHandler.h:355
Interfaces::IMSDataConsumer * consumer_
Consumer class to work on spectra.
Definition: MzMLHandler.h:344
Data necessary to generate a single spectrum.
Definition: MzMLHandler.h:302
Semantically validates MzXML files.
Definition: MzMLValidator.h:48
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
Map< String, std::vector< DataProcessingPtr > > processing_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:291
std::vector< BinaryData > data
Definition: MzMLHandler.h:322
Base class for XML handlers.
Definition: XMLHandler.h:148
Description of a file location, used to store the origin of (meta) data.
Definition: SourceFile.h:46
Binary data representation.
Definition: MzMLHandlerHelper.h:57
Description of the software used for processing.
Definition: Software.h:48
const double c
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
MSChromatogram ChromatogramType
Definition: MzDataHandler.h:62
std::vector< std::pair< std::string, long > > spectra_offsets
Definition: MzMLHandler.h:333
ChromatogramType chromatogram_
The current chromatogram.
Definition: MzMLHandler.h:271
Map< String, SourceFile > source_files_
The source files: id => SourceFile.
Definition: MzMLHandler.h:283
const ProgressLogger & logger_
Progress logger.
Definition: MzMLHandler.h:341
Map< String, Software > software_
The software list: id => Software.
Definition: MzMLHandler.h:287
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
Representation of a controlled vocabulary.
Definition: ControlledVocabulary.h:54
Size default_array_length
Definition: MzMLHandler.h:305
Size default_array_length_
The default number of peaks in the current spectrum.
Definition: MzMLHandler.h:275
Data necessary to generate a single chromatogram.
Definition: MzMLHandler.h:320
SpectrumType spectrum
Definition: MzMLHandler.h:306
Definition: MzMLHandler.h:113
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
MapType * exp_
map pointer for reading
Definition: MzMLHandler.h:259
bool skip_spectrum_
Definition: MzMLHandler.h:352
Base64 decoder_
Decoder/Encoder for Base64-data in MzML.
Definition: MzMLHandler.h:338
Map< String, Sample > samples_
The sample list: id => Sample.
Definition: MzMLHandler.h:285
MapType::ChromatogramPeakType ChromatogramPeakType
Chromatogram peak type.
Definition: MzMLHandler.h:192
String default_processing_
id of the default data processing (used when no processing is defined)
Definition: MzMLHandler.h:293
PeakFileOptions & getOptions()
Get the peak file options.
Definition: MzMLHandler.h:167
MSExperiment PeakMap
Two-dimensional map of raw data points or peaks.
Definition: StandardTypes.h:61
CVMappings mapping_
Definition: MzMLHandler.h:359
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setMSDataConsumer(Interfaces::IMSDataConsumer *consumer)
Set the IMSDataConsumer consumer which will consume the read data.
Definition: MzMLHandler.h:182
std::vector< BinaryData > data_
The spectrum data (or chromatogram data)
Definition: MzMLHandler.h:273
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:55
bool in_spectrum_list_
Flag that indicates that we&#39;re inside a spectrum (in contrast to a chromatogram)
Definition: MzMLHandler.h:277
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
void getCounts(Size &spectra_counts, Size &chromatogram_counts)
Get the spectra and chromatogram counts of a file.
Definition: MzMLHandler.h:175
std::vector< ChromatogramData > chromatogram_data_
Vector of chromatogram data stored for later parallel processing.
Definition: MzMLHandler.h:328
PeakMap MapType
XML handler for MzDataFile.
Definition: MzDataHandler.h:60
UInt scan_count
Counting spectra and chromatograms.
Definition: MzMLHandler.h:347
bool skip_chromatogram_
Flag that indicates whether this spectrum should be skipped (due to options)
Definition: MzMLHandler.h:351
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:67
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
UInt selected_ion_count_
Count of selected ions.
Definition: MzMLHandler.h:363
MapType::PeakType PeakType
Peak type.
Definition: MzMLHandler.h:190
void setOptions(const PeakFileOptions &opt)
Set the peak file options.
Definition: MzMLHandler.h:159
A 1-dimensional raw data point or peak for chromatograms.
Definition: ChromatogramPeak.h:54
std::vector< std::pair< std::string, long > > chromatograms_offsets
Definition: MzMLHandler.h:334
MzMLHandlerHelper::BinaryData BinaryData
Definition: MzMLHandler.h:198
Map< String, Instrument > instruments_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:289
String current_id_
Id of the current list. Used for referencing param group, source file, sample, software, ...
Definition: MzMLHandler.h:279
std::vector< BinaryData > data
Definition: MzMLHandler.h:304
Representation of controlled vocabulary mapping rules (for PSI formats)
Definition: CVMappings.h:56
Options for loading files containing peak data.
Definition: PeakFileOptions.h:47
Map< String, std::vector< SemanticValidator::CVTerm > > ref_param_
The referencing param groups: id => array (accession, value)
Definition: MzMLHandler.h:281
MSSpectrum SpectrumType
Spectrum type.
Definition: MzMLHandler.h:194
bool skip_data
Definition: MzMLHandler.h:307
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
const MapType * cexp_
map pointer for writing
Definition: MzMLHandler.h:261
MSChromatogram ChromatogramType
Spectrum type.
Definition: MzMLHandler.h:196
ChromatogramType chromatogram
Definition: MzMLHandler.h:324
UInt chromatogram_count
Definition: MzMLHandler.h:348
Size default_array_length
Definition: MzMLHandler.h:323
MSSpectrum SpectrumType
Definition: MzDataHandler.h:61

OpenMS / TOPP release 2.3.0 Documentation generated on Wed Apr 18 2018 19:29:07 using doxygen 1.8.14