OpenMS
Loading...
Searching...
No Matches
MzMLHandler.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Marc Sturm, Chris Bielow, Hannes Roest $
7// --------------------------------------------------------------------------
8
9#pragma once
10
13
15
18
21
25
26#include <map>
27
28
29//MISSING:
30// - more than one selected ion per precursor (warning if more than one)
31// - scanWindowList for each acquisition separately (currently for the whole spectrum only)
32// - instrumentConfigurationRef attribute for scan (why should the instrument change between scans? - warning if used)
33// - scanSettingsRef attribute for instrumentConfiguration tag (currently no information there because of missing mapping file entry - warning if used)
34
35// xs:id/xs:idref prefix list
36// - sf_ru : sourceFile (run)
37// - sf_sp : sourceFile (spectrum)
38// - sf_pr : sourceFile (precursor)
39// - sf_ac : sourceFile (acquisition)
40// - sa : sample
41// - ic : instrumentConfiguration
42// - so_dp : software (data processing)
43// - so_in : software (instrument)
44// - dp_sp : dataProcessing (spectrum)
45// - dp_bi : dataProcessing (binary data array)
46// - dp_ch : dataProcessing (chromatogram)
47
48namespace OpenMS
49{
50 namespace Interfaces
51 {
52 class IMSDataConsumer;
53 }
54
55 namespace Internal
56 {
57 class MzMLValidator;
58
59 typedef PeakMap MapType;
62
92 class OPENMS_DLLAPI MzMLHandler :
93 public XMLHandler
94 {
95public:
96
99
101 MzMLHandler(MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
102
104 MzMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
105
107 ~MzMLHandler() override;
109
115
117 void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
118
120 void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
121
123 void characters(const XMLCh* const chars, const XMLSize_t length) override;
124
126 void writeTo(std::ostream& os) override;
127
129
140
142 void setOptions(const PeakFileOptions& opt);
143
146
148
150 void getCounts(Size& spectra_counts, Size& chromatogram_counts);
151
161
165
167 LOADDETAIL getLoadDetail() const override;
168
170 void setLoadDetail(const LOADDETAIL d) override;
171
172protected:
173
175 MzMLHandler(const String& filename, const String& version, const ProgressLogger& logger);
176
185
187
192
200
208
224 void populateSpectraWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
225 Size& length,
226 const PeakFileOptions& peak_file_options,
227 SpectrumType& spectrum);
228
241 void populateChromatogramsWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
242 Size& length,
243 const PeakFileOptions& peak_file_options,
244 ChromatogramType& chromatogram);
245
248
250 void handleCVParam_(const String& parent_parent_tag,
251 const String& parent_tag,
252 const String& accession,
253 const String& name,
254 const String& value,
255 const String& unit_accession = "");
256
258 void handleUserParam_(const String& parent_parent_tag,
259 const String& parent_tag,
260 const String& name,
261 const String& type,
262 const String& value,
263 const String& unit_accession = "");
265
271
273 void writeHeader_(std::ostream& os,
274 const MapType& exp,
275 std::vector<std::vector< ConstDataProcessingPtr > >& dps,
276 const Internal::MzMLValidator& validator);
277
278
280 void writeSpectrum_(std::ostream& os,
281 const SpectrumType& spec,
282 Size spec_idx,
283 const Internal::MzMLValidator& validator,
284 bool renew_native_ids,
285 std::vector<std::vector< ConstDataProcessingPtr > >& dps);
286
288 void writeChromatogram_(std::ostream& os,
289 const ChromatogramType& chromatogram,
290 Size chrom_idx,
291 const Internal::MzMLValidator& validator);
292
293 template <typename ContainerT>
294 void writeContainerData_(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, const String& array_type);
295
308 template <typename DataType>
309 void writeBinaryDataArray_(std::ostream& os,
310 const PeakFileOptions& options,
311 std::vector<DataType>& data,
312 bool is32bit,
313 String array_type);
314
329 void writeBinaryFloatDataArray_(std::ostream& os,
330 const PeakFileOptions& options,
332 const Size spec_chrom_idx,
333 const Size array_idx,
334 bool is_spectrum,
335 const Internal::MzMLValidator& validator);
336
338 void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, UInt indent, const String& path, const Internal::MzMLValidator& validator, const std::set<String>& exclude = {}) const;
339
341 void writeSoftware_(std::ostream& os, const String& id, const Software& software, const Internal::MzMLValidator& validator);
342
344 void writeSourceFile_(std::ostream& os, const String& id, const SourceFile& software, const Internal::MzMLValidator& validator);
345
347 void writeDataProcessing_(std::ostream& os, const String& id, const std::vector< ConstDataProcessingPtr >& dps, const Internal::MzMLValidator& validator);
348
350 void writePrecursor_(std::ostream& os, const Precursor& precursor, const Internal::MzMLValidator& validator);
351
353 void writeProduct_(std::ostream& os, const Product& product, const Internal::MzMLValidator& validator);
354
356 String writeCV_(const ControlledVocabulary::CVTerm& c, const DataValue& metaValue) const;
357
359 bool validateCV_(const ControlledVocabulary::CVTerm& c, const String& path, const Internal::MzMLValidator& validator) const;
360
362 ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
363
365
366 // MEMBERS
367
369 MapType* exp_{ nullptr };
370
372 const MapType* cexp_{ nullptr };
373
376
384 std::vector<BinaryData> bin_data_;
388 bool in_spectrum_list_{ false };
390 bool skip_spectrum_{ false };
392 bool skip_chromatogram_{ false };
394 bool rt_set_{ false };
398 std::map<String, std::vector<SemanticValidator::CVTerm> > ref_param_;
400 std::map<String, SourceFile> source_files_;
402 std::map<String, Sample> samples_;
404 std::map<String, Software> software_;
406 std::map<String, Instrument> instruments_;
408 mutable std::map<std::pair<String, String>, bool> cached_terms_;
410 std::map<String, std::vector< DataProcessingPtr > > processing_;
414 UInt selected_ion_count_{ 0 };
415
424 {
425 std::vector<BinaryData> data;
428 };
429
431 std::vector<SpectrumData> spectrum_data_;
432
446
448 std::vector<ChromatogramData> chromatogram_data_;
449
451
459 std::vector<std::pair<std::string, Int64> > spectra_offsets_;
460 std::vector<std::pair<std::string, Int64> > chromatograms_offsets_;
462
465
467 Interfaces::IMSDataConsumer* consumer_{ nullptr };
468
470 UInt scan_count_{ 0 };
471 UInt chromatogram_count_{ 0 };
472 Int scan_count_total_{ -1 };
473 Int chrom_count_total_{ -1 };
475
479
480 };
481
482 //--------------------------------------------------------------------------------
483
484 } // namespace Internal
485} // namespace OpenMS
486
char16_t XMLCh
Definition ClassTest.h:28
Representation of controlled vocabulary mapping rules (for PSI formats)
Definition CVMappings.h:31
A 1-dimensional raw data point or peak for chromatograms.
Definition ChromatogramPeak.h:29
Definition ControlledVocabulary.h:29
Float data array class.
Definition DataArrays.h:25
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition DataValue.h:34
The interface of a consumer of spectra and chromatograms.
Definition IMSDataConsumer.h:46
Handler for mzML file format.
Definition MzMLHandler.h:94
ControlledVocabulary::CVTerm getChildWithName_(const String &parent_accession, const String &name) const
Helper method to look up a child CV term of parent_accession with the name name. If no such term is f...
MzMLHandler(const String &filename, const String &version, const ProgressLogger &logger)
delegated constructor for the two public versions
Size default_array_length_
The default number of peaks in the current spectrum.
Definition MzMLHandler.h:386
std::map< String, std::vector< DataProcessingPtr > > processing_
The data processing list: id => Instrument.
Definition MzMLHandler.h:410
ChromatogramType chromatogram_
The current chromatogram.
Definition MzMLHandler.h:382
MzMLHandler(const MapType &exp, const String &filename, const String &version, const ProgressLogger &logger)
Constructor for a write-only handler.
void writeProduct_(std::ostream &os, const Product &product, const Internal::MzMLValidator &validator)
Helper method that write precursor information from spectra and chromatograms.
ChromatogramType chromatogram
Definition MzMLHandler.h:444
MSChromatogram ChromatogramType
Spectrum type.
Definition MzMLHandler.h:184
LOADDETAIL getLoadDetail() const override
handler which support partial loading, implement this method
void writeBinaryDataArray_(std::ostream &os, const PeakFileOptions &options, std::vector< DataType > &data, bool is32bit, String array_type)
Write a single <binaryDataArray> element to the output.
std::vector< ChromatogramData > chromatogram_data_
Vector of chromatogram data stored for later parallel processing.
Definition MzMLHandler.h:448
const ControlledVocabulary & cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition MzMLHandler.h:477
~MzMLHandler() override
Destructor.
void writeTo(std::ostream &os) override
Docu in base class XMLHandler::writeTo.
MzMLHandler(MapType &exp, const String &filename, const String &version, const ProgressLogger &logger)
Constructor for a read-only handler.
std::vector< std::pair< std::string, Int64 > > chromatograms_offsets_
Stores binary offsets for each <chromatogram> tag.
Definition MzMLHandler.h:460
void writeHeader_(std::ostream &os, const MapType &exp, std::vector< std::vector< ConstDataProcessingPtr > > &dps, const Internal::MzMLValidator &validator)
Write out XML header including (everything up to spectrumList / chromatogramList.
void populateChromatogramsWithData_()
Populate all chromatograms on the stack with data from input.
std::vector< BinaryData > data
Definition MzMLHandler.h:425
std::vector< std::pair< std::string, Int64 > > spectra_offsets_
Stores binary offsets for each <spectrum> tag.
Definition MzMLHandler.h:459
CVMappings mapping_
Definition MzMLHandler.h:478
std::map< String, Instrument > instruments_
The data processing list: id => Instrument.
Definition MzMLHandler.h:406
std::map< String, Software > software_
The software list: id => Software.
Definition MzMLHandler.h:404
const ProgressLogger & logger_
Progress logger.
Definition MzMLHandler.h:464
SpectrumType spec_
The current spectrum.
Definition MzMLHandler.h:380
String current_id_
Id of the current list. Used for referencing param group, source file, sample, software,...
Definition MzMLHandler.h:396
void setLoadDetail(const LOADDETAIL d) override
handler which support partial loading, implement this method
MSSpectrum SpectrumType
Spectrum type.
Definition MzMLHandler.h:182
void setMSDataConsumer(Interfaces::IMSDataConsumer *consumer)
Set the IMSDataConsumer consumer which will consume the read data.
PeakFileOptions options_
Options that can be set for loading/storing.
Definition MzMLHandler.h:375
void getCounts(Size &spectra_counts, Size &chromatogram_counts)
Get the spectra and chromatogram counts of a file.
Size default_array_length
Definition MzMLHandler.h:426
void handleUserParam_(const String &parent_parent_tag, const String &parent_tag, const String &name, const String &type, const String &value, const String &unit_accession="")
Handles user terms.
MapType::PeakType PeakType
Peak type.
Definition MzMLHandler.h:178
std::map< std::pair< String, String >, bool > cached_terms_
CV terms-path-combinations that have been checked in validateCV_()
Definition MzMLHandler.h:408
std::vector< SpectrumData > spectrum_data_
Vector of spectrum data stored for later parallel processing.
Definition MzMLHandler.h:431
void populateSpectraWithData_()
Populate all spectra on the stack with data from input.
MzMLHandlerHelper::BinaryData BinaryData
Definition MzMLHandler.h:186
void populateSpectraWithData_(std::vector< MzMLHandlerHelper::BinaryData > &input_data, Size &length, const PeakFileOptions &peak_file_options, SpectrumType &spectrum)
Fill a single spectrum with data from input.
void writeBinaryFloatDataArray_(std::ostream &os, const PeakFileOptions &options, const OpenMS::DataArrays::FloatDataArray &array, const Size spec_chrom_idx, const Size array_idx, bool is_spectrum, const Internal::MzMLValidator &validator)
Write a single <binaryDataArray> element for a float data array to the output.
void writeSpectrum_(std::ostream &os, const SpectrumType &spec, Size spec_idx, const Internal::MzMLValidator &validator, bool renew_native_ids, std::vector< std::vector< ConstDataProcessingPtr > > &dps)
Write out a single spectrum.
void writeSoftware_(std::ostream &os, const String &id, const Software &software, const Internal::MzMLValidator &validator)
Helper method that writes a software.
PeakFileOptions & getOptions()
Get the peak file options.
void fillChromatogramData_()
Fills the current chromatogram with data points and meta data.
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
Docu in base class XMLHandler::startElelement.
void writeChromatogram_(std::ostream &os, const ChromatogramType &chromatogram, Size chrom_idx, const Internal::MzMLValidator &validator)
Write out a single chromatogram.
void handleCVParam_(const String &parent_parent_tag, const String &parent_tag, const String &accession, const String &name, const String &value, const String &unit_accession="")
Handles CV terms.
void writeUserParam_(std::ostream &os, const MetaInfoInterface &meta, UInt indent, const String &path, const Internal::MzMLValidator &validator, const std::set< String > &exclude={}) const
Writes user terms.
std::vector< BinaryData > bin_data_
The spectrum data (or chromatogram data)
Definition MzMLHandler.h:384
void writeContainerData_(std::ostream &os, const PeakFileOptions &pf_options_, const ContainerT &container, const String &array_type)
String writeCV_(const ControlledVocabulary::CVTerm &c, const DataValue &metaValue) const
Helper method to write an CV based on a meta value.
bool validateCV_(const ControlledVocabulary::CVTerm &c, const String &path, const Internal::MzMLValidator &validator) const
Helper method to validate if the given CV is allowed in the current location (path)
String default_processing_
id of the default data processing (used when no processing is defined)
Definition MzMLHandler.h:412
void writeSourceFile_(std::ostream &os, const String &id, const SourceFile &software, const Internal::MzMLValidator &validator)
Helper method that writes a source file.
void characters(const XMLCh *const chars, const XMLSize_t length) override
Docu in base class XMLHandler::characters.
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
Docu in base class XMLHandler::endElement.
std::map< String, Sample > samples_
The sample list: id => Sample.
Definition MzMLHandler.h:402
void setOptions(const PeakFileOptions &opt)
Set the peak file options.
std::map< String, std::vector< SemanticValidator::CVTerm > > ref_param_
The referencing param groups: id => array (accession, value)
Definition MzMLHandler.h:398
std::map< String, SourceFile > source_files_
The source files: id => SourceFile.
Definition MzMLHandler.h:400
void populateChromatogramsWithData_(std::vector< MzMLHandlerHelper::BinaryData > &input_data, Size &length, const PeakFileOptions &peak_file_options, ChromatogramType &chromatogram)
Fill a single chromatogram with data from input.
MapType::ChromatogramPeakType ChromatogramPeakType
Chromatogram peak type.
Definition MzMLHandler.h:180
void writePrecursor_(std::ostream &os, const Precursor &precursor, const Internal::MzMLValidator &validator)
Helper method that write precursor information from spectra and chromatograms.
SpectrumType spectrum
Definition MzMLHandler.h:427
void writeDataProcessing_(std::ostream &os, const String &id, const std::vector< ConstDataProcessingPtr > &dps, const Internal::MzMLValidator &validator)
Helper method that writes a data processing list.
Data necessary to generate a single chromatogram.
Definition MzMLHandler.h:441
Data necessary to generate a single spectrum.
Definition MzMLHandler.h:424
Semantically validates MzXML files.
Definition MzMLValidator.h:25
Base class for XML handlers.
Definition XMLHandler.h:328
LOADDETAIL
Definition XMLHandler.h:351
The representation of a chromatogram.
Definition MSChromatogram.h:30
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition MetaInfoInterface.h:36
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
Options for loading files containing peak data.
Definition PeakFileOptions.h:22
Precursor meta information.
Definition Precursor.h:37
Product meta information.
Definition Product.h:26
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Description of the software used for processing.
Definition Software.h:26
Description of a file location, used to store the origin of (meta) data.
Definition SourceFile.h:23
A more convenient string class.
Definition String.h:34
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
MSChromatogram ChromatogramType
Definition MzDataHandler.h:35
MSSpectrum SpectrumType
Definition MzDataHandler.h:34
PeakMap MapType
XML handler for MzDataFile.
Definition MzDataHandler.h:33
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Representation of a CV term.
Definition ControlledVocabulary.h:50
Representation for binary data in mzML.
Definition MzMLHandlerHelper.h:44