OpenMS
Loading...
Searching...
No Matches
MascotGenericFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow $
6// $Authors: Andreas Bertsch, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
16#include <OpenMS/SYSTEM/File.h>
18
19#include <vector>
20#include <fstream>
21
22#ifdef _OPENMP
23#include <omp.h>
24#endif
25
26namespace OpenMS
27{
37 class OPENMS_DLLAPI MascotGenericFile :
38 public ProgressLogger,
40 {
41public:
42
45
48
50 void updateMembers_() override;
51
53 void store(const String& filename, const PeakMap& experiment,
54 bool compact = false);
55
57 void store(std::ostream& os, const String& filename,
58 const PeakMap& experiment, bool compact = false);
59
67 template <typename MapType>
68 void load(const String& filename, MapType& exp)
69 {
70 if (!File::exists(filename))
71 {
72 throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, filename);
73 }
74
75 exp.reset();
76
77 std::ifstream is(filename.c_str());
78 // get size of file
79 is.seekg(0, std::ios::end);
80 startProgress(0, is.tellg(), "loading MGF");
81 is.seekg(0, std::ios::beg);
82
83 UInt spectrum_number(0);
84 Size line_number(0); // carry line number for error messages within getNextSpectrum()
85
86 typename MapType::SpectrumType spectrum;
87 spectrum.setMSLevel(2);
88 spectrum.getPrecursors().resize(1);
89 spectrum.setType(SpectrumSettings::SpectrumType::CENTROID); // MGF is always centroided, by definition
90 while (getNextSpectrum_(is, spectrum, line_number, spectrum_number))
91 {
92 exp.addSpectrum(spectrum);
93 setProgress(is.tellg());
94 ++spectrum_number;
95 } // next spectrum
96 exp.updateRanges();
97 endProgress();
98 }
99
107 std::pair<String, String> getHTTPPeakListEnclosure(const String& filename) const;
108
110 void writeSpectrum(std::ostream& os, const PeakSpectrum& spec, const String& filename, const String& native_id_type_accession);
111
112protected:
113
116
118 std::map<String, String> mod_group_map_;
119
121 void writeParameterHeader_(const String& name, std::ostream& os);
122
124 void writeModifications_(const std::vector<String>& mods, std::ostream& os,
125 bool variable_mods = false);
126
128 void writeHeader_(std::ostream& os);
129
131 void writeMSExperiment_(std::ostream& os, const String& filename, const PeakMap& experiment);
132
134 template <typename SpectrumType>
135 bool getNextSpectrum_(std::ifstream& is, SpectrumType& spectrum, Size& line_number, const Size& spectrum_number)
136 {
137 spectrum.resize(0);
138 spectrum.setNativeID(String("index=") + (spectrum_number));
139
140 if (spectrum.metaValueExists("TITLE"))
141 {
142 spectrum.removeMetaValue("TITLE");
143 }
144 typename SpectrumType::PeakType p;
145
146 String line;
147 // seek to next peak list block
148 while (getline(is, line, '\n'))
149 {
150 ++line_number;
151
152 line.trim(); // remove whitespaces, line-endings etc
153
154 // found peak list block?
155 if (line == "BEGIN IONS")
156 {
157 while (getline(is, line, '\n'))
158 {
159 ++line_number;
160 line.trim(); // remove whitespaces, line-endings etc
161
162 if (line.empty()) continue;
163
164 if (isdigit(line[0])) // actual data .. this comes first, since its the most common case
165 {
166 std::vector<String> split;
167 do
168 {
169 if (line.empty())
170 {
171 continue;
172 }
173
174 line.simplify(); // merge double spaces (explicitly allowed by MGF), to prevent empty split() chunks and subsequent parse error
175 line.substitute('\t', ' '); // also accept Tab (strictly, only space(s) are allowed)
176 if (line.split(' ', split, false))
177 {
178 try
179 {
180 p.setPosition(split[0].toDouble());
181 p.setIntensity(split[1].toDouble());
182 }
183 catch (Exception::ConversionError& /*e*/)
184 {
185 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " could not be converted to a number! Expected two (m/z int) or three (m/z int charge) numbers separated by whitespace (space or tab).", "");
186 }
187 spectrum.push_back(p);
188 }
189 else
190 {
191 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " does not contain m/z and intensity values separated by whitespace (space or tab)!", "");
192 }
193 }
194 while (getline(is, line, '\n') && ++line_number && line.trim() != "END IONS"); // line.trim() is important here!
195
196 if (line == "END IONS")
197 {
198 return true; // found end of spectrum
199 }
200 else
201 {
202 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, R"(Reached end of file. Found "BEGIN IONS" but not the corresponding "END IONS"!)", "");
203 }
204 }
205 else if (line.hasPrefix("PEPMASS")) // parse precursor position
206 {
207 String tmp = line.substr(8); // copy since we might need the original line for error reporting later
208 tmp.substitute('\t', ' ');
209 std::vector<String> split;
210 tmp.split(' ', split);
211 if (split.size() == 1)
212 {
213 spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
214 }
215 else if (split.size() == 2)
216 {
217 spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
218 spectrum.getPrecursors()[0].setIntensity(split[1].trim().toDouble());
219 }
220 else
221 {
222 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot parse PEPMASS in '" + line + "' at line #" + String(line_number) + " (expected 1 or 2 entries, but " + String(split.size()) + " were present)!", "");
223 }
224 }
225 else if (line.hasPrefix("CHARGE"))
226 {
227 String tmp = line.substr(7);
228 tmp.remove('+');
229 spectrum.getPrecursors()[0].setCharge(tmp.toInt());
230 }
231 else if (line.hasPrefix("RTINSECONDS"))
232 {
233 String tmp = line.substr(12);
234 spectrum.setRT(tmp.toDouble());
235 }
236 else if (line.hasPrefix("TITLE"))
237 {
238 // test if we have a line like "TITLE= Cmpd 1, +MSn(595.3), 10.9 min"
239 if (line.hasSubstring("min"))
240 {
241 try
242 {
243 std::vector<String> split;
244 line.split(',', split);
245 if (!split.empty())
246 {
247 for (Size i = 0; i != split.size(); ++i)
248 {
249 if (split[i].hasSubstring("min"))
250 {
251 std::vector<String> split2;
252 split[i].trim().split(' ', split2);
253 if (!split2.empty())
254 {
255 spectrum.setRT(split2[0].trim().toDouble() * 60.0);
256 }
257 }
258 }
259 }
260 }
261 catch (Exception::BaseException& /*e*/)
262 {
263 // just do nothing and write the whole title to spec
264 std::vector<String> split;
265 if (line.split('=', split))
266 {
267 if (!split[1].empty()) spectrum.setMetaValue("TITLE", split[1]);
268 }
269 }
270 }
271 else // just write the title as metainfo to the spectrum and add native ID to make the titles unique
272 {
273 Size firstEqual = line.find('=', 4);
274 if (firstEqual != std::string::npos)
275 {
276 if (String(spectrum.getMetaValue("TITLE")).hasSubstring(spectrum.getNativeID()))
277 {
278 spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1));
279 }
280 else
281 {
282 spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1) + "_" + spectrum.getNativeID());
283 }
284 }
285 }
286 }
287 else if (line.hasPrefix("NAME"))
288 {
289 String tmp = line.substr(5);
290 spectrum.setMetaValue(Constants::UserParam::MSM_METABOLITE_NAME, tmp);
291 }
292 else if (line.hasPrefix("COMPOUND_NAME"))
293 {
294 String tmp = line.substr(14);
295 spectrum.setMetaValue(Constants::UserParam::MSM_METABOLITE_NAME, tmp);
296 }
297 else if (line.hasPrefix("INCHI="))
298 {
299 String tmp = line.substr(6);
300 spectrum.setMetaValue(Constants::UserParam::MSM_INCHI_STRING, tmp);
301 }
302 else if (line.hasPrefix("SMILES"))
303 {
304 String tmp = line.substr(7);
305 spectrum.setMetaValue(Constants::UserParam::MSM_SMILES_STRING, tmp);
306 }
307 else if (line.hasPrefix("IONMODE"))
308 {
309 String tmp = line.substr(8);
310 spectrum.setMetaValue("IONMODE", tmp);
311 }
312 else if (line.hasPrefix("MSLEVEL"))
313 {
314 String tmp = line.substr(8);
315 try
316 {
317 int ms_level = std::stoi(tmp);
318 spectrum.setMSLevel(ms_level);
319 }
320 catch (const std::invalid_argument& /*e*/)
321 {
322 // Default to MS2 if parsing fails
323 spectrum.setMSLevel(2);
324 spectrum.setMetaValue("MSLEVEL", "2");
325 }
326 catch (const std::out_of_range& /*e*/)
327 {
328 spectrum.setMSLevel(2);
329 }
330 }
331 else if (line.hasPrefix("SOURCE_INSTRUMENT"))
332 {
333 String tmp = line.substr(18);
334 spectrum.setMetaValue("SOURCE_INSTRUMENT", tmp);
335 }
336 else if (line.hasPrefix("ORGANISM"))
337 {
338 String tmp = line.substr(9);
339 spectrum.setMetaValue("ORGANISM", tmp);
340 }
341 else if (line.hasPrefix("PI"))
342 {
343 String tmp = line.substr(3);
344 spectrum.setMetaValue("PI", tmp);
345 }
346 else if (line.hasPrefix("DATACOLLECTOR"))
347 {
348 String tmp = line.substr(14);
349 spectrum.setMetaValue("DATACOLLECTOR", tmp);
350 }
351 else if (line.hasPrefix("LIBRARYQUALITY"))
352 {
353 String tmp = line.substr(15);
354 spectrum.setMetaValue("LIBRARYQUALITY", tmp);
355 }
356 else if (line.hasPrefix("SPECTRUMID"))
357 {
358 String tmp = line.substr(11);
359 spectrum.setMetaValue("GNPS_Spectrum_ID", tmp);
360 }
361 else if (line.hasPrefix("SCANS="))
362 {
363 String tmp = line.substr(6);
364 spectrum.setMetaValue("Scan_ID", tmp);
365 }
366 }
367 }
368 }
369
370 return false; // found end of file
371 }
372
373 };
374} // namespace OpenMS
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Exception base class.
Definition Exception.h:63
Invalid conversion exception.
Definition Exception.h:331
File not found exception.
Definition Exception.h:475
Parse Error exception.
Definition Exception.h:593
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
void reset()
Clear all internal data (spectra, ranges, metadata)
void updateRanges()
Updates the m/z, intensity, mobility, and retention time ranges of all spectra and chromatograms.
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
void setMSLevel(UInt ms_level)
Sets the MS level.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Read/write Mascot generic files (MGF).
Definition MascotGenericFile.h:40
bool store_compact_
use a compact format for storing (no zero-intensity peaks, limited number of decimal places)?
Definition MascotGenericFile.h:115
void store(const String &filename, const PeakMap &experiment, bool compact=false)
stores the experiment data in a MascotGenericFile that can be used as input for MASCOT shell executio...
void writeHeader_(std::ostream &os)
writes the full header
void writeModifications_(const std::vector< String > &mods, std::ostream &os, bool variable_mods=false)
write a list of (fixed or variable) modifications
void writeParameterHeader_(const String &name, std::ostream &os)
writes a parameter header
void writeMSExperiment_(std::ostream &os, const String &filename, const PeakMap &experiment)
writes the MSExperiment
void load(const String &filename, MapType &exp)
loads a Mascot Generic File into a PeakMap
Definition MascotGenericFile.h:68
std::pair< String, String > getHTTPPeakListEnclosure(const String &filename) const
enclosing Strings of the peak list body for HTTP submission
~MascotGenericFile() override
destructor
void writeSpectrum(std::ostream &os, const PeakSpectrum &spec, const String &filename, const String &native_id_type_accession)
writes a spectrum in MGF format to an ostream
bool getNextSpectrum_(std::ifstream &is, SpectrumType &spectrum, Size &line_number, const Size &spectrum_number)
reads a spectrum block, the section between 'BEGIN IONS' and 'END IONS' of a MGF file
Definition MascotGenericFile.h:135
void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact=false)
store the experiment data in a MascotGenericFile; the output is written to the given stream,...
void updateMembers_() override
docu in base class
MascotGenericFile()
constructor
std::map< String, String > mod_group_map_
mapping of modifications with specificity groups, that have to be treated specially (e....
Definition MascotGenericFile.h:118
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string, or DataValue::EMPTY if not found.
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition Peak1D.h:86
void setPosition(PositionType const &position)
Mutable access to the position.
Definition Peak1D.h:125
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setType(SpectrumType type)
sets the spectrum type
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A more convenient string class.
Definition String.h:34
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
Int toInt() const
Conversion to Int.
double toDouble() const
Conversion to double.
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & remove(char what)
Remove all occurrences of the character what.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String & simplify()
merges subsequent whitespaces to one blank character
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19