OpenMS
MascotGenericFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Andreas Bertsch, Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
16 #include <OpenMS/SYSTEM/File.h>
18 
19 #include <vector>
20 #include <fstream>
21 
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 
26 namespace OpenMS
27 {
37  class OPENMS_DLLAPI MascotGenericFile :
38  public ProgressLogger,
39  public DefaultParamHandler
40  {
41 public:
42 
45 
47  ~MascotGenericFile() override;
48 
50  void updateMembers_() override;
51 
53  void store(const String& filename, const PeakMap& experiment,
54  bool compact = false);
55 
57  void store(std::ostream& os, const String& filename,
58  const PeakMap& experiment, bool compact = false);
59 
67  template <typename MapType>
68  void load(const String& filename, MapType& exp)
69  {
70  if (!File::exists(filename))
71  {
72  throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, filename);
73  }
74 
75  exp.reset();
76 
77  std::ifstream is(filename.c_str());
78  // get size of file
79  is.seekg(0, std::ios::end);
80  startProgress(0, is.tellg(), "loading MGF");
81  is.seekg(0, std::ios::beg);
82 
83  UInt spectrum_number(0);
84  Size line_number(0); // carry line number for error messages within getNextSpectrum()
85 
86  typename MapType::SpectrumType spectrum;
87  spectrum.setMSLevel(2);
88  spectrum.getPrecursors().resize(1);
89  spectrum.setType(SpectrumSettings::SpectrumType::CENTROID); // MGF is always centroided, by definition
90  while (getNextSpectrum_(is, spectrum, line_number, spectrum_number))
91  {
92  exp.addSpectrum(spectrum);
93  setProgress(is.tellg());
94  ++spectrum_number;
95  } // next spectrum
96 
97  endProgress();
98  }
99 
107  std::pair<String, String> getHTTPPeakListEnclosure(const String& filename) const;
108 
110  void writeSpectrum(std::ostream& os, const PeakSpectrum& spec, const String& filename, const String& native_id_type_accession);
111 
112 protected:
113 
116 
118  std::map<String, String> mod_group_map_;
119 
121  void writeParameterHeader_(const String& name, std::ostream& os);
122 
124  void writeModifications_(const std::vector<String>& mods, std::ostream& os,
125  bool variable_mods = false);
126 
128  void writeHeader_(std::ostream& os);
129 
131  void writeMSExperiment_(std::ostream& os, const String& filename, const PeakMap& experiment);
132 
134  template <typename SpectrumType>
135  bool getNextSpectrum_(std::ifstream& is, SpectrumType& spectrum, Size& line_number, const Size& spectrum_number)
136  {
137  spectrum.resize(0);
138  spectrum.setNativeID(String("index=") + (spectrum_number));
139 
140  if (spectrum.metaValueExists("TITLE"))
141  {
142  spectrum.removeMetaValue("TITLE");
143  }
144  typename SpectrumType::PeakType p;
145 
146  String line;
147  // seek to next peak list block
148  while (getline(is, line, '\n'))
149  {
150  ++line_number;
151 
152  line.trim(); // remove whitespaces, line-endings etc
153 
154  // found peak list block?
155  if (line == "BEGIN IONS")
156  {
157  while (getline(is, line, '\n'))
158  {
159  ++line_number;
160  line.trim(); // remove whitespaces, line-endings etc
161 
162  if (line.empty()) continue;
163 
164  if (isdigit(line[0])) // actual data .. this comes first, since its the most common case
165  {
166  std::vector<String> split;
167  do
168  {
169  if (line.empty())
170  {
171  continue;
172  }
173 
174  line.simplify(); // merge double spaces (explicitly allowed by MGF), to prevent empty split() chunks and subsequent parse error
175  line.substitute('\t', ' '); // also accept Tab (strictly, only space(s) are allowed)
176  if (line.split(' ', split, false))
177  {
178  try
179  {
180  p.setPosition(split[0].toDouble());
181  p.setIntensity(split[1].toDouble());
182  }
183  catch (Exception::ConversionError& /*e*/)
184  {
185  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " could not be converted to a number! Expected two (m/z int) or three (m/z int charge) numbers separated by whitespace (space or tab).", "");
186  }
187  spectrum.push_back(p);
188  }
189  else
190  {
191  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " does not contain m/z and intensity values separated by whitespace (space or tab)!", "");
192  }
193  }
194  while (getline(is, line, '\n') && ++line_number && line.trim() != "END IONS"); // line.trim() is important here!
195 
196  if (line == "END IONS")
197  {
198  return true; // found end of spectrum
199  }
200  else
201  {
202  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, R"(Reached end of file. Found "BEGIN IONS" but not the corresponding "END IONS"!)", "");
203  }
204  }
205  else if (line.hasPrefix("PEPMASS")) // parse precursor position
206  {
207  String tmp = line.substr(8); // copy since we might need the original line for error reporting later
208  tmp.substitute('\t', ' ');
209  std::vector<String> split;
210  tmp.split(' ', split);
211  if (split.size() == 1)
212  {
213  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
214  }
215  else if (split.size() == 2)
216  {
217  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
218  spectrum.getPrecursors()[0].setIntensity(split[1].trim().toDouble());
219  }
220  else
221  {
222  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot parse PEPMASS in '" + line + "' at line #" + String(line_number) + " (expected 1 or 2 entries, but " + String(split.size()) + " were present)!", "");
223  }
224  }
225  else if (line.hasPrefix("CHARGE"))
226  {
227  String tmp = line.substr(7);
228  tmp.remove('+');
229  spectrum.getPrecursors()[0].setCharge(tmp.toInt());
230  }
231  else if (line.hasPrefix("RTINSECONDS"))
232  {
233  String tmp = line.substr(12);
234  spectrum.setRT(tmp.toDouble());
235  }
236  else if (line.hasPrefix("TITLE"))
237  {
238  // test if we have a line like "TITLE= Cmpd 1, +MSn(595.3), 10.9 min"
239  if (line.hasSubstring("min"))
240  {
241  try
242  {
243  std::vector<String> split;
244  line.split(',', split);
245  if (!split.empty())
246  {
247  for (Size i = 0; i != split.size(); ++i)
248  {
249  if (split[i].hasSubstring("min"))
250  {
251  std::vector<String> split2;
252  split[i].trim().split(' ', split2);
253  if (!split2.empty())
254  {
255  spectrum.setRT(split2[0].trim().toDouble() * 60.0);
256  }
257  }
258  }
259  }
260  }
261  catch (Exception::BaseException& /*e*/)
262  {
263  // just do nothing and write the whole title to spec
264  std::vector<String> split;
265  if (line.split('=', split))
266  {
267  if (!split[1].empty()) spectrum.setMetaValue("TITLE", split[1]);
268  }
269  }
270  }
271  else // just write the title as metainfo to the spectrum and add native ID to make the titles unique
272  {
273  Size firstEqual = line.find('=', 4);
274  if (firstEqual != std::string::npos)
275  {
276  if (String(spectrum.getMetaValue("TITLE")).hasSubstring(spectrum.getNativeID()))
277  {
278  spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1));
279  }
280  else
281  {
282  spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1) + "_" + spectrum.getNativeID());
283  }
284  }
285  }
286  }
287  else if (line.hasPrefix("NAME"))
288  {
289  String tmp = line.substr(5);
291  }
292  else if (line.hasPrefix("INCHI="))
293  {
294  String tmp = line.substr(6);
296  }
297  else if (line.hasPrefix("SMILES"))
298  {
299  String tmp = line.substr(7);
301  }
302  else if (line.hasPrefix("SPECTRUMID"))
303  {
304  String tmp = line.substr(11);
305  spectrum.setMetaValue("GNPS_Spectrum_ID", tmp);
306  }
307  else if (line.hasPrefix("SCANS="))
308  {
309  String tmp = line.substr(6);
310  spectrum.setMetaValue("Scan_ID", tmp);
311  }
312  }
313  }
314  }
315 
316  return false; // found end of file
317  }
318 
319  };
320 
321 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Exception base class.
Definition: Exception.h:65
Invalid conversion exception.
Definition: Exception.h:330
File not found exception.
Definition: Exception.h:485
Parse Error exception.
Definition: Exception.h:598
static bool exists(const String &file)
Method used to test if a file exists.
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
void reset()
Clear all internal data (spectra, ranges, metadata)
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
void setMSLevel(UInt ms_level)
Sets the MS level.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Read/write Mascot generic files (MGF).
Definition: MascotGenericFile.h:40
bool store_compact_
use a compact format for storing (no zero-intensity peaks, limited number of decimal places)?
Definition: MascotGenericFile.h:115
std::pair< String, String > getHTTPPeakListEnclosure(const String &filename) const
enclosing Strings of the peak list body for HTTP submission
void store(const String &filename, const PeakMap &experiment, bool compact=false)
stores the experiment data in a MascotGenericFile that can be used as input for MASCOT shell executio...
void writeHeader_(std::ostream &os)
writes the full header
void writeModifications_(const std::vector< String > &mods, std::ostream &os, bool variable_mods=false)
write a list of (fixed or variable) modifications
void writeParameterHeader_(const String &name, std::ostream &os)
writes a parameter header
void writeMSExperiment_(std::ostream &os, const String &filename, const PeakMap &experiment)
writes the MSExperiment
void load(const String &filename, MapType &exp)
loads a Mascot Generic File into a PeakMap
Definition: MascotGenericFile.h:68
~MascotGenericFile() override
destructor
void writeSpectrum(std::ostream &os, const PeakSpectrum &spec, const String &filename, const String &native_id_type_accession)
writes a spectrum in MGF format to an ostream
bool getNextSpectrum_(std::ifstream &is, SpectrumType &spectrum, Size &line_number, const Size &spectrum_number)
reads a spectrum block, the section between 'BEGIN IONS' and 'END IONS' of a MGF file
Definition: MascotGenericFile.h:135
void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact=false)
store the experiment data in a MascotGenericFile; the output is written to the given stream,...
void updateMembers_() override
docu in base class
MascotGenericFile()
constructor
std::map< String, String > mod_group_map_
mapping of modifications with specificity groups, that have to be treated specially (e....
Definition: MascotGenericFile.h:118
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string, or DataValue::EMPTY if not found.
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:28
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:84
void setPosition(PositionType const &position)
Mutable access to the position.
Definition: Peak1D.h:123
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
void setType(SpectrumType type)
sets the spectrum type
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A more convenient string class.
Definition: String.h:34
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
String & simplify()
merges subsequent whitespaces to one blank character
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
String & remove(char what)
Remove all occurrences of the character what.
Int toInt() const
Conversion to Int.
double toDouble() const
Conversion to double.
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
const std::string MSM_SMILES_STRING
Definition: Constants.h:541
const std::string MSM_INCHI_STRING
Definition: Constants.h:536
const std::string MSM_METABOLITE_NAME
Definition: Constants.h:531
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:204
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition: StringUtilsSimple.h:340
static bool hasSubstring(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:112
static double toDouble(const String &this_s)
Definition: StringUtils.h:216
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22