OpenMS  2.7.0
MascotGenericFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Andreas Bertsch, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
42 #include <OpenMS/SYSTEM/File.h>
43 
44 #include <vector>
45 #include <fstream>
46 
47 #ifdef _OPENMP
48 #include <omp.h>
49 #endif
50 
51 namespace OpenMS
52 {
62  class OPENMS_DLLAPI MascotGenericFile :
63  public ProgressLogger,
64  public DefaultParamHandler
65  {
66 public:
67 
70 
72  ~MascotGenericFile() override;
73 
75  void updateMembers_() override;
76 
78  void store(const String& filename, const PeakMap& experiment,
79  bool compact = false);
80 
82  void store(std::ostream& os, const String& filename,
83  const PeakMap& experiment, bool compact = false);
84 
92  template <typename MapType>
93  void load(const String& filename, MapType& exp)
94  {
95  if (!File::exists(filename))
96  {
97  throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, filename);
98  }
99 
100  exp.reset();
101 
102  std::ifstream is(filename.c_str());
103  // get size of file
104  is.seekg(0, std::ios::end);
105  startProgress(0, is.tellg(), "loading MGF");
106  is.seekg(0, std::ios::beg);
107 
108  UInt spectrum_number(0);
109  Size line_number(0); // carry line number for error messages within getNextSpectrum()
110 
111  typename MapType::SpectrumType spectrum;
112  spectrum.setMSLevel(2);
113  spectrum.getPrecursors().resize(1);
114  spectrum.setType(SpectrumSettings::SpectrumType::CENTROID); // MGF is always centroided, by definition
115  while (getNextSpectrum_(is, spectrum, line_number, spectrum_number))
116  {
117  exp.addSpectrum(spectrum);
118  setProgress(is.tellg());
119  ++spectrum_number;
120  } // next spectrum
121 
122  endProgress();
123  }
124 
132  std::pair<String, String> getHTTPPeakListEnclosure(const String& filename) const;
133 
135  void writeSpectrum(std::ostream& os, const PeakSpectrum& spec, const String& filename, const String& native_id_type_accession);
136 
137 protected:
138 
141 
143  std::map<String, String> mod_group_map_;
144 
146  void writeParameterHeader_(const String& name, std::ostream& os);
147 
149  void writeModifications_(const std::vector<String>& mods, std::ostream& os,
150  bool variable_mods = false);
151 
153  void writeHeader_(std::ostream& os);
154 
156  void writeMSExperiment_(std::ostream& os, const String& filename, const PeakMap& experiment);
157 
159  template <typename SpectrumType>
160  bool getNextSpectrum_(std::ifstream& is, SpectrumType& spectrum, Size& line_number, const Size& spectrum_number)
161  {
162  spectrum.resize(0);
163  spectrum.setNativeID(String("index=") + (spectrum_number));
164 
165  if (spectrum.metaValueExists("TITLE"))
166  {
167  spectrum.removeMetaValue("TITLE");
168  }
169  typename SpectrumType::PeakType p;
170 
171  String line;
172  // seek to next peak list block
173  while (getline(is, line, '\n'))
174  {
175  ++line_number;
176 
177  line.trim(); // remove whitespaces, line-endings etc
178 
179  // found peak list block?
180  if (line == "BEGIN IONS")
181  {
182  while (getline(is, line, '\n'))
183  {
184  ++line_number;
185  line.trim(); // remove whitespaces, line-endings etc
186 
187  if (line.empty()) continue;
188 
189  if (isdigit(line[0])) // actual data .. this comes first, since its the most common case
190  {
191  std::vector<String> split;
192  do
193  {
194  if (line.empty())
195  {
196  continue;
197  }
198 
199  line.simplify(); // merge double spaces (explicitly allowed by MGF), to prevent empty split() chunks and subsequent parse error
200  line.substitute('\t', ' '); // also accept Tab (strictly, only space(s) are allowed)
201  if (line.split(' ', split, false))
202  {
203  try
204  {
205  p.setPosition(split[0].toDouble());
206  p.setIntensity(split[1].toDouble());
207  }
208  catch (Exception::ConversionError& /*e*/)
209  {
210  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " could not be converted to a number! Expected two (m/z int) or three (m/z int charge) numbers separated by whitespace (space or tab).", "");
211  }
212  spectrum.push_back(p);
213  }
214  else
215  {
216  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " does not contain m/z and intensity values separated by whitespace (space or tab)!", "");
217  }
218  }
219  while (getline(is, line, '\n') && ++line_number && line.trim() != "END IONS"); // line.trim() is important here!
220 
221  if (line == "END IONS")
222  {
223  return true; // found end of spectrum
224  }
225  else
226  {
227  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Reached end of file. Found \"BEGIN IONS\" but not the corresponding \"END IONS\"!", "");
228  }
229  }
230  else if (line.hasPrefix("PEPMASS")) // parse precursor position
231  {
232  String tmp = line.substr(8); // copy since we might need the original line for error reporting later
233  tmp.substitute('\t', ' ');
234  std::vector<String> split;
235  tmp.split(' ', split);
236  if (split.size() == 1)
237  {
238  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
239  }
240  else if (split.size() == 2)
241  {
242  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
243  spectrum.getPrecursors()[0].setIntensity(split[1].trim().toDouble());
244  }
245  else
246  {
247  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot parse PEPMASS in '" + line + "' at line #" + String(line_number) + " (expected 1 or 2 entries, but " + String(split.size()) + " were present)!", "");
248  }
249  }
250  else if (line.hasPrefix("CHARGE"))
251  {
252  String tmp = line.substr(7);
253  tmp.remove('+');
254  spectrum.getPrecursors()[0].setCharge(tmp.toInt());
255  }
256  else if (line.hasPrefix("RTINSECONDS"))
257  {
258  String tmp = line.substr(12);
259  spectrum.setRT(tmp.toDouble());
260  }
261  else if (line.hasPrefix("TITLE"))
262  {
263  // test if we have a line like "TITLE= Cmpd 1, +MSn(595.3), 10.9 min"
264  if (line.hasSubstring("min"))
265  {
266  try
267  {
268  std::vector<String> split;
269  line.split(',', split);
270  if (!split.empty())
271  {
272  for (Size i = 0; i != split.size(); ++i)
273  {
274  if (split[i].hasSubstring("min"))
275  {
276  std::vector<String> split2;
277  split[i].trim().split(' ', split2);
278  if (!split2.empty())
279  {
280  spectrum.setRT(split2[0].trim().toDouble() * 60.0);
281  }
282  }
283  }
284  }
285  }
286  catch (Exception::BaseException& /*e*/)
287  {
288  // just do nothing and write the whole title to spec
289  std::vector<String> split;
290  if (line.split('=', split))
291  {
292  if (split[1] != "") spectrum.setMetaValue("TITLE", split[1]);
293  }
294  }
295  }
296  else // just write the title as metainfo to the spectrum and add native ID to make the titles unique
297  {
298  Size firstEqual = line.find('=', 4);
299  if (firstEqual != std::string::npos)
300  {
301  if (String(spectrum.getMetaValue("TITLE")).hasSubstring(spectrum.getNativeID()))
302  {
303  spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1));
304  }
305  else
306  {
307  spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1) + "_" + spectrum.getNativeID());
308  }
309  }
310  }
311  }
312  }
313  }
314  }
315 
316  return false; // found end of file
317  }
318 
319  };
320 
321 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Exception base class.
Definition: Exception.h:92
Invalid conversion exception.
Definition: Exception.h:356
File not found exception.
Definition: Exception.h:517
Parse Error exception.
Definition: Exception.h:630
static bool exists(const String &file)
Method used to test if a file exists.
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:80
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
void reset()
Resets all internal values.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:71
void setMSLevel(UInt ms_level)
Sets the MS level.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Read/write Mascot generic files (MGF).
Definition: MascotGenericFile.h:65
bool store_compact_
use a compact format for storing (no zero-intensity peaks, limited number of decimal places)?
Definition: MascotGenericFile.h:140
std::pair< String, String > getHTTPPeakListEnclosure(const String &filename) const
enclosing Strings of the peak list body for HTTP submission
void store(const String &filename, const PeakMap &experiment, bool compact=false)
stores the experiment data in a MascotGenericFile that can be used as input for MASCOT shell executio...
void writeHeader_(std::ostream &os)
writes the full header
void writeModifications_(const std::vector< String > &mods, std::ostream &os, bool variable_mods=false)
write a list of (fixed or variable) modifications
void writeParameterHeader_(const String &name, std::ostream &os)
writes a parameter header
void writeMSExperiment_(std::ostream &os, const String &filename, const PeakMap &experiment)
writes the MSExperiment
void load(const String &filename, MapType &exp)
loads a Mascot Generic File into a PeakMap
Definition: MascotGenericFile.h:93
~MascotGenericFile() override
destructor
void writeSpectrum(std::ostream &os, const PeakSpectrum &spec, const String &filename, const String &native_id_type_accession)
writes a spectrum in MGF format to an ostream
bool getNextSpectrum_(std::ifstream &is, SpectrumType &spectrum, Size &line_number, const Size &spectrum_number)
reads a spectrum block, the section between 'BEGIN IONS' and 'END IONS' of a MGF file
Definition: MascotGenericFile.h:160
void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact=false)
store the experiment data in a MascotGenericFile; the output is written to the given stream,...
void updateMembers_() override
docu in base class
MascotGenericFile()
constructor
std::map< String, String > mod_group_map_
mapping of modifications with specificity groups, that have to be treated specially (e....
Definition: MascotGenericFile.h:143
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:104
void setPosition(PositionType const &position)
Mutable access to the position.
Definition: Peak1D.h:143
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
void setType(SpectrumType type)
sets the spectrum type
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A more convenient string class.
Definition: String.h:61
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
String & simplify()
merges subsequent whitespaces to one blank character
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
String & remove(char what)
Remove all occurrences of the character what.
Int toInt() const
Conversion to int.
double toDouble() const
Conversion to double.
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47