OpenMS
OpenSwathBase.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hannes Roest$
6 // $Authors: Hannes Roest$
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 // Consumers
14 
15 // Files
23 
24 // Kernel and implementations
30 
31 // Helpers
35 
36 // Algorithms
42 
44 
45 #include <cassert>
46 #include <limits>
47 
49 namespace OpenMS
50 {
51 
53  public TOPPBase
54 {
55 
56 public:
57 
58  TOPPOpenSwathBase(String name, String description, bool official = true) :
59  TOPPBase(name, description, official)
60  {
61  }
62 
63 private:
64 
65  void loadSwathFiles_(const StringList& file_list,
66  const bool split_file,
67  const String& tmp,
68  const String& readoptions,
69  boost::shared_ptr<ExperimentalSettings > & exp_meta,
70  std::vector< OpenSwath::SwathMap > & swath_maps,
71  Interfaces::IMSDataConsumer* plugin_consumer)
72  {
73  SwathFile swath_file;
74  swath_file.setLogType(log_type_);
75 
76  if (split_file || file_list.size() > 1)
77  {
78  // TODO cannot use data reduction here any more ...
79  swath_maps = swath_file.loadSplit(file_list, tmp, exp_meta, readoptions);
80  }
81  else
82  {
83  FileTypes::Type in_file_type = FileHandler::getTypeByFileName(file_list[0]);
84  if (in_file_type == FileTypes::MZML)
85  {
86  swath_maps = swath_file.loadMzML(file_list[0], tmp, exp_meta, readoptions, plugin_consumer);
87  }
88  else if (in_file_type == FileTypes::MZXML)
89  {
90  swath_maps = swath_file.loadMzXML(file_list[0], tmp, exp_meta, readoptions);
91  }
92  else if (in_file_type == FileTypes::SQMASS)
93  {
94  swath_maps = swath_file.loadSqMass(file_list[0], exp_meta);
95  }
96  else
97  {
98  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
99  "Input file needs to have ending mzML or mzXML");
100  }
101  }
102  }
103 
104 protected:
105 
131  bool loadSwathFiles(const StringList& file_list,
132  boost::shared_ptr<ExperimentalSettings >& exp_meta,
133  std::vector< OpenSwath::SwathMap >& swath_maps,
134  const bool split_file,
135  const String& tmp,
136  const String& readoptions,
137  const String& swath_windows_file,
138  const double min_upper_edge_dist,
139  const bool force,
140  const bool sort_swath_maps,
141  const bool prm,
142  const bool pasef,
143  Interfaces::IMSDataConsumer* plugin_consumer = nullptr)
144  {
145  // (i) Load files
146  loadSwathFiles_(file_list, split_file, tmp, readoptions, exp_meta, swath_maps, plugin_consumer);
147 
148  // (ii) Allow the user to specify the SWATH windows
149  if (!swath_windows_file.empty())
150  {
151  SwathWindowLoader::annotateSwathMapsFromFile(swath_windows_file, swath_maps, sort_swath_maps, force);
152  }
153 
154  for (Size i = 0; i < swath_maps.size(); i++)
155  {
156  OPENMS_LOG_DEBUG << "Found swath map " << i
157  << " with lower " << swath_maps[i].lower
158  << " and upper " << swath_maps[i].upper
159  << " and im Lower bounds of " << swath_maps[i].imLower
160  << " and im Upper bounds of " << swath_maps[i].imUpper
161  << " and " << swath_maps[i].sptr->getNrSpectra()
162  << " spectra." << std::endl;
163  }
164 
165  // (iii) Sanity check: there should be no overlap between the windows:
166  std::vector<std::pair<double, double>> sw_windows;
167  for (Size i = 0; i < swath_maps.size(); i++)
168  {
169  if (!swath_maps[i].ms1)
170  {
171  sw_windows.push_back(std::make_pair(swath_maps[i].lower, swath_maps[i].upper));
172  }
173  }
174  // sort by lower bound (first entry in pair)
175  std::sort(sw_windows.begin(), sw_windows.end());
176 
177  for (Size i = 1; i < sw_windows.size(); i++)
178  {
179  double lower_map_end = sw_windows[i-1].second - min_upper_edge_dist;
180  double upper_map_start = sw_windows[i].first;
181  OPENMS_LOG_DEBUG << "Extraction will go up to " << lower_map_end << " and continue at " << upper_map_start << std::endl;
182 
183  if (prm) {continue;} // skip next step as expect them to overlap and have gaps...
184 
185  if (upper_map_start - lower_map_end > 0.01)
186  {
187  OPENMS_LOG_WARN << "Extraction will have a gap between " << lower_map_end << " and " << upper_map_start << std::endl;
188  if (!force)
189  {
190  OPENMS_LOG_ERROR << "Extraction windows have a gap. Will abort (override with -force)" << std::endl;
191  return false;
192  }
193  }
194 
195  if (pasef) {continue;} // skip this step, expect there to be overlap ...
196 
197  if (lower_map_end - upper_map_start > 0.01)
198  {
199  OPENMS_LOG_WARN << "Extraction will overlap between " << lower_map_end << " and " << upper_map_start << "!\n"
200  << "This will lead to multiple extraction of the transitions in the overlapping region "
201  << "which will lead to duplicated output. It is very unlikely that you want this." << "\n"
202  << "Please fix this by providing an appropriate extraction file with -swath_windows_file" << "\n"
203  << "Did you mean to set the -pasef Flag?" << std::endl;
204  if (!force)
205  {
206  OPENMS_LOG_ERROR << "Extraction windows overlap. Will abort (override with -force)" << std::endl;
207  return false;
208  }
209  }
210  }
211  return true;
212  }
213 
227  void prepareChromOutput(Interfaces::IMSDataConsumer ** chromatogramConsumer,
228  const boost::shared_ptr<ExperimentalSettings>& exp_meta,
229  const OpenSwath::LightTargetedExperiment& transition_exp,
230  const String& out_chrom,
231  const UInt64 run_id)
232  {
233  if (!out_chrom.empty())
234  {
235  String tmp = out_chrom;
236  if (tmp.toLower().hasSuffix(".sqmass"))
237  {
238  bool full_meta = false; // can lead to very large files in memory
239  bool lossy_compression = true;
240  *chromatogramConsumer = new MSDataSqlConsumer(out_chrom, run_id, 500, full_meta, lossy_compression);
241  }
242  else
243  {
244  PlainMSDataWritingConsumer * chromConsumer = new PlainMSDataWritingConsumer(out_chrom);
245  int expected_chromatograms = transition_exp.transitions.size();
246  chromConsumer->setExpectedSize(0, expected_chromatograms);
247  chromConsumer->setExperimentalSettings(*exp_meta);
248  chromConsumer->getOptions().setWriteIndex(true); // ensure that we write the index
250 
251  // prepare data structures for lossy compression
253  MSNumpressCoder::NumpressConfig npconfig_int;
254  npconfig_mz.estimate_fixed_point = true; // critical
255  npconfig_int.estimate_fixed_point = true; // critical
256  npconfig_mz.numpressErrorTolerance = -1.0; // skip check, faster
257  npconfig_int.numpressErrorTolerance = -1.0; // skip check, faster
258  npconfig_mz.setCompression("linear");
259  npconfig_int.setCompression("slof");
260  npconfig_mz.linear_fp_mass_acc = 0.05; // set the desired RT accuracy in seconds
261 
262  chromConsumer->getOptions().setNumpressConfigurationMassTime(npconfig_mz);
263  chromConsumer->getOptions().setNumpressConfigurationIntensity(npconfig_int);
264  chromConsumer->getOptions().setCompression(true);
265 
266  *chromatogramConsumer = chromConsumer;
267  }
268  }
269  else
270  {
271  *chromatogramConsumer = new NoopMSDataWritingConsumer("");
272  }
273  }
274 
284  const String& tr_file,
285  const Param& tsv_reader_param)
286  {
287  OpenSwath::LightTargetedExperiment transition_exp;
288  ProgressLogger progresslogger;
289  progresslogger.setLogType(log_type_);
290  if (tr_type == FileTypes::TRAML)
291  {
292  progresslogger.startProgress(0, 1, "Load TraML file");
293  TargetedExperiment targeted_exp;
294  FileHandler().loadTransitions(tr_file, targeted_exp, {FileTypes::TRAML});
295  OpenSwathDataAccessHelper::convertTargetedExp(targeted_exp, transition_exp);
296  progresslogger.endProgress();
297  }
298  else if (tr_type == FileTypes::PQP)
299  {
300  progresslogger.startProgress(0, 1, "Load PQP file");
301  TransitionPQPFile().convertPQPToTargetedExperiment(tr_file.c_str(), transition_exp);
302  progresslogger.endProgress();
303  }
304  else if (tr_type == FileTypes::TSV)
305  {
306  progresslogger.startProgress(0, 1, "Load TSV file");
307  TransitionTSVFile tsv_reader;
308  tsv_reader.setParameters(tsv_reader_param);
309  tsv_reader.convertTSVToTargetedExperiment(tr_file.c_str(), tr_type, transition_exp);
310  progresslogger.endProgress();
311  }
312  else
313  {
314  OPENMS_LOG_ERROR << "Provide valid TraML, TSV or PQP transition file." << std::endl;
315  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Need to provide valid input file.");
316  }
317  return transition_exp;
318  }
319 
353  String irt_tr_file,
354  std::vector< OpenSwath::SwathMap > & swath_maps,
355  double min_rsq,
356  double min_coverage,
357  const Param& feature_finder_param,
358  const ChromExtractParams& cp_irt,
359  const Param& irt_detection_param,
360  const Param& calibration_param,
361  Size debug_level,
362  bool pasef,
363  bool load_into_memory,
364  const String& irt_trafo_out,
365  const String& irt_mzml_out)
366  {
367  TransformationDescription trafo_rtnorm;
368 
369  if (!trafo_in.empty())
370  {
371  // get read RT normalization file
372  FileHandler().loadTransformations(trafo_in, trafo_rtnorm, false, {FileTypes::TRANSFORMATIONXML});
373  Param model_params = getParam_().copy("model:", true);
374  model_params.setValue("symmetric_regression", "false");
375  model_params.setValue("span", irt_detection_param.getValue("lowess:span"));
376  model_params.setValue("num_nodes", irt_detection_param.getValue("b_spline:num_nodes"));
377  String model_type = irt_detection_param.getValue("alignmentMethod").toString();
378  trafo_rtnorm.fitModel(model_type, model_params);
379  }
380  else if (!irt_tr_file.empty())
381  {
382  // Loading iRT file
383  std::cout << "Will load iRT transitions and try to find iRT peptides" << std::endl;
384  FileTypes::Type tr_type = FileHandler::getType(irt_tr_file);
385  Param tsv_reader_param = TransitionTSVFile().getDefaults();
386  OpenSwath::LightTargetedExperiment irt_transitions = loadTransitionList(tr_type, irt_tr_file, tsv_reader_param);
387 
388  // If pasef flag is set, validate that IM is present
389  if (pasef)
390  {
391  const auto& transitions = irt_transitions.getTransitions();
392 
393  for ( Size k=0; k < (Size)transitions.size(); k++ )
394  {
395  if (transitions[k].precursor_im == -1)
396  {
397  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: iRT Transition " + transitions[k].getNativeID() + " does not have a valid IM value, this must be set to use the -pasef flag");
398  }
399  }
400  }
401 
402  // perform extraction
404  wf.setLogType(log_type_);
405  TransformationDescription im_trafo;
406  trafo_rtnorm = wf.performRTNormalization(irt_transitions, swath_maps, im_trafo,
407  min_rsq, min_coverage,
408  feature_finder_param,
409  cp_irt, irt_detection_param,
410  calibration_param, irt_mzml_out, debug_level, pasef,
411  load_into_memory);
412 
413  if (!irt_trafo_out.empty())
414  {
415  FileHandler().storeTransformations(irt_trafo_out, trafo_rtnorm, {FileTypes::TRANSFORMATIONXML});
416  }
417  }
418  return trafo_rtnorm;
419  }
420 
421 
422 };
423 
424 }
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:454
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:439
@ SMOOTHING
Smoothing of the signal to reduce noise.
Definition: DataProcessing.h:37
void setParameters(const Param &param)
Sets the parameters.
const Param & getDefaults() const
Non-mutable access to the default parameters.
A method or algorithm argument contains illegal values.
Definition: Exception.h:629
Facilitates file handling by file type recognition.
Definition: FileHandler.h:45
void storeTransformations(const String &filename, const TransformationDescription &map, const std::vector< FileTypes::Type > allowed_types={})
Store Transformations.
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
static FileTypes::Type getTypeByFileName(const String &filename)
Try to get the file type from the filename.
void loadTransformations(const String &filename, TransformationDescription &map, bool fit_model=true, const std::vector< FileTypes::Type > allowed_types={})
Loads a file into Transformations.
void loadTransitions(const String &filename, TargetedExperiment &library, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Load transitions of a spectral library.
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:46
PeakFileOptions & getOptions()
Get the peak file options.
A data consumer that inserts MS data into a SQLite database.
Definition: MSDataSqlConsumer.h:36
void setExpectedSize(Size expectedSpectra, Size expectedChromatograms) override
Set expected size of spectra and chromatograms to be written.
virtual void addDataProcessing(DataProcessing d)
Optionally add a data processing method to each chromatogram and spectrum.
void setExperimentalSettings(const ExperimentalSettings &exp) override
Set experimental settings for the whole file.
Consumer class that perform no operation.
Definition: MSDataWritingConsumer.h:233
Execute all steps for retention time and m/z calibration of SWATH-MS data.
Definition: OpenSwathWorkflow.h:231
TransformationDescription performRTNormalization(const OpenSwath::LightTargetedExperiment &irt_transitions, std::vector< OpenSwath::SwathMap > &swath_maps, TransformationDescription &im_trafo, double min_rsq, double min_coverage, const Param &feature_finder_param, const ChromExtractParams &cp_irt, const Param &irt_detection_param, const Param &calibration_param, const String &irt_mzml_out, Size debug_level, bool pasef=false, bool load_into_memory=false)
Perform RT and m/z correction of the input data using RT-normalization peptides.
static void convertTargetedExp(const OpenMS::TargetedExperiment &transition_exp_, OpenSwath::LightTargetedExperiment &transition_exp)
convert from the OpenMS TargetedExperiment to the LightTargetedExperiment
std::string toString(bool full_precision=true) const
Convert ParamValue to string.
Management and storage of parameters / INI files.
Definition: Param.h:44
Param copy(const std::string &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
void setNumpressConfigurationIntensity(MSNumpressCoder::NumpressConfig config)
Get numpress configuration options for intensity dimension.
void setCompression(bool compress)
void setWriteIndex(bool write_index)
Whether to write an index at the end of the file (e.g. indexedmzML file format)
void setNumpressConfigurationMassTime(MSNumpressCoder::NumpressConfig config)
Get numpress configuration options for m/z or rt dimension.
Consumer class that writes MS data to disk using the mzML format.
Definition: MSDataWritingConsumer.h:215
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
A more convenient string class.
Definition: String.h:34
String & toLower()
Converts the string to lowercase.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
File adapter for Swath files.
Definition: SwathFile.h:43
std::vector< OpenSwath::SwathMap > loadMzML(const String &file, const String &tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, const String &readoptions="normal", Interfaces::IMSDataConsumer *plugin_consumer=nullptr)
Loads a Swath run from a single mzML file.
std::vector< OpenSwath::SwathMap > loadSqMass(const String &file, boost::shared_ptr< ExperimentalSettings > &)
Loads a Swath run from a single sqMass file.
std::vector< OpenSwath::SwathMap > loadSplit(StringList file_list, const String &tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, const String &readoptions="normal")
Loads a Swath run from a list of split mzML files.
std::vector< OpenSwath::SwathMap > loadMzXML(const String &file, const String &tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, const String &readoptions="normal")
Loads a Swath run from a single mzXML file.
static void annotateSwathMapsFromFile(const std::string &filename, std::vector< OpenSwath::SwathMap > &swath_maps, bool do_sort, bool force)
Annotate a Swath map using a Swath window file specifying the individual windows.
Base class for TOPP applications.
Definition: TOPPBase.h:122
Param const & getParam_() const
Return all parameters relevant to this TOPP tool.
ProgressLogger::LogType log_type_
Type of progress logging.
Definition: TOPPBase.h:949
DataProcessing getProcessingInfo_(DataProcessing::ProcessingAction action) const
Returns the data processing information.
Definition: OpenSwathBase.h:54
void prepareChromOutput(Interfaces::IMSDataConsumer **chromatogramConsumer, const boost::shared_ptr< ExperimentalSettings > &exp_meta, const OpenSwath::LightTargetedExperiment &transition_exp, const String &out_chrom, const UInt64 run_id)
Prepare chromatogram output.
Definition: OpenSwathBase.h:227
TOPPOpenSwathBase(String name, String description, bool official=true)
Definition: OpenSwathBase.h:58
TransformationDescription performCalibration(String trafo_in, String irt_tr_file, std::vector< OpenSwath::SwathMap > &swath_maps, double min_rsq, double min_coverage, const Param &feature_finder_param, const ChromExtractParams &cp_irt, const Param &irt_detection_param, const Param &calibration_param, Size debug_level, bool pasef, bool load_into_memory, const String &irt_trafo_out, const String &irt_mzml_out)
Perform retention time and m/z calibration.
Definition: OpenSwathBase.h:352
OpenSwath::LightTargetedExperiment loadTransitionList(const FileTypes::Type &tr_type, const String &tr_file, const Param &tsv_reader_param)
Loads transition list from TraML / TSV or PQP.
Definition: OpenSwathBase.h:283
bool loadSwathFiles(const StringList &file_list, boost::shared_ptr< ExperimentalSettings > &exp_meta, std::vector< OpenSwath::SwathMap > &swath_maps, const bool split_file, const String &tmp, const String &readoptions, const String &swath_windows_file, const double min_upper_edge_dist, const bool force, const bool sort_swath_maps, const bool prm, const bool pasef, Interfaces::IMSDataConsumer *plugin_consumer=nullptr)
Load the DIA files into internal data structures.
Definition: OpenSwathBase.h:131
void loadSwathFiles_(const StringList &file_list, const bool split_file, const String &tmp, const String &readoptions, boost::shared_ptr< ExperimentalSettings > &exp_meta, std::vector< OpenSwath::SwathMap > &swath_maps, Interfaces::IMSDataConsumer *plugin_consumer)
Definition: OpenSwathBase.h:65
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:39
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:37
void fitModel(const String &model_type, const Param &params=Param())
Fits a model to the data.
This class supports reading and writing of PQP files.
Definition: TransitionPQPFile.h:191
void convertPQPToTargetedExperiment(const char *filename, OpenMS::TargetedExperiment &targeted_exp, bool legacy_traml_id=false)
Read in a PQP file and construct a targeted experiment (TraML structure)
Definition: TransitionTSVFile.h:121
void convertTSVToTargetedExperiment(const char *filename, FileTypes::Type filetype, OpenMS::TargetedExperiment &targeted_exp)
Read in a tsv/mrm file and construct a targeted experiment (TraML structure)
uint64_t UInt64
Unsigned integer type (64bit)
Definition: Types.h:47
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
const double k
Definition: Constants.h:132
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
ChromatogramExtractor parameters.
Definition: OpenSwathWorkflow.h:56
Type
Actual file types enum.
Definition: FileTypes.h:31
@ TRAML
TraML (HUPO PSI format) for transitions (.traML)
Definition: FileTypes.h:53
@ PQP
OpenSWATH Peptide Query Parameter (PQP) SQLite DB, see TransitionPQPFile.
Definition: FileTypes.h:75
@ TSV
any TSV file, for example msInspect file or OpenSWATH transition file (see TransitionTSVFile)
Definition: FileTypes.h:59
@ TRANSFORMATIONXML
Transformation description file (.trafoXML)
Definition: FileTypes.h:43
@ MZML
MzML file (.mzML)
Definition: FileTypes.h:44
@ SQMASS
SqLite format for mass and chromatograms, see SqMassFile.
Definition: FileTypes.h:74
@ MZXML
MzXML file (.mzXML)
Definition: FileTypes.h:36
Configuration class for MSNumpress.
Definition: MSNumpressCoder.h:63
double numpressErrorTolerance
Check error tolerance after encoding.
Definition: MSNumpressCoder.h:82
void setCompression(const std::string &compression)
Set compression using a string mapping to enum NumpressCompression.
Definition: MSNumpressCoder.h:123
double linear_fp_mass_acc
Desired mass accuracy for linear encoding.
Definition: MSNumpressCoder.h:105
bool estimate_fixed_point
Whether to estimate the fixed point used for encoding (highly recommended)
Definition: MSNumpressCoder.h:97
Definition: TransitionExperiment.h:185
std::vector< LightTransition > transitions
Definition: TransitionExperiment.h:193
std::vector< LightTransition > & getTransitions()
Definition: TransitionExperiment.h:196