OpenMS
MapAlignmentAlgorithmIdentification.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hendrik Weisser $
6 // $Authors: Eva Lange, Clemens Groepl, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
21 
22 #include <cmath> // for "abs"
23 #include <limits> // for "max"
24 #include <map>
25 
26 namespace OpenMS
27 {
47  public DefaultParamHandler,
48  public ProgressLogger
49  {
50 public:
53 
56 
57  // Set a reference for the alignment
58  template <typename DataType> void setReference(DataType& data)
59  {
60  reference_.clear();
61  if (data.empty()) return; // empty input resets the reference
62  SeqToList rt_data;
63  // set these here because "checkParameters_" may not have been called yet:
64  use_feature_rt_ = param_.getValue("use_feature_rt").toBool();
65  score_cutoff_ = param_.getValue("score_cutoff").toBool();
66  score_type_ = (std::string)param_.getValue("score_type");
67  bool sorted = getRetentionTimes_(data, rt_data);
68  computeMedians_(rt_data, reference_, sorted);
69 
70  if (reference_.empty())
71  {
72  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Could not extract retention time information from the reference file");
73  }
74  }
75 
85  template <typename DataType>
86  void align(std::vector<DataType>& data,
87  std::vector<TransformationDescription>& transformations,
88  Int reference_index = -1)
89  {
90  checkParameters_(data.size());
91  startProgress(0, 3, "aligning maps");
92 
93  reference_index_ = reference_index;
94  // is reference one of the input files?
95  bool use_internal_reference = (reference_index >= 0);
96  if (use_internal_reference)
97  {
98  if (reference_index >= Int(data.size()))
99  {
100  throw Exception::IndexOverflow(__FILE__, __LINE__,
101  OPENMS_PRETTY_FUNCTION,
102  reference_index, data.size());
103  }
104  setReference(data[reference_index]);
105  }
106 
107  // one set of RT data for each input map, except reference (if any):
108  std::vector<SeqToList> rt_data(data.size() - use_internal_reference);
109  bool all_sorted = true;
110  for (Size i = 0, j = 0; i < data.size(); ++i)
111  {
112  if ((reference_index >= 0) && (i == Size(reference_index)))
113  {
114  continue; // skip reference map, if any
115  }
116  all_sorted &= getRetentionTimes_(data[i], rt_data[j++]);
117  }
118  setProgress(1);
119 
120  computeTransformations_(rt_data, transformations, all_sorted);
121  setProgress(2);
122 
123  setProgress(3);
124  endProgress();
125  }
126 
127 protected:
128 
130  typedef std::map<String, DoubleList> SeqToList;
131 
133  typedef std::map<String, double> SeqToValue;
134 
137 
140 
143 
145  bool use_feature_rt_{};
146 
148  bool use_adducts_{};
149 
151  double min_score_;
152 
154  bool score_cutoff_{};
155 
158 
160  bool (*better_) (double, double) = [](double, double) {return true;};
161 
171  void computeMedians_(SeqToList& rt_data, SeqToValue& medians,
172  bool sorted = false);
173 
182  bool getRetentionTimes_(std::vector<PeptideIdentification>& peptides,
183  SeqToList& rt_data);
184 
193  // "id_data" can't be "const" here or template resolution will fail
195 
204  bool getRetentionTimes_(PeakMap& experiment, SeqToList& rt_data);
205 
220  template <typename MapType>
221  bool getRetentionTimes_(MapType& features, SeqToList& rt_data)
222  {
223  if (!score_cutoff_)
224  {
225  better_ = [](double, double)
226  {return true;};
227  }
228  else if (features[0].getPeptideIdentifications()[0].isHigherScoreBetter())
229  {
230  better_ = [](double a, double b)
231  { return a >= b; };
232  }
233  else
234  {
235  better_ = [](double a, double b)
236  { return a <= b; };
237  }
238 
239  for (typename MapType::Iterator feat_it = features.begin();
240  feat_it != features.end(); ++feat_it)
241  {
242  if (use_feature_rt_)
243  {
244  // find the peptide ID closest in RT to the feature centroid:
245  String sequence;
246  double rt_distance = std::numeric_limits<double>::max();
247  bool any_hit = false;
248  for (std::vector<PeptideIdentification>::iterator pep_it =
249  feat_it->getPeptideIdentifications().begin(); pep_it !=
250  feat_it->getPeptideIdentifications().end(); ++pep_it)
251  {
252  if (!pep_it->getHits().empty())
253  {
254  any_hit = true;
255  double current_distance = fabs(pep_it->getRT() -
256  feat_it->getRT());
257  if (current_distance < rt_distance)
258  {
259  pep_it->sort();
260  if (better_(pep_it->getHits()[0].getScore(), min_score_))
261  {
262  sequence = pep_it->getHits()[0].getSequence().toString();
263  rt_distance = current_distance;
264  }
265  }
266  }
267  }
268 
269  if (any_hit) rt_data[sequence].push_back(feat_it->getRT());
270  }
271  else
272  {
273  getRetentionTimes_(feat_it->getPeptideIdentifications(), rt_data);
274  }
275  }
276 
277  if (!use_feature_rt_ &&
278  param_.getValue("use_unassigned_peptides").toBool())
279  {
280  getRetentionTimes_(features.getUnassignedPeptideIdentifications(),
281  rt_data);
282  }
283 
284  // remove duplicates (can occur if a peptide ID was assigned to several
285  // features due to overlap or annotation tolerance):
286  for (SeqToList::iterator rt_it = rt_data.begin(); rt_it != rt_data.end();
287  ++rt_it)
288  {
289  DoubleList& rt_values = rt_it->second;
290  sort(rt_values.begin(), rt_values.end());
291  DoubleList::iterator it = unique(rt_values.begin(), rt_values.end());
292  rt_values.resize(it - rt_values.begin());
293  }
294  return true; // RTs were already sorted for duplicate detection
295  }
296 
304  void computeTransformations_(std::vector<SeqToList>& rt_data,
305  std::vector<TransformationDescription>&
306  transforms, bool sorted = false);
307 
315  void checkParameters_(const Size runs);
316 
323 
330 
331 private:
332 
335 
338 
339  };
340 
341 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Int overflow exception.
Definition: Exception.h:211
Not all required information provided.
Definition: Exception.h:155
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:77
Iterator begin() noexcept
Definition: MSExperiment.h:156
Iterator end()
Definition: MSExperiment.h:171
A map alignment algorithm based on peptide identifications from MS2 spectra.
Definition: MapAlignmentAlgorithmIdentification.h:49
void computeTransformations_(std::vector< SeqToList > &rt_data, std::vector< TransformationDescription > &transforms, bool sorted=false)
Compute retention time transformations from RT data grouped by peptide sequence.
bool getRetentionTimes_(std::vector< PeptideIdentification > &peptides, SeqToList &rt_data)
Collect retention time data from peptide IDs.
bool getRetentionTimes_(PeakMap &experiment, SeqToList &rt_data)
Collect retention time data from peptide IDs annotated to spectra.
~MapAlignmentAlgorithmIdentification() override
Destructor.
std::map< String, double > SeqToValue
Type to store one representative retention time per peptide sequence.
Definition: MapAlignmentAlgorithmIdentification.h:133
void checkParameters_(const Size runs)
Check that parameter values are valid.
void getReference_()
Get reference retention times.
String score_type_
Score type to use for filtering.
Definition: MapAlignmentAlgorithmIdentification.h:157
Int reference_index_
Index of input file to use as reference (if any)
Definition: MapAlignmentAlgorithmIdentification.h:136
void align(std::vector< DataType > &data, std::vector< TransformationDescription > &transformations, Int reference_index=-1)
Align feature maps, consensus maps, peak maps, or peptide identifications.
Definition: MapAlignmentAlgorithmIdentification.h:86
SeqToValue reference_
Reference retention times (per peptide sequence)
Definition: MapAlignmentAlgorithmIdentification.h:139
double min_score_
Minimum score to reach for a peptide to be considered.
Definition: MapAlignmentAlgorithmIdentification.h:151
Size min_run_occur_
Minimum number of runs a peptide must occur in.
Definition: MapAlignmentAlgorithmIdentification.h:142
std::map< String, DoubleList > SeqToList
Type to store retention times given for individual peptide sequences.
Definition: MapAlignmentAlgorithmIdentification.h:130
bool getRetentionTimes_(IdentificationData &id_data, SeqToList &rt_data)
Collect retention time data from spectrum matches.
MapAlignmentAlgorithmIdentification & operator=(const MapAlignmentAlgorithmIdentification &)
Assignment operator intentionally not implemented -> private.
MapAlignmentAlgorithmIdentification()
Default constructor.
IdentificationData::ScoreTypeRef handleIdDataScoreType_(const IdentificationData &id_data)
Helper function to find/define the score type for processing IdentificationData.
void setReference(DataType &data)
Definition: MapAlignmentAlgorithmIdentification.h:58
void computeMedians_(SeqToList &rt_data, SeqToValue &medians, bool sorted=false)
Compute the median retention time for each peptide sequence.
MapAlignmentAlgorithmIdentification(const MapAlignmentAlgorithmIdentification &)
Copy constructor intentionally not implemented -> private.
bool getRetentionTimes_(MapType &features, SeqToList &rt_data)
Collect retention time data from peptide IDs contained in feature maps or consensus maps.
Definition: MapAlignmentAlgorithmIdentification.h:221
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:36
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20