Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
IDMapper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Marc Sturm, Hendrik Weisser, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_ID_IDMAPPER_H
36 #define OPENMS_ANALYSIS_ID_IDMAPPER_H
37 
42 
44 
46 
48 
49 #include <algorithm>
50 #include <limits>
51 
52 namespace OpenMS
53 {
67  class OPENMS_DLLAPI IDMapper :
68  public DefaultParamHandler
69  {
70 public:
71  enum Measure {MEASURE_PPM = 0, MEASURE_DA};
72 
74  IDMapper();
75 
77  IDMapper(const IDMapper& cp);
78 
80  IDMapper& operator=(const IDMapper& rhs);
81 
97  void annotate(PeakMap& map, const std::vector<PeptideIdentification>& peptide_ids, const std::vector<ProteinIdentification>& protein_ids, const bool clear_ids = false, const bool mapMS1 = false)
98  {
99  checkHits_(peptide_ids);
100 
101  if (clear_ids)
102  { // start with empty IDs
103  std::vector<PeptideIdentification> empty_ids;
104  for (PeakMap::iterator it = map.begin(); it != map.end(); ++it)
105  {
106  it->setPeptideIdentifications(empty_ids);
107  }
108  std::vector<ProteinIdentification> empty_prot_ids;
109  map.setProteinIdentifications(empty_prot_ids);
110  }
111 
112  if (peptide_ids.empty()) return;
113 
114  // append protein identifications
115  map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end());
116 
117  // store mapping of scan RT to index
118  std::multimap<double, Size> experiment_precursors;
119  for (Size i = 0; i < map.size(); i++)
120  {
121  experiment_precursors.insert(std::make_pair(map[i].getRT(), i));
122  }
123 
124  // store mapping of identification RT to index (ignore empty hits)
125  std::multimap<double, Size> identifications_precursors;
126  for (Size i = 0; i < peptide_ids.size(); ++i)
127  {
128  if (!peptide_ids[i].empty())
129  {
130  identifications_precursors.insert(std::make_pair(peptide_ids[i].getRT(), i));
131  }
132  }
133  // note that mappings are sorted by key via multimap (we rely on that down below)
134 
135  // remember which peptides were mapped (for stats later)
136  std::set<Size> peptides_mapped;
137 
138  // calculate the actual mapping
139  std::multimap<double, Size>::const_iterator experiment_iterator = experiment_precursors.begin();
140  std::multimap<double, Size>::const_iterator identifications_iterator = identifications_precursors.begin();
141  // to achieve O(n) complexity we now move along the spectra
142  // and for each spectrum we look at the peptide id's with the allowed RT range
143  // once we finish a spectrum, we simply move back in the peptide id window a little to get from the
144  // right end of the old interval to the left end of the new interval
145  while (experiment_iterator != experiment_precursors.end())
146  {
147  // maybe we hit end() of IDs during the last scan .. go back to a real value
148  if (identifications_iterator == identifications_precursors.end())
149  {
150  --identifications_iterator; // this is valid, since we have at least one peptide ID
151  }
152 
153  // go to left border of RT interval
154  while (identifications_iterator != identifications_precursors.begin() &&
155  (experiment_iterator->first - identifications_iterator->first) < rt_tolerance_) // do NOT use fabs() here, since we want the LEFT border
156  {
157  --identifications_iterator;
158  }
159  // ... we might have stepped too far left
160  if (identifications_iterator != identifications_precursors.end() && ((experiment_iterator->first - identifications_iterator->first) > rt_tolerance_))
161  {
162  ++identifications_iterator; // get into interval again (we can potentially be at end() afterwards)
163  }
164 
165  if (identifications_iterator == identifications_precursors.end())
166  { // no more ID's, so we don't have any chance of matching the next spectra
167  break; // ... do NOT put this block below, since hitting the end of ID's for one spec, still allows to match stuff in the next (when going to left border)
168  }
169 
170  // run through RT interval
171  while (identifications_iterator != identifications_precursors.end() &&
172  (identifications_iterator->first - experiment_iterator->first) < rt_tolerance_) // fabs() not required here, since are definitely within left border, and wait until exceeding the right
173  {
174  if (mapMS1 ||
175  // testing whether the m/z fits
176  ((!map[experiment_iterator->second].getPrecursors().empty()) &&
177  isMatch_(0, peptide_ids[identifications_iterator->second].getMZ(), map[experiment_iterator->second].getPrecursors()[0].getMZ())))
178  {
179  map[experiment_iterator->second].getPeptideIdentifications().push_back(peptide_ids[identifications_iterator->second]);
180  peptides_mapped.insert(identifications_iterator->second);
181  }
182  ++identifications_iterator;
183  }
184  // we are the right border now (or likely even beyond)
185  ++experiment_iterator;
186  }
187 
188  // some statistics output
189  LOG_INFO << "Peptides assigned to a precursor: " << peptides_mapped.size() << "\n"
190  << " Unassigned peptides: " << peptide_ids.size() - peptides_mapped.size() << "\n"
191  << " Unmapped (empty) peptides: " << peptide_ids.size() - identifications_precursors.size() << std::endl;
192 
193  }
194 
211  void annotate(PeakMap& map, FeatureMap fmap, const bool clear_ids = false, const bool mapMS1 = false)
212  {
213  const std::vector<ProteinIdentification>& protein_ids = fmap.getProteinIdentifications();
214  std::vector<PeptideIdentification> peptide_ids;
215 
216  for (FeatureMap::const_iterator it = fmap.begin(); it != fmap.end(); ++it)
217  {
218  const std::vector<PeptideIdentification>& pi = it->getPeptideIdentifications();
219  for (std::vector<PeptideIdentification>::const_iterator itp = pi.begin(); itp != pi.end(); ++itp)
220  {
221  peptide_ids.push_back(*itp);
222  // if pepID has no m/z or RT, use the values of the feature
223  if (!itp->hasMZ()) peptide_ids.back().setMZ(it->getMZ());
224  if (!itp->hasRT()) peptide_ids.back().setRT(it->getRT());
225  }
226 
227  }
228  annotate(map, peptide_ids, protein_ids, clear_ids, mapMS1);
229  }
230 
250  void annotate(FeatureMap& map, const std::vector<PeptideIdentification>& ids, const std::vector<ProteinIdentification>& protein_ids, bool use_centroid_rt = false, bool use_centroid_mz = false, const PeakMap& spectra = PeakMap());
251 
268  void annotate(ConsensusMap& map, const std::vector<PeptideIdentification>& ids,
269  const std::vector<ProteinIdentification>& protein_ids,
270  bool measure_from_subelements = false,
271  bool annotate_ids_with_subelements = false,
272  const PeakMap& spectra = PeakMap());
273 
274 
279  {
280  std::vector<Size> no_precursors;
281  std::vector<Size> identified;
282  std::vector<Size> unidentified;
283  };
284 
301  const std::vector<PeptideIdentification>& ids,
302  double mz_tol = 0.001,
303  double rt_tol = 0.001)
304  {
306  for (Size spectrum_index = 0; spectrum_index < spectra.size(); ++spectrum_index)
307  {
308  const MSSpectrum& spectrum = spectra[spectrum_index];
309  if (!spectrum.getPrecursors().empty())
310  {
311  bool identified(false);
312  const std::vector<Precursor>& precursors = spectrum.getPrecursors();
313 
314  // check if precursor has been identified
315  for (Size i_p = 0; i_p < precursors.size(); ++i_p)
316  {
317  // check by precursor mass and spectrum RT
318  double mz_p = precursors[i_p].getMZ();
319  double rt_s = spectrum.getRT();
320 
321  for (Size i_id = 0; i_id != ids.size(); ++i_id)
322  {
323  const PeptideIdentification& pid = ids[i_id];
324 
325  // do not count empty ids as identification of a spectrum
326  if (pid.getHits().empty()) continue;
327 
328  double mz_id = pid.getMZ();
329  double rt_id = pid.getRT();
330 
331  if ( fabs(mz_id - mz_p) < mz_tol && fabs(rt_s - rt_id) < rt_tol )
332  {
333  identified = true;
334  break;
335  }
336  }
337  }
338  if (!identified)
339  {
340  ret.unidentified.push_back(spectrum_index);
341  }
342  else
343  {
344  ret.identified.push_back(spectrum_index);
345  }
346  }
347  else
348  {
349  ret.no_precursors.push_back(spectrum_index);
350  }
351  }
352  return ret;
353  }
354 
355 
356 protected:
357  void updateMembers_();
358 
367 
371  double getAbsoluteMZTolerance_(const double mz) const;
372 
374  bool isMatch_(const double rt_distance, const double mz_theoretical, const double mz_observed) const;
375 
377  void checkHits_(const std::vector<PeptideIdentification>& ids) const;
378 
382  void getIDDetails_(const PeptideIdentification& id, double& rt_pep, DoubleList& mz_values, IntList& charges, bool use_avg_mass = false) const;
383 
385  void increaseBoundingBox_(DBoundingBox<2>& box);
386 
389  bool checkMassType_(const std::vector<DataProcessing>& processing) const;
390 
391  };
392 
393 } // namespace OpenMS
394 
395 #endif // OPENMS_ANALYSIS_ID_IDMAPPER_H
double rt_tolerance_
Allowed RT deviation.
Definition: IDMapper.h:360
void setProteinIdentifications(const std::vector< ProteinIdentification > &protein_identifications)
sets the protein ProteinIdentification vector
double mz_tolerance_
Allowed m/z deviation.
Definition: IDMapper.h:362
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:455
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:66
void annotate(PeakMap &map, FeatureMap fmap, const bool clear_ids=false, const bool mapMS1=false)
Mapping method for peak maps.
Definition: IDMapper.h:211
std::vector< Size > no_precursors
Definition: IDMapper.h:280
Result of a partitioning by identification state with mapPrecursorsToIdentifications().
Definition: IDMapper.h:278
A container for features.
Definition: FeatureMap.h:94
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Annotates an MSExperiment, FeatureMap or ConsensusMap with peptide identifications.
Definition: IDMapper.h:67
std::vector< Size > unidentified
Definition: IDMapper.h:282
Measure
Definition: IDMapper.h:71
Iterator begin()
Definition: MSExperiment.h:162
A container for consensus elements.
Definition: ConsensusMap.h:72
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:59
Size size() const
Definition: MSExperiment.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
double getMZ() const
returns the MZ of the MS2 spectrum
Iterator end()
Definition: MSExperiment.h:172
Base::iterator iterator
Definition: MSExperiment.h:129
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
void annotate(PeakMap &map, const std::vector< PeptideIdentification > &peptide_ids, const std::vector< ProteinIdentification > &protein_ids, const bool clear_ids=false, const bool mapMS1=false)
Mapping method for peak maps.
Definition: IDMapper.h:97
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
MSExperiment PeakMap
Two-dimensional map of raw data points or peaks.
Definition: StandardTypes.h:59
bool ignore_charge_
Ignore charge states during matching?
Definition: IDMapper.h:366
bool empty() const
Definition: MSExperiment.h:142
std::vector< Size > identified
Definition: IDMapper.h:281
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
static SpectraIdentificationState mapPrecursorsToIdentifications(const PeakMap &spectra, const std::vector< PeptideIdentification > &ids, double mz_tol=0.001, double rt_tol=0.001)
Mapping of peptide identifications to spectra This helper function partitions all spectra into those ...
Definition: IDMapper.h:300
Measure measure_
Measure used for m/z.
Definition: IDMapper.h:364
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
double getRT() const
const std::vector< ProteinIdentification > & getProteinIdentifications() const
non-mutable access to the protein identifications
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:01 using doxygen 1.8.13