Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MRMTransitionGroupPicker.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
46 
49 
50 // Cross-correlation
53 
54 #include <numeric>
55 
56 //#define DEBUG_TRANSITIONGROUPPICKER
57 
58 namespace OpenMS
59 {
60 
79  class OPENMS_DLLAPI MRMTransitionGroupPicker :
80  public DefaultParamHandler
81  {
82 
83 public:
84 
88 
90  ~MRMTransitionGroupPicker() override;
92 
112  template <typename SpectrumT, typename TransitionT>
114  {
115  OPENMS_PRECONDITION(transition_group.isInternallyConsistent(), "Consistent state required")
116  OPENMS_PRECONDITION(transition_group.chromatogramIdsMatch(), "Chromatogram native IDs need to match keys in transition group")
117 
118  std::vector<MSChromatogram > picked_chroms_;
119  std::vector<MSChromatogram > smoothed_chroms_;
120 
121  // Pick fragment ion chromatograms
122  for (Size k = 0; k < transition_group.getChromatograms().size(); k++)
123  {
124  MSChromatogram& chromatogram = transition_group.getChromatograms()[k];
125  String native_id = chromatogram.getNativeID();
126 
127  // only pick detecting transitions (skip all others)
128  if (transition_group.getTransitions().size() > 0 &&
129  transition_group.hasTransition(native_id) &&
130  !transition_group.getTransition(native_id).isDetectingTransition() )
131  {
132  continue;
133  }
134 
135  MSChromatogram picked_chrom, smoothed_chrom;
136  picker_.pickChromatogram(chromatogram, picked_chrom, smoothed_chrom);
137  picked_chrom.sortByIntensity();
138  picked_chroms_.push_back(picked_chrom);
139  smoothed_chroms_.push_back(smoothed_chrom);
140  }
141 
142  // Pick precursor chromatograms
143  if (use_precursors_)
144  {
145  for (Size k = 0; k < transition_group.getPrecursorChromatograms().size(); k++)
146  {
147  SpectrumT picked_chrom, smoothed_chrom;
148  SpectrumT& chromatogram = transition_group.getPrecursorChromatograms()[k];
149  String native_id = chromatogram.getNativeID();
150 
151  picker_.pickChromatogram(chromatogram, picked_chrom, smoothed_chrom);
152  picked_chrom.sortByIntensity();
153  picked_chroms_.push_back(picked_chrom);
154  smoothed_chroms_.push_back(smoothed_chrom);
155  }
156  }
157 
158  // Find features (peak groups) in this group of transitions.
159  // While there are still peaks left, one will be picked and used to create
160  // a feature. Whenever we run out of peaks, we will get -1 back as index
161  // and terminate.
162  int chr_idx, peak_idx, cnt = 0;
163  std::vector<MRMFeature> features;
164  while (true)
165  {
166  chr_idx = -1; peak_idx = -1;
167 
168  if (boundary_selection_method_ == "largest")
169  {
170  findLargestPeak(picked_chroms_, chr_idx, peak_idx);
171  }
172  else if (boundary_selection_method_ == "widest")
173  {
174  findWidestPeakIndices(picked_chroms_, chr_idx, peak_idx);
175  }
176 
177  if (chr_idx == -1 && peak_idx == -1) break;
178 
179  // Compute a feature from the individual chromatograms and add non-zero features
180  MRMFeature mrm_feature = createMRMFeature(transition_group, picked_chroms_, smoothed_chroms_, chr_idx, peak_idx);
181  if (mrm_feature.getIntensity() > 0)
182  {
183  features.push_back(mrm_feature);
184  }
185 
186  cnt++;
187  if (stop_after_feature_ > 0 && cnt > stop_after_feature_) {break;}
188  if (mrm_feature.getIntensity() > 0 &&
189  mrm_feature.getIntensity() / (double)mrm_feature.getMetaValue("total_xic") < stop_after_intensity_ratio_)
190  {
191  break;
192  }
193  }
194 
195  // Check for completely overlapping features
196  for (Size i = 0; i < features.size(); i++)
197  {
198  MRMFeature& mrm_feature = features[i];
199  bool skip = false;
200  for (Size j = 0; j < i; j++)
201  {
202  if ((double)mrm_feature.getMetaValue("leftWidth") >= (double)features[j].getMetaValue("leftWidth") &&
203  (double)mrm_feature.getMetaValue("rightWidth") <= (double)features[j].getMetaValue("rightWidth") )
204  { skip = true; }
205  }
206  if (mrm_feature.getIntensity() > 0 && !skip)
207  {
208  transition_group.addFeature(mrm_feature);
209  }
210  }
211 
212  }
213 
215  template <typename SpectrumT, typename TransitionT>
217  std::vector<SpectrumT>& picked_chroms, std::vector<SpectrumT>& smoothed_chroms, const int chr_idx, const int peak_idx)
218  {
219  OPENMS_PRECONDITION(transition_group.isInternallyConsistent(), "Consistent state required")
220  OPENMS_PRECONDITION(transition_group.chromatogramIdsMatch(), "Chromatogram native IDs need to match keys in transition group")
221 
222  MRMFeature mrmFeature;
223  mrmFeature.setIntensity(0.0);
224  double best_left = picked_chroms[chr_idx].getFloatDataArrays()[1][peak_idx];
225  double best_right = picked_chroms[chr_idx].getFloatDataArrays()[2][peak_idx];
226  double peak_apex = picked_chroms[chr_idx][peak_idx].getRT();
227  LOG_DEBUG << "**** Creating MRMFeature for peak " << chr_idx << " " << peak_idx << " " <<
228  picked_chroms[chr_idx][peak_idx] << " with borders " << best_left << " " <<
229  best_right << " (" << best_right - best_left << ")" << std::endl;
230 
231  if (recalculate_peaks_)
232  {
233  // This may change best_left / best_right
234  recalculatePeakBorders_(picked_chroms, best_left, best_right, recalculate_peaks_max_z_);
235  if (peak_apex < best_left || peak_apex > best_right)
236  {
237  // apex fell out of range, lets correct it
238  peak_apex = (best_left + best_right) / 2.0;
239  }
240  }
241  picked_chroms[chr_idx][peak_idx].setIntensity(0.0);
242 
243  // Remove other, overlapping, picked peaks (in this and other
244  // chromatograms) and then ensure that at least one peak is set to zero
245  // (the currently best peak).
246  remove_overlapping_features(picked_chroms, best_left, best_right);
247 
248  // Check for minimal peak width -> return empty feature (Intensity zero)
249  if (min_peak_width_ > 0.0 && std::fabs(best_right - best_left) < min_peak_width_)
250  {
251  return mrmFeature;
252  }
253 
254  if (compute_peak_quality_)
255  {
256  String outlier = "none";
257  double qual = computeQuality_(transition_group, picked_chroms, chr_idx, best_left, best_right, outlier);
258  if (qual < min_qual_)
259  {
260  return mrmFeature;
261  }
262  mrmFeature.setMetaValue("potentialOutlier", outlier);
263  mrmFeature.setMetaValue("initialPeakQuality", qual);
264  mrmFeature.setOverallQuality(qual);
265  }
266 
267  // Prepare linear resampling of all the chromatograms, here creating the
268  // empty master_peak_container with the same RT (m/z) values as the reference
269  // chromatogram.
270  SpectrumT master_peak_container;
271  const SpectrumT& ref_chromatogram = selectChromHelper_(transition_group, picked_chroms[chr_idx].getNativeID());
272  prepareMasterContainer_(ref_chromatogram, master_peak_container, best_left, best_right);
273 
274  // Iterate over initial transitions / chromatograms (note that we may
275  // have a different number of picked chromatograms than total transitions
276  // as not all are detecting transitions).
277  double total_intensity = 0; double total_peak_apices = 0; double total_xic = 0;
278  for (Size k = 0; k < transition_group.getTransitions().size(); k++)
279  {
280  const SpectrumT& chromatogram = selectChromHelper_(transition_group, transition_group.getTransitions()[k].getNativeID());
281  if (transition_group.getTransitions()[k].isDetectingTransition())
282  {
283  for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++)
284  {
285  total_xic += it->getIntensity();
286  }
287  }
288 
289  SpectrumT used_chromatogram;
290  // resample the current chromatogram
291  if (peak_integration_ == "original")
292  {
293  used_chromatogram = resampleChromatogram_(chromatogram, master_peak_container, best_left, best_right);
294  // const SpectrumT& used_chromatogram = chromatogram; // instead of resampling
295  }
296  else if (peak_integration_ == "smoothed" && smoothed_chroms.size() <= k)
297  {
298  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
299  "Tried to calculate peak area and height without any smoothed chromatograms");
300  }
301  else if (peak_integration_ == "smoothed")
302  {
303  used_chromatogram = resampleChromatogram_(smoothed_chroms[k], master_peak_container, best_left, best_right);
304  }
305  else
306  {
307  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
308  String("Peak integration chromatogram ") + peak_integration_ + " is not a valid method for MRMTransitionGroupPicker");
309  }
310 
311  Feature f;
312  double quality = 0;
313  f.setQuality(0, quality);
314  f.setOverallQuality(quality);
315 
316  PeakIntegrator::PeakArea pa = pi_.integratePeak(used_chromatogram, best_left, best_right);
317  double peak_integral = pa.area;
318  double peak_apex_int = pa.height;
319  f.setMetaValue("peak_apex_position", pa.apex_pos);
320  if (background_subtraction_ != "none")
321  {
322  double background{0};
323  double avg_noise_level{0};
324  if ((peak_integration_ == "smoothed") && smoothed_chroms.size() <= k)
325  {
326  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
327  "Tried to calculate background estimation without any smoothed chromatograms");
328  }
329  else if (background_subtraction_ == "original")
330  {
331  const double intensity_left = chromatogram.PosBegin(best_left)->getIntensity();
332  const double intensity_right = (chromatogram.PosEnd(best_right) - 1)->getIntensity();
333  const UInt n_points = std::distance(chromatogram.PosBegin(best_left), chromatogram.PosEnd(best_right));
334  avg_noise_level = (intensity_right + intensity_left) / 2;
335  background = avg_noise_level * n_points;
336  }
337  else if (background_subtraction_ == "exact")
338  {
339  PeakIntegrator::PeakBackground pb = pi_.estimateBackground(used_chromatogram, best_left, best_right, pa.apex_pos);
340  background = pb.area;
341  avg_noise_level = pb.height;
342  }
343  peak_integral -= background;
344  peak_apex_int -= avg_noise_level;
345  if (peak_integral < 0) {peak_integral = 0;}
346  if (peak_apex_int < 0) {peak_apex_int = 0;}
347 
348  f.setMetaValue("area_background_level", background);
349  f.setMetaValue("noise_background_level", avg_noise_level);
350  }
351 
352  f.setRT(picked_chroms[chr_idx][peak_idx].getMZ());
353  f.setIntensity(peak_integral);
354  ConvexHull2D hull;
355  hull.setHullPoints(pa.hull_points);
356  f.getConvexHulls().push_back(hull);
357  if (chromatogram.metaValueExists("product_mz"))
358  {
359  f.setMetaValue("MZ", chromatogram.getMetaValue("product_mz"));
360  f.setMZ(chromatogram.getMetaValue("product_mz"));
361  }
362  else
363  {
364  LOG_WARN << "Please set meta value 'product_mz' on chromatogram to populate feature m/z value" << std::endl;
365  }
366  f.setMetaValue("native_id", chromatogram.getNativeID());
367  f.setMetaValue("peak_apex_int", peak_apex_int);
368 
369  if (transition_group.getTransitions()[k].isDetectingTransition())
370  {
371  total_intensity += peak_integral;
372  total_peak_apices += peak_apex_int;
373  }
374 
375  // for backwards compatibility with TOPP tests
376  // Calculate peak shape metrics that will be used for later QC
377  if (compute_peak_shape_metrics_)
378  {
379  PeakIntegrator::PeakShapeMetrics psm = pi_.calculatePeakShapeMetrics(used_chromatogram, best_left, best_right, peak_apex_int, pa.apex_pos);
380  f.setMetaValue("width_at_5", psm.width_at_5);
381  f.setMetaValue("width_at_10", psm.width_at_10);
382  f.setMetaValue("width_at_50", psm.width_at_50);
383  f.setMetaValue("start_position_at_5", psm.start_position_at_5);
384  f.setMetaValue("start_position_at_10", psm.start_position_at_10);
385  f.setMetaValue("start_position_at_50", psm.start_position_at_50);
386  f.setMetaValue("end_position_at_5", psm.end_position_at_5);
387  f.setMetaValue("end_position_at_10", psm.end_position_at_10);
388  f.setMetaValue("end_position_at_50", psm.end_position_at_50);
389  f.setMetaValue("total_width", psm.total_width);
390  f.setMetaValue("tailing_factor", psm.tailing_factor);
391  f.setMetaValue("asymmetry_factor", psm.asymmetry_factor);
392  f.setMetaValue("slope_of_baseline", psm.slope_of_baseline);
393  f.setMetaValue("baseline_delta_2_height", psm.baseline_delta_2_height);
394  f.setMetaValue("points_across_baseline", psm.points_across_baseline);
395  f.setMetaValue("points_across_half_height", psm.points_across_half_height);
396  }
397 
398  mrmFeature.addFeature(f, chromatogram.getNativeID()); //map index and feature
399  }
400 
401  // Also pick the precursor chromatogram(s); note total_xic is not
402  // extracted here, only for fragment traces
403  for (Size k = 0; k < transition_group.getPrecursorChromatograms().size(); k++)
404  {
405 
406  const SpectrumT& chromatogram = transition_group.getPrecursorChromatograms()[k];
407  Size prec_idx = transition_group.getChromatograms().size() + k;
408 
409  SpectrumT used_chromatogram;
410  // resample the current chromatogram
411  if (peak_integration_ == "original")
412  {
413  used_chromatogram = resampleChromatogram_(chromatogram, master_peak_container, best_left, best_right);
414  // const SpectrumT& used_chromatogram = chromatogram; // instead of resampling
415  }
416  else if (peak_integration_ == "smoothed" && smoothed_chroms.size() <= prec_idx)
417  {
418  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
419  "Tried to calculate peak area and height without any smoothed chromatograms");
420  }
421  else if (peak_integration_ == "smoothed")
422  {
423  used_chromatogram = resampleChromatogram_(smoothed_chroms[prec_idx], master_peak_container, best_left, best_right);
424  }
425  else
426  {
427  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
428  String("Peak integration chromatogram ") + peak_integration_ + " is not a valid method for MRMTransitionGroupPicker");
429  }
430 
431  Feature f;
432  double quality = 0;
433  f.setQuality(0, quality);
434  f.setOverallQuality(quality);
435 
436  PeakIntegrator::PeakArea pa = pi_.integratePeak(used_chromatogram, best_left, best_right);
437  double peak_integral = pa.area;
438  double peak_apex_int = pa.height;
439 
440  if (background_subtraction_ != "none")
441  {
442  double background{0};
443  double avg_noise_level{0};
444  if ((peak_integration_ == "smoothed") && smoothed_chroms.size() <= prec_idx)
445  {
446  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
447  "Tried to calculate background estimation without any smoothed chromatograms");
448  }
449  else if (background_subtraction_ == "original")
450  {
451  const double intensity_left = chromatogram.PosBegin(best_left)->getIntensity();
452  const double intensity_right = (chromatogram.PosEnd(best_right) - 1)->getIntensity();
453  const UInt n_points = std::distance(chromatogram.PosBegin(best_left), chromatogram.PosEnd(best_right));
454  avg_noise_level = (intensity_right + intensity_left) / 2;
455  background = avg_noise_level * n_points;
456  }
457  else if (background_subtraction_ == "exact")
458  {
459  PeakIntegrator::PeakBackground pb = pi_.estimateBackground(used_chromatogram, best_left, best_right, pa.apex_pos);
460  background = pb.area;
461  avg_noise_level = pb.height;
462  }
463  peak_integral -= background;
464  peak_apex_int -= avg_noise_level;
465  if (peak_integral < 0) {peak_integral = 0;}
466  if (peak_apex_int < 0) {peak_apex_int = 0;}
467 
468  f.setMetaValue("area_background_level", background);
469  f.setMetaValue("noise_background_level", avg_noise_level);
470  }
471 
472  if (chromatogram.metaValueExists("precursor_mz"))
473  {
474  f.setMZ(chromatogram.getMetaValue("precursor_mz"));
475  mrmFeature.setMZ(chromatogram.getMetaValue("precursor_mz"));
476  }
477 
478  f.setRT(picked_chroms[chr_idx][peak_idx].getMZ());
479  f.setIntensity(peak_integral);
480  ConvexHull2D hull;
481  hull.setHullPoints(pa.hull_points);
482  f.getConvexHulls().push_back(hull);
483  f.setMetaValue("native_id", chromatogram.getNativeID());
484  f.setMetaValue("peak_apex_int", peak_apex_int);
485 
486  if (use_precursors_ && transition_group.getTransitions().empty())
487  {
488  total_intensity += peak_integral;
489  }
490 
491  mrmFeature.addPrecursorFeature(f, chromatogram.getNativeID());
492  }
493 
494  mrmFeature.setRT(peak_apex);
495  mrmFeature.setIntensity(total_intensity);
496  mrmFeature.setMetaValue("PeptideRef", transition_group.getTransitionGroupID());
497  mrmFeature.setMetaValue("leftWidth", best_left);
498  mrmFeature.setMetaValue("rightWidth", best_right);
499  mrmFeature.setMetaValue("total_xic", total_xic);
500  mrmFeature.setMetaValue("peak_apices_sum", total_peak_apices);
501 
502  mrmFeature.ensureUniqueId();
503  return mrmFeature;
504  }
505 
506  // maybe private, but we have tests
507 
519  template <typename SpectrumT>
520  void remove_overlapping_features(std::vector<SpectrumT>& picked_chroms, double best_left, double best_right)
521  {
522  // delete all seeds that lie within the current seed
523  //std::cout << "Removing features for peak between " << best_left << " " << best_right << std::endl;
524  for (Size k = 0; k < picked_chroms.size(); k++)
525  {
526  for (Size i = 0; i < picked_chroms[k].size(); i++)
527  {
528  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
529  {
530  //std::cout << "For Chrom " << k << " removing peak " << picked_chroms[k][i].getMZ() << " l/r : " << picked_chroms[k].getFloatDataArrays()[1][i] << " " <<
531  // picked_chroms[k].getFloatDataArrays()[2][i] << " with int " << picked_chroms[k][i].getIntensity() <<std::endl;
532  picked_chroms[k][i].setIntensity(0.0);
533  }
534  }
535  }
536 
537  // delete all seeds that overlap within the current seed
538  for (Size k = 0; k < picked_chroms.size(); k++)
539  {
540  for (Size i = 0; i < picked_chroms[k].size(); i++)
541  {
542  if (picked_chroms[k][i].getIntensity() <= 0.0) {continue; }
543 
544  double left = picked_chroms[k].getFloatDataArrays()[1][i];
545  double right = picked_chroms[k].getFloatDataArrays()[2][i];
546  if ((left > best_left && left < best_right)
547  || (right > best_left && right < best_right))
548  {
549  //std::cout << "= For Chrom " << k << " removing contained peak " << picked_chroms[k][i].getMZ() << " l/r : " << picked_chroms[k].getFloatDataArrays()[1][i] << " " <<
550  // picked_chroms[k].getFloatDataArrays()[2][i] << " with int " << picked_chroms[k][i].getIntensity() <<std::endl;
551  picked_chroms[k][i].setIntensity(0.0);
552  }
553  }
554  }
555  }
556 
558  void findLargestPeak(std::vector<MSChromatogram >& picked_chroms, int& chr_idx, int& peak_idx);
559 
568  void findWidestPeakIndices(const std::vector<MSChromatogram>& picked_chroms, Int& chrom_idx, Int& point_idx) const;
569 
570 protected:
571 
573  void updateMembers_() override;
574 
576  MRMTransitionGroupPicker& operator=(const MRMTransitionGroupPicker& rhs);
577 
581  template <typename SpectrumT, typename TransitionT>
582  const SpectrumT& selectChromHelper_(MRMTransitionGroup<SpectrumT, TransitionT>& transition_group, String native_id)
583  {
584  if (transition_group.hasChromatogram(native_id))
585  {
586  return transition_group.getChromatogram(native_id);
587  }
588  else if (transition_group.hasPrecursorChromatogram(native_id))
589  {
590  return transition_group.getPrecursorChromatogram(native_id);
591  }
592  else
593  {
594  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Did not find chromatogram for id '" + native_id + "'.");
595  }
596  }
597 
614  template <typename SpectrumT, typename TransitionT>
616  std::vector<SpectrumT>& picked_chroms, const int chr_idx,
617  const double best_left, const double best_right, String& outlier)
618  {
619 
620  // Resample all chromatograms around the current estimated peak and
621  // collect the raw intensities. For resampling, use a bit more on either
622  // side to correctly identify shoulders etc.
623  double resample_boundary = resample_boundary_; // sample 15 seconds more on each side
624  SpectrumT master_peak_container;
625  const SpectrumT& ref_chromatogram = selectChromHelper_(transition_group, picked_chroms[chr_idx].getNativeID());
626  prepareMasterContainer_(ref_chromatogram, master_peak_container, best_left - resample_boundary, best_right + resample_boundary);
627  std::vector<std::vector<double> > all_ints;
628  for (Size k = 0; k < picked_chroms.size(); k++)
629  {
630  const SpectrumT chromatogram = selectChromHelper_(transition_group, picked_chroms[k].getNativeID());
631  const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram,
632  master_peak_container, best_left - resample_boundary, best_right + resample_boundary);
633 
634  std::vector<double> int_here;
635  for (Size i = 0; i < used_chromatogram.size(); i++)
636  {
637  int_here.push_back(used_chromatogram[i].getIntensity());
638  }
639  all_ints.push_back(int_here);
640  }
641 
642  // Compute the cross-correlation for the collected intensities
643  std::vector<double> mean_shapes;
644  std::vector<double> mean_coel;
645  for (Size k = 0; k < all_ints.size(); k++)
646  {
647  std::vector<double> shapes;
648  std::vector<double> coel;
649  for (Size i = 0; i < all_ints.size(); i++)
650  {
651  if (i == k) {continue;}
653  all_ints[k], all_ints[i], boost::numeric_cast<int>(all_ints[i].size()), 1);
654 
655  // the first value is the x-axis (retention time) and should be an int -> it show the lag between the two
656  double res_coelution = std::abs(OpenSwath::Scoring::xcorrArrayGetMaxPeak(res)->first);
657  double res_shape = std::abs(OpenSwath::Scoring::xcorrArrayGetMaxPeak(res)->second);
658 
659  shapes.push_back(res_shape);
660  coel.push_back(res_coelution);
661  }
662 
663  // We have computed the cross-correlation of chromatogram k against
664  // all others. Use the mean of these computations as the value for k.
666  msc = std::for_each(shapes.begin(), shapes.end(), msc);
667  double shapes_mean = msc.mean();
668  msc = std::for_each(coel.begin(), coel.end(), msc);
669  double coel_mean = msc.mean();
670 
671  // mean shape scores below 0.5-0.6 should be a real sign of trouble ... !
672  // mean coel scores above 3.5 should be a real sign of trouble ... !
673  mean_shapes.push_back(shapes_mean);
674  mean_coel.push_back(coel_mean);
675  }
676 
677  // find the chromatogram with the minimal shape score and the maximal
678  // coelution score -> if it is the same chromatogram, the chance is
679  // pretty good that it is different from the others...
680  int min_index_shape = std::distance(mean_shapes.begin(), std::min_element(mean_shapes.begin(), mean_shapes.end()));
681  int max_index_coel = std::distance(mean_coel.begin(), std::max_element(mean_coel.begin(), mean_coel.end()));
682 
683  // Look at the picked peaks that are within the current left/right borders
684  int missing_peaks = 0;
685  int multiple_peaks = 0;
686 
687  // collect all seeds that lie within the current seed
688  std::vector<double> left_borders;
689  std::vector<double> right_borders;
690  for (Size k = 0; k < picked_chroms.size(); k++)
691  {
692  double l_tmp;
693  double r_tmp;
694  double max_int = -1;
695 
696  int pfound = 0;
697  l_tmp = -1;
698  r_tmp = -1;
699  for (Size i = 0; i < picked_chroms[k].size(); i++)
700  {
701  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
702  {
703  pfound++;
704  if (picked_chroms[k][i].getIntensity() > max_int)
705  {
706  max_int = picked_chroms[k][i].getIntensity() > max_int;
707  l_tmp = picked_chroms[k].getFloatDataArrays()[1][i];
708  r_tmp = picked_chroms[k].getFloatDataArrays()[2][i];
709  }
710  }
711  }
712 
713  if (l_tmp > 0.0) left_borders.push_back(l_tmp);
714  if (r_tmp > 0.0) right_borders.push_back(r_tmp);
715 
716  if (pfound == 0) missing_peaks++;
717  if (pfound > 1) multiple_peaks++;
718  }
719 
720  // Check how many chromatograms had exactly one peak picked between our
721  // current left/right borders -> this would be a sign of consistency.
722  LOG_DEBUG << " Overall found missing : " << missing_peaks << " and multiple : " << multiple_peaks << std::endl;
723 
725 
726  // Is there one transitions that is very different from the rest (e.g.
727  // the same element has a bad shape and a bad coelution score) -> potential outlier
728  if (min_index_shape == max_index_coel)
729  {
730  LOG_DEBUG << " element " << min_index_shape << " is a candidate for removal ... " << std::endl;
731  outlier = String(picked_chroms[min_index_shape].getNativeID());
732  }
733  else
734  {
735  outlier = "none";
736  }
737 
738  // For the final score (larger is better), consider these scores:
739  // - missing_peaks (the more peaks are missing, the worse)
740  // - multiple_peaks
741  // - mean of the shapes (1 is very good, 0 is bad)
742  // - mean of the co-elution scores (0 is good, 1 is ok, above 1 is pretty bad)
743  double shape_score = std::accumulate(mean_shapes.begin(), mean_shapes.end(), 0.0) / mean_shapes.size();
744  double coel_score = std::accumulate(mean_coel.begin(), mean_coel.end(), 0.0) / mean_coel.size();
745  coel_score = (coel_score - 1.0) / 2.0;
746 
747  double score = shape_score - coel_score - 1.0 * missing_peaks / picked_chroms.size();
748 
749  LOG_DEBUG << " computed score " << score << " (from " << shape_score <<
750  " - " << coel_score << " - " << 1.0 * missing_peaks / picked_chroms.size() << ")" << std::endl;
751 
752  return score;
753  }
754 
764  template <typename SpectrumT>
765  void recalculatePeakBorders_(std::vector<SpectrumT>& picked_chroms, double& best_left, double& best_right, double max_z)
766  {
767  // 1. Collect all seeds that lie within the current seed
768  // - Per chromatogram only the most intense one counts, otherwise very
769  // - low intense peaks can contribute disproportionally to the voting
770  // - procedure.
771  std::vector<double> left_borders;
772  std::vector<double> right_borders;
773  for (Size k = 0; k < picked_chroms.size(); k++)
774  {
775  double max_int = -1;
776  double left = -1;
777  double right = -1;
778  for (Size i = 0; i < picked_chroms[k].size(); i++)
779  {
780  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
781  {
782  if (picked_chroms[k].getFloatDataArrays()[0][i] > max_int)
783  {
784  max_int = picked_chroms[k].getFloatDataArrays()[0][i];
785  left = picked_chroms[k].getFloatDataArrays()[1][i];
786  right = picked_chroms[k].getFloatDataArrays()[2][i];
787  }
788  }
789  }
790  if (max_int > -1 )
791  {
792  left_borders.push_back(left);
793  right_borders.push_back(right);
794  LOG_DEBUG << " * " << k << " left boundary " << left_borders.back() << " with int " << max_int << std::endl;
795  LOG_DEBUG << " * " << k << " right boundary " << right_borders.back() << " with int " << max_int << std::endl;
796  }
797  }
798 
799  // Return for empty peak list
800  if (right_borders.empty())
801  {
802  return;
803  }
804 
805  // FEATURE IDEA: instead of Z-score use modified Z-score for small data sets
806  // http://d-scholarship.pitt.edu/7948/1/Seo.pdf
807  // http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
808  // 1. calculate median
809  // 2. MAD = calculate difference to median for each value -> take median of that
810  // 3. Mi = 0.6745*(xi - median) / MAD
811 
812  // 2. Calculate mean and standard deviation
813  // If the coefficient of variation is too large for one border, we use a
814  // "pseudo-median" instead of the border of the most intense peak.
815  double mean, stdev;
816 
817  // Right borders
818  mean = std::accumulate(right_borders.begin(), right_borders.end(), 0.0) / (double) right_borders.size();
819  stdev = std::sqrt(std::inner_product(right_borders.begin(), right_borders.end(), right_borders.begin(), 0.0)
820  / right_borders.size() - mean * mean);
821  std::sort(right_borders.begin(), right_borders.end());
822 
823  LOG_DEBUG << " - Recalculating right peak boundaries " << mean << " mean / best "
824  << best_right << " std " << stdev << " : " << std::fabs(best_right - mean) / stdev
825  << " coefficient of variation" << std::endl;
826 
827  // Compare right borders of best transition with the mean
828  if (std::fabs(best_right - mean) / stdev > max_z)
829  {
830  best_right = right_borders[right_borders.size() / 2]; // pseudo median
831  LOG_DEBUG << " - Setting right boundary to " << best_right << std::endl;
832  }
833 
834  // Left borders
835  mean = std::accumulate(left_borders.begin(), left_borders.end(), 0.0) / (double) left_borders.size();
836  stdev = std::sqrt(std::inner_product(left_borders.begin(), left_borders.end(), left_borders.begin(), 0.0)
837  / left_borders.size() - mean * mean);
838  std::sort(left_borders.begin(), left_borders.end());
839 
840  LOG_DEBUG << " - Recalculating left peak boundaries " << mean << " mean / best "
841  << best_left << " std " << stdev << " : " << std::fabs(best_left - mean) / stdev
842  << " coefficient of variation" << std::endl;
843 
844  // Compare left borders of best transition with the mean
845  if (std::fabs(best_left - mean) / stdev > max_z)
846  {
847  best_left = left_borders[left_borders.size() / 2]; // pseudo median
848  LOG_DEBUG << " - Setting left boundary to " << best_left << std::endl;
849  }
850 
851  }
852 
854 
855 
870  template <typename SpectrumT>
871  void prepareMasterContainer_(const SpectrumT& ref_chromatogram,
872  SpectrumT& master_peak_container, double left_boundary, double right_boundary)
873  {
874  OPENMS_PRECONDITION(master_peak_container.empty(), "Master peak container must be empty")
875 
876  // get the start / end point of this chromatogram => then add one more
877  // point beyond the two boundaries to make the resampling accurate also
878  // at the edge.
879  typename SpectrumT::const_iterator begin = ref_chromatogram.begin();
880  while (begin != ref_chromatogram.end() && begin->getMZ() < left_boundary) {begin++; }
881  if (begin != ref_chromatogram.begin()) {begin--; }
882 
883  typename SpectrumT::const_iterator end = begin;
884  while (end != ref_chromatogram.end() && end->getMZ() < right_boundary) {end++; }
885  if (end != ref_chromatogram.end()) {end++; }
886 
887  // resize the master container and set the m/z values to the ones of the master container
888  master_peak_container.resize(distance(begin, end)); // initialize to zero
889  typename SpectrumT::iterator it = master_peak_container.begin();
890  for (typename SpectrumT::const_iterator chrom_it = begin; chrom_it != end; chrom_it++, it++)
891  {
892  it->setMZ(chrom_it->getMZ());
893  }
894  }
895 
906  template <typename SpectrumT>
907  SpectrumT resampleChromatogram_(const SpectrumT& chromatogram,
908  const SpectrumT& master_peak_container, double left_boundary, double right_boundary)
909  {
910  // get the start / end point of this chromatogram => then add one more
911  // point beyond the two boundaries to make the resampling accurate also
912  // at the edge.
913  typename SpectrumT::const_iterator begin = chromatogram.begin();
914  while (begin != chromatogram.end() && begin->getMZ() < left_boundary) {begin++;}
915  if (begin != chromatogram.begin()) {begin--;}
916 
917  typename SpectrumT::const_iterator end = begin;
918  while (end != chromatogram.end() && end->getMZ() < right_boundary) {end++;}
919  if (end != chromatogram.end()) {end++;}
920 
921  SpectrumT resampled_peak_container = master_peak_container; // copy the master container, which contains the RT values
922  LinearResamplerAlign lresampler;
923  lresampler.raster(begin, end, resampled_peak_container.begin(), resampled_peak_container.end());
924 
925  return resampled_peak_container;
926  }
927 
929 
930  // Members
937  double min_qual_;
938 
944 
951 
954  };
955 }
956 
957 
const double k
double total_width
Definition: PeakIntegrator.h:165
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
bool hasTransition(String key) const
Definition: MRMTransitionGroup.h:158
double min_qual_
Definition: MRMTransitionGroupPicker.h:937
String background_subtraction_
Definition: MRMTransitionGroupPicker.h:932
double recalculate_peaks_max_z_
Definition: MRMTransitionGroupPicker.h:942
A more convenient string class.
Definition: String.h:57
bool compute_peak_quality_
Definition: MRMTransitionGroupPicker.h:935
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
double mean() const
Definition: StatsHelpers.h:207
The representation of a chromatogram.
Definition: MSChromatogram.h:54
const std::vector< TransitionType > & getTransitions() const
Definition: MRMTransitionGroup.h:142
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:106
Definition: PeakIntegrator.h:106
bool hasPrecursorChromatogram(String key) const
Definition: MRMTransitionGroup.h:236
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OPENSWATHALGO_DLLAPI XCorrArrayType::const_iterator xcorrArrayGetMaxPeak(const XCorrArrayType &array)
Find best peak in an cross-correlation (highest apex)
const std::vector< ChromatogramType > & getPrecursorChromatograms() const
Definition: MRMTransitionGroup.h:208
Int points_across_half_height
Definition: PeakIntegrator.h:207
double start_position_at_50
Definition: PeakIntegrator.h:149
The PeakPickerMRM finds peaks a single chromatogram.
Definition: PeakPickerMRM.h:68
PeakIntegrator pi_
Definition: MRMTransitionGroupPicker.h:953
double width_at_5
Definition: PeakIntegrator.h:129
double slope_of_baseline
Definition: PeakIntegrator.h:194
double start_position_at_5
Definition: PeakIntegrator.h:141
int stop_after_feature_
Definition: MRMTransitionGroupPicker.h:939
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
const std::vector< ConvexHull2D > & getConvexHulls() const
Non-mutable access to the convex hulls.
IntensityType getIntensity() const
Definition: Peak2D.h:166
String peak_integration_
Definition: MRMTransitionGroupPicker.h:931
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling...
Definition: ConvexHull2D.h:72
ChromatogramType & getChromatogram(String key)
Definition: MRMTransitionGroup.h:197
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:458
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:134
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string (or DataValue::EMPTY if not found)
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:172
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:450
Definition: Scoring.h:59
double start_position_at_10
Definition: PeakIntegrator.h:145
void sortByIntensity(bool reverse=false)
Lexicographically sorts the peaks by their intensity.
double min_peak_width_
Definition: MRMTransitionGroupPicker.h:941
double resample_boundary_
Definition: MRMTransitionGroupPicker.h:943
const SpectrumT & selectChromHelper_(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, String native_id)
Select matching precursor or fragment ion chromatogram.
Definition: MRMTransitionGroupPicker.h:582
bool isInternallyConsistent() const
Check whether internal state is consistent, e.g. same number of chromatograms and transitions are pre...
Definition: MRMTransitionGroup.h:273
bool hasChromatogram(String key) const
Definition: MRMTransitionGroup.h:192
const std::vector< ChromatogramType > & getChromatograms() const
Definition: MRMTransitionGroup.h:174
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
The MRMTransitionGroupPicker finds peaks in chromatograms that belong to the same precursors...
Definition: MRMTransitionGroupPicker.h:79
double end_position_at_10
Definition: PeakIntegrator.h:157
bool recalculate_peaks_
Definition: MRMTransitionGroupPicker.h:933
ConvexHull2D::PointArrayType hull_points
Definition: PeakIntegrator.h:98
void setQuality(Size index, QualityType q)
Set the quality in dimension c.
double width_at_50
Definition: PeakIntegrator.h:137
void raster(SpecT &container)
Applies the resampling algorithm to a container (MSSpectrum or MSChromatogram).
Definition: LinearResamplerAlign.h:80
bool compute_peak_shape_metrics_
Definition: MRMTransitionGroupPicker.h:936
Linear Resampling of raw data with alignment.
Definition: LinearResamplerAlign.h:57
void prepareMasterContainer_(const SpectrumT &ref_chromatogram, SpectrumT &master_peak_container, double left_boundary, double right_boundary)
Create an empty master peak container that has the correct mz / RT values set.
Definition: MRMTransitionGroupPicker.h:871
void setHullPoints(const PointArrayType &points)
accessor for the outer(!) points (no checking is performed if this is actually a convex hull) ...
const String & getTransitionGroupID() const
Definition: MRMTransitionGroup.h:130
double computeQuality_(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, std::vector< SpectrumT > &picked_chroms, const int chr_idx, const double best_left, const double best_right, String &outlier)
Compute transition group quality (higher score is better)
Definition: MRMTransitionGroupPicker.h:615
Int points_across_baseline
Definition: PeakIntegrator.h:203
An LC-MS feature.
Definition: Feature.h:69
ChromatogramType & getPrecursorChromatogram(String key)
Definition: MRMTransitionGroup.h:241
functor to compute the mean and stddev of sequence using the std::foreach algorithm ...
Definition: StatsHelpers.h:169
double end_position_at_5
Definition: PeakIntegrator.h:153
double area
Definition: PeakIntegrator.h:86
void addFeature(MRMFeature &feature)
Definition: MRMTransitionGroup.h:262
Definition: PeakIntegrator.h:124
double asymmetry_factor
Definition: PeakIntegrator.h:189
void setOverallQuality(QualityType q)
Set the overall quality.
double end_position_at_50
Definition: PeakIntegrator.h:161
SpectrumT resampleChromatogram_(const SpectrumT &chromatogram, const SpectrumT &master_peak_container, double left_boundary, double right_boundary)
Resample a container at the positions indicated by the master peak container.
Definition: MRMTransitionGroupPicker.h:907
double height
Definition: PeakIntegrator.h:115
double width_at_10
Definition: PeakIntegrator.h:133
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software. ...
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
void pickTransitionGroup(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group)
Pick a group of chromatograms belonging to the same peptide.
Definition: MRMTransitionGroupPicker.h:113
Compute the area, background and shape metrics of a peak.
Definition: PeakIntegrator.h:68
PeakPickerMRM picker_
Definition: MRMTransitionGroupPicker.h:952
bool chromatogramIdsMatch()
Ensure that chromatogram native ids match their keys in the map.
Definition: MRMTransitionGroup.h:282
Definition: PeakIntegrator.h:81
MRMFeature createMRMFeature(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, std::vector< SpectrumT > &picked_chroms, std::vector< SpectrumT > &smoothed_chroms, const int chr_idx, const int peak_idx)
Create feature from a vector of chromatograms and a specified peak.
Definition: MRMTransitionGroupPicker.h:216
double area
Definition: PeakIntegrator.h:111
bool use_precursors_
Definition: MRMTransitionGroupPicker.h:934
double apex_pos
Definition: PeakIntegrator.h:94
OPENSWATHALGO_DLLAPI XCorrArrayType normalizedCrossCorrelation(std::vector< double > &data1, std::vector< double > &data2, const int &maxdelay, const int &lag)
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
void remove_overlapping_features(std::vector< SpectrumT > &picked_chroms, double best_left, double best_right)
Remove overlapping features.
Definition: MRMTransitionGroupPicker.h:520
String boundary_selection_method_
Which method to use for selecting peaks&#39; boundaries.
Definition: MRMTransitionGroupPicker.h:950
double stop_after_intensity_ratio_
Definition: MRMTransitionGroupPicker.h:940
A multi-chromatogram MRM feature.
Definition: MRMFeature.h:49
void recalculatePeakBorders_(std::vector< SpectrumT > &picked_chroms, double &best_left, double &best_right, double max_z)
Recalculate the borders of the peak.
Definition: MRMTransitionGroupPicker.h:765
double height
Definition: PeakIntegrator.h:90
int Int
Signed integer type.
Definition: Types.h:102
double tailing_factor
Definition: PeakIntegrator.h:179
const TransitionType & getTransition(String key)
Definition: MRMTransitionGroup.h:163
double baseline_delta_2_height
Definition: PeakIntegrator.h:199

OpenMS / TOPP release 2.3.0 Documentation generated on Wed Apr 18 2018 19:29:06 using doxygen 1.8.14