OpenMS
Loading...
Searching...
No Matches
SpectraMerger.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow $
6// $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
7// --------------------------------------------------------------------------
8//
9#pragma once
10
20
21#include <vector>
22
23namespace OpenMS
24{
25
44 class OPENMS_DLLAPI SpectraMerger :
46 {
47
48protected:
49
50 /* Determine distance between two spectra
51
52 Distance is determined as
53
54 (d_rt/rt_max_ + d_mz/mz_max_) / 2
55
56 */
59 {
60public:
62 DefaultParamHandler("SpectraDistance")
63 {
64 defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
65 defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
66 defaultsToParam_(); // calls updateMembers_
67 }
68
69 void updateMembers_() override
70 {
71 rt_max_ = (double) param_.getValue("rt_tolerance");
72 mz_max_ = (double) param_.getValue("mz_tolerance");
73 }
74
75 double getSimilarity(const double d_rt, const double d_mz) const
76 {
77 // 1 - distance
78 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
79 }
80
81 // measure of SIMILARITY (not distance, i.e. 1-distance)!!
82 double operator()(const BaseFeature& first, const BaseFeature& second) const
83 {
84 // get RT distance:
85 double d_rt = fabs(first.getRT() - second.getRT());
86 double d_mz = fabs(first.getMZ() - second.getMZ());
87
88 if (d_rt > rt_max_ || d_mz > mz_max_)
89 {
90 return 0;
91 }
92
93 // calculate similarity (0-1):
94 double sim = getSimilarity(d_rt, d_mz);
95
96 return sim;
97 }
98
99protected:
100 double rt_max_;
101 double mz_max_;
102
103 }; // end of SpectraDistance
104
105public:
106
108 typedef std::map<Size, std::vector<Size> > MergeBlocks;
109
111 typedef std::map<Size, std::vector<std::pair<Size, double> > > AverageBlocks;
112
113 // @name Constructors and Destructors
114 // @{
117
120
122 SpectraMerger(SpectraMerger&& source) = default;
123
125 ~SpectraMerger() override;
126 // @}
127
128 // @name Operators
129 // @{
132
135 // @}
136
137 // @name Merging functions
138 // @{
139
142 template <typename MapType>
144 {
145 IntList ms_levels = param_.getValue("block_method:ms_levels");
146
147 // now actually using an UNSIGNED int, so we can increase it by 1 even if the value is INT_MAX without overflow
148 UInt rt_block_size(param_.getValue("block_method:rt_block_size"));
149 double rt_max_length = (param_.getValue("block_method:rt_max_length"));
150
151 if (rt_max_length == 0) // no rt restriction set?
152 {
153 rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
154 }
155
156 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
157 {
158 MergeBlocks spectra_to_merge;
159 Size idx_block(0);
160 UInt block_size_count(rt_block_size + 1);
161 Size idx_spectrum(0);
162 for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
163 {
164 if (Int(it1->getMSLevel()) == *it_mslevel)
165 {
166 // block full if it contains a maximum number of scans or if maximum rt length spanned
167 if (++block_size_count >= rt_block_size ||
168 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
169 {
170 block_size_count = 0;
171 idx_block = idx_spectrum;
172 }
173 else
174 {
175 spectra_to_merge[idx_block].push_back(idx_spectrum);
176 }
177 }
178
179 ++idx_spectrum;
180 }
181 // check if last block had sacrifice spectra
182 if (block_size_count == 0) //block just got initialized
183 {
184 spectra_to_merge[idx_block] = std::vector<Size>();
185 }
186
187 // merge spectra, remove all old MS spectra and add new consensus spectra
188 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
189 }
190
191 exp.sortSpectra();
192 }
193
195 template <typename MapType>
197 {
198
199 // convert spectra's precursors to clusterizable data
200 Size data_size;
201 std::vector<BinaryTreeNode> tree;
202 std::map<Size, Size> index_mapping;
203 // local scope to save memory - we do not need the clustering stuff later
204 {
205 std::vector<BaseFeature> data;
206
207 for (Size i = 0; i < exp.size(); ++i)
208 {
209 if (exp[i].getMSLevel() != 2)
210 {
211 continue;
212 }
213
214 // remember which index in distance data ==> experiment index
215 index_mapping[data.size()] = i;
216
217 // make cluster element
218 BaseFeature bf;
219 bf.setRT(exp[i].getRT());
220 const auto& pcs = exp[i].getPrecursors();
221 // keep the first Precursor
222 if (pcs.empty())
223 {
224 throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
225 }
226 if (pcs.size() > 1)
227 {
228 OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
229 }
230 bf.setMZ(pcs[0].getMZ());
231 data.push_back(bf);
232 }
233 data_size = data.size();
234
236 llc.setParameters(param_.copy("precursor_method:", true));
237 SingleLinkage sl;
238 DistanceMatrix<float> dist; // will be filled
240
241 // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
242 ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
243 }
244
245 // extract the clusters
247 std::vector<std::vector<Size> > clusters;
248 // count number of real tree nodes (not the -1 ones):
249 Size node_count = 0;
250 for (Size ii = 0; ii < tree.size(); ++ii)
251 {
252 if (tree[ii].distance >= 1)
253 {
254 tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
255 }
256 if (tree[ii].distance != -1)
257 {
258 ++node_count;
259 }
260 }
261 ca.cut(data_size - node_count, tree, clusters);
262
263 // convert to blocks
264 MergeBlocks spectra_to_merge;
265
266 for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
267 {
268 if (clusters[i_outer].size() <= 1)
269 {
270 continue;
271 }
272 // init block with first cluster element
273 Size cl_index0 = clusters[i_outer][0];
274 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
275 // add all other elements
276 for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
277 {
278 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
279 }
280 }
281
282 // do it
283 mergeSpectra_(exp, spectra_to_merge, 2);
284
285 exp.sortSpectra();
286 }
287
296 static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
297 {
298 if (mz1 == mz2 || tol_ppm <= 0)
299 {
300 return true;
301 }
302
303 const int min_c = 1;
304 const int max_iso_diff = 5; // maximum charge difference 5 is more than enough
305 const double max_charge_diff_ratio = 3.0; // maximum ratio between charges (large / small charge)
306
307 for (int c1 = min_c; c1 <= max_c; ++c1)
308 {
309 double mass1 = (mz1 - Constants::PROTON_MASS_U) * c1;
310
311 for (int c2 = min_c; c2 <= max_c; ++c2)
312 {
313 if (c1 / c2 > max_charge_diff_ratio)
314 {
315 continue;
316 }
317 if (c2 / c1 > max_charge_diff_ratio)
318 {
319 break;
320 }
321
322 double mass2 = (mz2 - Constants::PROTON_MASS_U) * c2;
323
324 if (fabs(mass1 - mass2) > max_iso_diff)
325 {
326 continue;
327 }
328 for (int i = -max_iso_diff; i <= max_iso_diff; ++i)
329 {
330 if (fabs(mass1 - mass2 + i * Constants::ISOTOPE_MASSDIFF_55K_U) < mass1 * tol_ppm * 1e-6)
331 {
332 return true;
333 }
334 }
335 }
336 }
337 return false;
338 }
339
347 template <typename MapType>
348 void average(MapType& exp, const String& average_type, int ms_level = -1)
349 {
350 // MS level to be averaged
351 if (ms_level < 0)
352 {
353 ms_level = param_.getValue("average_gaussian:ms_level");
354 if (average_type == "tophat")
355 {
356 ms_level = param_.getValue("average_tophat:ms_level");
357 }
358 }
359
360 // spectrum type (profile, centroid or automatic)
361 std::string spectrum_type = param_.getValue("average_gaussian:spectrum_type");
362 if (average_type == "tophat")
363 {
364 spectrum_type = std::string(param_.getValue("average_tophat:spectrum_type"));
365 }
366
367 // parameters for Gaussian averaging
368 double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
369 double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
370 double cutoff(param_.getValue("average_gaussian:cutoff"));
371 double precursor_mass_ppm = param_.getValue("average_gaussian:precursor_mass_tol");
372 int precursor_max_charge = param_.getValue("average_gaussian:precursor_max_charge");
373
374 // parameters for Top-Hat averaging
375 bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
376 double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
377 double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
378 int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
379 if ((range_scans % 2) == 0)
380 {
381 ++range_scans;
382 }
383 range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
384
385 AverageBlocks spectra_to_average_over;
386
387 // loop over RT
388 int n(0); // spectrum index
389 int cntr(0); // spectrum counter
390 for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
391 {
392 if (Int(it_rt->getMSLevel()) == ms_level)
393 {
394 int m; // spectrum index
395 int steps;
396 bool terminate_now;
397 typename MapType::const_iterator it_rt_2;
398
399 // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
400 steps = 0;
401 m = n + 1;
402 it_rt_2 = it_rt + 1;
403 terminate_now = false;
404 while (it_rt_2 != exp.end() && !terminate_now)
405 {
406 if (Int(it_rt_2->getMSLevel()) == ms_level)
407 {
408 bool add = true;
409 // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
410 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
411 it_rt_2->getPrecursors().size() > 0)
412 {
413 double mz1 = it_rt->getPrecursors()[0].getMZ();
414 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
415 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
416 }
417
418 if (add)
419 {
420 double weight = 1;
421 if (average_type == "gaussian")
422 {
423 //factor * (rt_2 -rt)^2
424 double base = it_rt_2->getRT() - it_rt->getRT();
425 weight = std::exp(factor * base * base);
426 }
427 std::pair<Size, double> p(m, weight);
428 spectra_to_average_over[n].push_back(p);
429 }
430 ++steps;
431 }
432 if (average_type == "gaussian")
433 {
434 // Gaussian
435 double base = it_rt_2->getRT() - it_rt->getRT();
436 terminate_now = std::exp(factor * base * base) < cutoff;
437 }
438 else if (unit)
439 {
440 // Top-Hat with RT unit = scans
441 terminate_now = (steps > range_scans);
442 }
443 else
444 {
445 // Top-Hat with RT unit = seconds
446 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
447 }
448 ++m;
449 ++it_rt_2;
450 }
451
452 // go backward
453 steps = 0;
454 m = n;
455 it_rt_2 = it_rt;
456 terminate_now = false;
457 while (it_rt_2 != exp.begin() && !terminate_now)
458 {
459 if (Int(it_rt_2->getMSLevel()) == ms_level)
460 {
461 bool add = true;
462 // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
463 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
464 it_rt_2->getPrecursors().size() > 0)
465 {
466 double mz1 = it_rt->getPrecursors()[0].getMZ();
467 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
468 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
469 }
470 if (add)
471 {
472 double weight = 1;
473 if (average_type == "gaussian")
474 {
475 double base = it_rt_2->getRT() - it_rt->getRT();
476 weight = std::exp(factor * base * base);
477 }
478 std::pair<Size, double> p(m, weight);
479 spectra_to_average_over[n].push_back(p);
480 }
481 ++steps;
482 }
483 if (average_type == "gaussian")
484 {
485 // Gaussian
486 double base = it_rt_2->getRT() - it_rt->getRT();
487 terminate_now = std::exp(factor * base * base) < cutoff;
488 }
489 else if (unit)
490 {
491 // Top-Hat with RT unit = scans
492 terminate_now = (steps > range_scans);
493 }
494 else
495 {
496 // Top-Hat with RT unit = seconds
497 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
498 }
499 --m;
500 --it_rt_2;
501 }
502 ++cntr;
503 }
504 ++n;
505 }
506
507 if (cntr == 0)
508 {
509 //return;
510 throw Exception::InvalidParameter(__FILE__,
511 __LINE__,
512 OPENMS_PRETTY_FUNCTION,
513 "Input mzML does not have any spectra of MS level specified by ms_level.");
514 }
515
516 // normalize weights (such that their sum == 1)
517 for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
518 {
519 double sum(0.0);
520 for (const auto& weight: it->second)
521 {
522 sum += weight.second;
523 }
524
525 for (auto& weight: it->second)
526 {
527 weight.second /= sum;
528 }
529 }
530
531 // determine type of spectral data (profile or centroided)
533 if (spectrum_type == "automatic")
534 {
535 Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
536 type = exp[idx].getType(true);
537 }
538 else if (spectrum_type == "profile")
539 {
540 type = SpectrumSettings::PROFILE;
541 }
542 else if (spectrum_type == "centroid")
543 {
544 type = SpectrumSettings::CENTROID;
545 }
546 else
547 {
548 throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
549 }
550
551 // generate new spectra
552 if (type == SpectrumSettings::CENTROID)
553 {
554 averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
555 }
556 else
557 {
558 averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
559 }
560
561 exp.sortSpectra();
562 }
563
564 // @}
565
566protected:
567
578 template <typename MapType>
579 void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
580 {
581 double mz_binning_width(param_.getValue("mz_binning_width"));
582 std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
583
584 // merge spectra
585 MapType merged_spectra;
586
587 std::map<Size, Size> cluster_sizes;
588 std::set<Size> merged_indices;
589
590 // set up alignment
592 Param p;
593 p.setValue("tolerance", mz_binning_width);
594 if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
595 {
596 throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
597 }
598
599 p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
600 sas.setParameters(p);
601 std::vector<std::pair<Size, Size> > alignment;
602
603 Size count_peaks_aligned(0);
604 Size count_peaks_overall(0);
605
606 // each BLOCK
607 for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
608 {
609 ++cluster_sizes[it->second.size() + 1]; // for stats
610
611 typename MapType::SpectrumType consensus_spec = exp[it->first];
612 consensus_spec.setMSLevel(ms_level);
613
614 merged_indices.insert(it->first);
615
616 double rt_average = consensus_spec.getRT();
617 double precursor_mz_average = 0.0;
618 Size precursor_count(0);
619 if (!consensus_spec.getPrecursors().empty())
620 {
621 precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
622 ++precursor_count;
623 }
624
625 count_peaks_overall += consensus_spec.size();
626
627 String consensus_native_id = consensus_spec.getNativeID();
628
629 // block elements
630 for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
631 {
632 consensus_spec.unify(exp[*sit]); // append meta info
633 merged_indices.insert(*sit);
634
635 rt_average += exp[*sit].getRT();
636 if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
637 {
638 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
639 ++precursor_count;
640 }
641
642 // add native ID to consensus native ID, comma separated
643 consensus_native_id += ",";
644 consensus_native_id += exp[*sit].getNativeID();
645
646 // merge data points
647 sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
648 count_peaks_aligned += alignment.size();
649 count_peaks_overall += exp[*sit].size();
650
651 Size align_index(0);
652 Size spec_b_index(0);
653
654 // sanity check for number of peaks
655 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
656 for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
657 {
658 if (alignment.empty() || alignment[align_index].second != spec_b_index)
659 // ... add unaligned peak
660 {
661 consensus_spec.push_back(*pit);
662 }
663 // or add aligned peak height to ALL corresponding existing peaks
664 else
665 {
666 Size counter(0);
667 Size copy_of_align_index(align_index);
668
669 while (!alignment.empty() &&
670 copy_of_align_index < alignment.size() &&
671 alignment[copy_of_align_index].second == spec_b_index)
672 {
673 ++copy_of_align_index;
674 ++counter;
675 } // Count the number of peaks which correspond to a single b peak.
676
677 while (!alignment.empty() &&
678 align_index < alignment.size() &&
679 alignment[align_index].second == spec_b_index)
680 {
681 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
682 (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
683 ++align_index; // this aligned peak was explained, wait for next aligned peak ...
684 if (align_index == alignment.size())
685 {
686 alignment.clear(); // end reached -> avoid going into this block again
687 }
688 }
689 align_size = align_size + 1 - counter; //Decrease align_size by number of
690 }
691 ++spec_b_index;
692 }
693 consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
694 if (spec_a + spec_b - align_size != consensus_spec.size())
695 {
696 OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
697 }
698 }
699 rt_average /= it->second.size() + 1;
700 consensus_spec.setRT(rt_average);
701
702 // set new consensus native ID
703 consensus_spec.setNativeID(consensus_native_id);
704
705 if (ms_level >= 2)
706 {
707 if (precursor_count)
708 {
709 precursor_mz_average /= precursor_count;
710 }
711 auto& pcs = consensus_spec.getPrecursors();
712 pcs.resize(1);
713 pcs[0].setMZ(precursor_mz_average);
714 consensus_spec.setPrecursors(pcs);
715 }
716
717 if (consensus_spec.empty())
718 {
719 continue;
720 }
721 else
722 {
723 merged_spectra.addSpectrum(std::move(consensus_spec));
724 }
725 }
726
727 OPENMS_LOG_INFO << "Cluster sizes:\n";
728 for (const auto& cl_size : cluster_sizes)
729 {
730 OPENMS_LOG_INFO << " size " << cl_size.first << ": " << cl_size.second << "x\n";
731 }
732
733 char buffer[200];
734 sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
735 (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
736 OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
737
738 // remove all spectra that were within a cluster
739 typename MapType::SpectrumType empty_spec;
740 MapType exp_tmp;
741 for (Size i = 0; i < exp.size(); ++i)
742 {
743 if (merged_indices.count(i) == 0) // save unclustered ones
744 {
745 exp_tmp.addSpectrum(exp[i]);
746 exp[i] = empty_spec;
747 }
748 }
749
750 //Meta_Data will not be cleared
751 exp.clear(false);
752 exp.getSpectra().insert(exp.end(), std::make_move_iterator(exp_tmp.begin()),
753 std::make_move_iterator(exp_tmp.end()));
754
755 // ... and add consensus spectra
756 exp.getSpectra().insert(exp.end(), std::make_move_iterator(merged_spectra.begin()),
757 std::make_move_iterator(merged_spectra.end()));
758
759 }
760
781 template <typename MapType>
782 void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
783 {
784 MapType exp_tmp; // temporary experiment for averaged spectra
785
786 double mz_binning_width(param_.getValue("mz_binning_width"));
787 std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
788
789 unsigned progress = 0;
790 std::stringstream progress_message;
791 progress_message << "averaging profile spectra of MS level " << ms_level;
792 startProgress(0, spectra_to_average_over.size(), progress_message.str());
793
794 // loop over blocks
795 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
796 {
797 setProgress(++progress);
798
799 // loop over spectra in blocks
800 std::vector<double> mz_positions_all; // m/z positions from all spectra
801 for (const auto& spec : it->second)
802 {
803 // loop over m/z positions
804 for (typename MapType::SpectrumType::ConstIterator it_mz = exp[spec.first].begin(); it_mz < exp[spec.first].end(); ++it_mz)
805 {
806 mz_positions_all.push_back(it_mz->getMZ());
807 }
808 }
809
810 sort(mz_positions_all.begin(), mz_positions_all.end());
811
812 std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
813 std::vector<double> intensities;
814 double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
815 double delta_mz(mz_binning_width); // for m/z unit Da
816 for (const auto mz_pos : mz_positions_all)
817 {
818 if (mz_binning_unit == "ppm")
819 {
820 delta_mz = mz_binning_width * mz_pos / 1000000;
821 }
822
823 if ((mz_pos - last_mz) > delta_mz)
824 {
825 mz_positions.push_back(mz_pos);
826 intensities.push_back(0.0);
827 last_mz = mz_pos;
828 }
829 }
830
831 // loop over spectra in blocks
832 for (const auto& spec : it->second)
833 {
834 SplineInterpolatedPeaks spline(exp[spec.first]);
836
837 // loop over m/z positions
838 for (Size i = spline.getPosMin(); i < mz_positions.size(); ++i)
839 {
840 if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
841 {
842 intensities[i] += nav.eval(mz_positions[i]) * (spec.second); // spline-interpolated intensity * weight
843 }
844 }
845 }
846
847 // update spectrum
848 typename MapType::SpectrumType average_spec = exp[it->first];
849 average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
850
851 // refill spectrum
852 for (Size i = 0; i < mz_positions.size(); ++i)
853 {
854 typename MapType::PeakType peak;
855 peak.setMZ(mz_positions[i]);
856 peak.setIntensity(intensities[i]);
857 average_spec.push_back(peak);
858 }
859
860 // store spectrum temporarily
861 exp_tmp.addSpectrum(std::move(average_spec));
862 }
863
864 endProgress();
865
866 // loop over blocks
867 int n(0);
868 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
869 {
870 exp[it->first] = exp_tmp[n];
871 ++n;
872 }
873 }
874
890 template <typename MapType>
891 void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
892 {
893 MapType exp_tmp; // temporary experiment for averaged spectra
894
895 double mz_binning_width(param_.getValue("mz_binning_width"));
896 std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
897
898 unsigned progress = 0;
899 ProgressLogger logger;
900 std::stringstream progress_message;
901 progress_message << "averaging centroid spectra of MS level " << ms_level;
902 logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
903
904 // loop over blocks
905 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
906 {
907 logger.setProgress(++progress);
908
909 // collect peaks from all spectra
910 // loop over spectra in blocks
911 std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
912 for (const auto& weightedMZ: it->second)
913 {
914 // loop over m/z positions
915 for (typename MapType::SpectrumType::ConstIterator it_mz = exp[weightedMZ.first].begin(); it_mz < exp[weightedMZ.first].end(); ++it_mz)
916 {
917 std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second)); // m/z, intensity * weight
918 mz_intensity_all.push_back(mz_intensity);
919 }
920 }
921
922 sort(mz_intensity_all.begin(), mz_intensity_all.end());
923
924 // generate new spectrum
925 std::vector<double> mz_new;
926 std::vector<double> intensity_new;
927 double last_mz = std::numeric_limits<double>::min();
928 double delta_mz = mz_binning_width;
929 double sum_mz(0);
930 double sum_intensity(0);
931 Size count(0);
932 for (const auto& mz_pos : mz_intensity_all)
933 {
934 if (mz_binning_unit == "ppm")
935 {
936 delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
937 }
938
939 if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
940 {
941 mz_new.push_back(sum_mz / count);
942 intensity_new.push_back(sum_intensity); // intensities already weighted
943
944 sum_mz = 0;
945 sum_intensity = 0;
946
947 last_mz = mz_pos.first;
948 count = 0;
949 }
950
951 sum_mz += mz_pos.first;
952 sum_intensity += mz_pos.second;
953 ++count;
954 }
955 if (count > 0)
956 {
957 mz_new.push_back(sum_mz / count);
958 intensity_new.push_back(sum_intensity); // intensities already weighted
959 }
960
961 // update spectrum
962 typename MapType::SpectrumType average_spec = exp[it->first];
963 average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
964
965 // refill spectrum
966 for (Size i = 0; i < mz_new.size(); ++i)
967 {
968 typename MapType::PeakType peak;
969 peak.setMZ(mz_new[i]);
970 peak.setIntensity(intensity_new[i]);
971 average_spec.push_back(peak);
972 }
973
974 // store spectrum temporarily
975 exp_tmp.addSpectrum(std::move(average_spec));
976 }
977
978 logger.endProgress();
979
980 // loop over blocks
981 int n(0);
982 for (const auto& spectral_index : spectra_to_average_over)
983 {
984 exp[spectral_index.first] = std::move(exp_tmp[n]);
985 ++n;
986 }
987 }
988 };
989}
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition LogStream.h:447
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition LogStream.h:452
A basic LC-MS feature.
Definition BaseFeature.h:34
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition ClusterAnalyzer.h:26
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Hierarchical clustering with generic clustering functions.
Definition ClusterHierarchical.h:37
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition ClusterHierarchical.h:84
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
void setParameters(const Param &param)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition DistanceMatrix.h:42
Illegal self operation exception.
Definition Exception.h:346
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
Not all required information provided.
Definition Exception.h:155
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin() noexcept
Size size() const noexcept
The number of spectra.
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Base::const_iterator const_iterator
Definition MSExperiment.h:98
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition MSExperiment.h:86
void clear(bool clear_meta_data)
Clears all data and meta data.
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
double getRT() const
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition Param.h:46
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition Peak1D.h:86
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition Peak1D.h:95
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition Peak2D.h:173
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition Peak2D.h:179
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition Peak2D.h:191
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition Peak2D.h:185
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
SingleLinkage ClusterMethod.
Definition SingleLinkage.h:31
Definition SpectraMerger.h:59
double getSimilarity(const double d_rt, const double d_mz) const
Definition SpectraMerger.h:75
SpectraDistance_()
Definition SpectraMerger.h:61
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition SpectraMerger.h:82
double mz_max_
Definition SpectraMerger.h:101
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition SpectraMerger.h:69
double rt_max_
Definition SpectraMerger.h:100
Offers spectra merging and averaging algorithms to increase the quality of a spectrum.
Definition SpectraMerger.h:46
SpectraMerger & operator=(SpectraMerger &&source)=default
move-assignment operator
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition SpectraMerger.h:348
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition SpectraMerger.h:296
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition SpectraMerger.h:891
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition SpectraMerger.h:111
~SpectraMerger() override
destructor
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition SpectraMerger.h:196
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition SpectraMerger.h:782
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition SpectraMerger.h:579
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition SpectraMerger.h:108
SpectraMerger(SpectraMerger &&source)=default
move constructor
void mergeSpectraBlockWise(MapType &exp)
Definition SpectraMerger.h:143
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition SpectrumAlignment.h:43
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition SpectrumAlignment.h:62
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
SpectrumType
Spectrum peak type.
Definition SpectrumSettings.h:50
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
iterator class for access of spline packages
Definition SplineInterpolatedPeaks.h:84
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition SplineInterpolatedPeaks.h:34
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition String.h:34
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< Int > IntList
Vector of signed integers.
Definition ListUtils.h:29
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19