83 defaults_.setValue(
"rt_tolerance", 10.0,
"Maximal RT distance (in [s]) for two spectra's precursors.");
84 defaults_.setValue(
"mz_tolerance", 1.0,
"Maximal m/z distance (in Da) for two spectra's precursors.");
90 rt_max_ = (
double) param_.getValue(
"rt_tolerance");
91 mz_max_ = (
double) param_.getValue(
"mz_tolerance");
97 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
104 double d_rt = fabs(first.
getRT() - second.
getRT());
105 double d_mz = fabs(first.
getMZ() - second.
getMZ());
107 if (d_rt > rt_max_ || d_mz > mz_max_)
113 double sim = getSimilarity(d_rt, d_mz);
130 typedef std::map<Size, std::vector<std::pair<Size, double> > >
AverageBlocks;
159 template <
typename MapType>
162 IntList ms_levels = param_.getValue(
"block_method:ms_levels");
163 Int rt_block_size(param_.getValue(
"block_method:rt_block_size"));
164 double rt_max_length = (param_.getValue(
"block_method:rt_max_length"));
166 if (rt_max_length == 0)
168 rt_max_length = (std::numeric_limits<double>::max)();
171 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
175 SignedSize block_size_count(rt_block_size + 1);
176 Size idx_spectrum(0);
179 if (
Int(it1->getMSLevel()) == *it_mslevel)
182 if (++block_size_count >= rt_block_size ||
183 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
185 block_size_count = 0;
186 idx_block = idx_spectrum;
190 spectra_to_merge[idx_block].push_back(idx_spectrum);
197 if (block_size_count == 0)
199 spectra_to_merge[idx_block] = std::vector<Size>();
203 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
210 template <
typename MapType>
216 std::vector<BinaryTreeNode> tree;
217 std::map<Size, Size> index_mapping;
220 std::vector<BaseFeature> data;
222 for (
Size i = 0; i < exp.
size(); ++i)
224 if (exp[i].getMSLevel() != 2)
230 index_mapping[data.size()] = i;
234 bf.
setRT(exp[i].getRT());
235 const auto& pcs = exp[i].getPrecursors();
243 OPENMS_LOG_WARN <<
"More than one precursor found. Using first one!" << std::endl;
245 bf.
setMZ(pcs[0].getMZ());
248 data_size = data.size();
262 std::vector<std::vector<Size> > clusters;
265 for (
Size ii = 0; ii < tree.size(); ++ii)
267 if (tree[ii].distance >= 1)
269 tree[ii].distance = -1;
271 if (tree[ii].distance != -1)
276 ca.
cut(data_size - node_count, tree, clusters);
281 for (
Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
283 if (clusters[i_outer].size() <= 1)
288 Size cl_index0 = clusters[i_outer][0];
289 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
291 for (
Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
293 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
298 mergeSpectra_(exp, spectra_to_merge, 2);
313 if (mz1 == mz2 || tol_ppm <= 0)
319 const int max_iso_diff = 5;
320 const double max_charge_diff_ratio = 3.0;
322 for (
int c1 = min_c; c1 <= max_c; ++c1)
326 for (
int c2 = min_c; c2 <= max_c; ++c2)
328 if (c1 / c2 > max_charge_diff_ratio)
332 if (c2 / c1 > max_charge_diff_ratio)
339 if (fabs(mass1 - mass2) > max_iso_diff)
343 for (
int i = -max_iso_diff; i <= max_iso_diff; ++i)
362 template <
typename MapType>
368 ms_level = param_.getValue(
"average_gaussian:ms_level");
369 if (average_type ==
"tophat")
371 ms_level = param_.getValue(
"average_tophat:ms_level");
376 std::string spectrum_type = param_.getValue(
"average_gaussian:spectrum_type");
377 if (average_type ==
"tophat")
379 spectrum_type = std::string(param_.getValue(
"average_tophat:spectrum_type"));
383 double fwhm(param_.getValue(
"average_gaussian:rt_FWHM"));
384 double factor = -4 * log(2.0) / (fwhm * fwhm);
385 double cutoff(param_.getValue(
"average_gaussian:cutoff"));
386 double precursor_mass_ppm = param_.getValue(
"average_gaussian:precursor_mass_tol");
387 int precursor_max_charge = param_.getValue(
"average_gaussian:precursor_max_charge");
390 bool unit(param_.getValue(
"average_tophat:rt_unit") ==
"scans");
391 double range(param_.getValue(
"average_tophat:rt_range"));
392 double range_seconds = range / 2;
393 int range_scans =
static_cast<int>(range);
394 if ((range_scans % 2) == 0)
398 range_scans = (range_scans - 1) / 2;
407 if (
Int(it_rt->getMSLevel()) == ms_level)
418 terminate_now =
false;
419 while (it_rt_2 != exp.
end() && !terminate_now)
421 if (
Int(it_rt_2->getMSLevel()) == ms_level)
425 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
426 it_rt_2->getPrecursors().size() > 0)
428 double mz1 = it_rt->getPrecursors()[0].getMZ();
429 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
430 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
436 if (average_type ==
"gaussian")
439 double base = it_rt_2->getRT() - it_rt->getRT();
440 weight = std::exp(factor * base * base);
442 std::pair<Size, double> p(m, weight);
443 spectra_to_average_over[n].push_back(p);
447 if (average_type ==
"gaussian")
450 double base = it_rt_2->getRT() - it_rt->getRT();
451 terminate_now = std::exp(factor * base * base) < cutoff;
456 terminate_now = (steps > range_scans);
461 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
471 terminate_now =
false;
472 while (it_rt_2 != exp.
begin() && !terminate_now)
474 if (
Int(it_rt_2->getMSLevel()) == ms_level)
478 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
479 it_rt_2->getPrecursors().size() > 0)
481 double mz1 = it_rt->getPrecursors()[0].getMZ();
482 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
483 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
488 if (average_type ==
"gaussian")
490 double base = it_rt_2->getRT() - it_rt->getRT();
491 weight = std::exp(factor * base * base);
493 std::pair<Size, double> p(m, weight);
494 spectra_to_average_over[n].push_back(p);
498 if (average_type ==
"gaussian")
501 double base = it_rt_2->getRT() - it_rt->getRT();
502 terminate_now = std::exp(factor * base * base) < cutoff;
507 terminate_now = (steps > range_scans);
512 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
527 OPENMS_PRETTY_FUNCTION,
528 "Input mzML does not have any spectra of MS level specified by ms_level.");
532 for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
535 for (
const auto& weight: it->second)
537 sum += weight.second;
540 for (
auto& weight: it->second)
542 weight.second /=
sum;
548 if (spectrum_type ==
"automatic")
550 Size idx = spectra_to_average_over.begin()->first;
551 type = exp[idx].getType(
true);
553 else if (spectrum_type ==
"profile")
557 else if (spectrum_type ==
"centroid")
563 throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION,
"Spectrum type has to be one of automatic, profile or centroid.");
569 averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
573 averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
593 template <
typename MapType>
596 double mz_binning_width(param_.getValue(
"mz_binning_width"));
597 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
602 std::map<Size, Size> cluster_sizes;
603 std::set<Size> merged_indices;
608 p.
setValue(
"tolerance", mz_binning_width);
609 if (!(mz_binning_unit ==
"Da" || mz_binning_unit ==
"ppm"))
614 p.
setValue(
"is_relative_tolerance", mz_binning_unit ==
"Da" ?
"false" :
"true");
616 std::vector<std::pair<Size, Size> > alignment;
618 Size count_peaks_aligned(0);
619 Size count_peaks_overall(0);
622 for (
auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
624 ++cluster_sizes[it->second.size() + 1];
629 merged_indices.insert(it->first);
631 double rt_average = consensus_spec.
getRT();
632 double precursor_mz_average = 0.0;
633 Size precursor_count(0);
636 precursor_mz_average = consensus_spec.
getPrecursors()[0].getMZ();
640 count_peaks_overall += consensus_spec.size();
645 for (
auto sit = it->second.begin(); sit != it->second.end(); ++sit)
647 consensus_spec.
unify(exp[*sit]);
648 merged_indices.insert(*sit);
650 rt_average += exp[*sit].getRT();
651 if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
653 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
658 consensus_native_id +=
",";
659 consensus_native_id += exp[*sit].getNativeID();
663 count_peaks_aligned += alignment.size();
664 count_peaks_overall += exp[*sit].
size();
667 Size spec_b_index(0);
670 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].
size(), align_size = alignment.size();
671 for (
auto pit = exp[*sit].begin(); pit != exp[*sit].
end(); ++pit)
673 if (alignment.empty() || alignment[align_index].second != spec_b_index)
676 consensus_spec.push_back(*pit);
682 Size copy_of_align_index(align_index);
684 while (!alignment.empty() &&
685 copy_of_align_index < alignment.size() &&
686 alignment[copy_of_align_index].second == spec_b_index)
688 ++copy_of_align_index;
692 while (!alignment.empty() &&
693 align_index < alignment.size() &&
694 alignment[align_index].second == spec_b_index)
696 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
697 (pit->getIntensity() / (
double)counter));
699 if (align_index == alignment.size())
704 align_size = align_size + 1 - counter;
709 if (spec_a + spec_b - align_size != consensus_spec.size())
711 OPENMS_LOG_WARN <<
"wrong number of features after merge. Expected: " << spec_a + spec_b - align_size <<
" got: " << consensus_spec.size() <<
"\n";
714 rt_average /= it->second.size() + 1;
715 consensus_spec.
setRT(rt_average);
724 precursor_mz_average /= precursor_count;
728 pcs[0].setMZ(precursor_mz_average);
732 if (consensus_spec.empty())
738 merged_spectra.addSpectrum(std::move(consensus_spec));
743 for (
const auto& cl_size : cluster_sizes)
745 OPENMS_LOG_INFO <<
" size " << cl_size.first <<
": " << cl_size.second <<
"x\n";
749 sprintf(buffer,
"%d/%d (%.2f %%) of blocked spectra", (
int)count_peaks_aligned,
750 (
int)count_peaks_overall,
float(count_peaks_aligned) /
float(count_peaks_overall) * 100.);
756 for (
Size i = 0; i < exp.
size(); ++i)
758 if (merged_indices.count(i) == 0)
768 std::make_move_iterator(exp_tmp.
end()));
771 exp.
getSpectra().insert(exp.
end(), std::make_move_iterator(merged_spectra.begin()),
772 std::make_move_iterator(merged_spectra.end()));
796 template <
typename MapType>
801 double mz_binning_width(param_.getValue(
"mz_binning_width"));
802 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
804 unsigned progress = 0;
805 std::stringstream progress_message;
806 progress_message <<
"averaging profile spectra of MS level " << ms_level;
807 startProgress(0, spectra_to_average_over.size(), progress_message.str());
810 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
812 setProgress(++progress);
815 std::vector<double> mz_positions_all;
816 for (
const auto& spec : it->second)
821 mz_positions_all.push_back(it_mz->getMZ());
825 sort(mz_positions_all.begin(), mz_positions_all.end());
827 std::vector<double> mz_positions;
828 std::vector<double> intensities;
829 double last_mz = std::numeric_limits<double>::min();
830 double delta_mz(mz_binning_width);
831 for (
const auto mz_pos : mz_positions_all)
833 if (mz_binning_unit ==
"ppm")
835 delta_mz = mz_binning_width * mz_pos / 1000000;
838 if ((mz_pos - last_mz) > delta_mz)
840 mz_positions.push_back(mz_pos);
841 intensities.push_back(0.0);
847 for (
const auto& spec : it->second)
853 for (
Size i = spline.
getPosMin(); i < mz_positions.size(); ++i)
857 intensities[i] += nav.
eval(mz_positions[i]) * (spec.second);
864 average_spec.
clear(
false);
867 for (
Size i = 0; i < mz_positions.size(); ++i)
870 peak.
setMZ(mz_positions[i]);
872 average_spec.push_back(peak);
883 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
885 exp[it->first] = exp_tmp[n];
905 template <
typename MapType>
910 double mz_binning_width(param_.getValue(
"mz_binning_width"));
911 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
913 unsigned progress = 0;
915 std::stringstream progress_message;
916 progress_message <<
"averaging centroid spectra of MS level " << ms_level;
917 logger.
startProgress(0, spectra_to_average_over.size(), progress_message.str());
920 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
926 std::vector<std::pair<double, double> > mz_intensity_all;
927 for (
const auto& weightedMZ: it->second)
932 std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second));
933 mz_intensity_all.push_back(mz_intensity);
937 sort(mz_intensity_all.begin(), mz_intensity_all.end());
940 std::vector<double> mz_new;
941 std::vector<double> intensity_new;
942 double last_mz = std::numeric_limits<double>::min();
943 double delta_mz = mz_binning_width;
945 double sum_intensity(0);
947 for (
const auto& mz_pos : mz_intensity_all)
949 if (mz_binning_unit ==
"ppm")
951 delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
954 if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
956 mz_new.push_back(sum_mz / count);
957 intensity_new.push_back(sum_intensity);
962 last_mz = mz_pos.first;
966 sum_mz += mz_pos.first;
967 sum_intensity += mz_pos.second;
972 mz_new.push_back(sum_mz / count);
973 intensity_new.push_back(sum_intensity);
978 average_spec.
clear(
false);
981 for (
Size i = 0; i < mz_new.size(); ++i)
984 peak.
setMZ(mz_new[i]);
986 average_spec.push_back(peak);
997 for (
const auto& spectral_index : spectra_to_average_over)
999 exp[spectral_index.first] = std::move(exp_tmp[n]);
A more convenient string class.
Definition: String.h:58
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:107
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:204
void sortByPosition()
Lexicographically sorts the peaks by their position.
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:127
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:51
void endProgress() const
Ends the progress display.
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:70
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:594
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:67
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:470
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
profile data
Definition: SpectrumSettings.h:74
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:160
Iterator begin()
Definition: MSExperiment.h:182
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:63
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Base::const_iterator const_iterator
Definition: MSExperiment.h:117
Size size() const
The number of spectra.
Definition: MSExperiment.h:147
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:119
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setParameters(const Param ¶m)
Sets the parameters.
const double ISOTOPE_MASSDIFF_55K_U
Definition: Constants.h:126
SpectraDistance_()
Definition: SpectraMerger.h:80
A basic LC-MS feature.
Definition: BaseFeature.h:58
const double PROTON_MASS_U
Definition: Constants.h:116
Iterator end()
Definition: MSExperiment.h:192
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method...
Definition: SpectraMerger.h:88
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:216
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:198
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
double rt_max_
Definition: SpectraMerger.h:119
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:101
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:94
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
void setProgress(SignedSize value) const
Sets the current progress.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:53
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: SpectraMerger.h:76
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:339
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition: SpectraMerger.h:311
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:67
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition: SpectraMerger.h:363
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:69
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:906
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:210
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:55
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:59
Illegal self operation exception.
Definition: Exception.h:370
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:105
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
double mz_max_
Definition: SpectraMerger.h:120
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:109
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
void clear(bool clear_meta_data)
Clears all data and meta data.
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:52
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:797
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:211
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software. ...
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:63
centroid data or stick data
Definition: SpectrumSettings.h:73
int Int
Signed integer type.
Definition: Types.h:102
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:130
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:465