82 defaults_.setValue(
"rt_tolerance", 10.0,
"Maximal RT distance (in [s]) for two spectra's precursors.");
83 defaults_.setValue(
"mz_tolerance", 1.0,
"Maximal m/z distance (in Da) for two spectra's precursors.");
89 rt_max_ = (double) param_.getValue(
"rt_tolerance");
90 mz_max_ = (double) param_.getValue(
"mz_tolerance");
96 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
103 double d_rt = fabs(first.
getRT() - second.
getRT());
104 double d_mz = fabs(first.
getMZ() - second.
getMZ());
106 if (d_rt > rt_max_ || d_mz > mz_max_)
112 double sim = getSimilarity(d_rt, d_mz);
129 typedef std::map<Size, std::vector<std::pair<Size, double> > >
AverageBlocks;
158 template <
typename MapType>
161 IntList ms_levels = param_.getValue(
"block_method:ms_levels");
162 Int rt_block_size(param_.getValue(
"block_method:rt_block_size"));
163 double rt_max_length = (param_.getValue(
"block_method:rt_max_length"));
165 if (rt_max_length == 0)
167 rt_max_length = (std::numeric_limits<double>::max)();
170 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
174 SignedSize block_size_count(rt_block_size + 1);
175 Size idx_spectrum(0);
178 if (
Int(it1->getMSLevel()) == *it_mslevel)
181 if (++block_size_count >= rt_block_size ||
182 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
184 block_size_count = 0;
185 idx_block = idx_spectrum;
189 spectra_to_merge[idx_block].push_back(idx_spectrum);
196 if (block_size_count == 0)
198 spectra_to_merge[idx_block] = std::vector<Size>();
202 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
209 template <
typename MapType>
215 std::vector<BinaryTreeNode> tree;
216 std::map<Size, Size> index_mapping;
219 std::vector<BaseFeature> data;
221 for (
Size i = 0; i < exp.
size(); ++i)
223 if (exp[i].getMSLevel() != 2)
229 index_mapping[data.size()] = i;
233 bf.
setRT(exp[i].getRT());
234 const auto& pcs = exp[i].getPrecursors();
242 OPENMS_LOG_WARN <<
"More than one precursor found. Using first one!" << std::endl;
244 bf.
setMZ(pcs[0].getMZ());
247 data_size = data.size();
262 std::vector<std::vector<Size> > clusters;
265 for (
Size ii = 0; ii < tree.size(); ++ii)
267 if (tree[ii].distance >= 1)
269 tree[ii].distance = -1;
271 if (tree[ii].distance != -1)
276 ca.
cut(data_size - node_count, tree, clusters);
284 for (
Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
286 if (clusters[i_outer].size() <= 1)
291 Size cl_index0 = clusters[i_outer][0];
292 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
294 for (
Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
296 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
301 mergeSpectra_(exp, spectra_to_merge, 2);
316 if (mz1 == mz2 || tol_ppm <= 0)
322 const int max_iso_diff = 5;
323 const double max_charge_diff_ratio = 3.0;
325 for (
int c1 = min_c; c1 <= max_c; ++c1)
329 for (
int c2 = min_c; c2 <= max_c; ++c2)
331 if (c1 / c2 > max_charge_diff_ratio)
335 if (c2 / c1 > max_charge_diff_ratio)
342 if (fabs(mass1 - mass2) > max_iso_diff)
346 for (
int i = -max_iso_diff; i <= max_iso_diff; ++i)
365 template <
typename MapType>
371 ms_level = param_.getValue(
"average_gaussian:ms_level");
372 if (average_type ==
"tophat")
374 ms_level = param_.getValue(
"average_tophat:ms_level");
379 std::string spectrum_type = param_.getValue(
"average_gaussian:spectrum_type");
380 if (average_type ==
"tophat")
382 spectrum_type = std::string(param_.getValue(
"average_tophat:spectrum_type"));
386 double fwhm(param_.getValue(
"average_gaussian:rt_FWHM"));
387 double factor = -4 * log(2.0) / (fwhm * fwhm);
388 double cutoff(param_.getValue(
"average_gaussian:cutoff"));
389 double precursor_mass_ppm = param_.getValue(
"average_gaussian:precursor_mass_tol");
390 int precursor_max_charge = param_.getValue(
"average_gaussian:precursor_max_charge");
393 bool unit(param_.getValue(
"average_tophat:rt_unit") ==
"scans");
394 double range(param_.getValue(
"average_tophat:rt_range"));
395 double range_seconds = range / 2;
396 int range_scans =
static_cast<int>(range);
397 if ((range_scans % 2) == 0)
401 range_scans = (range_scans - 1) / 2;
410 if (
Int(it_rt->getMSLevel()) == ms_level)
421 terminate_now =
false;
422 while (it_rt_2 != exp.
end() && !terminate_now)
424 if (
Int(it_rt_2->getMSLevel()) == ms_level)
428 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
429 it_rt_2->getPrecursors().size() > 0)
431 double mz1 = it_rt->getPrecursors()[0].getMZ();
432 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
433 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
439 if (average_type ==
"gaussian")
442 double base = it_rt_2->getRT() - it_rt->getRT();
443 weight = std::exp(factor * base * base);
445 std::pair<Size, double> p(m, weight);
446 spectra_to_average_over[n].push_back(p);
450 if (average_type ==
"gaussian")
453 double base = it_rt_2->getRT() - it_rt->getRT();
454 terminate_now = std::exp(factor * base * base) < cutoff;
459 terminate_now = (steps > range_scans);
464 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
474 terminate_now =
false;
475 while (it_rt_2 != exp.
begin() && !terminate_now)
477 if (
Int(it_rt_2->getMSLevel()) == ms_level)
481 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
482 it_rt_2->getPrecursors().size() > 0)
484 double mz1 = it_rt->getPrecursors()[0].getMZ();
485 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
486 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
491 if (average_type ==
"gaussian")
493 double base = it_rt_2->getRT() - it_rt->getRT();
494 weight = std::exp(factor * base * base);
496 std::pair<Size, double> p(m, weight);
497 spectra_to_average_over[n].push_back(p);
501 if (average_type ==
"gaussian")
504 double base = it_rt_2->getRT() - it_rt->getRT();
505 terminate_now = std::exp(factor * base * base) < cutoff;
510 terminate_now = (steps > range_scans);
515 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
530 OPENMS_PRETTY_FUNCTION,
531 "Input mzML does not have any spectra of MS level specified by ms_level.");
535 for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
538 for (
const auto& weight: it->second)
540 sum += weight.second;
543 for (
auto& weight: it->second)
545 weight.second /=
sum;
551 if (spectrum_type ==
"automatic")
553 Size idx = spectra_to_average_over.begin()->first;
554 type = exp[idx].getType(
true);
556 else if (spectrum_type ==
"profile")
560 else if (spectrum_type ==
"centroid")
566 throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION,
"Spectrum type has to be one of automatic, profile or centroid.");
572 averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
576 averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
596 template <
typename MapType>
599 double mz_binning_width(param_.getValue(
"mz_binning_width"));
600 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
605 std::map<Size, Size> cluster_sizes;
606 std::set<Size> merged_indices;
611 p.
setValue(
"tolerance", mz_binning_width);
612 if (!(mz_binning_unit ==
"Da" || mz_binning_unit ==
"ppm"))
617 p.
setValue(
"is_relative_tolerance", mz_binning_unit ==
"Da" ?
"false" :
"true");
619 std::vector<std::pair<Size, Size> > alignment;
621 Size count_peaks_aligned(0);
622 Size count_peaks_overall(0);
625 for (
auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
627 ++cluster_sizes[it->second.size() + 1];
633 merged_indices.insert(it->first);
636 double rt_average = consensus_spec.
getRT();
637 double precursor_mz_average = 0.0;
638 Size precursor_count(0);
641 precursor_mz_average = consensus_spec.
getPrecursors()[0].getMZ();
645 count_peaks_overall += consensus_spec.size();
648 for (
auto sit = it->second.begin(); sit != it->second.end(); ++sit)
650 consensus_spec.
unify(exp[*sit]);
651 merged_indices.insert(*sit);
653 rt_average += exp[*sit].getRT();
654 if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
656 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
663 count_peaks_aligned += alignment.size();
664 count_peaks_overall += exp[*sit].
size();
667 Size spec_b_index(0);
670 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].
size(), align_size = alignment.size();
671 for (
auto pit = exp[*sit].begin(); pit != exp[*sit].
end(); ++pit)
673 if (alignment.empty() || alignment[align_index].second != spec_b_index)
676 consensus_spec.push_back(*pit);
682 Size copy_of_align_index(align_index);
684 while (!alignment.empty() &&
685 copy_of_align_index < alignment.size() &&
686 alignment[copy_of_align_index].second == spec_b_index)
688 ++copy_of_align_index;
692 while (!alignment.empty() &&
693 align_index < alignment.size() &&
694 alignment[align_index].second == spec_b_index)
696 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
697 (pit->getIntensity() / (
double)counter));
699 if (align_index == alignment.size())
704 align_size = align_size + 1 - counter;
709 if (spec_a + spec_b - align_size != consensus_spec.size())
711 OPENMS_LOG_WARN <<
"wrong number of features after merge. Expected: " << spec_a + spec_b - align_size <<
" got: " << consensus_spec.size() <<
"\n";
714 rt_average /= it->second.size() + 1;
715 consensus_spec.
setRT(rt_average);
721 precursor_mz_average /= precursor_count;
726 pcs[0].setMZ(precursor_mz_average);
730 if (consensus_spec.empty())
736 merged_spectra.
addSpectrum(std::move(consensus_spec));
741 for (
const auto& cl_size : cluster_sizes)
743 OPENMS_LOG_INFO <<
" size " << cl_size.first <<
": " << cl_size.second <<
"x\n";
747 sprintf(buffer,
"%d/%d (%.2f %%) of blocked spectra", (
int)count_peaks_aligned,
748 (
int)count_peaks_overall,
float(count_peaks_aligned) /
float(count_peaks_overall) * 100.);
754 for (
Size i = 0; i < exp.
size(); ++i)
756 if (merged_indices.count(i) == 0)
768 std::make_move_iterator(exp_tmp.
end()));
774 std::make_move_iterator(merged_spectra.
end()));
798 template <
typename MapType>
803 double mz_binning_width(param_.getValue(
"mz_binning_width"));
804 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
806 unsigned progress = 0;
807 std::stringstream progress_message;
808 progress_message <<
"averaging profile spectra of MS level " << ms_level;
809 startProgress(0, spectra_to_average_over.size(), progress_message.str());
812 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
814 setProgress(++progress);
817 std::vector<double> mz_positions_all;
818 for (
const auto& spec : it->second)
823 mz_positions_all.push_back(it_mz->getMZ());
827 sort(mz_positions_all.begin(), mz_positions_all.end());
829 std::vector<double> mz_positions;
830 std::vector<double> intensities;
831 double last_mz = std::numeric_limits<double>::min();
832 double delta_mz(mz_binning_width);
833 for (
const auto mz_pos : mz_positions_all)
835 if (mz_binning_unit ==
"ppm")
837 delta_mz = mz_binning_width * mz_pos / 1000000;
840 if ((mz_pos - last_mz) > delta_mz)
842 mz_positions.push_back(mz_pos);
843 intensities.push_back(0.0);
849 for (
const auto& spec : it->second)
855 for (
Size i = spline.
getPosMin(); i < mz_positions.size(); ++i)
859 intensities[i] += nav.
eval(mz_positions[i]) * (spec.second);
866 average_spec.
clear(
false);
870 for (
Size i = 0; i < mz_positions.size(); ++i)
873 peak.
setMZ(mz_positions[i]);
875 average_spec.push_back(peak);
887 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
889 exp[it->first] = exp_tmp[n];
910 template <
typename MapType>
915 double mz_binning_width(param_.getValue(
"mz_binning_width"));
916 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
918 unsigned progress = 0;
920 std::stringstream progress_message;
921 progress_message <<
"averaging centroid spectra of MS level " << ms_level;
922 logger.
startProgress(0, spectra_to_average_over.size(), progress_message.str());
925 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
931 std::vector<std::pair<double, double> > mz_intensity_all;
932 for (
const auto& weightedMZ: it->second)
937 std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second));
938 mz_intensity_all.push_back(mz_intensity);
942 sort(mz_intensity_all.begin(), mz_intensity_all.end());
945 std::vector<double> mz_new;
946 std::vector<double> intensity_new;
947 double last_mz = std::numeric_limits<double>::min();
948 double delta_mz = mz_binning_width;
950 double sum_intensity(0);
952 for (
const auto& mz_pos : mz_intensity_all)
954 if (mz_binning_unit ==
"ppm")
956 delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
959 if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
961 mz_new.push_back(sum_mz / count);
962 intensity_new.push_back(sum_intensity);
967 last_mz = mz_pos.first;
971 sum_mz += mz_pos.first;
972 sum_intensity += mz_pos.second;
977 mz_new.push_back(sum_mz / count);
978 intensity_new.push_back(sum_intensity);
983 average_spec.
clear(
false);
987 for (
Size i = 0; i < mz_new.size(); ++i)
990 peak.
setMZ(mz_new[i]);
992 average_spec.push_back(peak);
1003 for (
const auto& spectral_index : spectra_to_average_over)
1005 exp[spectral_index.first] = std::move(exp_tmp[n]);
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:465
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:470
A basic LC-MS feature.
Definition: BaseFeature.h:59
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:52
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
void setParameters(const Param ¶m)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
Illegal self operation exception.
Definition: Exception.h:372
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin()
Definition: MSExperiment.h:149
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Iterator end()
Definition: MSExperiment.h:159
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Size size() const
Definition: MSExperiment.h:119
Base::const_iterator const_iterator
Definition: MSExperiment.h:117
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:105
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:119
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:198
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:204
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:216
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:210
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress() const
Ends the progress display.
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:57
Definition: SpectraMerger.h:77
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:93
SpectraDistance_()
Definition: SpectraMerger.h:79
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:100
double mz_max_
Definition: SpectraMerger.h:119
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:87
double rt_max_
Definition: SpectraMerger.h:118
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:64
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition: SpectraMerger.h:366
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition: SpectraMerger.h:314
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:911
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:129
~SpectraMerger() override
destructor
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:210
SpectraMerger & operator=(SpectraMerger &&source)=default
move-assignment operator
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:799
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:597
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:126
SpectraMerger(SpectraMerger &&source)=default
move constructor
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:159
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:69
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
@ PROFILE
profile data
Definition: SpectrumSettings.h:74
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:110
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:60
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:107
const double ISOTOPE_MASSDIFF_55K_U
Definition: Constants.h:126
const double PROTON_MASS_U
Definition: Constants.h:116
FLASHIda C++ to C# (or vice versa) bridge functions The functions here are called in C# to invoke fun...
Definition: FeatureDeconvolution.h:48