24#include <OpenMS/config.h>
30#include <unordered_set>
37 std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
41 std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
46 !std::is_same_v<T, std::vector<PeptideIdentification>> &&
47 !std::is_same_v<T, std::vector<ProteinIdentification>> &&
48 !std::is_same_v<T, PeptideIdentificationList>;
92 template<
class HitType>
99 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
105 if (higher_score_better)
107 return hit.getScore() >= score;
109 return hit.getScore() <= score;
118 template<
class HitType>
136 return found == value;
141 template<
class HitType>
157 return double(found) <= value;
168 template<
class HitType>
201 return static_cast<double>(found) >= value;
221 template<
class HitType>
234 target_decoy(
"target_decoy",
"decoy"),
235 is_decoy(
"isDecoy",
"true")
252 return target_decoy(hit) || is_decoy(hit);
261 template<
class HitType>
268 accessions(accessions_)
276 if (accessions.count(it) > 0)
298 template<
class HitType>
312 if (accessions.count(it) > 0)
334 template<
class HitType,
class Entry>
342 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
344 items[getKey(*rec_it)] = &(*rec_it);
359 return items.count(getHitKey(hit)) > 0;
369 if (!exists(evidence))
371 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
373 return *(items.find(getHitKey(evidence))->second);
388 struct HasMinPeptideLength;
394 struct HasLowMZError;
401 struct HasMatchingModification;
408 struct HasMatchingSequence;
411 struct HasNoEvidence;
441 const auto& fun = [&](
const Int missed_cleavages) {
442 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
443 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
444 return max_filter || min_filter;
451 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
471 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
483 if (accession_resolver_.
exists(evidence))
486 ignore_missed_cleavages_, methionine_cleavage_);
492 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
504 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
515 template<
class IdentificationType>
521 return id.getHits().empty();
548 template<
class Container,
class Predicate>
551 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
555 template<
class Container,
class Predicate>
558 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
562 template<
class Container,
class Predicate>
565 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
566 std::move(part, items.end(), std::back_inserter(target));
567 items.erase(part, items.end());
571 template<
class IDContainer,
class Predicate>
574 for (
auto& item : items)
576 removeMatchingItems(item.getHits(), pred);
581 template<
class IDContainer,
class Predicate>
584 for (
auto& item : items)
586 keepMatchingItems(item.getHits(), pred);
590 template<
class MapType,
class Predicate>
593 for (
auto& feat : prot_and_pep_ids)
595 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
597 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
600 template<
class MapType,
class Predicate>
603 for (
auto& feat : prot_and_pep_ids)
605 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
607 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
610 template<IsFeatureOrConsensusMap MapType,
class Predicate>
613 for (
auto& feat : prot_and_pep_ids)
615 removeMatchingItems(feat.getPeptideIdentifications(), pred);
617 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
621 template<
class Predicate>
624 removeMatchingItems(pep_ids, pred);
634 template<
class IdentificationType>
638 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
640 counter += id_it->getHits().size();
649 for (
const auto&
id : ids)
651 counter +=
id.getHits().size();
659 std::vector<PeptideIdentification>& vec = ids.
getData();
660 filterHitsByRank(vec, min_rank, max_rank);
666 std::vector<PeptideIdentification>& vec = ids.
getData();
667 removeHitsMatchingProteins(vec, accessions);
673 std::vector<PeptideIdentification>& vec = ids.
getData();
674 keepHitsMatchingProteins(vec, accessions);
680 std::vector<PeptideIdentification>& vec = ids.
getData();
681 return getBestHit(vec, assume_sorted, best_hit);
687 std::vector<PeptideIdentification>& vec = ids.
getData();
688 removeEmptyIdentifications(vec);
704 template<
class IdentificationType>
705 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
707 if (identifications.empty())
710 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
711 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
713 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
715 if (id_it->getHits().empty())
718 if (best_id_it == identifications.end())
721 best_hit_it = id_it->getHits().begin();
723 else if (best_id_it->getScoreType() != id_it->getScoreType())
725 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
728 bool higher_better = best_id_it->isHigherScoreBetter();
729 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
731 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
733 best_hit_it = hit_it;
740 if (best_id_it == identifications.end())
745 best_hit = *best_hit_it;
770 template<
class Ev
idenceFilter>
775 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
777 std::vector<PeptideEvidence> evidences;
778 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
779 hit_it->setPeptideEvidences(evidences);
855 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
863 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
871 template<IsPept
ideOrProteinIdentification IdentificationType>
874 struct HasNoHits<IdentificationType> empty_filter;
875 removeMatchingItems(ids, empty_filter);
883 template<
class IdentificationType>
886 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
888 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
889 keepMatchingItems(id_it->getHits(), score_filter);
906 template<class IdentificationType>
907 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
910 bool at_least_one_found =
false;
911 for (IdentificationType&
id : ids)
913 if (switcher.
isScoreType(
id.getScoreType(), score_type))
915 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
916 keepMatchingItems(id.getHits(), score_filter);
921 auto result = switcher.
findScoreType<IdentificationType>(id, score_type);
922 if (!result.score_name.empty())
924 String metaval = result.score_name;
927 struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
928 keepMatchingItems(id.getHits(), score_filter);
932 struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
933 keepMatchingItems(id.getHits(), score_filter);
935 at_least_one_found = true;
939 if (!at_least_one_found)
OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
948 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
955 template<class IdentificationType>
956 static void filterHitsByScore(IdentificationType& id, double threshold_score)
958 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
959 keepMatchingItems(id.getHits(), score_filter);
967 template<class IdentificationType>
968 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
970 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
973 if (n < id_it->getHits().size())
974 id_it->getHits().resize(n);
986 static void keepNBestHits(PeptideIdentificationList& pep_ids, Size n)
988 std::vector<PeptideIdentification>& vec = pep_ids.getData();
989 keepNBestHits(vec, n);
1006 template<class IdentificationType>
1007 static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
1009 for (auto& id : ids)
1011 auto& hits = id.getHits();
1012 if (hits.empty()) continue;
1017 if (max_rank < min_rank) max_rank = hits.size();
1020 double last_score = hits.front().getScore();
1024 std::remove_if(hits.begin(), hits.end(),
1025 [&](const auto& hit) {
1026 if (hit.getScore() != last_score)
1029 last_score = hit.getScore();
1031 return rank < min_rank || rank > max_rank;
1045 template<class IdentificationType>
1046 static void removeDecoyHits(std::vector<IdentificationType>& ids)
1048 struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
1049 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
1051 removeMatchingItems(id_it->getHits(), decoy_filter);
1062 template<class IdentificationType>
1063 static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
1065 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1066 for (auto& id_it : ids)
1068 removeMatchingItems(id_it.getHits(), acc_filter);
1079 template<IsPeptideOrProteinIdentification IdentificationType>
1080 static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
1082 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1083 keepMatchingItems(id.getHits(), acc_filter);
1093 template<class IdentificationType>
1094 static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1096 for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1111 static void keepBestPeptideHits(PeptideIdentificationList& peptides, bool strict = false);
1121 static void filterPeptidesByLength(PeptideIdentificationList& peptides, Size min_length, Size max_length = UINT_MAX);
1131 static void filterPeptidesByCharge(PeptideIdentificationList& peptides, Int min_charge, Int max_charge);
1134 static void filterPeptidesByRT(PeptideIdentificationList& peptides, double min_rt, double max_rt);
1137 static void filterPeptidesByMZ(PeptideIdentificationList& peptides, double min_mz, double max_mz);
1150 static void filterPeptidesByMZError(PeptideIdentificationList& peptides, double mass_error, bool unit_ppm);
1159 template<class Filter>
1160 static void filterPeptideEvidences(Filter& filter, PeptideIdentificationList& peptides);
1173 static void filterPeptidesByRTPredictPValue(PeptideIdentificationList& peptides, const String& metavalue_key, double threshold = 0.05);
1176 static void removePeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1178 static void removePeptidesWithMatchingRegEx(PeptideIdentificationList& peptides, const String& regex);
1181 static void keepPeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1190 static void removePeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& bad_peptides, bool ignore_mods = false);
1199 static void keepPeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& good_peptides, bool ignore_mods = false);
1202 static void keepUniquePeptidesPerProtein(PeptideIdentificationList& peptides);
1210 static void removeDuplicatePeptideHits(PeptideIdentificationList& peptides, bool seq_only = false);
1219 static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1220 double peptide_threshold_score,
1221 double protein_threshold_score)
1224 filterHitsByScore(annotated_data.getProteinIdentifications(),
1225 protein_threshold_score);
1230 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1232 filterHitsByScore(peptide_id, peptide_threshold_score);
1234 removeDanglingProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1238 static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1242 PeptideIdentificationList all_peptides;
1244 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1247 PeptideIdentificationList temp_vec = {peptide_id};
1248 keepNBestHits(temp_vec, n);
1250 if (!temp_vec.empty())
1252 peptide_id = temp_vec[0];
1256 peptide_id.getHits().clear();
1261 temp_vec = {peptide_id};
1262 removeDanglingProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1263 all_peptides.push_back(peptide_id);
1266 removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1271 static void keepNBestSpectra(PeptideIdentificationList& peptides, Size n);
1274 template<class MapType>
1275 static void keepNBestPeptideHits(MapType& map, Size n)
1279 for (auto& feat : map)
1281 keepNBestHits(feat.getPeptideIdentifications(), n);
1283 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1286 template<IsNotIdentificationVector MapType>
1287 static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1289 const auto pred = HasNoHits<PeptideIdentification>();
1290 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1294 static void keepBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1296 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1297 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1298 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1301 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1303 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1304 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1305 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1309 template<class MapType>
1310 static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1312 const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1314 RunToSequenceToChargeToPepHitP best_peps_per_run;
1315 for (const auto& idrun : prot_ids)
1317 best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1320 for (auto& feat : prot_and_pep_ids)
1322 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1325 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1328 template<class MapType>
1329 static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1331 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1332 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1333 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1338 static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1339 Size nr_best_spectrum)
1341 RunToSequenceToChargeToPepHitP best_peps_per_run;
1342 for (const auto& id : prot_ids)
1344 best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1346 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1352 static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1353 Size nr_best_spectrum)
1355 for (auto& pep : pep_ids)
1357 SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1358 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1365 static void annotateBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1367 SequenceToChargeToPepHitP best_pep;
1368 for (auto& pep : pep_ids)
1370 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1378 static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1380 bool higher_score_better = pep.isHigherScoreBetter();
1384 auto pepIt = pep.getHits().begin();
1385 auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1386 for (; pepIt != pepItEnd; ++pepIt)
1388 PeptideHit& hit = *pepIt;
1393 lookup_seq = hit.getSequence().toUnmodifiedString();
1397 lookup_seq = hit.getSequence().toString();
1400 int lookup_charge = 0;
1401 if (!ignore_charges)
1403 lookup_charge = hit.getCharge();
1407 auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1408 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1410 PeptideHit*& p = it_inserted_chg.first->second;
1411 if (!it_inserted_chg.second)
1413 if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1415 p->setMetaValue(
"best_per_peptide", 0);
1416 hit.setMetaValue(
"best_per_peptide", 1);
1422 hit.setMetaValue(
"best_per_peptide", 0);
1427 hit.setMetaValue(
"best_per_peptide", 1);
1435 const std::vector<FASTAFile::FASTAEntry>& proteins)
1437 std::set<String> accessions;
1438 for (
auto it = proteins.begin(); it != proteins.end(); ++it)
1440 accessions.insert(it->identifier);
1448 for (
auto [spectrum, peptide_id] : experiment)
1450 if (spectrum.getMSLevel() == 2)
1452 keepHitsMatchingProteins(peptide_id, accessions);
1499 removeDecoyHits(ids.
getData());
1504 filterHitsByScore(ids.
getData(), threshold_score);
1509 removeUnreferencedProteins(proteins, ids.
getData());
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition LogStream.h:447
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
Class for storing MS run data with peptide and protein identifications.
Definition AnnotatedMSRun.h:38
PeptideIdentificationList & getPeptideIdentifications()
Get all peptide identifications for all spectra.
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition AnnotatedMSRun.h:85
A container for consensus elements.
Definition ConsensusMap.h:68
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition DataValue.h:34
bool isEmpty() const
Test if the value is empty.
Class for the enzymatic digestion of sequences.
Definition EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
Invalid value exception.
Definition Exception.h:306
const VecMember & getData() const
read-only access to the underlying data
Definition ExposedVector.h:328
typename VecMember::iterator iterator
Definition ExposedVector.h:68
iterator begin() noexcept
Definition ExposedVector.h:104
iterator end() noexcept
Definition ExposedVector.h:108
Filter Peptide Hit by its digestion product.
Definition IDFilter.h:420
Int max_cleavages_
Definition IDFilter.h:424
EnzymaticDigestion & digestion_
Definition IDFilter.h:422
PeptideHit argument_type
Definition IDFilter.h:427
Int min_cleavages_
Definition IDFilter.h:423
bool operator()(PeptideHit &p) const
Definition IDFilter.h:439
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition IDFilter.h:449
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition IDFilter.h:428
static Int disabledValue()
Definition IDFilter.h:432
Collection of functions for filtering peptide and protein identifications.
Definition IDFilter.h:71
static void removeHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:664
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition IDFilter.h:884
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDanglingProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits using a reference protein run.
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition IDFilter.h:563
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition IDFilter.h:82
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:591
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition IDFilter.h:549
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition IDFilter.h:81
static void removeDecoyHits(PeptideIdentificationList &ids)
Definition IDFilter.h:1497
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition IDFilter.h:872
static void removeEmptyIdentifications(PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:685
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition IDFilter.h:582
static void removeDanglingProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits in a ConsensusMap.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void keepHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:671
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition IDFilter.h:556
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition IDFilter.h:1433
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:601
static void filterHitsByRank(PeptideIdentificationList &ids, Size min_rank, Size max_rank)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:657
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static Size countHits(const PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:646
static bool getBestHit(PeptideIdentificationList &ids, bool assume_sorted, PeptideHit &best_hit)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:678
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition IDFilter.h:572
static void filterHitsByScore(PeptideIdentificationList &ids, double threshold_score)
Definition IDFilter.h:1502
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:611
static void FilterPeptideEvidences(EvidenceFilter &filter, PeptideIdentificationList &peptides)
remove peptide evidences based on a filter
Definition IDFilter.h:771
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition IDFilter.h:635
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &ids)
Definition IDFilter.h:1507
static void removeDanglingProteinReferences(PeptideIdentificationList &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition IDFilter.h:705
static void extractPeptideSequences(const PeptideIdentificationList &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeMatchingPeptideIdentifications(PeptideIdentificationList &pep_ids, Predicate &pred)
Definition IDFilter.h:622
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void removeUnreferencedProteins(ProteinIdentification &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition IDFilter.h:80
This class is used to switch identification scores within identification or consensus feature maps.
Definition IDScoreSwitcherAlgorithm.h:42
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition IDScoreSwitcherAlgorithm.h:139
bool isScoreType(const String &score_name, const ScoreType &type) const
Checks if the given score name corresponds to a specific score type.
Definition IDScoreSwitcherAlgorithm.h:75
ScoreSearchResult findScoreType(const IDType &id, ScoreType score_type) const
Searches for a general score type (e.g. PEP, QVAL) in an identification data structure.
Definition IDScoreSwitcherAlgorithm.h:176
Definition IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Representation of a peptide evidence.
Definition PeptideEvidence.h:28
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition ProteinHit.h:35
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition ProteinIdentification.h:54
A more convenient string class.
Definition String.h:34
Concept to exclude std::vector of identification types (used to disambiguate template overloads)
Definition IDFilter.h:45
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition FASTAFile.h:46
String identifier
Definition FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition IDFilter.h:461
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition IDFilter.h:470
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition IDFilter.h:465
void filterPeptideEvidences(PeptideIdentificationList &peptides)
Definition IDFilter.h:502
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:475
bool ignore_missed_cleavages_
Definition IDFilter.h:467
PeptideEvidence argument_type
Definition IDFilter.h:462
ProteaseDigestion & digestion_
Definition IDFilter.h:466
bool methionine_cleavage_
Definition IDFilter.h:468
Builds a map index of data that have a String index to find matches and return the objects.
Definition IDFilter.h:335
std::map< String, Entry * > ItemMap
Definition IDFilter.h:337
GetMatchingItems()
Definition IDFilter.h:348
const String & getHitKey(const PeptideEvidence &p) const
Definition IDFilter.h:362
ItemMap items
Definition IDFilter.h:338
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition IDFilter.h:352
HitType argument_type
Definition IDFilter.h:336
bool exists(const HitType &hit) const
Definition IDFilter.h:357
GetMatchingItems(std::vector< Entry > &records)
Definition IDFilter.h:340
const Entry & getValue(const PeptideEvidence &evidence) const
Definition IDFilter.h:367
Is this a decoy hit?
Definition IDFilter.h:223
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition IDFilter.h:247
HitType argument_type
Definition IDFilter.h:224
HasDecoyAnnotation()
Default constructor.
Definition IDFilter.h:233
Is the score of this hit at least as good as the given value?
Definition IDFilter.h:93
bool operator()(const HitType &hit) const
Definition IDFilter.h:103
double score
Definition IDFilter.h:96
HitType argument_type
Definition IDFilter.h:94
HasGoodScore(double score_, bool higher_score_better_)
Definition IDFilter.h:99
bool higher_score_better
Definition IDFilter.h:97
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition IDFilter.h:262
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition IDFilter.h:267
HitType argument_type
Definition IDFilter.h:263
const std::unordered_set< String > & accessions
Definition IDFilter.h:265
bool operator()(const PeptideHit &hit) const
Definition IDFilter.h:272
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:287
bool operator()(const ProteinHit &hit) const
Definition IDFilter.h:282
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition IDFilter.h:299
HitType argument_type
Definition IDFilter.h:300
bool operator()(const PeptideHit &hit) const
Definition IDFilter.h:308
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:323
const std::set< String > & accessions
Definition IDFilter.h:302
HasMatchingAccession(const std::set< String > &accessions_)
Definition IDFilter.h:304
bool operator()(const ProteinHit &hit) const
Definition IDFilter.h:318
Is the list of hits of this peptide/protein ID empty?
Definition IDFilter.h:516
bool operator()(const IdentificationType &id) const
Definition IDFilter.h:519
IdentificationType argument_type
Definition IDFilter.h:517
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition MetaData.h:20