OpenMS
Loading...
Searching...
No Matches
IDFilter.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Mathias Walzer $
6// $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
7// --------------------------------------------------------------------------
8
9#pragma once
10
24#include <OpenMS/config.h>
25#include <algorithm>
26#include <climits>
27#include <functional>
28#include <map>
29#include <set>
30#include <unordered_set>
31#include <vector>
32
33namespace OpenMS
34{
35 template<typename T>
37 std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
38
39 template<typename T>
41 std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
42
44 template<typename T>
46 !std::is_same_v<T, std::vector<PeptideIdentification>> &&
47 !std::is_same_v<T, std::vector<ProteinIdentification>> &&
48 !std::is_same_v<T, PeptideIdentificationList>;
49
70 class OPENMS_DLLAPI IDFilter
71 {
72 public:
74 IDFilter() = default;
75
77 virtual ~IDFilter() = default;
78
80 typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
81 typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
82 typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
83
90
92 template<class HitType>
93 struct HasGoodScore {
94 typedef HitType argument_type; // for use as a predicate
95
96 double score;
98
99 HasGoodScore(double score_, bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
100 {
101 }
102
103 bool operator()(const HitType& hit) const
104 {
105 if (higher_score_better)
106 {
107 return hit.getScore() >= score;
108 }
109 return hit.getScore() <= score;
110 }
111 };
112
118 template<class HitType>
120 typedef HitType argument_type; // for use as a predicate
121
124
125 HasMetaValue(const String& key_, const DataValue& value_) : key(key_), value(value_)
126 {
127 }
128
129 bool operator()(const HitType& hit) const
130 {
131 DataValue found = hit.getMetaValue(key);
132 if (found.isEmpty())
133 return false; // meta value "key" not set
134 if (value.isEmpty())
135 return true; // "key" is set, value doesn't matter
136 return found == value;
137 }
138 };
139
141 template<class HitType>
143 typedef HitType argument_type; // for use as a predicate
144
146 double value;
147
148 HasMaxMetaValue(const String& key_, const double& value_) : key(key_), value(value_)
149 {
150 }
151
152 bool operator()(const HitType& hit) const
153 {
154 DataValue found = hit.getMetaValue(key);
155 if (found.isEmpty())
156 return false; // meta value "key" not set
157 return double(found) <= value;
158 }
159 };
160
168 template<class HitType>
170 {
171 typedef HitType argument_type; // for use as a predicate
172
174 double value;
175
182 HasMinMetaValue(const String& key_, const double& value_) :
183 key(key_),
184 value(value_)
185 {
186 }
187
194 bool operator()(const HitType& hit) const
195 {
196 DataValue found = hit.getMetaValue(key);
197 if (found.isEmpty())
198 {
199 return false; // meta value "key" not set
200 }
201 return static_cast<double>(found) >= value;
202 }
203 };
204
206
221 template<class HitType>
223 {
224 typedef HitType argument_type; // for use as a predicate
225
226 struct HasMetaValue<HitType> target_decoy, is_decoy;
227
234 target_decoy("target_decoy", "decoy"),
235 is_decoy("isDecoy", "true")
236 {
237 }
238
247 bool operator()(const HitType& hit) const
248 {
249 // @TODO: this could be done slightly more efficiently by returning
250 // false if the "target_decoy" meta value is "target" or "target+decoy",
251 // without checking for an "isDecoy" meta value in that case
252 return target_decoy(hit) || is_decoy(hit);
253 }
254 };
255
261 template<class HitType>
263 typedef HitType argument_type; // for use as a predicate
264
265 const std::unordered_set<String>& accessions;
266
267 HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_) :
268 accessions(accessions_)
269 {
270 }
271
272 bool operator()(const PeptideHit& hit) const
273 {
274 for (const auto& it : hit.extractProteinAccessionsSet())
275 {
276 if (accessions.count(it) > 0)
277 return true;
278 }
279 return false;
280 }
281
282 bool operator()(const ProteinHit& hit) const
283 {
284 return (accessions.count(hit.getAccession()) > 0);
285 }
286
287 bool operator()(const PeptideEvidence& evidence) const
288 {
289 return (accessions.count(evidence.getProteinAccession()) > 0);
290 }
291 };
292
298 template<class HitType>
300 typedef HitType argument_type; // for use as a predicate
301
302 const std::set<String>& accessions;
303
304 HasMatchingAccession(const std::set<String>& accessions_) : accessions(accessions_)
305 {
306 }
307
308 bool operator()(const PeptideHit& hit) const
309 {
310 for (const auto& it : hit.extractProteinAccessionsSet())
311 {
312 if (accessions.count(it) > 0)
313 return true;
314 }
315 return false;
316 }
317
318 bool operator()(const ProteinHit& hit) const
319 {
320 return (accessions.count(hit.getAccession()) > 0);
321 }
322
323 bool operator()(const PeptideEvidence& evidence) const
324 {
325 return (accessions.count(evidence.getProteinAccession()) > 0);
326 }
327 };
328
334 template<class HitType, class Entry>
336 typedef HitType argument_type; // for use as a predicate
337 typedef std::map<String, Entry*> ItemMap; // Store pointers to avoid copying data
339
340 GetMatchingItems(std::vector<Entry>& records)
341 {
342 for (typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
343 {
344 items[getKey(*rec_it)] = &(*rec_it);
345 }
346 }
347
349 {
350 }
351
352 const String& getKey(const FASTAFile::FASTAEntry& entry) const
353 {
354 return entry.identifier;
355 }
356
357 bool exists(const HitType& hit) const
358 {
359 return items.count(getHitKey(hit)) > 0;
360 }
361
362 const String& getHitKey(const PeptideEvidence& p) const
363 {
364 return p.getProteinAccession();
365 }
366
367 const Entry& getValue(const PeptideEvidence& evidence) const
368 {
369 if (!exists(evidence))
370 {
371 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '" + getHitKey(evidence) + "'. peptide evidence accession not in data");
372 }
373 return *(items.find(getHitKey(evidence))->second);
374 }
375 };
376
378
379
386
388 struct HasMinPeptideLength;
389
391 struct HasMinCharge;
392
394 struct HasLowMZError;
395
401 struct HasMatchingModification;
402
408 struct HasMatchingSequence;
409
411 struct HasNoEvidence;
412
413
420 {
421 private:
425
426 public:
428 PeptideDigestionFilter(EnzymaticDigestion& digestion, Int min, Int max) : digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
429 {
430 }
431
432 static inline Int disabledValue()
433 {
434 return -1;
435 }
436
439 bool operator()(PeptideHit& p) const
440 {
441 const auto& fun = [&](const Int missed_cleavages) {
442 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ : false;
443 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ : false;
444 return max_filter || min_filter;
445 };
446 return digestion_.filterByMissedCleavages(p.getSequence().toUnmodifiedString(), fun);
447 }
448
449 void filterPeptideSequences(std::vector<PeptideHit>& hits)
450 {
451 hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
452 }
453 };
454
455
463
464 // Build an accession index to avoid the linear search cost
469
470 DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries, ProteaseDigestion& digestion, bool ignore_missed_cleavages, bool methionine_cleavage) :
471 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
472 {
473 }
474
475 bool operator()(const PeptideEvidence& evidence) const
476 {
477 if (!evidence.hasValidLimits())
478 {
479 OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
480 return true;
481 }
482
483 if (accession_resolver_.exists(evidence))
484 {
485 return digestion_.isValidProduct(AASequence::fromString(accession_resolver_.getValue(evidence).sequence), evidence.getStart(), evidence.getEnd() - evidence.getStart(),
486 ignore_missed_cleavages_, methionine_cleavage_);
487 }
488 else
489 {
490 if (evidence.getProteinAccession().empty())
491 {
492 OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
493 }
494 else
495 {
496 OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession() << "' not found in fasta file!" << std::endl;
497 }
498 return true;
499 }
500 }
501
503 {
504 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this, peptides);
505 }
506 };
507
509
510
513
515 template<class IdentificationType>
516 struct HasNoHits {
517 typedef IdentificationType argument_type; // for use as a predicate
518
519 bool operator()(const IdentificationType& id) const
520 {
521 return id.getHits().empty();
522 }
523 };
524
526
527
530
532 struct HasRTInRange;
533
535 struct HasMZInRange;
536
538
539
546
548 template<class Container, class Predicate>
549 static void removeMatchingItems(Container& items, const Predicate& pred)
550 {
551 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
552 }
553
555 template<class Container, class Predicate>
556 static void keepMatchingItems(Container& items, const Predicate& pred)
557 {
558 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
559 }
560
562 template<class Container, class Predicate>
563 static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
564 {
565 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
566 std::move(part, items.end(), std::back_inserter(target));
567 items.erase(part, items.end());
568 }
569
571 template<class IDContainer, class Predicate>
572 static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
573 {
574 for (auto& item : items)
575 {
576 removeMatchingItems(item.getHits(), pred);
577 }
578 }
579
581 template<class IDContainer, class Predicate>
582 static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
583 {
584 for (auto& item : items)
585 {
586 keepMatchingItems(item.getHits(), pred);
587 }
588 }
589
590 template<class MapType, class Predicate>
591 static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
592 {
593 for (auto& feat : prot_and_pep_ids)
594 {
595 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
596 }
597 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
598 }
599
600 template<class MapType, class Predicate>
601 static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
602 {
603 for (auto& feat : prot_and_pep_ids)
604 {
605 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
606 }
607 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
608 }
609
610 template<IsFeatureOrConsensusMap MapType, class Predicate>
611 static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
612 {
613 for (auto& feat : prot_and_pep_ids)
614 {
615 removeMatchingItems(feat.getPeptideIdentifications(), pred);
616 }
617 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
618 }
619
620 // Specialization for PeptideIdentificationList
621 template<class Predicate>
623 {
624 removeMatchingItems(pep_ids, pred);
625 }
626
628
629
632
634 template<class IdentificationType>
635 static Size countHits(const std::vector<IdentificationType>& ids)
636 {
637 Size counter = 0;
638 for (typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
639 {
640 counter += id_it->getHits().size();
641 }
642 return counter;
643 }
644
647 {
648 Size counter = 0;
649 for (const auto& id : ids)
650 {
651 counter += id.getHits().size();
652 }
653 return counter;
654 }
655
657 static void filterHitsByRank(PeptideIdentificationList& ids, Size min_rank, Size max_rank)
658 {
659 std::vector<PeptideIdentification>& vec = ids.getData();
660 filterHitsByRank(vec, min_rank, max_rank);
661 }
662
664 static void removeHitsMatchingProteins(PeptideIdentificationList& ids, const std::set<String>& accessions)
665 {
666 std::vector<PeptideIdentification>& vec = ids.getData();
667 removeHitsMatchingProteins(vec, accessions);
668 }
669
671 static void keepHitsMatchingProteins(PeptideIdentificationList& ids, const std::set<String>& accessions)
672 {
673 std::vector<PeptideIdentification>& vec = ids.getData();
674 keepHitsMatchingProteins(vec, accessions);
675 }
676
678 static bool getBestHit(PeptideIdentificationList& ids, bool assume_sorted, PeptideHit& best_hit)
679 {
680 std::vector<PeptideIdentification>& vec = ids.getData();
681 return getBestHit(vec, assume_sorted, best_hit);
682 }
683
686 {
687 std::vector<PeptideIdentification>& vec = ids.getData();
688 removeEmptyIdentifications(vec);
689 }
690
704 template<class IdentificationType>
705 static bool getBestHit(const std::vector<IdentificationType>& identifications, bool assume_sorted, typename IdentificationType::HitType& best_hit)
706 {
707 if (identifications.empty())
708 return false;
709
710 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
711 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
712
713 for (typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
714 {
715 if (id_it->getHits().empty())
716 continue;
717
718 if (best_id_it == identifications.end()) // no previous "best" hit
719 {
720 best_id_it = id_it;
721 best_hit_it = id_it->getHits().begin();
722 }
723 else if (best_id_it->getScoreType() != id_it->getScoreType())
724 {
725 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
726 }
727
728 bool higher_better = best_id_it->isHigherScoreBetter();
729 for (typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
730 {
731 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
732 {
733 best_hit_it = hit_it;
734 }
735 if (assume_sorted)
736 break; // only consider the first hit
737 }
738 }
739
740 if (best_id_it == identifications.end())
741 {
742 return false; // no hits in any IDs
743 }
744
745 best_hit = *best_hit_it;
746 return true;
747 }
748
756 static void extractPeptideSequences(const PeptideIdentificationList& peptides, std::set<String>& sequences, bool ignore_mods = false);
757
763 static std::map<String, std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
764
770 template<class EvidenceFilter>
771 static void FilterPeptideEvidences(EvidenceFilter& filter, PeptideIdentificationList& peptides)
772 {
773 for (PeptideIdentificationList::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
774 {
775 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
776 {
777 std::vector<PeptideEvidence> evidences;
778 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
779 hit_it->setPeptideEvidences(evidences);
780 }
781 }
782 }
783
785
786
791 static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
792
794 static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, const PeptideIdentificationList& peptides);
797
813 static void removeDanglingProteinReferences(PeptideIdentificationList& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
814
829 static void removeDanglingProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
830
845 static void removeDanglingProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
846
855 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups, const std::vector<ProteinHit>& hits);
856
863 static void removeUngroupedProteins(const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
865
866
869
871 template<IsPeptideOrProteinIdentification IdentificationType>
872 static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
873 {
874 struct HasNoHits<IdentificationType> empty_filter;
875 removeMatchingItems(ids, empty_filter);
876 }
877
883 template<class IdentificationType>
884 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score)
885 {
886 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
887 {
888 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
889 keepMatchingItems(id_it->getHits(), score_filter);
890 }
891 }
892
906 template<class IdentificationType>
907 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
908 {
910 bool at_least_one_found = false;
911 for (IdentificationType& id : ids)
912 {
913 if (switcher.isScoreType(id.getScoreType(), score_type))
914 {
915 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
916 keepMatchingItems(id.getHits(), score_filter);
917 }
918 else
919 {
920 // If one assumes they are all the same in the vector, this could be done in the beginning.
921 auto result = switcher.findScoreType<IdentificationType>(id, score_type);
922 if (!result.score_name.empty())
923 {
924 String metaval = result.score_name;
925 if (switcher.isScoreTypeHigherBetter(score_type))
926 {
927 struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
928 keepMatchingItems(id.getHits(), score_filter);
929 }
930 else
931 {
932 struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
933 keepMatchingItems(id.getHits(), score_filter);
934 }
935 at_least_one_found = true;
936 }
937 }
938 }
939 if (!at_least_one_found) OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
940 }
941
948 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
949
955 template<class IdentificationType>
956 static void filterHitsByScore(IdentificationType& id, double threshold_score)
957 {
958 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
959 keepMatchingItems(id.getHits(), score_filter);
960 }
961
967 template<class IdentificationType>
968 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
969 {
970 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
971 {
972 id_it->sort();
973 if (n < id_it->getHits().size())
974 id_it->getHits().resize(n);
975 }
976 }
977
986 static void keepNBestHits(PeptideIdentificationList& pep_ids, Size n)
987 {
988 std::vector<PeptideIdentification>& vec = pep_ids.getData();
989 keepNBestHits(vec, n);
990 }
991
1006 template<class IdentificationType>
1007 static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
1008 {
1009 for (auto& id : ids)
1010 {
1011 auto& hits = id.getHits();
1012 if (hits.empty()) continue;
1013
1014 id.sort(); // Ensure hits are properly sorted
1015
1016 // ignore max_rank?
1017 if (max_rank < min_rank) max_rank = hits.size();
1018
1019 Size rank = 1;
1020 double last_score = hits.front().getScore();
1021
1022 // Remove hits not within [min_rank, max_rank], while computing rank on the fly
1023 hits.erase(
1024 std::remove_if(hits.begin(), hits.end(),
1025 [&](const auto& hit) {
1026 if (hit.getScore() != last_score)
1027 {
1028 ++rank;
1029 last_score = hit.getScore();
1030 }
1031 return rank < min_rank || rank > max_rank;
1032 }),
1033 hits.end()
1034 );
1035 }
1036 }
1037
1045 template<class IdentificationType>
1046 static void removeDecoyHits(std::vector<IdentificationType>& ids)
1047 {
1048 struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
1049 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
1050 {
1051 removeMatchingItems(id_it->getHits(), decoy_filter);
1052 }
1053 }
1054
1062 template<class IdentificationType>
1063 static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
1064 {
1065 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1066 for (auto& id_it : ids)
1067 {
1068 removeMatchingItems(id_it.getHits(), acc_filter);
1069 }
1070 }
1071
1079 template<IsPeptideOrProteinIdentification IdentificationType>
1080 static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
1081 {
1082 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1083 keepMatchingItems(id.getHits(), acc_filter);
1084 }
1085
1093 template<class IdentificationType>
1094 static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1095 {
1096 for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1097 }
1098
1100
1101
1104
1111 static void keepBestPeptideHits(PeptideIdentificationList& peptides, bool strict = false);
1112
1121 static void filterPeptidesByLength(PeptideIdentificationList& peptides, Size min_length, Size max_length = UINT_MAX);
1122
1131 static void filterPeptidesByCharge(PeptideIdentificationList& peptides, Int min_charge, Int max_charge);
1132
1134 static void filterPeptidesByRT(PeptideIdentificationList& peptides, double min_rt, double max_rt);
1135
1137 static void filterPeptidesByMZ(PeptideIdentificationList& peptides, double min_mz, double max_mz);
1138
1150 static void filterPeptidesByMZError(PeptideIdentificationList& peptides, double mass_error, bool unit_ppm);
1151
1152
1159 template<class Filter>
1160 static void filterPeptideEvidences(Filter& filter, PeptideIdentificationList& peptides);
1161
1173 static void filterPeptidesByRTPredictPValue(PeptideIdentificationList& peptides, const String& metavalue_key, double threshold = 0.05);
1174
1176 static void removePeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1177
1178 static void removePeptidesWithMatchingRegEx(PeptideIdentificationList& peptides, const String& regex);
1179
1181 static void keepPeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1182
1190 static void removePeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& bad_peptides, bool ignore_mods = false);
1191
1199 static void keepPeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& good_peptides, bool ignore_mods = false);
1200
1202 static void keepUniquePeptidesPerProtein(PeptideIdentificationList& peptides);
1203
1210 static void removeDuplicatePeptideHits(PeptideIdentificationList& peptides, bool seq_only = false);
1211
1213
1214
1217
1219 static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1220 double peptide_threshold_score,
1221 double protein_threshold_score)
1222 {
1223 // filter protein hits:
1224 filterHitsByScore(annotated_data.getProteinIdentifications(),
1225 protein_threshold_score);
1226 // don't remove empty protein IDs - they contain search meta data and may
1227 // be referenced by peptide IDs (via run ID)
1228
1229 // filter peptide hits:
1230 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1231 {
1232 filterHitsByScore(peptide_id, peptide_threshold_score);
1233 }
1234 removeDanglingProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1235 }
1236
1238 static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1239 {
1240 // don't filter the protein hits by "N best" here - filter the peptides
1241 // and update the protein hits!
1242 PeptideIdentificationList all_peptides; // IDs from all spectra
1243 // filter peptide hits:
1244 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1245 {
1246 // Create a temporary vector with a single PeptideIdentification
1247 PeptideIdentificationList temp_vec = {peptide_id};
1248 keepNBestHits(temp_vec, n);
1249 // Copy back the filtered hits
1250 if (!temp_vec.empty())
1251 {
1252 peptide_id = temp_vec[0];
1253 }
1254 else
1255 {
1256 peptide_id.getHits().clear();
1257 }
1258
1259 // Since we're working with individual PeptideIdentifications, we don't need to remove empty ones
1260 // but we still need to update protein references
1261 temp_vec = {peptide_id};
1262 removeDanglingProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1263 all_peptides.push_back(peptide_id);
1264 }
1265 // update protein hits:
1266 removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1267 }
1268
1271 static void keepNBestSpectra(PeptideIdentificationList& peptides, Size n);
1272
1274 template<class MapType>
1275 static void keepNBestPeptideHits(MapType& map, Size n)
1276 {
1277 // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1278 // which sorts Hits first.
1279 for (auto& feat : map)
1280 {
1281 keepNBestHits(feat.getPeptideIdentifications(), n);
1282 }
1283 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1284 }
1285
1286 template<IsNotIdentificationVector MapType>
1287 static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1288 {
1289 const auto pred = HasNoHits<PeptideIdentification>();
1290 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1291 }
1292
1294 static void keepBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1295 {
1296 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1297 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1298 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1299 }
1300
1301 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1302 {
1303 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1304 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1305 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1306 }
1307
1308 // TODO allow skipping unassigned?
1309 template<class MapType>
1310 static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1311 {
1312 const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1313
1314 RunToSequenceToChargeToPepHitP best_peps_per_run;
1315 for (const auto& idrun : prot_ids)
1316 {
1317 best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1318 }
1319
1320 for (auto& feat : prot_and_pep_ids)
1321 {
1322 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1323 }
1324
1325 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1326 }
1327
1328 template<class MapType>
1329 static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1330 {
1331 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1332 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1333 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1334 }
1335
1338 static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1339 Size nr_best_spectrum)
1340 {
1341 RunToSequenceToChargeToPepHitP best_peps_per_run;
1342 for (const auto& id : prot_ids)
1343 {
1344 best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1345 }
1346 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1347 }
1348
1352 static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1353 Size nr_best_spectrum)
1354 {
1355 for (auto& pep : pep_ids)
1356 {
1357 SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1358 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1359 }
1360 }
1361
1365 static void annotateBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1366 {
1367 SequenceToChargeToPepHitP best_pep;
1368 for (auto& pep : pep_ids)
1369 {
1370 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1371 }
1372 }
1373
1378 static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1379 {
1380 bool higher_score_better = pep.isHigherScoreBetter();
1381 // make sure that first = best hit
1382 pep.sort();
1383
1384 auto pepIt = pep.getHits().begin();
1385 auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1386 for (; pepIt != pepItEnd; ++pepIt)
1387 {
1388 PeptideHit& hit = *pepIt;
1389
1390 String lookup_seq;
1391 if (ignore_mods)
1392 {
1393 lookup_seq = hit.getSequence().toUnmodifiedString();
1394 }
1395 else
1396 {
1397 lookup_seq = hit.getSequence().toString();
1398 }
1399
1400 int lookup_charge = 0;
1401 if (!ignore_charges)
1402 {
1403 lookup_charge = hit.getCharge();
1404 }
1405
1406 // try to insert
1407 auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1408 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1409
1410 PeptideHit*& p = it_inserted_chg.first->second; // now this gets either the old one if already present, or this
1411 if (!it_inserted_chg.second) // was already present -> possibly update
1412 {
1413 if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1414 {
1415 p->setMetaValue("best_per_peptide", 0);
1416 hit.setMetaValue("best_per_peptide", 1);
1417 p = &hit;
1418 }
1419 else // note that this was def. not the best
1420 {
1421 // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1422 hit.setMetaValue("best_per_peptide", 0);
1423 }
1424 }
1425 else // newly inserted -> first for that sequence (and optionally charge)
1426 {
1427 hit.setMetaValue("best_per_peptide", 1);
1428 }
1429 }
1430 }
1431
1434 AnnotatedMSRun& experiment,
1435 const std::vector<FASTAFile::FASTAEntry>& proteins)
1436 {
1437 std::set<String> accessions;
1438 for (auto it = proteins.begin(); it != proteins.end(); ++it)
1439 {
1440 accessions.insert(it->identifier);
1441 }
1442
1443 // filter protein hits:
1444 keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions);
1445
1446 // filter peptide hits:
1447 // std::pair<OpenMS::MSSpectrum&, OpenMS::PeptideIdentification&>
1448 for (auto [spectrum, peptide_id] : experiment)
1449 {
1450 if (spectrum.getMSLevel() == 2)
1451 {
1452 keepHitsMatchingProteins(peptide_id, accessions);
1453 }
1454 }
1455 removeEmptyIdentifications(experiment.getPeptideIdentifications());
1456 }
1457
1459
1460
1463
1474
1487
1493 static void removeDecoys(IdentificationData& id_data);
1495
1496 // Specific overloads for PeptideIdentificationList to ensure correct template resolution
1498 {
1499 removeDecoyHits(ids.getData());
1500 }
1501
1502 static void filterHitsByScore(PeptideIdentificationList& ids, double threshold_score)
1503 {
1504 filterHitsByScore(ids.getData(), threshold_score);
1505 }
1506
1507 static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, PeptideIdentificationList& ids)
1508 {
1509 removeUnreferencedProteins(proteins, ids.getData());
1510 }
1511 };
1512
1513} // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition LogStream.h:447
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
Class for storing MS run data with peptide and protein identifications.
Definition AnnotatedMSRun.h:38
PeptideIdentificationList & getPeptideIdentifications()
Get all peptide identifications for all spectra.
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition AnnotatedMSRun.h:85
A container for consensus elements.
Definition ConsensusMap.h:68
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition DataValue.h:34
bool isEmpty() const
Test if the value is empty.
Class for the enzymatic digestion of sequences.
Definition EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
Invalid value exception.
Definition Exception.h:306
const VecMember & getData() const
read-only access to the underlying data
Definition ExposedVector.h:328
typename VecMember::iterator iterator
Definition ExposedVector.h:68
iterator begin() noexcept
Definition ExposedVector.h:104
iterator end() noexcept
Definition ExposedVector.h:108
Filter Peptide Hit by its digestion product.
Definition IDFilter.h:420
Int max_cleavages_
Definition IDFilter.h:424
EnzymaticDigestion & digestion_
Definition IDFilter.h:422
PeptideHit argument_type
Definition IDFilter.h:427
Int min_cleavages_
Definition IDFilter.h:423
bool operator()(PeptideHit &p) const
Definition IDFilter.h:439
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition IDFilter.h:449
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition IDFilter.h:428
static Int disabledValue()
Definition IDFilter.h:432
Collection of functions for filtering peptide and protein identifications.
Definition IDFilter.h:71
static void removeHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:664
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition IDFilter.h:884
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDanglingProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits using a reference protein run.
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition IDFilter.h:563
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition IDFilter.h:82
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:591
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition IDFilter.h:549
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition IDFilter.h:81
static void removeDecoyHits(PeptideIdentificationList &ids)
Definition IDFilter.h:1497
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition IDFilter.h:872
static void removeEmptyIdentifications(PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:685
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition IDFilter.h:582
static void removeDanglingProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits in a ConsensusMap.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void keepHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:671
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition IDFilter.h:556
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition IDFilter.h:1433
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:601
static void filterHitsByRank(PeptideIdentificationList &ids, Size min_rank, Size max_rank)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:657
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static Size countHits(const PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:646
static bool getBestHit(PeptideIdentificationList &ids, bool assume_sorted, PeptideHit &best_hit)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:678
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition IDFilter.h:572
static void filterHitsByScore(PeptideIdentificationList &ids, double threshold_score)
Definition IDFilter.h:1502
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:611
static void FilterPeptideEvidences(EvidenceFilter &filter, PeptideIdentificationList &peptides)
remove peptide evidences based on a filter
Definition IDFilter.h:771
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition IDFilter.h:635
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &ids)
Definition IDFilter.h:1507
static void removeDanglingProteinReferences(PeptideIdentificationList &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition IDFilter.h:705
static void extractPeptideSequences(const PeptideIdentificationList &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeMatchingPeptideIdentifications(PeptideIdentificationList &pep_ids, Predicate &pred)
Definition IDFilter.h:622
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void removeUnreferencedProteins(ProteinIdentification &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition IDFilter.h:80
This class is used to switch identification scores within identification or consensus feature maps.
Definition IDScoreSwitcherAlgorithm.h:42
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition IDScoreSwitcherAlgorithm.h:139
bool isScoreType(const String &score_name, const ScoreType &type) const
Checks if the given score name corresponds to a specific score type.
Definition IDScoreSwitcherAlgorithm.h:75
ScoreSearchResult findScoreType(const IDType &id, ScoreType score_type) const
Searches for a general score type (e.g. PEP, QVAL) in an identification data structure.
Definition IDScoreSwitcherAlgorithm.h:176
Definition IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Representation of a peptide evidence.
Definition PeptideEvidence.h:28
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition ProteinHit.h:35
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition ProteinIdentification.h:54
A more convenient string class.
Definition String.h:34
Definition IDFilter.h:40
Concept to exclude std::vector of identification types (used to disambiguate template overloads)
Definition IDFilter.h:45
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition FASTAFile.h:46
String identifier
Definition FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition IDFilter.h:461
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition IDFilter.h:470
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition IDFilter.h:465
void filterPeptideEvidences(PeptideIdentificationList &peptides)
Definition IDFilter.h:502
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:475
bool ignore_missed_cleavages_
Definition IDFilter.h:467
PeptideEvidence argument_type
Definition IDFilter.h:462
ProteaseDigestion & digestion_
Definition IDFilter.h:466
bool methionine_cleavage_
Definition IDFilter.h:468
Builds a map index of data that have a String index to find matches and return the objects.
Definition IDFilter.h:335
std::map< String, Entry * > ItemMap
Definition IDFilter.h:337
GetMatchingItems()
Definition IDFilter.h:348
const String & getHitKey(const PeptideEvidence &p) const
Definition IDFilter.h:362
ItemMap items
Definition IDFilter.h:338
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition IDFilter.h:352
HitType argument_type
Definition IDFilter.h:336
bool exists(const HitType &hit) const
Definition IDFilter.h:357
GetMatchingItems(std::vector< Entry > &records)
Definition IDFilter.h:340
const Entry & getValue(const PeptideEvidence &evidence) const
Definition IDFilter.h:367
Is this a decoy hit?
Definition IDFilter.h:223
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition IDFilter.h:247
HitType argument_type
Definition IDFilter.h:224
HasDecoyAnnotation()
Default constructor.
Definition IDFilter.h:233
Is the score of this hit at least as good as the given value?
Definition IDFilter.h:93
bool operator()(const HitType &hit) const
Definition IDFilter.h:103
double score
Definition IDFilter.h:96
HitType argument_type
Definition IDFilter.h:94
HasGoodScore(double score_, bool higher_score_better_)
Definition IDFilter.h:99
bool higher_score_better
Definition IDFilter.h:97
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition IDFilter.h:262
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition IDFilter.h:267
HitType argument_type
Definition IDFilter.h:263
const std::unordered_set< String > & accessions
Definition IDFilter.h:265
bool operator()(const PeptideHit &hit) const
Definition IDFilter.h:272
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:287
bool operator()(const ProteinHit &hit) const
Definition IDFilter.h:282
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition IDFilter.h:299
HitType argument_type
Definition IDFilter.h:300
bool operator()(const PeptideHit &hit) const
Definition IDFilter.h:308
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:323
const std::set< String > & accessions
Definition IDFilter.h:302
HasMatchingAccession(const std::set< String > &accessions_)
Definition IDFilter.h:304
bool operator()(const ProteinHit &hit) const
Definition IDFilter.h:318
Does a meta value of this hit have at most the given value?
Definition IDFilter.h:142
bool operator()(const HitType &hit) const
Definition IDFilter.h:152
HasMaxMetaValue(const String &key_, const double &value_)
Definition IDFilter.h:148
HitType argument_type
Definition IDFilter.h:143
String key
Definition IDFilter.h:145
double value
Definition IDFilter.h:146
Is a meta value with given key and value set on this hit?
Definition IDFilter.h:119
bool operator()(const HitType &hit) const
Definition IDFilter.h:129
DataValue value
Definition IDFilter.h:123
HitType argument_type
Definition IDFilter.h:120
HasMetaValue(const String &key_, const DataValue &value_)
Definition IDFilter.h:125
String key
Definition IDFilter.h:122
Predicate to check if a HitType object has a minimum meta value.
Definition IDFilter.h:170
bool operator()(const HitType &hit) const
Operator() function to check if a HitType object has a minimum meta value.
Definition IDFilter.h:194
HitType argument_type
Definition IDFilter.h:171
String key
Definition IDFilter.h:173
HasMinMetaValue(const String &key_, const double &value_)
Constructor for HasMinMetaValue.
Definition IDFilter.h:182
double value
Definition IDFilter.h:174
Is the list of hits of this peptide/protein ID empty?
Definition IDFilter.h:516
bool operator()(const IdentificationType &id) const
Definition IDFilter.h:519
IdentificationType argument_type
Definition IDFilter.h:517
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition MetaData.h:20