OpenMS
IDFilter.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Mathias Walzer $
6 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
21 #include <OpenMS/config.h>
22 #include <algorithm>
23 #include <climits>
24 #include <functional>
25 #include <map>
26 #include <set>
27 #include <unordered_set>
28 #include <vector>
29 
30 namespace OpenMS
31 {
52  class OPENMS_DLLAPI IDFilter
53  {
54  public:
56  IDFilter() = default;
57 
59  virtual ~IDFilter() = default;
60 
62  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
63  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
64  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
65 
72 
74  template<class HitType>
75  struct HasGoodScore {
76  typedef HitType argument_type; // for use as a predicate
77 
78  double score;
80 
81  HasGoodScore(double score_, bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
82  {
83  }
84 
85  bool operator()(const HitType& hit) const
86  {
87  if (higher_score_better)
88  {
89  return hit.getScore() >= score;
90  }
91  return hit.getScore() <= score;
92  }
93  };
94 
100  template<class HitType>
101  struct HasMaxRank {
102  typedef HitType argument_type; // for use as a predicate
103 
105 
106  HasMaxRank(Size rank_) : rank(rank_)
107  {
108  if (rank_ == 0)
109  {
110  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
111  }
112  }
113 
114  bool operator()(const HitType& hit) const
115  {
116  Size hit_rank = hit.getRank();
117  if (hit_rank == 0)
118  {
119  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
120  }
121  return hit_rank <= rank;
122  }
123  };
124 
130  template<class HitType>
131  struct HasMetaValue {
132  typedef HitType argument_type; // for use as a predicate
133 
136 
137  HasMetaValue(const String& key_, const DataValue& value_) : key(key_), value(value_)
138  {
139  }
140 
141  bool operator()(const HitType& hit) const
142  {
143  DataValue found = hit.getMetaValue(key);
144  if (found.isEmpty())
145  return false; // meta value "key" not set
146  if (value.isEmpty())
147  return true; // "key" is set, value doesn't matter
148  return found == value;
149  }
150  };
151 
153  template<class HitType>
155  typedef HitType argument_type; // for use as a predicate
156 
158  double value;
159 
160  HasMaxMetaValue(const String& key_, const double& value_) : key(key_), value(value_)
161  {
162  }
163 
164  bool operator()(const HitType& hit) const
165  {
166  DataValue found = hit.getMetaValue(key);
167  if (found.isEmpty())
168  return false; // meta value "key" not set
169  return double(found) <= value;
170  }
171  };
172 
174  template<class HitType>
176  typedef HitType argument_type; // for use as a predicate
177 
178  struct HasMetaValue<HitType> target_decoy, is_decoy;
179 
180  HasDecoyAnnotation() : target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
181  {
182  }
183 
184  bool operator()(const HitType& hit) const
185  {
186  // @TODO: this could be done slightly more efficiently by returning
187  // false if the "target_decoy" meta value is "target" or "target+decoy",
188  // without checking for an "isDecoy" meta value in that case
189  return target_decoy(hit) || is_decoy(hit);
190  }
191  };
192 
198  template<class HitType>
200  typedef HitType argument_type; // for use as a predicate
201 
202  const std::unordered_set<String>& accessions;
203 
204  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_) : accessions(accessions_)
205  {
206  }
207 
208  bool operator()(const PeptideHit& hit) const
209  {
210  for (const auto& it : hit.extractProteinAccessionsSet())
211  {
212  if (accessions.count(it) > 0)
213  return true;
214  }
215  return false;
216  }
217 
218  bool operator()(const ProteinHit& hit) const
219  {
220  return (accessions.count(hit.getAccession()) > 0);
221  }
222 
223  bool operator()(const PeptideEvidence& evidence) const
224  {
225  return (accessions.count(evidence.getProteinAccession()) > 0);
226  }
227  };
228 
234  template<class HitType>
236  typedef HitType argument_type; // for use as a predicate
237 
238  const std::set<String>& accessions;
239 
240  HasMatchingAccession(const std::set<String>& accessions_) : accessions(accessions_)
241  {
242  }
243 
244  bool operator()(const PeptideHit& hit) const
245  {
246  for (const auto& it : hit.extractProteinAccessionsSet())
247  {
248  if (accessions.count(it) > 0)
249  return true;
250  }
251  return false;
252  }
253 
254  bool operator()(const ProteinHit& hit) const
255  {
256  return (accessions.count(hit.getAccession()) > 0);
257  }
258 
259  bool operator()(const PeptideEvidence& evidence) const
260  {
261  return (accessions.count(evidence.getProteinAccession()) > 0);
262  }
263  };
264 
270  template<class HitType, class Entry>
272  typedef HitType argument_type; // for use as a predicate
273  typedef std::map<String, Entry*> ItemMap; // Store pointers to avoid copying data
275 
276  GetMatchingItems(std::vector<Entry>& records)
277  {
278  for (typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
279  {
280  items[getKey(*rec_it)] = &(*rec_it);
281  }
282  }
283 
285  {
286  }
287 
288  const String& getKey(const FASTAFile::FASTAEntry& entry) const
289  {
290  return entry.identifier;
291  }
292 
293  bool exists(const HitType& hit) const
294  {
295  return items.count(getHitKey(hit)) > 0;
296  }
297 
298  const String& getHitKey(const PeptideEvidence& p) const
299  {
300  return p.getProteinAccession();
301  }
302 
303  const Entry& getValue(const PeptideEvidence& evidence) const
304  {
305  if (!exists(evidence))
306  {
307  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '" + getHitKey(evidence) + "'. peptide evidence accession not in data");
308  }
309  return *(items.find(getHitKey(evidence))->second);
310  }
311  };
312 
314 
315 
322 
324  struct HasMinPeptideLength;
325 
327  struct HasMinCharge;
328 
330  struct HasLowMZError;
331 
337  struct HasMatchingModification;
338 
344  struct HasMatchingSequence;
345 
347  struct HasNoEvidence;
348 
349 
356  {
357  private:
361 
362  public:
364  PeptideDigestionFilter(EnzymaticDigestion& digestion, Int min, Int max) : digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
365  {
366  }
367 
368  static inline Int disabledValue()
369  {
370  return -1;
371  }
372 
375  bool operator()(PeptideHit& p) const
376  {
377  const auto& fun = [&](const Int missed_cleavages) {
378  bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ : false;
379  bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ : false;
380  return max_filter || min_filter;
381  };
382  return digestion_.filterByMissedCleavages(p.getSequence().toUnmodifiedString(), fun);
383  }
384 
385  void filterPeptideSequences(std::vector<PeptideHit>& hits)
386  {
387  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
388  }
389  };
390 
391 
399 
400  // Build an accession index to avoid the linear search cost
405 
406  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries, ProteaseDigestion& digestion, bool ignore_missed_cleavages, bool methionine_cleavage) :
407  accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
408  {
409  }
410 
411  bool operator()(const PeptideEvidence& evidence) const
412  {
413  if (!evidence.hasValidLimits())
414  {
415  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
416  return true;
417  }
418 
419  if (accession_resolver_.exists(evidence))
420  {
421  return digestion_.isValidProduct(AASequence::fromString(accession_resolver_.getValue(evidence).sequence), evidence.getStart(), evidence.getEnd() - evidence.getStart(),
422  ignore_missed_cleavages_, methionine_cleavage_);
423  }
424  else
425  {
426  if (evidence.getProteinAccession().empty())
427  {
428  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
429  }
430  else
431  {
432  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession() << "' not found in fasta file!" << std::endl;
433  }
434  return true;
435  }
436  }
437 
438  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
439  {
440  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this, peptides);
441  }
442  };
443 
445 
446 
449 
451  template<class IdentificationType>
452  struct HasNoHits {
453  typedef IdentificationType argument_type; // for use as a predicate
454 
455  bool operator()(const IdentificationType& id) const
456  {
457  return id.getHits().empty();
458  }
459  };
460 
462 
463 
466 
468  struct HasRTInRange;
469 
471  struct HasMZInRange;
472 
474 
475 
482 
484  template<class Container, class Predicate>
485  static void removeMatchingItems(Container& items, const Predicate& pred)
486  {
487  items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
488  }
489 
491  template<class Container, class Predicate>
492  static void keepMatchingItems(Container& items, const Predicate& pred)
493  {
494  items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
495  }
496 
498  template<class Container, class Predicate>
499  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
500  {
501  auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
502  std::move(part, items.end(), std::back_inserter(target));
503  items.erase(part, items.end());
504  }
505 
507  template<class IDContainer, class Predicate>
508  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
509  {
510  for (auto& item : items)
511  {
512  removeMatchingItems(item.getHits(), pred);
513  }
514  }
515 
517  template<class IDContainer, class Predicate>
518  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
519  {
520  for (auto& item : items)
521  {
522  keepMatchingItems(item.getHits(), pred);
523  }
524  }
525 
526  template<class MapType, class Predicate>
527  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
528  {
529  for (auto& feat : prot_and_pep_ids)
530  {
531  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
532  }
533  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
534  }
535 
536  template<class MapType, class Predicate>
537  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
538  {
539  for (auto& feat : prot_and_pep_ids)
540  {
541  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
542  }
543  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
544  }
545 
546  template<class MapType, class Predicate>
547  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
548  {
549  for (auto& feat : prot_and_pep_ids)
550  {
551  removeMatchingItems(feat.getPeptideIdentifications(), pred);
552  }
553  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
554  }
555 
557 
558 
561 
563  template<class IdentificationType>
564  static Size countHits(const std::vector<IdentificationType>& ids)
565  {
566  Size counter = 0;
567  for (typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
568  {
569  counter += id_it->getHits().size();
570  }
571  return counter;
572  }
573 
587  template<class IdentificationType>
588  static bool getBestHit(const std::vector<IdentificationType>& identifications, bool assume_sorted, typename IdentificationType::HitType& best_hit)
589  {
590  if (identifications.empty())
591  return false;
592 
593  typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
594  typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
595 
596  for (typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
597  {
598  if (id_it->getHits().empty())
599  continue;
600 
601  if (best_id_it == identifications.end()) // no previous "best" hit
602  {
603  best_id_it = id_it;
604  best_hit_it = id_it->getHits().begin();
605  }
606  else if (best_id_it->getScoreType() != id_it->getScoreType())
607  {
608  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
609  }
610 
611  bool higher_better = best_id_it->isHigherScoreBetter();
612  for (typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
613  {
614  if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
615  {
616  best_hit_it = hit_it;
617  }
618  if (assume_sorted)
619  break; // only consider the first hit
620  }
621  }
622 
623  if (best_id_it == identifications.end())
624  {
625  return false; // no hits in any IDs
626  }
627 
628  best_hit = *best_hit_it;
629  return true;
630  }
631 
639  static void extractPeptideSequences(const std::vector<PeptideIdentification>& peptides, std::set<String>& sequences, bool ignore_mods = false);
640 
646  static std::map<String, std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
647 
653  template<class EvidenceFilter>
654  static void FilterPeptideEvidences(EvidenceFilter& filter, std::vector<PeptideIdentification>& peptides)
655  {
656  for (std::vector<PeptideIdentification>::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
657  {
658  for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
659  {
660  std::vector<PeptideEvidence> evidences;
661  remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
662  hit_it->setPeptideEvidences(evidences);
663  }
664  }
665  }
666 
668 
669 
672 
674  template<class IdentificationType>
675  static void updateHitRanks(std::vector<IdentificationType>& ids)
676  {
677  for (typename std::vector<IdentificationType>::iterator it = ids.begin(); it != ids.end(); ++it)
678  {
679  it->assignRanks();
680  }
681  }
682 
685  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
686 
688  static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, const std::vector<PeptideIdentification>& peptides);
690  static void removeUnreferencedProteins(ProteinIdentification& proteins, const std::vector<PeptideIdentification>& peptides);
691 
699  static void updateProteinReferences(std::vector<PeptideIdentification>& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
700 
708  static void updateProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
709 
717  static void updateProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
718 
727  static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups, const std::vector<ProteinHit>& hits);
728 
735  static void removeUngroupedProteins(const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
737 
738 
741 
743  template<class IdentificationType>
744  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
745  {
746  struct HasNoHits<IdentificationType> empty_filter;
747  removeMatchingItems(ids, empty_filter);
748  }
749 
755  template<class IdentificationType>
756  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score)
757  {
758  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
759  {
760  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
761  keepMatchingItems(id_it->getHits(), score_filter);
762  }
763  }
764 
771  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
772 
778  template<class IdentificationType>
779  static void filterHitsByScore(IdentificationType& id, double threshold_score)
780  {
781  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
782  keepMatchingItems(id.getHits(), score_filter);
783  }
784 
790  template<class IdentificationType>
791  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
792  {
793  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
794  {
795  id_it->sort();
796  if (n < id_it->getHits().size())
797  id_it->getHits().resize(n);
798  }
799  }
800 
815  template<class IdentificationType>
816  static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
817  {
818  updateHitRanks(ids);
819  if (min_rank > 1)
820  {
821  struct HasMaxRank<typename IdentificationType::HitType> rank_filter(min_rank - 1);
822  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
823  {
824  removeMatchingItems(id_it->getHits(), rank_filter);
825  }
826  }
827  if (max_rank >= min_rank)
828  {
829  struct HasMaxRank<typename IdentificationType::HitType> rank_filter(max_rank);
830  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
831  {
832  keepMatchingItems(id_it->getHits(), rank_filter);
833  }
834  }
835  }
836 
844  template<class IdentificationType>
845  static void removeDecoyHits(std::vector<IdentificationType>& ids)
846  {
847  struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
848  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
849  {
850  removeMatchingItems(id_it->getHits(), decoy_filter);
851  }
852  }
853 
861  template<class IdentificationType>
862  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
863  {
864  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
865  for (auto& id_it : ids)
866  {
867  removeMatchingItems(id_it.getHits(), acc_filter);
868  }
869  }
870 
878  template<class IdentificationType>
879  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
880  {
881  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
882  for (auto& id_it : ids)
883  {
884  keepMatchingItems(id_it.getHits(), acc_filter);
885  }
886  }
887 
889 
890 
893 
900  static void keepBestPeptideHits(std::vector<PeptideIdentification>& peptides, bool strict = false);
901 
910  static void filterPeptidesByLength(std::vector<PeptideIdentification>& peptides, Size min_length, Size max_length = UINT_MAX);
911 
920  static void filterPeptidesByCharge(std::vector<PeptideIdentification>& peptides, Int min_charge, Int max_charge);
921 
923  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides, double min_rt, double max_rt);
924 
926  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides, double min_mz, double max_mz);
927 
939  static void filterPeptidesByMZError(std::vector<PeptideIdentification>& peptides, double mass_error, bool unit_ppm);
940 
941 
948  template<class Filter>
949  static void filterPeptideEvidences(Filter& filter, std::vector<PeptideIdentification>& peptides);
950 
962  static void filterPeptidesByRTPredictPValue(std::vector<PeptideIdentification>& peptides, const String& metavalue_key, double threshold = 0.05);
963 
965  static void removePeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
966 
967  static void removePeptidesWithMatchingRegEx(std::vector<PeptideIdentification>& peptides, const String& regex);
968 
970  static void keepPeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
971 
979  static void removePeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& bad_peptides, bool ignore_mods = false);
980 
988  static void keepPeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& good_peptides, bool ignore_mods = false);
989 
991  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>& peptides);
992 
999  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>& peptides, bool seq_only = false);
1000 
1002 
1003 
1006 
1008  static void filterHitsByScore(PeakMap& experiment, double peptide_threshold_score, double protein_threshold_score)
1009  {
1010  // filter protein hits:
1011  filterHitsByScore(experiment.getProteinIdentifications(), protein_threshold_score);
1012  // don't remove empty protein IDs - they contain search metadata and may
1013  // be referenced by peptide IDs (via run ID)
1014 
1015  // filter peptide hits:
1016  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1017  {
1018  filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score);
1019  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1020  // TODO super-duper inefficient.
1021  updateProteinReferences(exp_it->getPeptideIdentifications(), experiment.getProteinIdentifications());
1022  }
1023  // @TODO: remove proteins that aren't referenced by peptides any more?
1024  }
1025 
1027  static void keepNBestHits(PeakMap& experiment, Size n)
1028  {
1029  // don't filter the protein hits by "N best" here - filter the peptides
1030  // and update the protein hits!
1031  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1032 
1033  // filter peptide hits:
1034  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1035  {
1036  std::vector<PeptideIdentification>& peptides = exp_it->getPeptideIdentifications();
1037  keepNBestHits(peptides, n);
1038  removeEmptyIdentifications(peptides);
1039  updateProteinReferences(peptides, experiment.getProteinIdentifications());
1040  all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
1041  }
1042  // update protein hits:
1043  removeUnreferencedProteins(experiment.getProteinIdentifications(), all_peptides);
1044  }
1045 
1048  static void keepNBestSpectra(std::vector<PeptideIdentification>& peptides, Size n);
1049 
1051  template<class MapType>
1052  static void keepNBestPeptideHits(MapType& map, Size n)
1053  {
1054  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1055  // which sorts Hits first.
1056  for (auto& feat : map)
1057  {
1058  keepNBestHits(feat.getPeptideIdentifications(), n);
1059  }
1060  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1061  }
1062 
1063  template<class MapType>
1064  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1065  {
1066  const auto pred = HasNoHits<PeptideIdentification>();
1067  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1068  }
1069 
1071  static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1072  {
1073  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1074  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1075  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1076  }
1077 
1078  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1079  {
1080  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1081  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1082  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1083  }
1084 
1085  // TODO allow skipping unassigned?
1086  template<class MapType>
1087  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1088  {
1089  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1090 
1091  RunToSequenceToChargeToPepHitP best_peps_per_run;
1092  for (const auto& idrun : prot_ids)
1093  {
1094  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1095  }
1096 
1097  for (auto& feat : prot_and_pep_ids)
1098  {
1099  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1100  }
1101 
1102  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1103  }
1104 
1105  template<class MapType>
1106  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1107  {
1108  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1109  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1110  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1111  }
1112 
1115  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1116  Size nr_best_spectrum)
1117  {
1118  RunToSequenceToChargeToPepHitP best_peps_per_run;
1119  for (const auto& id : prot_ids)
1120  {
1121  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1122  }
1123  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1124  }
1125 
1129  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1130  Size nr_best_spectrum)
1131  {
1132  for (auto& pep : pep_ids)
1133  {
1134  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1135  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1136  }
1137  }
1138 
1142  static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1143  {
1144  SequenceToChargeToPepHitP best_pep;
1145  for (auto& pep : pep_ids)
1146  {
1147  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1148  }
1149  }
1150 
1155  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1156  {
1157  bool higher_score_better = pep.isHigherScoreBetter();
1158  // make sure that first = best hit
1159  pep.sort();
1160 
1161  auto pepIt = pep.getHits().begin();
1162  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1163  for (; pepIt != pepItEnd; ++pepIt)
1164  {
1165  PeptideHit& hit = *pepIt;
1166 
1167  String lookup_seq;
1168  if (ignore_mods)
1169  {
1170  lookup_seq = hit.getSequence().toUnmodifiedString();
1171  }
1172  else
1173  {
1174  lookup_seq = hit.getSequence().toString();
1175  }
1176 
1177  int lookup_charge = 0;
1178  if (!ignore_charges)
1179  {
1180  lookup_charge = hit.getCharge();
1181  }
1182 
1183  // try to insert
1184  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1185  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1186 
1187  PeptideHit*& p = it_inserted_chg.first->second; // now this gets either the old one if already present, or this
1188  if (!it_inserted_chg.second) // was already present -> possibly update
1189  {
1190  if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1191  {
1192  p->setMetaValue("best_per_peptide", 0);
1193  hit.setMetaValue("best_per_peptide", 1);
1194  p = &hit;
1195  }
1196  else // note that this was def. not the best
1197  {
1198  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1199  hit.setMetaValue("best_per_peptide", 0);
1200  }
1201  }
1202  else // newly inserted -> first for that sequence (and optionally charge)
1203  {
1204  hit.setMetaValue("best_per_peptide", 1);
1205  }
1206  }
1207  }
1208 
1210  static void keepHitsMatchingProteins(PeakMap& experiment, const std::vector<FASTAFile::FASTAEntry>& proteins)
1211  {
1212  std::set<String> accessions;
1213  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
1214  {
1215  accessions.insert(it->identifier);
1216  }
1217 
1218  // filter protein hits:
1219  keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions);
1220  updateHitRanks(experiment.getProteinIdentifications());
1221 
1222  // filter peptide hits:
1223  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1224  {
1225  if (exp_it->getMSLevel() == 2)
1226  {
1227  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions);
1228  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1229  updateHitRanks(exp_it->getPeptideIdentifications());
1230  }
1231  }
1232  }
1233 
1235 
1236 
1239 
1250 
1263 
1269  static void removeDecoys(IdentificationData& id_data);
1271  };
1272 
1273 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:66
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:624
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:315
Invalid value exception.
Definition: Exception.h:303
Not all required information provided.
Definition: Exception.h:162
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:356
Int max_cleavages_
Definition: IDFilter.h:360
EnzymaticDigestion & digestion_
Definition: IDFilter.h:358
PeptideHit argument_type
Definition: IDFilter.h:363
Int min_cleavages_
Definition: IDFilter.h:359
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:375
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:385
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:364
static Int disabledValue()
Definition: IDFilter.h:368
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:53
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:756
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:499
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1027
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1087
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:64
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:527
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1064
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1115
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:485
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1129
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:63
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:744
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:518
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:845
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:492
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1155
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:537
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1071
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:654
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:816
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1052
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:508
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:547
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String &regex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:564
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:588
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:675
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1106
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1210
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:862
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1078
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:879
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1142
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1008
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:62
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
Iterator begin()
Definition: MSExperiment.h:156
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:77
Iterator end()
Definition: MSExperiment.h:166
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:31
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:76
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:397
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:406
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:401
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:438
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:411
bool ignore_missed_cleavages_
Definition: IDFilter.h:403
PeptideEvidence argument_type
Definition: IDFilter.h:398
ProteaseDigestion & digestion_
Definition: IDFilter.h:402
bool methionine_cleavage_
Definition: IDFilter.h:404
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:271
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:273
GetMatchingItems()
Definition: IDFilter.h:284
ItemMap items
Definition: IDFilter.h:274
HitType argument_type
Definition: IDFilter.h:272
bool exists(const HitType &hit) const
Definition: IDFilter.h:293
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:303
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:276
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:288
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:298
Is this a decoy hit?
Definition: IDFilter.h:175
bool operator()(const HitType &hit) const
Definition: IDFilter.h:184
HitType argument_type
Definition: IDFilter.h:176
HasDecoyAnnotation()
Definition: IDFilter.h:180
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:75
bool operator()(const HitType &hit) const
Definition: IDFilter.h:85
double score
Definition: IDFilter.h:78
HitType argument_type
Definition: IDFilter.h:76
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:81
bool higher_score_better
Definition: IDFilter.h:79
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:199
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:204
HitType argument_type
Definition: IDFilter.h:200
const std::unordered_set< String > & accessions
Definition: IDFilter.h:202
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:208
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:223
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:218
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:235
HitType argument_type
Definition: IDFilter.h:236
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:244
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:259
const std::set< String > & accessions
Definition: IDFilter.h:238
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:240
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:254
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:154
bool operator()(const HitType &hit) const
Definition: IDFilter.h:164
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:160
HitType argument_type
Definition: IDFilter.h:155
String key
Definition: IDFilter.h:157
double value
Definition: IDFilter.h:158
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:101
bool operator()(const HitType &hit) const
Definition: IDFilter.h:114
HitType argument_type
Definition: IDFilter.h:102
Size rank
Definition: IDFilter.h:104
HasMaxRank(Size rank_)
Definition: IDFilter.h:106
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:131
bool operator()(const HitType &hit) const
Definition: IDFilter.h:141
DataValue value
Definition: IDFilter.h:135
HitType argument_type
Definition: IDFilter.h:132
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:137
String key
Definition: IDFilter.h:134
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:452
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:455
IdentificationType argument_type
Definition: IDFilter.h:453
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20