OpenMS
IDFilter.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Mathias Walzer $
6 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
22 #include <OpenMS/config.h>
23 #include <algorithm>
24 #include <climits>
25 #include <functional>
26 #include <map>
27 #include <set>
28 #include <unordered_set>
29 #include <vector>
30 
31 namespace OpenMS
32 {
53  class OPENMS_DLLAPI IDFilter
54  {
55  public:
57  IDFilter() = default;
58 
60  virtual ~IDFilter() = default;
61 
63  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
64  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
65  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
66 
73 
75  template<class HitType>
76  struct HasGoodScore {
77  typedef HitType argument_type; // for use as a predicate
78 
79  double score;
81 
82  HasGoodScore(double score_, bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
83  {
84  }
85 
86  bool operator()(const HitType& hit) const
87  {
88  if (higher_score_better)
89  {
90  return hit.getScore() >= score;
91  }
92  return hit.getScore() <= score;
93  }
94  };
95 
101  template<class HitType>
102  struct HasMaxRank {
103  typedef HitType argument_type; // for use as a predicate
104 
106 
107  HasMaxRank(Size rank_) : rank(rank_)
108  {
109  if (rank_ == 0)
110  {
111  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
112  }
113  }
114 
115  bool operator()(const HitType& hit) const
116  {
117  Size hit_rank = hit.getRank();
118  if (hit_rank == 0)
119  {
120  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
121  }
122  return hit_rank <= rank;
123  }
124  };
125 
131  template<class HitType>
132  struct HasMetaValue {
133  typedef HitType argument_type; // for use as a predicate
134 
137 
138  HasMetaValue(const String& key_, const DataValue& value_) : key(key_), value(value_)
139  {
140  }
141 
142  bool operator()(const HitType& hit) const
143  {
144  DataValue found = hit.getMetaValue(key);
145  if (found.isEmpty())
146  return false; // meta value "key" not set
147  if (value.isEmpty())
148  return true; // "key" is set, value doesn't matter
149  return found == value;
150  }
151  };
152 
154  template<class HitType>
156  typedef HitType argument_type; // for use as a predicate
157 
159  double value;
160 
161  HasMaxMetaValue(const String& key_, const double& value_) : key(key_), value(value_)
162  {
163  }
164 
165  bool operator()(const HitType& hit) const
166  {
167  DataValue found = hit.getMetaValue(key);
168  if (found.isEmpty())
169  return false; // meta value "key" not set
170  return double(found) <= value;
171  }
172  };
173 
181  template<class HitType>
183  {
184  typedef HitType argument_type; // for use as a predicate
185 
187  double value;
188 
195  HasMinMetaValue(const String& key_, const double& value_) :
196  key(key_),
197  value(value_)
198  {
199  }
200 
207  bool operator()(const HitType& hit) const
208  {
209  DataValue found = hit.getMetaValue(key);
210  if (found.isEmpty())
211  {
212  return false; // meta value "key" not set
213  }
214  return static_cast<double>(found) >= value;
215  }
216  };
217 
219 
234  template<class HitType>
236  {
237  typedef HitType argument_type; // for use as a predicate
238 
239  struct HasMetaValue<HitType> target_decoy, is_decoy;
240 
247  target_decoy("target_decoy", "decoy"),
248  is_decoy("isDecoy", "true")
249  {
250  }
251 
260  bool operator()(const HitType& hit) const
261  {
262  // @TODO: this could be done slightly more efficiently by returning
263  // false if the "target_decoy" meta value is "target" or "target+decoy",
264  // without checking for an "isDecoy" meta value in that case
265  return target_decoy(hit) || is_decoy(hit);
266  }
267  };
268 
274  template<class HitType>
276  typedef HitType argument_type; // for use as a predicate
277 
278  const std::unordered_set<String>& accessions;
279 
280  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_) :
281  accessions(accessions_)
282  {
283  }
284 
285  bool operator()(const PeptideHit& hit) const
286  {
287  for (const auto& it : hit.extractProteinAccessionsSet())
288  {
289  if (accessions.count(it) > 0)
290  return true;
291  }
292  return false;
293  }
294 
295  bool operator()(const ProteinHit& hit) const
296  {
297  return (accessions.count(hit.getAccession()) > 0);
298  }
299 
300  bool operator()(const PeptideEvidence& evidence) const
301  {
302  return (accessions.count(evidence.getProteinAccession()) > 0);
303  }
304  };
305 
311  template<class HitType>
313  typedef HitType argument_type; // for use as a predicate
314 
315  const std::set<String>& accessions;
316 
317  HasMatchingAccession(const std::set<String>& accessions_) : accessions(accessions_)
318  {
319  }
320 
321  bool operator()(const PeptideHit& hit) const
322  {
323  for (const auto& it : hit.extractProteinAccessionsSet())
324  {
325  if (accessions.count(it) > 0)
326  return true;
327  }
328  return false;
329  }
330 
331  bool operator()(const ProteinHit& hit) const
332  {
333  return (accessions.count(hit.getAccession()) > 0);
334  }
335 
336  bool operator()(const PeptideEvidence& evidence) const
337  {
338  return (accessions.count(evidence.getProteinAccession()) > 0);
339  }
340  };
341 
347  template<class HitType, class Entry>
349  typedef HitType argument_type; // for use as a predicate
350  typedef std::map<String, Entry*> ItemMap; // Store pointers to avoid copying data
352 
353  GetMatchingItems(std::vector<Entry>& records)
354  {
355  for (typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
356  {
357  items[getKey(*rec_it)] = &(*rec_it);
358  }
359  }
360 
362  {
363  }
364 
365  const String& getKey(const FASTAFile::FASTAEntry& entry) const
366  {
367  return entry.identifier;
368  }
369 
370  bool exists(const HitType& hit) const
371  {
372  return items.count(getHitKey(hit)) > 0;
373  }
374 
375  const String& getHitKey(const PeptideEvidence& p) const
376  {
377  return p.getProteinAccession();
378  }
379 
380  const Entry& getValue(const PeptideEvidence& evidence) const
381  {
382  if (!exists(evidence))
383  {
384  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '" + getHitKey(evidence) + "'. peptide evidence accession not in data");
385  }
386  return *(items.find(getHitKey(evidence))->second);
387  }
388  };
389 
391 
392 
399 
401  struct HasMinPeptideLength;
402 
404  struct HasMinCharge;
405 
407  struct HasLowMZError;
408 
414  struct HasMatchingModification;
415 
421  struct HasMatchingSequence;
422 
424  struct HasNoEvidence;
425 
426 
433  {
434  private:
438 
439  public:
441  PeptideDigestionFilter(EnzymaticDigestion& digestion, Int min, Int max) : digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
442  {
443  }
444 
445  static inline Int disabledValue()
446  {
447  return -1;
448  }
449 
452  bool operator()(PeptideHit& p) const
453  {
454  const auto& fun = [&](const Int missed_cleavages) {
455  bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ : false;
456  bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ : false;
457  return max_filter || min_filter;
458  };
459  return digestion_.filterByMissedCleavages(p.getSequence().toUnmodifiedString(), fun);
460  }
461 
462  void filterPeptideSequences(std::vector<PeptideHit>& hits)
463  {
464  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
465  }
466  };
467 
468 
476 
477  // Build an accession index to avoid the linear search cost
482 
483  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries, ProteaseDigestion& digestion, bool ignore_missed_cleavages, bool methionine_cleavage) :
484  accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
485  {
486  }
487 
488  bool operator()(const PeptideEvidence& evidence) const
489  {
490  if (!evidence.hasValidLimits())
491  {
492  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
493  return true;
494  }
495 
496  if (accession_resolver_.exists(evidence))
497  {
498  return digestion_.isValidProduct(AASequence::fromString(accession_resolver_.getValue(evidence).sequence), evidence.getStart(), evidence.getEnd() - evidence.getStart(),
499  ignore_missed_cleavages_, methionine_cleavage_);
500  }
501  else
502  {
503  if (evidence.getProteinAccession().empty())
504  {
505  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
506  }
507  else
508  {
509  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession() << "' not found in fasta file!" << std::endl;
510  }
511  return true;
512  }
513  }
514 
515  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
516  {
517  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this, peptides);
518  }
519  };
520 
522 
523 
526 
528  template<class IdentificationType>
529  struct HasNoHits {
530  typedef IdentificationType argument_type; // for use as a predicate
531 
532  bool operator()(const IdentificationType& id) const
533  {
534  return id.getHits().empty();
535  }
536  };
537 
539 
540 
543 
545  struct HasRTInRange;
546 
548  struct HasMZInRange;
549 
551 
552 
559 
561  template<class Container, class Predicate>
562  static void removeMatchingItems(Container& items, const Predicate& pred)
563  {
564  items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
565  }
566 
568  template<class Container, class Predicate>
569  static void keepMatchingItems(Container& items, const Predicate& pred)
570  {
571  items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
572  }
573 
575  template<class Container, class Predicate>
576  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
577  {
578  auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
579  std::move(part, items.end(), std::back_inserter(target));
580  items.erase(part, items.end());
581  }
582 
584  template<class IDContainer, class Predicate>
585  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
586  {
587  for (auto& item : items)
588  {
589  removeMatchingItems(item.getHits(), pred);
590  }
591  }
592 
594  template<class IDContainer, class Predicate>
595  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
596  {
597  for (auto& item : items)
598  {
599  keepMatchingItems(item.getHits(), pred);
600  }
601  }
602 
603  template<class MapType, class Predicate>
604  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
605  {
606  for (auto& feat : prot_and_pep_ids)
607  {
608  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
609  }
610  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
611  }
612 
613  template<class MapType, class Predicate>
614  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
615  {
616  for (auto& feat : prot_and_pep_ids)
617  {
618  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
619  }
620  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
621  }
622 
623  template<class MapType, class Predicate>
624  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
625  {
626  for (auto& feat : prot_and_pep_ids)
627  {
628  removeMatchingItems(feat.getPeptideIdentifications(), pred);
629  }
630  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
631  }
632 
634 
635 
638 
640  template<class IdentificationType>
641  static Size countHits(const std::vector<IdentificationType>& ids)
642  {
643  Size counter = 0;
644  for (typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
645  {
646  counter += id_it->getHits().size();
647  }
648  return counter;
649  }
650 
664  template<class IdentificationType>
665  static bool getBestHit(const std::vector<IdentificationType>& identifications, bool assume_sorted, typename IdentificationType::HitType& best_hit)
666  {
667  if (identifications.empty())
668  return false;
669 
670  typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
671  typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
672 
673  for (typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
674  {
675  if (id_it->getHits().empty())
676  continue;
677 
678  if (best_id_it == identifications.end()) // no previous "best" hit
679  {
680  best_id_it = id_it;
681  best_hit_it = id_it->getHits().begin();
682  }
683  else if (best_id_it->getScoreType() != id_it->getScoreType())
684  {
685  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
686  }
687 
688  bool higher_better = best_id_it->isHigherScoreBetter();
689  for (typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
690  {
691  if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
692  {
693  best_hit_it = hit_it;
694  }
695  if (assume_sorted)
696  break; // only consider the first hit
697  }
698  }
699 
700  if (best_id_it == identifications.end())
701  {
702  return false; // no hits in any IDs
703  }
704 
705  best_hit = *best_hit_it;
706  return true;
707  }
708 
716  static void extractPeptideSequences(const std::vector<PeptideIdentification>& peptides, std::set<String>& sequences, bool ignore_mods = false);
717 
723  static std::map<String, std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
724 
730  template<class EvidenceFilter>
731  static void FilterPeptideEvidences(EvidenceFilter& filter, std::vector<PeptideIdentification>& peptides)
732  {
733  for (std::vector<PeptideIdentification>::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
734  {
735  for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
736  {
737  std::vector<PeptideEvidence> evidences;
738  remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
739  hit_it->setPeptideEvidences(evidences);
740  }
741  }
742  }
743 
745 
746 
749 
751  template<class IdentificationType>
752  static void updateHitRanks(std::vector<IdentificationType>& ids)
753  {
754  for (typename std::vector<IdentificationType>::iterator it = ids.begin(); it != ids.end(); ++it)
755  {
756  it->assignRanks();
757  }
758  }
759 
762  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
763 
765  static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, const std::vector<PeptideIdentification>& peptides);
767  static void removeUnreferencedProteins(ProteinIdentification& proteins, const std::vector<PeptideIdentification>& peptides);
768 
776  static void updateProteinReferences(std::vector<PeptideIdentification>& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
777 
785  static void updateProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
786 
794  static void updateProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
795 
804  static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups, const std::vector<ProteinHit>& hits);
805 
812  static void removeUngroupedProteins(const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
814 
815 
818 
820  template<class IdentificationType>
821  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
822  {
823  struct HasNoHits<IdentificationType> empty_filter;
824  removeMatchingItems(ids, empty_filter);
825  }
826 
832  template<class IdentificationType>
833  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score)
834  {
835  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
836  {
837  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
838  keepMatchingItems(id_it->getHits(), score_filter);
839  }
840  }
841 
855  template<class IdentificationType>
856  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
857  {
858  IDScoreSwitcherAlgorithm switcher;
859  bool at_least_one_found = false;
860  for (IdentificationType& id : ids)
861  {
862  if (switcher.isScoreType(id.getScoreType(), score_type))
863  {
864  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
865  keepMatchingItems(id.getHits(), score_filter);
866  }
867  else
868  {
869  // If one assumes they are all the same in the vector, this could be done in the beginning.
870  String metaval = switcher.findScoreType(id, score_type);
871  if (!metaval.empty())
872  {
873  if (switcher.isScoreTypeHigherBetter(score_type))
874  {
875  struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
876  keepMatchingItems(id.getHits(), score_filter);
877  }
878  else
879  {
880  struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
881  keepMatchingItems(id.getHits(), score_filter);
882  }
883  at_least_one_found = true;
884  }
885  }
886  }
887  if (!at_least_one_found) OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
888  }
889 
896  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
897 
903  template<class IdentificationType>
904  static void filterHitsByScore(IdentificationType& id, double threshold_score)
905  {
906  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
907  keepMatchingItems(id.getHits(), score_filter);
908  }
909 
915  template<class IdentificationType>
916  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
917  {
918  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
919  {
920  id_it->sort();
921  if (n < id_it->getHits().size())
922  id_it->getHits().resize(n);
923  }
924  }
925 
940  template<class IdentificationType>
941  static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
942  {
943  updateHitRanks(ids);
944  if (min_rank > 1)
945  {
946  struct HasMaxRank<typename IdentificationType::HitType> rank_filter(min_rank - 1);
947  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
948  {
949  removeMatchingItems(id_it->getHits(), rank_filter);
950  }
951  }
952  if (max_rank >= min_rank)
953  {
954  struct HasMaxRank<typename IdentificationType::HitType> rank_filter(max_rank);
955  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
956  {
957  keepMatchingItems(id_it->getHits(), rank_filter);
958  }
959  }
960  }
961 
969  template<class IdentificationType>
970  static void removeDecoyHits(std::vector<IdentificationType>& ids)
971  {
972  struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
973  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
974  {
975  removeMatchingItems(id_it->getHits(), decoy_filter);
976  }
977  }
978 
986  template<class IdentificationType>
987  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
988  {
989  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
990  for (auto& id_it : ids)
991  {
992  removeMatchingItems(id_it.getHits(), acc_filter);
993  }
994  }
995 
1003  template<class IdentificationType>
1004  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1005  {
1006  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1007  for (auto& id_it : ids)
1008  {
1009  keepMatchingItems(id_it.getHits(), acc_filter);
1010  }
1011  }
1012 
1014 
1015 
1018 
1025  static void keepBestPeptideHits(std::vector<PeptideIdentification>& peptides, bool strict = false);
1026 
1035  static void filterPeptidesByLength(std::vector<PeptideIdentification>& peptides, Size min_length, Size max_length = UINT_MAX);
1036 
1045  static void filterPeptidesByCharge(std::vector<PeptideIdentification>& peptides, Int min_charge, Int max_charge);
1046 
1048  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides, double min_rt, double max_rt);
1049 
1051  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides, double min_mz, double max_mz);
1052 
1064  static void filterPeptidesByMZError(std::vector<PeptideIdentification>& peptides, double mass_error, bool unit_ppm);
1065 
1066 
1073  template<class Filter>
1074  static void filterPeptideEvidences(Filter& filter, std::vector<PeptideIdentification>& peptides);
1075 
1087  static void filterPeptidesByRTPredictPValue(std::vector<PeptideIdentification>& peptides, const String& metavalue_key, double threshold = 0.05);
1088 
1090  static void removePeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
1091 
1092  static void removePeptidesWithMatchingRegEx(std::vector<PeptideIdentification>& peptides, const String& regex);
1093 
1095  static void keepPeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
1096 
1104  static void removePeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& bad_peptides, bool ignore_mods = false);
1105 
1113  static void keepPeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& good_peptides, bool ignore_mods = false);
1114 
1116  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>& peptides);
1117 
1124  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>& peptides, bool seq_only = false);
1125 
1127 
1128 
1131 
1133  static void filterHitsByScore(PeakMap& experiment, double peptide_threshold_score, double protein_threshold_score)
1134  {
1135  // filter protein hits:
1136  filterHitsByScore(experiment.getProteinIdentifications(), protein_threshold_score);
1137  // don't remove empty protein IDs - they contain search metadata and may
1138  // be referenced by peptide IDs (via run ID)
1139 
1140  // filter peptide hits:
1141  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1142  {
1143  filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score);
1144  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1145  // TODO super-duper inefficient.
1146  updateProteinReferences(exp_it->getPeptideIdentifications(), experiment.getProteinIdentifications());
1147  }
1148  // @TODO: remove proteins that aren't referenced by peptides any more?
1149  }
1150 
1152  static void keepNBestHits(PeakMap& experiment, Size n)
1153  {
1154  // don't filter the protein hits by "N best" here - filter the peptides
1155  // and update the protein hits!
1156  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1157 
1158  // filter peptide hits:
1159  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1160  {
1161  std::vector<PeptideIdentification>& peptides = exp_it->getPeptideIdentifications();
1162  keepNBestHits(peptides, n);
1163  removeEmptyIdentifications(peptides);
1164  updateProteinReferences(peptides, experiment.getProteinIdentifications());
1165  all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
1166  }
1167  // update protein hits:
1168  removeUnreferencedProteins(experiment.getProteinIdentifications(), all_peptides);
1169  }
1170 
1173  static void keepNBestSpectra(std::vector<PeptideIdentification>& peptides, Size n);
1174 
1176  template<class MapType>
1177  static void keepNBestPeptideHits(MapType& map, Size n)
1178  {
1179  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1180  // which sorts Hits first.
1181  for (auto& feat : map)
1182  {
1183  keepNBestHits(feat.getPeptideIdentifications(), n);
1184  }
1185  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1186  }
1187 
1188  template<class MapType>
1189  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1190  {
1191  const auto pred = HasNoHits<PeptideIdentification>();
1192  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1193  }
1194 
1196  static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1197  {
1198  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1199  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1200  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1201  }
1202 
1203  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1204  {
1205  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1206  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1207  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1208  }
1209 
1210  // TODO allow skipping unassigned?
1211  template<class MapType>
1212  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1213  {
1214  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1215 
1216  RunToSequenceToChargeToPepHitP best_peps_per_run;
1217  for (const auto& idrun : prot_ids)
1218  {
1219  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1220  }
1221 
1222  for (auto& feat : prot_and_pep_ids)
1223  {
1224  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1225  }
1226 
1227  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1228  }
1229 
1230  template<class MapType>
1231  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1232  {
1233  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1234  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1235  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1236  }
1237 
1240  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1241  Size nr_best_spectrum)
1242  {
1243  RunToSequenceToChargeToPepHitP best_peps_per_run;
1244  for (const auto& id : prot_ids)
1245  {
1246  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1247  }
1248  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1249  }
1250 
1254  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1255  Size nr_best_spectrum)
1256  {
1257  for (auto& pep : pep_ids)
1258  {
1259  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1260  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1261  }
1262  }
1263 
1267  static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1268  {
1269  SequenceToChargeToPepHitP best_pep;
1270  for (auto& pep : pep_ids)
1271  {
1272  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1273  }
1274  }
1275 
1280  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1281  {
1282  bool higher_score_better = pep.isHigherScoreBetter();
1283  // make sure that first = best hit
1284  pep.sort();
1285 
1286  auto pepIt = pep.getHits().begin();
1287  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1288  for (; pepIt != pepItEnd; ++pepIt)
1289  {
1290  PeptideHit& hit = *pepIt;
1291 
1292  String lookup_seq;
1293  if (ignore_mods)
1294  {
1295  lookup_seq = hit.getSequence().toUnmodifiedString();
1296  }
1297  else
1298  {
1299  lookup_seq = hit.getSequence().toString();
1300  }
1301 
1302  int lookup_charge = 0;
1303  if (!ignore_charges)
1304  {
1305  lookup_charge = hit.getCharge();
1306  }
1307 
1308  // try to insert
1309  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1310  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1311 
1312  PeptideHit*& p = it_inserted_chg.first->second; // now this gets either the old one if already present, or this
1313  if (!it_inserted_chg.second) // was already present -> possibly update
1314  {
1315  if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1316  {
1317  p->setMetaValue("best_per_peptide", 0);
1318  hit.setMetaValue("best_per_peptide", 1);
1319  p = &hit;
1320  }
1321  else // note that this was def. not the best
1322  {
1323  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1324  hit.setMetaValue("best_per_peptide", 0);
1325  }
1326  }
1327  else // newly inserted -> first for that sequence (and optionally charge)
1328  {
1329  hit.setMetaValue("best_per_peptide", 1);
1330  }
1331  }
1332  }
1333 
1335  static void keepHitsMatchingProteins(PeakMap& experiment, const std::vector<FASTAFile::FASTAEntry>& proteins)
1336  {
1337  std::set<String> accessions;
1338  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
1339  {
1340  accessions.insert(it->identifier);
1341  }
1342 
1343  // filter protein hits:
1344  keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions);
1345  updateHitRanks(experiment.getProteinIdentifications());
1346 
1347  // filter peptide hits:
1348  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1349  {
1350  if (exp_it->getMSLevel() == 2)
1351  {
1352  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions);
1353  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1354  updateHitRanks(exp_it->getPeptideIdentifications());
1355  }
1356  }
1357  }
1358 
1360 
1361 
1364 
1375 
1388 
1394  static void removeDecoys(IdentificationData& id_data);
1396  };
1397 
1398 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:66
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:616
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
Not all required information provided.
Definition: Exception.h:155
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:433
Int max_cleavages_
Definition: IDFilter.h:437
EnzymaticDigestion & digestion_
Definition: IDFilter.h:435
PeptideHit argument_type
Definition: IDFilter.h:440
Int min_cleavages_
Definition: IDFilter.h:436
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:452
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:462
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:441
static Int disabledValue()
Definition: IDFilter.h:445
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:54
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:833
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:576
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1152
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1212
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:65
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:604
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1189
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1240
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:562
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1254
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:64
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:821
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:595
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:970
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:569
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1280
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:614
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1196
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:731
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1177
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:585
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:624
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String &regex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:641
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:665
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:752
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1231
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1335
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:987
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1203
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:1004
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1267
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1133
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:63
Definition: IDScoreSwitcherAlgorithm.h:25
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
Searches for a specified score type within an identification object and its meta values.
Definition: IDScoreSwitcherAlgorithm.h:310
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:97
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score_name is of ScoreType type.
Definition: IDScoreSwitcherAlgorithm.h:44
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:77
Iterator begin() noexcept
Definition: MSExperiment.h:156
Iterator end()
Definition: MSExperiment.h:171
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:31
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:474
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:483
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:478
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:515
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:488
bool ignore_missed_cleavages_
Definition: IDFilter.h:480
PeptideEvidence argument_type
Definition: IDFilter.h:475
ProteaseDigestion & digestion_
Definition: IDFilter.h:479
bool methionine_cleavage_
Definition: IDFilter.h:481
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:348
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:350
GetMatchingItems()
Definition: IDFilter.h:361
ItemMap items
Definition: IDFilter.h:351
HitType argument_type
Definition: IDFilter.h:349
bool exists(const HitType &hit) const
Definition: IDFilter.h:370
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:380
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:353
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:365
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:375
Is this a decoy hit?
Definition: IDFilter.h:236
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition: IDFilter.h:260
HitType argument_type
Definition: IDFilter.h:237
HasDecoyAnnotation()
Default constructor.
Definition: IDFilter.h:246
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:76
bool operator()(const HitType &hit) const
Definition: IDFilter.h:86
double score
Definition: IDFilter.h:79
HitType argument_type
Definition: IDFilter.h:77
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:82
bool higher_score_better
Definition: IDFilter.h:80
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:275
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:280
HitType argument_type
Definition: IDFilter.h:276
const std::unordered_set< String > & accessions
Definition: IDFilter.h:278
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:285
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:300
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:295
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:312
HitType argument_type
Definition: IDFilter.h:313
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:321
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:336
const std::set< String > & accessions
Definition: IDFilter.h:315
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:317
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:331
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:155
bool operator()(const HitType &hit) const
Definition: IDFilter.h:165
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:161
HitType argument_type
Definition: IDFilter.h:156
String key
Definition: IDFilter.h:158
double value
Definition: IDFilter.h:159
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:102
bool operator()(const HitType &hit) const
Definition: IDFilter.h:115
HitType argument_type
Definition: IDFilter.h:103
Size rank
Definition: IDFilter.h:105
HasMaxRank(Size rank_)
Definition: IDFilter.h:107
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:132
bool operator()(const HitType &hit) const
Definition: IDFilter.h:142
DataValue value
Definition: IDFilter.h:136
HitType argument_type
Definition: IDFilter.h:133
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:138
String key
Definition: IDFilter.h:135
Predicate to check if a HitType object has a minimum meta value.
Definition: IDFilter.h:183
bool operator()(const HitType &hit) const
Operator() function to check if a HitType object has a minimum meta value.
Definition: IDFilter.h:207
HitType argument_type
Definition: IDFilter.h:184
String key
Definition: IDFilter.h:186
HasMinMetaValue(const String &key_, const double &value_)
Constructor for HasMinMetaValue.
Definition: IDFilter.h:195
double value
Definition: IDFilter.h:187
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:529
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:532
IdentificationType argument_type
Definition: IDFilter.h:530
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20