OpenMS  2.7.0
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/config.h>
47 
48 #include <algorithm>
49 #include <climits>
50 #include <vector>
51 #include <set>
52 #include <map>
53 #include <unordered_set>
54 
55 namespace OpenMS
56 {
77  class OPENMS_DLLAPI IDFilter
78  {
79 public:
80 
82  IDFilter() = default;
83 
85  virtual ~IDFilter() = default;
86 
88  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
89  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
90  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
91 
98 
100  template <class HitType>
102  {
103  typedef HitType argument_type; // for use as a predicate
104 
105  double score;
107 
108  HasGoodScore(double score_, bool higher_score_better_) :
109  score(score_),
110  higher_score_better(higher_score_better_)
111  {}
112 
113  bool operator()(const HitType& hit) const
114  {
115  if (higher_score_better)
116  {
117  return hit.getScore() >= score;
118  }
119  return hit.getScore() <= score;
120  }
121  };
122 
128  template <class HitType>
129  struct HasMaxRank
130  {
131  typedef HitType argument_type; // for use as a predicate
132 
134 
135  HasMaxRank(Size rank_):
136  rank(rank_)
137  {
138  if (rank_ == 0)
139  {
140  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
141  }
142  }
143 
144  bool operator()(const HitType& hit) const
145  {
146  Size hit_rank = hit.getRank();
147  if (hit_rank == 0)
148  {
149  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
150  }
151  return hit_rank <= rank;
152  }
153  };
154 
160  template <class HitType>
162  {
163  typedef HitType argument_type; // for use as a predicate
164 
167 
168  HasMetaValue(const String& key_, const DataValue& value_):
169  key(key_),
170  value(value_)
171  {}
172 
173  bool operator()(const HitType& hit) const
174  {
175  DataValue found = hit.getMetaValue(key);
176  if (found.isEmpty()) return false; // meta value "key" not set
177  if (value.isEmpty()) return true; // "key" is set, value doesn't matter
178  return found == value;
179  }
180  };
181 
183  template <class HitType>
185  {
186  typedef HitType argument_type; // for use as a predicate
187 
189  double value;
190 
191  HasMaxMetaValue(const String& key_, const double& value_):
192  key(key_),
193  value(value_)
194  {}
195 
196  bool operator()(const HitType& hit) const
197  {
198  DataValue found = hit.getMetaValue(key);
199  if (found.isEmpty()) return false; // meta value "key" not set
200  return double(found) <= value;
201  }
202  };
203 
205  template <class HitType>
207  {
208  typedef HitType argument_type; // for use as a predicate
209 
210  struct HasMetaValue<HitType> target_decoy, is_decoy;
211 
213  target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
214  {}
215 
216  bool operator()(const HitType& hit) const
217  {
218  // @TODO: this could be done slightly more efficiently by returning
219  // false if the "target_decoy" meta value is "target" or "target+decoy",
220  // without checking for an "isDecoy" meta value in that case
221  return target_decoy(hit) || is_decoy(hit);
222  }
223  };
224 
230  template <class HitType>
232  {
233  typedef HitType argument_type; // for use as a predicate
234 
235  const std::unordered_set<String>& accessions;
236 
237  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_):
238  accessions(accessions_)
239  {}
240 
241  bool operator()(const PeptideHit& hit) const
242  {
243  for (const auto& it : hit.extractProteinAccessionsSet())
244  {
245  if (accessions.count(it) > 0) return true;
246  }
247  return false;
248  }
249 
250  bool operator()(const ProteinHit& hit) const
251  {
252  return (accessions.count(hit.getAccession()) > 0);
253  }
254 
255  bool operator()(const PeptideEvidence& evidence) const
256  {
257  return (accessions.count(evidence.getProteinAccession()) > 0);
258  }
259  };
260 
266  template <class HitType>
268  {
269  typedef HitType argument_type; // for use as a predicate
270 
271  const std::set<String>& accessions;
272 
273  HasMatchingAccession(const std::set<String>& accessions_):
274  accessions(accessions_)
275  {}
276 
277  bool operator()(const PeptideHit& hit) const
278  {
279  for (const auto& it : hit.extractProteinAccessionsSet())
280  {
281  if (accessions.count(it) > 0) return true;
282  }
283  return false;
284  }
285 
286  bool operator()(const ProteinHit& hit) const
287  {
288  return (accessions.count(hit.getAccession()) > 0);
289  }
290 
291  bool operator()(const PeptideEvidence& evidence) const
292  {
293  return (accessions.count(evidence.getProteinAccession()) > 0);
294  }
295  };
296 
302  template <class HitType, class Entry>
304  {
305  typedef HitType argument_type; // for use as a predicate
306  typedef std::map<String, Entry*> ItemMap;//Store pointers to avoid copying data
308 
309  GetMatchingItems(std::vector<Entry>& records)
310  {
311  for(typename std::vector<Entry>::iterator rec_it = records.begin();
312  rec_it != records.end(); ++rec_it)
313  {
314  items[getKey(*rec_it)] = &(*rec_it);
315  }
316  }
317 
319 
320  const String& getKey(const FASTAFile::FASTAEntry& entry) const
321  {
322  return entry.identifier;
323  }
324 
325  bool exists(const HitType& hit) const
326  {
327  return items.count(getHitKey(hit)) > 0;
328  }
329 
330  const String& getHitKey(const PeptideEvidence& p) const
331  {
332  return p.getProteinAccession();
333  }
334 
335  const Entry& getValue(const PeptideEvidence& evidence) const
336  {
337  if(!exists(evidence)){
338  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '"+ getHitKey(evidence) + "'. peptide evidence accession not in data");
339  }
340  return *(items.find(getHitKey(evidence))->second);
341  }
342 
343  };
344 
346 
347 
354 
356  struct HasMinPeptideLength;
357 
359  struct HasMinCharge;
360 
362  struct HasLowMZError;
363 
369  struct HasMatchingModification;
370 
376  struct HasMatchingSequence;
377 
379  struct HasNoEvidence;
380 
381 
388  {
389  private:
393 
394  public:
397  digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
398  {}
399 
400  static inline Int disabledValue(){ return -1; }
401 
404  bool operator()(PeptideHit& p) const
405  {
406  const auto& fun = [&](const Int missed_cleavages)
407  {
408 
409  bool max_filter = max_cleavages_ != disabledValue() ?
410  missed_cleavages > max_cleavages_ : false;
411  bool min_filter = min_cleavages_ != disabledValue() ?
412  missed_cleavages < min_cleavages_ : false;
413  return max_filter || min_filter;
414  };
415  return digestion_.filterByMissedCleavages(
417  fun);
418  }
419 
420  void filterPeptideSequences(std::vector<PeptideHit>& hits)
421  {
422  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)),
423  hits.end());
424  }
425 
426  };
427 
428 
435  {
437 
438  // Build an accession index to avoid the linear search cost
443 
444  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries,
445  ProteaseDigestion& digestion,
446  bool ignore_missed_cleavages,
447  bool methionine_cleavage) :
448  accession_resolver_(entries),
449  digestion_(digestion),
450  ignore_missed_cleavages_(ignore_missed_cleavages),
451  methionine_cleavage_(methionine_cleavage)
452  {}
453 
454  bool operator()(const PeptideEvidence& evidence) const
455  {
456  if(!evidence.hasValidLimits())
457  {
458  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
459  return true;
460  }
461 
462  if (accession_resolver_.exists(evidence))
463  {
464  return digestion_.isValidProduct(
465  AASequence::fromString(accession_resolver_.getValue(evidence).sequence),
466  evidence.getStart(), evidence.getEnd() - evidence.getStart(), ignore_missed_cleavages_, methionine_cleavage_);
467  }
468  else
469  {
470  if (evidence.getProteinAccession().empty())
471  {
472  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
473  }
474  else
475  {
476  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession()
477  << "' not found in fasta file!" << std::endl;
478  }
479  return true;
480  }
481  }
482 
483  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
484  {
485  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this,peptides);
486  }
487 
488  };
489 
491 
492 
495 
497  template <class IdentificationType>
498  struct HasNoHits
499  {
500  typedef IdentificationType argument_type; // for use as a predicate
501 
502  bool operator()(const IdentificationType& id) const
503  {
504  return id.getHits().empty();
505  }
506  };
507 
509 
510 
513 
515  struct HasRTInRange;
516 
518  struct HasMZInRange;
519 
521 
522 
529 
531  template <class Container, class Predicate>
532  static void removeMatchingItems(Container& items, const Predicate& pred)
533  {
534  items.erase(std::remove_if(items.begin(), items.end(), pred),
535  items.end());
536  }
537 
539  template <class Container, class Predicate>
540  static void keepMatchingItems(Container& items, const Predicate& pred)
541  {
542  items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
543  items.end());
544  }
545 
547  template <class Container, class Predicate>
548  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
549  {
550  auto part = std::partition(items.begin(), items.end(), std::not1(pred));
551  std::move(part, items.end(), std::back_inserter(target));
552  items.erase(part, items.end());
553  }
554 
556  template <class IDContainer, class Predicate>
557  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
558  {
559  for (auto& item : items)
560  {
561  removeMatchingItems(item.getHits(), pred);
562  }
563  }
564 
566  template <class IDContainer, class Predicate>
567  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
568  {
569  for (auto& item : items)
570  {
571  keepMatchingItems(item.getHits(), pred);
572  }
573  }
574 
575  template <class MapType, class Predicate>
576  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
577  {
578  for (auto& feat : prot_and_pep_ids)
579  {
580  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
581  }
582  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
583  }
584 
585  template <class MapType, class Predicate>
586  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
587  {
588  for (auto& feat : prot_and_pep_ids)
589  {
590  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
591  }
592  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
593  }
594 
595  template <class MapType, class Predicate>
596  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
597  {
598  for (auto& feat : prot_and_pep_ids)
599  {
600  removeMatchingItems(feat.getPeptideIdentifications(), pred);
601  }
602  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
603  }
604 
606 
607 
610 
612  template <class IdentificationType>
613  static Size countHits(const std::vector<IdentificationType>& ids)
614  {
615  Size counter = 0;
616  for (typename std::vector<IdentificationType>::const_iterator id_it =
617  ids.begin(); id_it != ids.end(); ++id_it)
618  {
619  counter += id_it->getHits().size();
620  }
621  return counter;
622  }
623 
637  template <class IdentificationType>
638  static bool getBestHit(
639  const std::vector<IdentificationType>& identifications,
640  bool assume_sorted, typename IdentificationType::HitType& best_hit)
641  {
642  if (identifications.empty()) return false;
643 
644  typename std::vector<IdentificationType>::const_iterator best_id_it =
645  identifications.end();
646  typename std::vector<typename IdentificationType::HitType>::const_iterator
647  best_hit_it;
648 
649  for (typename std::vector<IdentificationType>::const_iterator id_it =
650  identifications.begin(); id_it != identifications.end(); ++id_it)
651  {
652  if (id_it->getHits().empty()) continue;
653 
654  if (best_id_it == identifications.end()) // no previous "best" hit
655  {
656  best_id_it = id_it;
657  best_hit_it = id_it->getHits().begin();
658  }
659  else if (best_id_it->getScoreType() != id_it->getScoreType())
660  {
661  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
662  }
663 
664  bool higher_better = best_id_it->isHigherScoreBetter();
665  for (typename std::vector<typename IdentificationType::HitType>::
666  const_iterator hit_it = id_it->getHits().begin(); hit_it !=
667  id_it->getHits().end(); ++hit_it)
668  {
669  if ((higher_better && (hit_it->getScore() >
670  best_hit_it->getScore())) ||
671  (!higher_better && (hit_it->getScore() <
672  best_hit_it->getScore())))
673  {
674  best_hit_it = hit_it;
675  }
676  if (assume_sorted) break; // only consider the first hit
677  }
678  }
679 
680  if (best_id_it == identifications.end())
681  {
682  return false; // no hits in any IDs
683  }
684 
685  best_hit = *best_hit_it;
686  return true;
687  }
688 
697  const std::vector<PeptideIdentification>& peptides,
698  std::set<String>& sequences, bool ignore_mods = false);
699 
705  static std::map<String,std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
706 
712  template<class EvidenceFilter>
714  EvidenceFilter& filter,
715  std::vector<PeptideIdentification>& peptides)
716  {
717  for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
718  pep_it != peptides.end(); ++pep_it)
719  {
720  for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
721  hit_it != pep_it->getHits().end(); ++hit_it )
722  {
723  std::vector<PeptideEvidence> evidences;
724  remove_copy_if(hit_it->getPeptideEvidences().begin(),
725  hit_it->getPeptideEvidences().end(),
726  back_inserter(evidences),
727  std::not1(filter));
728  hit_it->setPeptideEvidences(evidences);
729  }
730  }
731  }
732 
734 
735 
738 
740  template <class IdentificationType>
741  static void updateHitRanks(std::vector<IdentificationType>& ids)
742  {
743  for (typename std::vector<IdentificationType>::iterator it = ids.begin();
744  it != ids.end(); ++it)
745  {
746  it->assignRanks();
747  }
748  }
749 
752  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
753 
756  std::vector<ProteinIdentification>& proteins,
757  const std::vector<PeptideIdentification>& peptides);
758 
767  std::vector<PeptideIdentification>& peptides,
768  const std::vector<ProteinIdentification>& proteins,
769  bool remove_peptides_without_reference = false);
770 
779  ConsensusMap& cmap,
780  bool remove_peptides_without_reference = false);
781 
790  static bool updateProteinGroups(
791  std::vector<ProteinIdentification::ProteinGroup>& groups,
792  const std::vector<ProteinHit>& hits);
793 
801  const std::vector<ProteinIdentification::ProteinGroup>& groups,
802  std::vector<ProteinHit>& hits);
804 
805 
808 
810  template <class IdentificationType>
811  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
812  {
813  struct HasNoHits<IdentificationType> empty_filter;
814  removeMatchingItems(ids, empty_filter);
815  }
816 
822  template <class IdentificationType>
823  static void filterHitsByScore(std::vector<IdentificationType>& ids,
824  double threshold_score)
825  {
826  for (typename std::vector<IdentificationType>::iterator id_it =
827  ids.begin(); id_it != ids.end(); ++id_it)
828  {
829  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
830  threshold_score, id_it->isHigherScoreBetter());
831  keepMatchingItems(id_it->getHits(), score_filter);
832  }
833  }
834 
841  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps,
842  double threshold_score, bool higher_better);
843 
849  template <class IdentificationType>
850  static void filterHitsByScore(IdentificationType& id,
851  double threshold_score)
852  {
853  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
854  threshold_score, id->isHigherScoreBetter());
855  keepMatchingItems(id->getHits(), score_filter);
856  }
857 
863  template <class IdentificationType>
864  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
865  {
866  for (typename std::vector<IdentificationType>::iterator id_it =
867  ids.begin(); id_it != ids.end(); ++id_it)
868  {
869  id_it->sort();
870  if (n < id_it->getHits().size()) id_it->getHits().resize(n);
871  }
872  }
873 
888  template <class IdentificationType>
889  static void filterHitsByRank(std::vector<IdentificationType>& ids,
890  Size min_rank, Size max_rank)
891  {
892  updateHitRanks(ids);
893  if (min_rank > 1)
894  {
895  struct HasMaxRank<typename IdentificationType::HitType>
896  rank_filter(min_rank - 1);
897  for (typename std::vector<IdentificationType>::iterator id_it =
898  ids.begin(); id_it != ids.end(); ++id_it)
899  {
900  removeMatchingItems(id_it->getHits(), rank_filter);
901  }
902  }
903  if (max_rank >= min_rank)
904  {
905  struct HasMaxRank<typename IdentificationType::HitType>
906  rank_filter(max_rank);
907  for (typename std::vector<IdentificationType>::iterator id_it =
908  ids.begin(); id_it != ids.end(); ++id_it)
909  {
910  keepMatchingItems(id_it->getHits(), rank_filter);
911  }
912  }
913  }
914 
922  template <class IdentificationType>
923  static void removeDecoyHits(std::vector<IdentificationType>& ids)
924  {
925  struct HasDecoyAnnotation<typename IdentificationType::HitType>
926  decoy_filter;
927  for (typename std::vector<IdentificationType>::iterator id_it =
928  ids.begin(); id_it != ids.end(); ++id_it)
929  {
930  removeMatchingItems(id_it->getHits(), decoy_filter);
931  }
932  }
933 
941  template <class IdentificationType>
942  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids,
943  const std::set<String> accessions)
944  {
945  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
946  for (auto& id_it : ids)
947  {
948  removeMatchingItems(id_it.getHits(), acc_filter);
949  }
950  }
951 
959  template <class IdentificationType>
960  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids,
961  const std::set<String>& accessions)
962  {
963  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
964  for (auto& id_it : ids)
965  {
966  keepMatchingItems(id_it.getHits(), acc_filter);
967  }
968  }
969 
971 
972 
975 
982  static void keepBestPeptideHits(
983  std::vector<PeptideIdentification>& peptides, bool strict = false);
984 
994  std::vector<PeptideIdentification>& peptides, Size min_length,
995  Size max_length = UINT_MAX);
996 
1006  std::vector<PeptideIdentification>& peptides, Int min_charge,
1007  Int max_charge);
1008 
1010  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
1011  double min_rt, double max_rt);
1012 
1014  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
1015  double min_mz, double max_mz);
1016 
1029  std::vector<PeptideIdentification>& peptides, double mass_error,
1030  bool unit_ppm);
1031 
1032 
1039  template <class Filter>
1041  Filter& filter,
1042  std::vector<PeptideIdentification>& peptides);
1043 
1056  std::vector<PeptideIdentification>& peptides,
1057  const String& metavalue_key, double threshold = 0.05);
1058 
1061  std::vector<PeptideIdentification>& peptides,
1062  const std::set<String>& modifications);
1063 
1065  std::vector<PeptideIdentification>& peptides,
1066  const String& regex);
1067 
1070  std::vector<PeptideIdentification>& peptides,
1071  const std::set<String>& modifications);
1072 
1081  std::vector<PeptideIdentification>& peptides,
1082  const std::vector<PeptideIdentification>& bad_peptides,
1083  bool ignore_mods = false);
1084 
1093  std::vector<PeptideIdentification>& peptides,
1094  const std::vector<PeptideIdentification>& good_peptides,
1095  bool ignore_mods = false);
1096 
1098  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
1099  peptides);
1100 
1106  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
1107  peptides, bool seq_only = false);
1108 
1110 
1111 
1114 
1116  static void filterHitsByScore(PeakMap& experiment,
1117  double peptide_threshold_score,
1118  double protein_threshold_score)
1119  {
1120  // filter protein hits:
1121  filterHitsByScore(experiment.getProteinIdentifications(),
1122  protein_threshold_score);
1123  // don't remove empty protein IDs - they contain search meta data and may
1124  // be referenced by peptide IDs (via run ID)
1125 
1126  // filter peptide hits:
1127  for (PeakMap::Iterator exp_it = experiment.begin();
1128  exp_it != experiment.end(); ++exp_it)
1129  {
1130  filterHitsByScore(exp_it->getPeptideIdentifications(),
1131  peptide_threshold_score);
1132  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1133  updateProteinReferences(exp_it->getPeptideIdentifications(),
1134  experiment.getProteinIdentifications());
1135  }
1136  // @TODO: remove proteins that aren't referenced by peptides any more?
1137  }
1138 
1140  static void keepNBestHits(PeakMap& experiment, Size n)
1141  {
1142  // don't filter the protein hits by "N best" here - filter the peptides
1143  // and update the protein hits!
1144  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1145 
1146  // filter peptide hits:
1147  for (PeakMap::Iterator exp_it = experiment.begin();
1148  exp_it != experiment.end(); ++exp_it)
1149  {
1150  std::vector<PeptideIdentification>& peptides =
1151  exp_it->getPeptideIdentifications();
1152  keepNBestHits(peptides, n);
1153  removeEmptyIdentifications(peptides);
1154  updateProteinReferences(peptides,
1155  experiment.getProteinIdentifications());
1156  all_peptides.insert(all_peptides.end(), peptides.begin(),
1157  peptides.end());
1158  }
1159  // update protein hits:
1160  removeUnreferencedProteins(experiment.getProteinIdentifications(),
1161  all_peptides);
1162  }
1163 
1166  static void keepNBestSpectra(std::vector<PeptideIdentification>& peptides, Size n);
1167 
1169  template <class MapType>
1170  static void keepNBestPeptideHits(MapType& map, Size n)
1171  {
1172  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1173  // which sorts Hits first.
1174  for (auto& feat : map)
1175  {
1176  keepNBestHits(feat.getPeptideIdentifications(), n);
1177  }
1178  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1179  }
1180 
1181  template <class MapType>
1182  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1183  {
1184  const auto pred = HasNoHits<PeptideIdentification>();
1185  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1186  }
1187 
1189  static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1190  {
1191  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1192  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1193  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1194  }
1195 
1196  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1197  {
1198  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1199  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1200  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1201  }
1202 
1203  //TODO allow skipping unassigned?
1204  template <class MapType>
1205  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1206  {
1207  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1208 
1209  RunToSequenceToChargeToPepHitP best_peps_per_run;
1210  for (const auto& idrun : prot_ids)
1211  {
1212  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1213  }
1214 
1215  for (auto& feat : prot_and_pep_ids)
1216  {
1217  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1218  }
1219 
1220  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1221  }
1222 
1223  template <class MapType>
1224  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1225  {
1226  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1227  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1228  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1229  }
1230 
1233  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1234  {
1235  RunToSequenceToChargeToPepHitP best_peps_per_run;
1236  for (const auto& id : prot_ids)
1237  {
1238  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1239  }
1240  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1241  }
1242 
1246  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1247  {
1248  for (auto &pep : pep_ids)
1249  {
1250  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1251  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1252  }
1253  }
1254 
1258  static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1259  {
1260  SequenceToChargeToPepHitP best_pep;
1261  for (auto& pep : pep_ids)
1262  {
1263  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1264  }
1265  }
1266 
1271  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1272  {
1273  bool higher_score_better = pep.isHigherScoreBetter();
1274  //make sure that first = best hit
1275  pep.sort();
1276 
1277  auto pepIt = pep.getHits().begin();
1278  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1279  for (; pepIt != pepItEnd; ++pepIt)
1280  {
1281  PeptideHit &hit = *pepIt;
1282 
1283  String lookup_seq;
1284  if (ignore_mods)
1285  {
1286  lookup_seq = hit.getSequence().toUnmodifiedString();
1287  }
1288  else
1289  {
1290  lookup_seq = hit.getSequence().toString();
1291  }
1292 
1293  int lookup_charge = 0;
1294  if (!ignore_charges)
1295  {
1296  lookup_charge = hit.getCharge();
1297  }
1298 
1299  // try to insert
1300  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1301  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1302 
1303  PeptideHit* &p = it_inserted_chg.first->second; //now this gets either the old one if already present, or this
1304  if (!it_inserted_chg.second) //was already present -> possibly update
1305  {
1306  if (
1307  (higher_score_better && (hit.getScore() > p->getScore())) ||
1308  (!higher_score_better && (hit.getScore() < p->getScore()))
1309  )
1310  {
1311  p->setMetaValue("best_per_peptide", 0);
1312  hit.setMetaValue("best_per_peptide", 1);
1313  p = &hit;
1314  }
1315  else //note that this was def. not the best
1316  {
1317  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1318  hit.setMetaValue("best_per_peptide", 0);
1319  }
1320  }
1321  else //newly inserted -> first for that sequence (and optionally charge)
1322  {
1323  hit.setMetaValue("best_per_peptide", 1);
1324  }
1325  }
1326  }
1327 
1330  PeakMap& experiment,
1331  const std::vector<FASTAFile::FASTAEntry>& proteins)
1332  {
1333  std::set<String> accessions;
1334  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1335  proteins.begin(); it != proteins.end(); ++it)
1336  {
1337  accessions.insert(it->identifier);
1338  }
1339 
1340  // filter protein hits:
1341  keepHitsMatchingProteins(experiment.getProteinIdentifications(),
1342  accessions);
1343  updateHitRanks(experiment.getProteinIdentifications());
1344 
1345  // filter peptide hits:
1346  for (PeakMap::Iterator exp_it = experiment.begin();
1347  exp_it != experiment.end(); ++exp_it)
1348  {
1349  if (exp_it->getMSLevel() == 2)
1350  {
1351  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1352  accessions);
1353  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1354  updateHitRanks(exp_it->getPeptideIdentifications());
1355  }
1356  }
1357  }
1358 
1360 
1361 
1365  IdentificationData& id_data,
1367 
1369  IdentificationData& id_data,
1370  IdentificationData::ScoreTypeRef score_ref, double cutoff);
1371 
1372  static void removeDecoys(IdentificationData& id_data);
1374 
1375  };
1376 
1377 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:88
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:379
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:63
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:656
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Invalid value exception.
Definition: Exception.h:329
Not all required information provided.
Definition: Exception.h:189
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:388
Int max_cleavages_
Definition: IDFilter.h:392
EnzymaticDigestion & digestion_
Definition: IDFilter.h:390
PeptideHit argument_type
Definition: IDFilter.h:395
Int min_cleavages_
Definition: IDFilter.h:391
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:404
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:420
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:396
static Int disabledValue()
Definition: IDFilter.h:400
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:78
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:823
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:548
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1140
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1205
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:90
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:576
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1182
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1233
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:532
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1246
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:89
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:811
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:567
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:923
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:540
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1271
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:586
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1189
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:713
static void keepBestMatchPerQuery(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:889
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1170
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:557
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:596
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String &regex)
static void filterQueryMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:613
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:638
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:741
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1224
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1329
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:942
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1196
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:960
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1258
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1116
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:88
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:90
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:80
Iterator begin()
Definition: MSExperiment.h:157
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
Iterator end()
Definition: MSExperiment.h:167
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:61
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:60
const String & getAccession() const
returns the accession of the protein
A more convenient string class.
Definition: String.h:61
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
String identifier
Definition: FASTAFile.h:73
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:435
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:444
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:439
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:483
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:454
bool ignore_missed_cleavages_
Definition: IDFilter.h:441
PeptideEvidence argument_type
Definition: IDFilter.h:436
ProteaseDigestion & digestion_
Definition: IDFilter.h:440
bool methionine_cleavage_
Definition: IDFilter.h:442
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:304
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:306
GetMatchingItems()
Definition: IDFilter.h:318
ItemMap items
Definition: IDFilter.h:307
HitType argument_type
Definition: IDFilter.h:305
bool exists(const HitType &hit) const
Definition: IDFilter.h:325
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:335
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:309
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:320
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:330
Is this a decoy hit?
Definition: IDFilter.h:207
bool operator()(const HitType &hit) const
Definition: IDFilter.h:216
HitType argument_type
Definition: IDFilter.h:208
HasDecoyAnnotation()
Definition: IDFilter.h:212
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:102
bool operator()(const HitType &hit) const
Definition: IDFilter.h:113
double score
Definition: IDFilter.h:105
HitType argument_type
Definition: IDFilter.h:103
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:108
bool higher_score_better
Definition: IDFilter.h:106
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:232
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:237
HitType argument_type
Definition: IDFilter.h:233
const std::unordered_set< String > & accessions
Definition: IDFilter.h:235
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:241
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:255
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:250
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:268
HitType argument_type
Definition: IDFilter.h:269
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:277
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:291
const std::set< String > & accessions
Definition: IDFilter.h:271
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:273
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:286
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:185
bool operator()(const HitType &hit) const
Definition: IDFilter.h:196
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:191
HitType argument_type
Definition: IDFilter.h:186
String key
Definition: IDFilter.h:188
double value
Definition: IDFilter.h:189
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:130
bool operator()(const HitType &hit) const
Definition: IDFilter.h:144
HitType argument_type
Definition: IDFilter.h:131
Size rank
Definition: IDFilter.h:133
HasMaxRank(Size rank_)
Definition: IDFilter.h:135
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:162
bool operator()(const HitType &hit) const
Definition: IDFilter.h:173
DataValue value
Definition: IDFilter.h:166
HitType argument_type
Definition: IDFilter.h:163
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:168
String key
Definition: IDFilter.h:165
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:499
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:502
IdentificationType argument_type
Definition: IDFilter.h:500
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:44