OpenMS  3.0.0
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/config.h>
48 
49 #include <algorithm>
50 #include <climits>
51 #include <vector>
52 #include <set>
53 #include <map>
54 #include <unordered_set>
55 
56 namespace OpenMS
57 {
78  class OPENMS_DLLAPI IDFilter
79  {
80 public:
81 
83  IDFilter() = default;
84 
86  virtual ~IDFilter() = default;
87 
89  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
90  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
91  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
92 
98 
101  template <class HitType>
103  {
104  typedef HitType argument_type; // for use as a predicate
105 
106  double score;
108 
109  HasGoodScore(double score_, bool higher_score_better_) :
110  score(score_),
111  higher_score_better(higher_score_better_)
112  {}
113 
114  bool operator()(const HitType& hit) const
115  {
116  if (higher_score_better)
117  {
118  return hit.getScore() >= score;
119  }
120  return hit.getScore() <= score;
121  }
122  };
123 
129  template <class HitType>
130  struct HasMaxRank
131  {
132  typedef HitType argument_type; // for use as a predicate
133 
135 
136  HasMaxRank(Size rank_):
137  rank(rank_)
138  {
139  if (rank_ == 0)
140  {
141  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
142  }
143  }
144 
145  bool operator()(const HitType& hit) const
146  {
147  Size hit_rank = hit.getRank();
148  if (hit_rank == 0)
149  {
150  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
151  }
152  return hit_rank <= rank;
153  }
154  };
155 
161  template <class HitType>
163  {
164  typedef HitType argument_type; // for use as a predicate
165 
168 
169  HasMetaValue(const String& key_, const DataValue& value_):
170  key(key_),
171  value(value_)
172  {}
173 
174  bool operator()(const HitType& hit) const
175  {
176  DataValue found = hit.getMetaValue(key);
177  if (found.isEmpty()) return false; // meta value "key" not set
178  if (value.isEmpty()) return true; // "key" is set, value doesn't matter
179  return found == value;
180  }
181  };
182 
184  template <class HitType>
186  {
187  typedef HitType argument_type; // for use as a predicate
188 
190  double value;
191 
192  HasMaxMetaValue(const String& key_, const double& value_):
193  key(key_),
194  value(value_)
195  {}
196 
197  bool operator()(const HitType& hit) const
198  {
199  DataValue found = hit.getMetaValue(key);
200  if (found.isEmpty()) return false; // meta value "key" not set
201  return double(found) <= value;
202  }
203  };
204 
206  template <class HitType>
208  {
209  typedef HitType argument_type; // for use as a predicate
210 
211  struct HasMetaValue<HitType> target_decoy, is_decoy;
212 
214  target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
215  {}
216 
217  bool operator()(const HitType& hit) const
218  {
219  // @TODO: this could be done slightly more efficiently by returning
220  // false if the "target_decoy" meta value is "target" or "target+decoy",
221  // without checking for an "isDecoy" meta value in that case
222  return target_decoy(hit) || is_decoy(hit);
223  }
224  };
225 
231  template <class HitType>
233  {
234  typedef HitType argument_type; // for use as a predicate
235 
236  const std::unordered_set<String>& accessions;
237 
238  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_):
239  accessions(accessions_)
240  {}
241 
242  bool operator()(const PeptideHit& hit) const
243  {
244  for (const auto& it : hit.extractProteinAccessionsSet())
245  {
246  if (accessions.count(it) > 0) return true;
247  }
248  return false;
249  }
250 
251  bool operator()(const ProteinHit& hit) const
252  {
253  return (accessions.count(hit.getAccession()) > 0);
254  }
255 
256  bool operator()(const PeptideEvidence& evidence) const
257  {
258  return (accessions.count(evidence.getProteinAccession()) > 0);
259  }
260  };
261 
267  template <class HitType>
269  {
270  typedef HitType argument_type; // for use as a predicate
271 
272  const std::set<String>& accessions;
273 
274  HasMatchingAccession(const std::set<String>& accessions_):
275  accessions(accessions_)
276  {}
277 
278  bool operator()(const PeptideHit& hit) const
279  {
280  for (const auto& it : hit.extractProteinAccessionsSet())
281  {
282  if (accessions.count(it) > 0) return true;
283  }
284  return false;
285  }
286 
287  bool operator()(const ProteinHit& hit) const
288  {
289  return (accessions.count(hit.getAccession()) > 0);
290  }
291 
292  bool operator()(const PeptideEvidence& evidence) const
293  {
294  return (accessions.count(evidence.getProteinAccession()) > 0);
295  }
296  };
297 
303  template <class HitType, class Entry>
305  {
306  typedef HitType argument_type; // for use as a predicate
307  typedef std::map<String, Entry*> ItemMap;//Store pointers to avoid copying data
309 
310  GetMatchingItems(std::vector<Entry>& records)
311  {
312  for(typename std::vector<Entry>::iterator rec_it = records.begin();
313  rec_it != records.end(); ++rec_it)
314  {
315  items[getKey(*rec_it)] = &(*rec_it);
316  }
317  }
318 
320 
321  const String& getKey(const FASTAFile::FASTAEntry& entry) const
322  {
323  return entry.identifier;
324  }
325 
326  bool exists(const HitType& hit) const
327  {
328  return items.count(getHitKey(hit)) > 0;
329  }
330 
331  const String& getHitKey(const PeptideEvidence& p) const
332  {
333  return p.getProteinAccession();
334  }
335 
336  const Entry& getValue(const PeptideEvidence& evidence) const
337  {
338  if(!exists(evidence)){
339  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '"+ getHitKey(evidence) + "'. peptide evidence accession not in data");
340  }
341  return *(items.find(getHitKey(evidence))->second);
342  }
343 
344  };
345 
347 
348 
354 
357  struct HasMinPeptideLength;
358 
360  struct HasMinCharge;
361 
363  struct HasLowMZError;
364 
370  struct HasMatchingModification;
371 
377  struct HasMatchingSequence;
378 
380  struct HasNoEvidence;
381 
382 
389  {
390  private:
394 
395  public:
398  digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
399  {}
400 
401  static inline Int disabledValue(){ return -1; }
402 
405  bool operator()(PeptideHit& p) const
406  {
407  const auto& fun = [&](const Int missed_cleavages)
408  {
409 
410  bool max_filter = max_cleavages_ != disabledValue() ?
411  missed_cleavages > max_cleavages_ : false;
412  bool min_filter = min_cleavages_ != disabledValue() ?
413  missed_cleavages < min_cleavages_ : false;
414  return max_filter || min_filter;
415  };
416  return digestion_.filterByMissedCleavages(
418  fun);
419  }
420 
421  void filterPeptideSequences(std::vector<PeptideHit>& hits)
422  {
423  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)),
424  hits.end());
425  }
426 
427  };
428 
429 
436  {
438 
439  // Build an accession index to avoid the linear search cost
444 
445  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries,
446  ProteaseDigestion& digestion,
447  bool ignore_missed_cleavages,
448  bool methionine_cleavage) :
449  accession_resolver_(entries),
450  digestion_(digestion),
451  ignore_missed_cleavages_(ignore_missed_cleavages),
452  methionine_cleavage_(methionine_cleavage)
453  {}
454 
455  bool operator()(const PeptideEvidence& evidence) const
456  {
457  if(!evidence.hasValidLimits())
458  {
459  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
460  return true;
461  }
462 
463  if (accession_resolver_.exists(evidence))
464  {
465  return digestion_.isValidProduct(
466  AASequence::fromString(accession_resolver_.getValue(evidence).sequence),
467  evidence.getStart(), evidence.getEnd() - evidence.getStart(), ignore_missed_cleavages_, methionine_cleavage_);
468  }
469  else
470  {
471  if (evidence.getProteinAccession().empty())
472  {
473  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
474  }
475  else
476  {
477  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession()
478  << "' not found in fasta file!" << std::endl;
479  }
480  return true;
481  }
482  }
483 
484  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
485  {
486  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this,peptides);
487  }
488 
489  };
490 
492 
493 
496 
498  template <class IdentificationType>
499  struct HasNoHits
500  {
501  typedef IdentificationType argument_type; // for use as a predicate
502 
503  bool operator()(const IdentificationType& id) const
504  {
505  return id.getHits().empty();
506  }
507  };
508 
510 
511 
514 
516  struct HasRTInRange;
517 
519  struct HasMZInRange;
520 
522 
523 
529 
532  template <class Container, class Predicate>
533  static void removeMatchingItems(Container& items, const Predicate& pred)
534  {
535  items.erase(std::remove_if(items.begin(), items.end(), pred),
536  items.end());
537  }
538 
540  template <class Container, class Predicate>
541  static void keepMatchingItems(Container& items, const Predicate& pred)
542  {
543  items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
544  items.end());
545  }
546 
548  template <class Container, class Predicate>
549  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
550  {
551  auto part = std::partition(items.begin(), items.end(), std::not1(pred));
552  std::move(part, items.end(), std::back_inserter(target));
553  items.erase(part, items.end());
554  }
555 
557  template <class IDContainer, class Predicate>
558  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
559  {
560  for (auto& item : items)
561  {
562  removeMatchingItems(item.getHits(), pred);
563  }
564  }
565 
567  template <class IDContainer, class Predicate>
568  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
569  {
570  for (auto& item : items)
571  {
572  keepMatchingItems(item.getHits(), pred);
573  }
574  }
575 
576  template <class MapType, class Predicate>
577  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
578  {
579  for (auto& feat : prot_and_pep_ids)
580  {
581  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
582  }
583  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
584  }
585 
586  template <class MapType, class Predicate>
587  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
588  {
589  for (auto& feat : prot_and_pep_ids)
590  {
591  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
592  }
593  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
594  }
595 
596  template <class MapType, class Predicate>
597  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
598  {
599  for (auto& feat : prot_and_pep_ids)
600  {
601  removeMatchingItems(feat.getPeptideIdentifications(), pred);
602  }
603  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
604  }
605 
607 
608 
611 
613  template <class IdentificationType>
614  static Size countHits(const std::vector<IdentificationType>& ids)
615  {
616  Size counter = 0;
617  for (typename std::vector<IdentificationType>::const_iterator id_it =
618  ids.begin(); id_it != ids.end(); ++id_it)
619  {
620  counter += id_it->getHits().size();
621  }
622  return counter;
623  }
624 
638  template <class IdentificationType>
639  static bool getBestHit(
640  const std::vector<IdentificationType>& identifications,
641  bool assume_sorted, typename IdentificationType::HitType& best_hit)
642  {
643  if (identifications.empty()) return false;
644 
645  typename std::vector<IdentificationType>::const_iterator best_id_it =
646  identifications.end();
647  typename std::vector<typename IdentificationType::HitType>::const_iterator
648  best_hit_it;
649 
650  for (typename std::vector<IdentificationType>::const_iterator id_it =
651  identifications.begin(); id_it != identifications.end(); ++id_it)
652  {
653  if (id_it->getHits().empty()) continue;
654 
655  if (best_id_it == identifications.end()) // no previous "best" hit
656  {
657  best_id_it = id_it;
658  best_hit_it = id_it->getHits().begin();
659  }
660  else if (best_id_it->getScoreType() != id_it->getScoreType())
661  {
662  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
663  }
664 
665  bool higher_better = best_id_it->isHigherScoreBetter();
666  for (typename std::vector<typename IdentificationType::HitType>::
667  const_iterator hit_it = id_it->getHits().begin(); hit_it !=
668  id_it->getHits().end(); ++hit_it)
669  {
670  if ((higher_better && (hit_it->getScore() >
671  best_hit_it->getScore())) ||
672  (!higher_better && (hit_it->getScore() <
673  best_hit_it->getScore())))
674  {
675  best_hit_it = hit_it;
676  }
677  if (assume_sorted) break; // only consider the first hit
678  }
679  }
680 
681  if (best_id_it == identifications.end())
682  {
683  return false; // no hits in any IDs
684  }
685 
686  best_hit = *best_hit_it;
687  return true;
688  }
689 
697  static void extractPeptideSequences(
698  const std::vector<PeptideIdentification>& peptides,
699  std::set<String>& sequences, bool ignore_mods = false);
700 
706  static std::map<String,std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
707 
713  template<class EvidenceFilter>
715  EvidenceFilter& filter,
716  std::vector<PeptideIdentification>& peptides)
717  {
718  for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
719  pep_it != peptides.end(); ++pep_it)
720  {
721  for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
722  hit_it != pep_it->getHits().end(); ++hit_it )
723  {
724  std::vector<PeptideEvidence> evidences;
725  remove_copy_if(hit_it->getPeptideEvidences().begin(),
726  hit_it->getPeptideEvidences().end(),
727  back_inserter(evidences),
728  std::not1(filter));
729  hit_it->setPeptideEvidences(evidences);
730  }
731  }
732  }
733 
735 
736 
739 
741  template <class IdentificationType>
742  static void updateHitRanks(std::vector<IdentificationType>& ids)
743  {
744  for (typename std::vector<IdentificationType>::iterator it = ids.begin();
745  it != ids.end(); ++it)
746  {
747  it->assignRanks();
748  }
749  }
750 
753  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
754 
756  static void removeUnreferencedProteins(
757  std::vector<ProteinIdentification>& proteins,
758  const std::vector<PeptideIdentification>& peptides);
760  static void removeUnreferencedProteins(
761  ProteinIdentification& proteins,
762  const std::vector<PeptideIdentification>& peptides);
763 
771  static void updateProteinReferences(
772  std::vector<PeptideIdentification>& peptides,
773  const std::vector<ProteinIdentification>& proteins,
774  bool remove_peptides_without_reference = false);
775 
783  static void updateProteinReferences(
784  ConsensusMap& cmap,
785  bool remove_peptides_without_reference = false);
786 
794  static void updateProteinReferences(
795  ConsensusMap& cmap,
796  const ProteinIdentification& ref_run,
797  bool remove_peptides_without_reference = false);
798 
807  static bool updateProteinGroups(
808  std::vector<ProteinIdentification::ProteinGroup>& groups,
809  const std::vector<ProteinHit>& hits);
810 
817  static void removeUngroupedProteins(
818  const std::vector<ProteinIdentification::ProteinGroup>& groups,
819  std::vector<ProteinHit>& hits);
821 
822 
825 
827  template <class IdentificationType>
828  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
829  {
830  struct HasNoHits<IdentificationType> empty_filter;
831  removeMatchingItems(ids, empty_filter);
832  }
833 
839  template <class IdentificationType>
840  static void filterHitsByScore(std::vector<IdentificationType>& ids,
841  double threshold_score)
842  {
843  for (typename std::vector<IdentificationType>::iterator id_it =
844  ids.begin(); id_it != ids.end(); ++id_it)
845  {
846  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
847  threshold_score, id_it->isHigherScoreBetter());
848  keepMatchingItems(id_it->getHits(), score_filter);
849  }
850  }
851 
858  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps,
859  double threshold_score, bool higher_better);
860 
866  template <class IdentificationType>
867  static void filterHitsByScore(IdentificationType& id,
868  double threshold_score)
869  {
870  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
871  threshold_score, id.isHigherScoreBetter());
872  keepMatchingItems(id.getHits(), score_filter);
873  }
874 
880  template <class IdentificationType>
881  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
882  {
883  for (typename std::vector<IdentificationType>::iterator id_it =
884  ids.begin(); id_it != ids.end(); ++id_it)
885  {
886  id_it->sort();
887  if (n < id_it->getHits().size()) id_it->getHits().resize(n);
888  }
889  }
890 
905  template <class IdentificationType>
906  static void filterHitsByRank(std::vector<IdentificationType>& ids,
907  Size min_rank, Size max_rank)
908  {
909  updateHitRanks(ids);
910  if (min_rank > 1)
911  {
912  struct HasMaxRank<typename IdentificationType::HitType>
913  rank_filter(min_rank - 1);
914  for (typename std::vector<IdentificationType>::iterator id_it =
915  ids.begin(); id_it != ids.end(); ++id_it)
916  {
917  removeMatchingItems(id_it->getHits(), rank_filter);
918  }
919  }
920  if (max_rank >= min_rank)
921  {
922  struct HasMaxRank<typename IdentificationType::HitType>
923  rank_filter(max_rank);
924  for (typename std::vector<IdentificationType>::iterator id_it =
925  ids.begin(); id_it != ids.end(); ++id_it)
926  {
927  keepMatchingItems(id_it->getHits(), rank_filter);
928  }
929  }
930  }
931 
939  template <class IdentificationType>
940  static void removeDecoyHits(std::vector<IdentificationType>& ids)
941  {
942  struct HasDecoyAnnotation<typename IdentificationType::HitType>
943  decoy_filter;
944  for (typename std::vector<IdentificationType>::iterator id_it =
945  ids.begin(); id_it != ids.end(); ++id_it)
946  {
947  removeMatchingItems(id_it->getHits(), decoy_filter);
948  }
949  }
950 
958  template <class IdentificationType>
959  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids,
960  const std::set<String> accessions)
961  {
962  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
963  for (auto& id_it : ids)
964  {
965  removeMatchingItems(id_it.getHits(), acc_filter);
966  }
967  }
968 
976  template <class IdentificationType>
977  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids,
978  const std::set<String>& accessions)
979  {
980  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
981  for (auto& id_it : ids)
982  {
983  keepMatchingItems(id_it.getHits(), acc_filter);
984  }
985  }
986 
988 
989 
992 
999  static void keepBestPeptideHits(
1000  std::vector<PeptideIdentification>& peptides, bool strict = false);
1001 
1010  static void filterPeptidesByLength(
1011  std::vector<PeptideIdentification>& peptides, Size min_length,
1012  Size max_length = UINT_MAX);
1013 
1022  static void filterPeptidesByCharge(
1023  std::vector<PeptideIdentification>& peptides, Int min_charge,
1024  Int max_charge);
1025 
1027  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
1028  double min_rt, double max_rt);
1029 
1031  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
1032  double min_mz, double max_mz);
1033 
1045  static void filterPeptidesByMZError(
1046  std::vector<PeptideIdentification>& peptides, double mass_error,
1047  bool unit_ppm);
1048 
1049 
1056  template <class Filter>
1057  static void filterPeptideEvidences(
1058  Filter& filter,
1059  std::vector<PeptideIdentification>& peptides);
1060 
1072  static void filterPeptidesByRTPredictPValue(
1073  std::vector<PeptideIdentification>& peptides,
1074  const String& metavalue_key, double threshold = 0.05);
1075 
1077  static void removePeptidesWithMatchingModifications(
1078  std::vector<PeptideIdentification>& peptides,
1079  const std::set<String>& modifications);
1080 
1081  static void removePeptidesWithMatchingRegEx(
1082  std::vector<PeptideIdentification>& peptides,
1083  const String& regex);
1084 
1086  static void keepPeptidesWithMatchingModifications(
1087  std::vector<PeptideIdentification>& peptides,
1088  const std::set<String>& modifications);
1089 
1097  static void removePeptidesWithMatchingSequences(
1098  std::vector<PeptideIdentification>& peptides,
1099  const std::vector<PeptideIdentification>& bad_peptides,
1100  bool ignore_mods = false);
1101 
1109  static void keepPeptidesWithMatchingSequences(
1110  std::vector<PeptideIdentification>& peptides,
1111  const std::vector<PeptideIdentification>& good_peptides,
1112  bool ignore_mods = false);
1113 
1115  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
1116  peptides);
1117 
1123  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
1124  peptides, bool seq_only = false);
1125 
1127 
1128 
1131 
1133  static void filterHitsByScore(PeakMap& experiment,
1134  double peptide_threshold_score,
1135  double protein_threshold_score)
1136  {
1137  // filter protein hits:
1138  filterHitsByScore(experiment.getProteinIdentifications(),
1139  protein_threshold_score);
1140  // don't remove empty protein IDs - they contain search metadata and may
1141  // be referenced by peptide IDs (via run ID)
1142 
1143  // filter peptide hits:
1144  for (PeakMap::Iterator exp_it = experiment.begin();
1145  exp_it != experiment.end(); ++exp_it)
1146  {
1147  filterHitsByScore(exp_it->getPeptideIdentifications(),
1148  peptide_threshold_score);
1149  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1150  // TODO super-duper inefficient.
1151  updateProteinReferences(exp_it->getPeptideIdentifications(),
1152  experiment.getProteinIdentifications());
1153  }
1154  // @TODO: remove proteins that aren't referenced by peptides any more?
1155  }
1156 
1158  static void keepNBestHits(PeakMap& experiment, Size n)
1159  {
1160  // don't filter the protein hits by "N best" here - filter the peptides
1161  // and update the protein hits!
1162  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1163 
1164  // filter peptide hits:
1165  for (PeakMap::Iterator exp_it = experiment.begin();
1166  exp_it != experiment.end(); ++exp_it)
1167  {
1168  std::vector<PeptideIdentification>& peptides =
1169  exp_it->getPeptideIdentifications();
1170  keepNBestHits(peptides, n);
1171  removeEmptyIdentifications(peptides);
1172  updateProteinReferences(peptides,
1173  experiment.getProteinIdentifications());
1174  all_peptides.insert(all_peptides.end(), peptides.begin(),
1175  peptides.end());
1176  }
1177  // update protein hits:
1178  removeUnreferencedProteins(experiment.getProteinIdentifications(),
1179  all_peptides);
1180  }
1181 
1184  static void keepNBestSpectra(std::vector<PeptideIdentification>& peptides, Size n);
1185 
1187  template <class MapType>
1188  static void keepNBestPeptideHits(MapType& map, Size n)
1189  {
1190  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1191  // which sorts Hits first.
1192  for (auto& feat : map)
1193  {
1194  keepNBestHits(feat.getPeptideIdentifications(), n);
1195  }
1196  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1197  }
1198 
1199  template <class MapType>
1200  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1201  {
1202  const auto pred = HasNoHits<PeptideIdentification>();
1203  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1204  }
1205 
1207  static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1208  {
1209  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1210  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1211  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1212  }
1213 
1214  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1215  {
1216  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1217  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1218  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1219  }
1220 
1221  //TODO allow skipping unassigned?
1222  template <class MapType>
1223  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1224  {
1225  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1226 
1227  RunToSequenceToChargeToPepHitP best_peps_per_run;
1228  for (const auto& idrun : prot_ids)
1229  {
1230  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1231  }
1232 
1233  for (auto& feat : prot_and_pep_ids)
1234  {
1235  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1236  }
1237 
1238  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1239  }
1240 
1241  template <class MapType>
1242  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1243  {
1244  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1245  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1246  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1247  }
1248 
1251  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1252  {
1253  RunToSequenceToChargeToPepHitP best_peps_per_run;
1254  for (const auto& id : prot_ids)
1255  {
1256  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1257  }
1258  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1259  }
1260 
1264  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1265  {
1266  for (auto &pep : pep_ids)
1267  {
1268  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1269  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1270  }
1271  }
1272 
1276  static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1277  {
1278  SequenceToChargeToPepHitP best_pep;
1279  for (auto& pep : pep_ids)
1280  {
1281  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1282  }
1283  }
1284 
1289  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1290  {
1291  bool higher_score_better = pep.isHigherScoreBetter();
1292  //make sure that first = best hit
1293  pep.sort();
1294 
1295  auto pepIt = pep.getHits().begin();
1296  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1297  for (; pepIt != pepItEnd; ++pepIt)
1298  {
1299  PeptideHit &hit = *pepIt;
1300 
1301  String lookup_seq;
1302  if (ignore_mods)
1303  {
1304  lookup_seq = hit.getSequence().toUnmodifiedString();
1305  }
1306  else
1307  {
1308  lookup_seq = hit.getSequence().toString();
1309  }
1310 
1311  int lookup_charge = 0;
1312  if (!ignore_charges)
1313  {
1314  lookup_charge = hit.getCharge();
1315  }
1316 
1317  // try to insert
1318  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1319  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1320 
1321  PeptideHit* &p = it_inserted_chg.first->second; //now this gets either the old one if already present, or this
1322  if (!it_inserted_chg.second) //was already present -> possibly update
1323  {
1324  if (
1325  (higher_score_better && (hit.getScore() > p->getScore())) ||
1326  (!higher_score_better && (hit.getScore() < p->getScore()))
1327  )
1328  {
1329  p->setMetaValue("best_per_peptide", 0);
1330  hit.setMetaValue("best_per_peptide", 1);
1331  p = &hit;
1332  }
1333  else //note that this was def. not the best
1334  {
1335  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1336  hit.setMetaValue("best_per_peptide", 0);
1337  }
1338  }
1339  else //newly inserted -> first for that sequence (and optionally charge)
1340  {
1341  hit.setMetaValue("best_per_peptide", 1);
1342  }
1343  }
1344  }
1345 
1348  PeakMap& experiment,
1349  const std::vector<FASTAFile::FASTAEntry>& proteins)
1350  {
1351  std::set<String> accessions;
1352  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1353  proteins.begin(); it != proteins.end(); ++it)
1354  {
1355  accessions.insert(it->identifier);
1356  }
1357 
1358  // filter protein hits:
1359  keepHitsMatchingProteins(experiment.getProteinIdentifications(),
1360  accessions);
1361  updateHitRanks(experiment.getProteinIdentifications());
1362 
1363  // filter peptide hits:
1364  for (PeakMap::Iterator exp_it = experiment.begin();
1365  exp_it != experiment.end(); ++exp_it)
1366  {
1367  if (exp_it->getMSLevel() == 2)
1368  {
1369  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1370  accessions);
1371  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1372  updateHitRanks(exp_it->getPeptideIdentifications());
1373  }
1374  }
1375  }
1376 
1378 
1379 
1382 
1392  template <typename PredicateType>
1394  IdentificationData& id_data, PredicateType&& func, bool cleanup_affected = false)
1395  {
1396  id_data.removeFromSetIf_(id_data.observation_matches_, func);
1397  if (cleanup_affected) id_data.cleanup();
1398  }
1399 
1410  static void keepBestMatchPerObservation(
1411  IdentificationData& id_data,
1413 
1425  static void filterObservationMatchesByScore(
1426  IdentificationData& id_data,
1427  IdentificationData::ScoreTypeRef score_ref, double cutoff);
1428 
1434  static void removeDecoys(IdentificationData& id_data);
1436 
1437  };
1438 
1439 } // namespace OpenMS
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:130
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:421
bool ignore_missed_cleavages_
Definition: IDFilter.h:442
ItemMap items
Definition: IDFilter.h:308
const String & getAccession() const
returns the accession of the protein
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1200
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1264
ProteaseDigestion & digestion_
Definition: IDFilter.h:441
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1276
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:307
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:549
bool exists(const HitType &hit) const
Definition: IDFilter.h:326
A more convenient string class.
Definition: String.h:58
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:959
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:614
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:292
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:232
static void removeFromSetIf_(ContainerType &container, PredicateType predicate)
Remove elements from a set (or ordered multi_index_container) if they fulfill a predicate.
Definition: IdentificationData.h:837
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:103
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:251
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:287
GetMatchingItems()
Definition: IDFilter.h:319
HitType argument_type
Definition: IDFilter.h:234
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:397
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:162
double score
Definition: IDFilter.h:106
Int max_cleavages_
Definition: IDFilter.h:393
Is this a decoy hit?
Definition: IDFilter.h:207
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1251
PeptideEvidence argument_type
Definition: IDFilter.h:437
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:499
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:568
HasMaxRank(Size rank_)
Definition: IDFilter.h:136
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
bool operator()(const HitType &hit) const
Definition: IDFilter.h:217
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:109
bool operator()(const HitType &hit) const
Definition: IDFilter.h:114
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:977
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:558
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
Iterator begin()
Definition: MSExperiment.h:182
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:94
A container for consensus elements.
Definition: ConsensusMap.h:82
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:435
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:278
const std::set< String > & accessions
Definition: IDFilter.h:272
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:192
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:63
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1207
double value
Definition: IDFilter.h:190
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:169
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:597
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:336
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:940
static void filterObservationMatchesByFunctor(IdentificationData &id_data, PredicateType &&func, bool cleanup_affected=false)
Helper function for filtering observation matches (e.g. PSMs) in IdentificationData.
Definition: IDFilter.h:1393
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:840
static Int disabledValue()
Definition: IDFilter.h:401
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:503
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:90
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:58
HitType argument_type
Definition: IDFilter.h:187
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:587
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:388
Iterator end()
Definition: MSExperiment.h:192
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:484
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1347
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:714
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1289
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:242
String toString() const
returns the peptide as string with modifications embedded in brackets
String key
Definition: IDFilter.h:189
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:742
bool operator()(const HitType &hit) const
Definition: IDFilter.h:145
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:331
HitType argument_type
Definition: IDFilter.h:164
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
Size rank
Definition: IDFilter.h:134
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:455
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:59
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1214
void cleanup(bool require_observation_match=true, bool require_identified_sequence=true, bool require_parent_match=true, bool require_parent_group=false, bool require_match_group=false)
Clean up the data structure after filtering parts of it.
ObservationMatches observation_matches_
Definition: IdentificationData.h:656
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:533
const std::unordered_set< String > & accessions
Definition: IDFilter.h:236
bool isHigherScoreBetter() const
returns the peptide score orientation
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:310
Representation of a peptide hit.
Definition: PeptideHit.h:55
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:440
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned...
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:577
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1158
IdentificationType argument_type
Definition: IDFilter.h:501
bool operator()(const HitType &hit) const
Definition: IDFilter.h:197
HasDecoyAnnotation()
Definition: IDFilter.h:213
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:102
DataValue value
Definition: IDFilter.h:167
Int min_cleavages_
Definition: IDFilter.h:392
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:268
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:238
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:321
HitType argument_type
Definition: IDFilter.h:209
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:339
HitType argument_type
Definition: IDFilter.h:104
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1242
bool methionine_cleavage_
Definition: IDFilter.h:443
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:185
Representation of a protein hit.
Definition: ProteinHit.h:58
Invalid value exception.
Definition: Exception.h:327
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:256
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:89
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:445
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:828
bool higher_score_better
Definition: IDFilter.h:107
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:91
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:388
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1223
Builds a map index of data that have a String index to find matches and return the objects...
Definition: IDFilter.h:304
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:274
bool operator()(const HitType &hit) const
Definition: IDFilter.h:174
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:906
String key
Definition: IDFilter.h:166
HitType argument_type
Definition: IDFilter.h:270
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1133
HitType argument_type
Definition: IDFilter.h:306
HitType argument_type
Definition: IDFilter.h:132
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:71
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
double getScore() const
returns the PSM score
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:405
String identifier
Definition: FASTAFile.h:73
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:639
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:78
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1188
int Int
Signed integer type.
Definition: Types.h:102
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
EnzymaticDigestion & digestion_
Definition: IDFilter.h:391
PeptideHit argument_type
Definition: IDFilter.h:396
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:541
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Not all required information provided.
Definition: Exception.h:186
void sort()
Sorts the hits by score.
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:465
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:45
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
Int getCharge() const
returns the charge of the peptide