|
OpenMS
2.5.0
|
Go to the documentation of this file.
37 #include <OpenMS/config.h>
53 #include <unordered_set>
100 template <
class HitType>
110 higher_score_better(higher_score_better_)
115 if (higher_score_better)
117 return hit.getScore() >= score;
119 return hit.getScore() <= score;
128 template <
class HitType>
140 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
146 Size hit_rank = hit.getRank();
151 return hit_rank <= rank;
160 template <
class HitType>
176 if (found.
isEmpty())
return false;
177 if (value.
isEmpty())
return true;
178 return found == value;
183 template <
class HitType>
199 if (found.
isEmpty())
return false;
200 return double(found) <= value;
205 template <
class HitType>
213 target_decoy(
"target_decoy",
"decoy"), is_decoy(
"isDecoy",
"true")
221 return target_decoy(hit) || is_decoy(hit);
230 template <
class HitType>
238 accessions(accessions_)
245 if (accessions.count(it) > 0)
return true;
266 template <
class HitType>
274 accessions(accessions_)
281 if (accessions.count(it) > 0)
return true;
302 template <
class HitType,
class Entry>
311 for(
typename std::vector<Entry>::iterator rec_it = records.begin();
312 rec_it != records.end(); ++rec_it)
314 items[getKey(*rec_it)] = &(*rec_it);
327 return items.count(getHitKey(hit)) > 0;
337 if(!exists(evidence)){
338 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '"+ getHitKey(evidence) +
"'. peptide evidence accession not in data");
340 return *(items.find(getHitKey(evidence))->second);
356 struct HasMinPeptideLength;
362 struct HasLowMZError;
369 struct HasMatchingModification;
376 struct HasMatchingSequence;
379 struct HasNoEvidence;
397 digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
408 [&](
const Int missed_cleavages)
411 bool max_filter = max_cleavages_ != disabledValue() ?
412 missed_cleavages > max_cleavages_ :
false;
413 bool min_filter = min_cleavages_ != disabledValue() ?
414 missed_cleavages < min_cleavages_ :
false;
415 return max_filter || min_filter;
421 hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)),
445 bool ignore_missed_cleavages,
446 bool methionine_cleavage) :
447 accession_resolver_(entries),
448 digestion_(digestion),
449 ignore_missed_cleavages_(ignore_missed_cleavages),
450 methionine_cleavage_(methionine_cleavage)
461 if (accession_resolver_.
exists(evidence))
465 evidence.
getStart(), evidence.
getEnd() - evidence.
getStart(), ignore_missed_cleavages_, methionine_cleavage_);
471 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
476 <<
"' not found in fasta file!" << std::endl;
484 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this,peptides);
496 template <
class IdentificationType>
503 return id.getHits().empty();
530 template <
class Container,
class Predicate>
533 items.erase(std::remove_if(items.begin(), items.end(), pred),
538 template <
class Container,
class Predicate>
541 items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
546 template <
class IDContainer,
class Predicate>
549 for (
auto& item : items)
551 removeMatchingItems(item.getHits(), pred);
556 template <
class IDContainer,
class Predicate>
559 for (
auto& item : items)
561 keepMatchingItems(item.getHits(), pred);
565 template <
class MapType,
class Predicate>
568 for (
auto& feat : prot_and_pep_ids)
570 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
572 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
575 template <
class MapType,
class Predicate>
578 for (
auto& feat : prot_and_pep_ids)
580 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
582 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
592 template <
class IdentificationType>
596 for (
typename std::vector<IdentificationType>::const_iterator id_it =
597 ids.begin(); id_it != ids.end(); ++id_it)
599 counter += id_it->getHits().size();
616 template <
class IdentificationType>
618 const std::vector<IdentificationType>& identifications,
619 bool assume_sorted,
typename IdentificationType::HitType& best_hit)
621 if (identifications.empty())
return false;
623 typename std::vector<IdentificationType>::const_iterator best_id_it =
624 identifications.end();
625 typename std::vector<typename IdentificationType::HitType>::const_iterator
628 for (
typename std::vector<IdentificationType>::const_iterator id_it =
629 identifications.begin(); id_it != identifications.end(); ++id_it)
631 if (id_it->getHits().empty())
continue;
633 if (best_id_it == identifications.end())
636 best_hit_it = id_it->getHits().begin();
638 else if (best_id_it->getScoreType() != id_it->getScoreType())
640 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
643 bool higher_better = best_id_it->isHigherScoreBetter();
644 for (
typename std::vector<typename IdentificationType::HitType>::
645 const_iterator hit_it = id_it->getHits().begin(); hit_it !=
646 id_it->getHits().end(); ++hit_it)
648 if ((higher_better && (hit_it->getScore() >
649 best_hit_it->getScore())) ||
650 (!higher_better && (hit_it->getScore() <
651 best_hit_it->getScore())))
653 best_hit_it = hit_it;
655 if (assume_sorted)
break;
659 if (best_id_it == identifications.end())
664 best_hit = *best_hit_it;
675 static void extractPeptideSequences(
676 const std::vector<PeptideIdentification>& peptides,
677 std::set<String>& sequences,
bool ignore_mods =
false);
685 template<
class Ev
idenceFilter>
687 EvidenceFilter& filter,
688 std::vector<PeptideIdentification>& peptides)
690 for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
691 pep_it != peptides.end(); ++pep_it)
693 for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
694 hit_it != pep_it->getHits().end(); ++hit_it )
696 std::vector<PeptideEvidence> evidences;
697 remove_copy_if(hit_it->getPeptideEvidences().begin(),
698 hit_it->getPeptideEvidences().end(),
699 back_inserter(evidences),
701 hit_it->setPeptideEvidences(evidences);
713 template <
class IdentificationType>
716 for (
typename std::vector<IdentificationType>::iterator it = ids.begin();
717 it != ids.end(); ++it)
724 static void removeUnreferencedProteins(
725 std::vector<ProteinIdentification>& proteins,
726 const std::vector<PeptideIdentification>& peptides);
735 static void updateProteinReferences(
736 std::vector<PeptideIdentification>& peptides,
737 const std::vector<ProteinIdentification>& proteins,
738 bool remove_peptides_without_reference =
false);
747 static void updateProteinReferences(
749 bool remove_peptides_without_reference =
false);
759 static bool updateProteinGroups(
760 std::vector<ProteinIdentification::ProteinGroup>& groups,
761 const std::vector<ProteinHit>& hits);
770 template <
class IdentificationType>
773 struct HasNoHits<IdentificationType> empty_filter;
774 removeMatchingItems(ids, empty_filter);
782 template <
class IdentificationType>
784 double threshold_score)
786 for (
typename std::vector<IdentificationType>::iterator id_it =
787 ids.begin(); id_it != ids.end(); ++id_it)
789 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
790 threshold_score, id_it->isHigherScoreBetter());
791 keepMatchingItems(id_it->getHits(), score_filter);
800 template <class IdentificationType>
801 static void filterHitsByScore(IdentificationType& id,
802 double threshold_score)
804 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
805 threshold_score, id->isHigherScoreBetter());
806 keepMatchingItems(id->getHits(), score_filter);
814 template <class IdentificationType>
815 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
817 for (
typename std::vector<IdentificationType>::iterator id_it =
818 ids.begin(); id_it != ids.end(); ++id_it)
821 if (n < id_it->getHits().size()) id_it->getHits().resize(n);
839 template <
class IdentificationType>
846 struct HasMaxRank<typename IdentificationType::HitType>
847 rank_filter(min_rank - 1);
848 for (typename std::vector<IdentificationType>::iterator id_it =
849 ids.begin(); id_it != ids.end(); ++id_it)
851 removeMatchingItems(id_it->getHits(), rank_filter);
854 if (max_rank >= min_rank)
856 struct HasMaxRank<typename IdentificationType::HitType>
857 rank_filter(max_rank);
858 for (typename std::vector<IdentificationType>::iterator id_it =
859 ids.begin(); id_it != ids.end(); ++id_it)
861 keepMatchingItems(id_it->getHits(), rank_filter);
873 template <
class IdentificationType>
878 for (typename std::vector<IdentificationType>::iterator id_it =
879 ids.begin(); id_it != ids.end(); ++id_it)
881 removeMatchingItems(id_it->getHits(), decoy_filter);
892 template <
class IdentificationType>
894 const std::set<String> accessions)
897 for (auto& id_it : ids)
899 removeMatchingItems(id_it.getHits(), acc_filter);
910 template <
class IdentificationType>
912 const std::set<String>& accessions)
915 for (auto& id_it : ids)
917 keepMatchingItems(id_it.getHits(), acc_filter);
933 static void keepBestPeptideHits(
934 std::vector<PeptideIdentification>& peptides,
bool strict =
false);
944 static void filterPeptidesByLength(
945 std::vector<PeptideIdentification>& peptides,
Size min_length,
946 Size max_length = UINT_MAX);
956 static void filterPeptidesByCharge(
957 std::vector<PeptideIdentification>& peptides,
Int min_charge,
961 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
962 double min_rt,
double max_rt);
965 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
966 double min_mz,
double max_mz);
979 static void filterPeptidesByMZError(
980 std::vector<PeptideIdentification>& peptides,
double mass_error,
990 template <
class Filter>
991 static void filterPeptideEvidences(
993 std::vector<PeptideIdentification>& peptides);
1006 static void filterPeptidesByRTPredictPValue(
1007 std::vector<PeptideIdentification>& peptides,
1008 const String& metavalue_key,
double threshold = 0.05);
1011 static void removePeptidesWithMatchingModifications(
1012 std::vector<PeptideIdentification>& peptides,
1013 const std::set<String>& modifications);
1016 static void keepPeptidesWithMatchingModifications(
1017 std::vector<PeptideIdentification>& peptides,
1018 const std::set<String>& modifications);
1027 static void removePeptidesWithMatchingSequences(
1028 std::vector<PeptideIdentification>& peptides,
1029 const std::vector<PeptideIdentification>& bad_peptides,
1030 bool ignore_mods =
false);
1039 static void keepPeptidesWithMatchingSequences(
1040 std::vector<PeptideIdentification>& peptides,
1041 const std::vector<PeptideIdentification>& good_peptides,
1042 bool ignore_mods =
false);
1045 static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
1053 static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
1054 peptides,
bool seq_only =
false);
1064 double peptide_threshold_score,
1065 double protein_threshold_score)
1069 protein_threshold_score);
1075 exp_it != experiment.
end(); ++exp_it)
1077 filterHitsByScore(exp_it->getPeptideIdentifications(),
1078 peptide_threshold_score);
1079 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1080 updateProteinReferences(exp_it->getPeptideIdentifications(),
1091 std::vector<PeptideIdentification> all_peptides;
1095 exp_it != experiment.
end(); ++exp_it)
1097 std::vector<PeptideIdentification>& peptides =
1098 exp_it->getPeptideIdentifications();
1099 keepNBestHits(peptides, n);
1100 removeEmptyIdentifications(peptides);
1101 updateProteinReferences(peptides,
1103 all_peptides.insert(all_peptides.end(), peptides.begin(),
1112 template <
class MapType>
1117 for (
auto& feat : map)
1119 keepNBestHits(feat.getPeptideIdentifications(), n);
1121 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1124 template <
class MapType>
1131 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1133 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1135 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1138 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1140 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1142 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1145 template <
class MapType>
1151 for (
const auto& idrun : prot_ids)
1156 for (
auto& feat : prot_and_pep_ids)
1158 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1161 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1164 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1169 static void annotateBestPerPeptidePerRun(
const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1172 for (
const auto&
id : prot_ids)
1176 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1184 for (
auto &pep : pep_ids)
1187 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1197 for (
auto& pep : pep_ids)
1199 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1213 auto pepIt = pep.
getHits().begin();
1214 auto pepItEnd = nr_best_spectrum == 0 || pep.
getHits().size() <= nr_best_spectrum ? pep.
getHits().end() : pep.
getHits().begin() + nr_best_spectrum;
1215 for (; pepIt != pepItEnd; ++pepIt)
1229 int lookup_charge = 0;
1230 if (!ignore_charges)
1236 auto it_inserted = best_pep.emplace(std::move(lookup_seq),
ChargeToPepHitP());
1237 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1239 PeptideHit* &p = it_inserted_chg.first->second;
1240 if (!it_inserted_chg.second)
1267 const std::vector<FASTAFile::FASTAEntry>& proteins)
1269 std::set<String> accessions;
1270 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1271 proteins.begin(); it != proteins.end(); ++it)
1273 accessions.insert(it->identifier);
1283 exp_it != experiment.
end(); ++exp_it)
1285 if (exp_it->getMSLevel() == 2)
1287 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1289 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1290 updateHitRanks(exp_it->getPeptideIdentifications());
1300 static void keepBestMatchPerQuery(
1304 static void filterQueryMatchesByScore(
PeptideEvidence argument_type
Definition: IDFilter.h:435
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:184
void addMSLevel(int level)
adds a desired MS level for peaks to load
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:911
static bool addMissingSpectrumReferences(std::vector< PeptideIdentification > &peptides, const String &filename, bool stop_on_error=false, bool override_spectra_data=false, bool override_spectra_references=false, std::vector< ProteinIdentification > proteins=std::vector< ProteinIdentification >())
Add missing "spectrum_reference"s to peptide identifications based on raw data.
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:67
String key
Definition: IDFilter.h:165
Management and storage of parameters / INI files.
Definition: Param.h:73
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:482
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:433
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:840
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:576
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
String getEnzymeName() const
Returns the enzyme for the digestion.
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
Class for reading Percolator tab-delimited output files.
Definition: PercolatorOutfile.h:52
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:714
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
static String absolutePath(const String &file)
Replaces the relative path in the argument with the absolute path.
Int getCharge() const
returns the charge of the peptide
Annotates spectra from identifications and theoretical spectra or identifications from spectra and th...
Definition: SpectrumAnnotator.h:60
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:43
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
static void initializeLookup(SpectrumMetaDataLookup &lookup, const PeakMap &experiment, const String &scan_regex="")
Initializes a helper object for looking up spectrum meta data (RT, m/z)
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1182
void addReferenceFormat(const String ®exp)
Register a possible format for a spectrum reference.
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
Helper class for looking up spectra based on different attributes.
Definition: SpectrumLookup.h:67
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:237
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:241
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a xQuest XML file.
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
Representation of a Sequest output file.
Definition: SequestOutfile.h:61
HitType argument_type
Definition: IDFilter.h:305
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
Used to load OMSSAXML files.
Definition: OMSSAXMLFile.h:60
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:193
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:686
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
String toString(const T &i)
fallback template for general purpose using Boost::Karma; more specializations below
Definition: StringUtils.h:85
double value
Definition: DataValue.h:69
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
static ModificationsDB * getInstance()
Returns a pointer to the modifications DB (singleton)
static const DataValue EMPTY
Empty data value for comparisons.
Definition: DataValue.h:62
static void keepNBestHits(std::vector< IdentificationType > &ids, Size n)
Filters peptide or protein identifications according to the score of the hits, keeping the n best hit...
Definition: IDFilter.h:815
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:108
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, ModificationDefinitionsSet &mod_def_set)
loads data from an X! Tandem XML file
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:303
const String & getAccession() const
returns the accession of the protein
HitType argument_type
Definition: IDFilter.h:269
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1087
HitType argument_type
Definition: IDFilter.h:233
void store(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false, double rt_tolerance=0.01)
Stores idXML as PepXML file.
HitType argument_type
Definition: IDFilter.h:163
string value
Definition: DataValue.h:67
void readSpectra(const SpectrumContainer &spectra, const String &scan_regexp=default_scan_regexp, bool get_precursor_rt=false)
Read spectra and store their meta data.
Definition: SpectrumMetaDataLookup.h:213
Representation of a protein identification run.
Definition: ProteinIdentification.h:71
Size size() const
Definition: MSExperiment.h:127
ConstIterator begin() const
Gives access to the underlying text buffer.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:557
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1113
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:77
static bool addMissingRTsToPeptideIDs(std::vector< PeptideIdentification > &peptides, const String &filename, bool stop_on_error=false)
Add missing retention time values to peptide identifications based on raw data.
empty value
Definition: DataValue.h:73
void setIdentifier(const String &id)
Sets the identifier.
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:501
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:593
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
int Int
Signed integer type.
Definition: Types.h:102
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
PeptideHit argument_type
Definition: IDFilter.h:395
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:74
const char * getMessage() const noexcept
Returns the message.
Definition: PercolatorOutfile.h:58
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:231
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:438
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:255
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:547
any TSV file, for example msInspect file or OpenSWATH transition file (see TransitionTSVFile)
Definition: FileTypes.h:87
String toString() const
returns the peptide as string with modifications embedded in brackets
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
void getAllSearchModifications(std::vector< String > &modifications) const
Collects all modifications that can be used for identification searches.
bool operator()(const HitType &hit) const
Definition: IDFilter.h:113
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:566
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:161
void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids)
Loads the identifications of an ProtXML file without identifier.
HitType argument_type
Definition: IDFilter.h:131
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
HitType argument_type
Definition: IDFilter.h:103
const std::unordered_set< String > & accessions
Definition: IDFilter.h:235
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1169
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
Representation of a protein hit.
Definition: ProteinHit.h:57
void load(const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
MzML file (.mzML)
Definition: FileTypes.h:72
integer list
Definition: DataValue.h:71
int exception
(Used by various macros. Indicates a rough category of the exception being caught....
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:453
static const std::string score_type_names[SIZE_OF_SCORETYPE]
Names of Percolator scores (to match ScoreType)
Definition: PercolatorOutfile.h:61
Iterator begin()
Definition: MSExperiment.h:157
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static std::vector< PeptideHit > getReferencingHits(const std::vector< PeptideHit > &, const std::set< String > &accession)
returns all peptide hits which reference to a given protein accession (i.e. filter by protein accessi...
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:101
void addIonMatchStatistics(PeptideIdentification &pi, MSSpectrum &spec, const TheoreticalSpectrumGenerator &tg, const SpectrumAlignment &sa) const
Adds ion match statistics to pi PeptideIdentifcation.
Base class for TOPP applications.
Definition: TOPPBase.h:144
String & ensureLastChar(char end)
Makes sure the string ends with the character end.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
static Specificity getSpecificityByName(const String &name)
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
DataValue value
Definition: IDFilter.h:166
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
string list
Definition: DataValue.h:70
ConstIterator end() const
Gives access to the underlying text buffer.
static Int disabledValue()
Definition: IDFilter.h:400
Is this a decoy hit?
Definition: IDFilter.h:206
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:375
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
Not all required information provided.
Definition: Exception.h:195
const std::set< String > & accessions
Definition: IDFilter.h:271
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:335
HasDecoyAnnotation()
Definition: IDFilter.h:212
FASTA file (.fasta)
Definition: FileTypes.h:92
any XML format
Definition: FileTypes.h:98
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
GetMatchingItems()
Definition: IDFilter.h:318
Iterator end()
Definition: MSExperiment.h:167
Unknown file extension.
Definition: FileTypes.h:60
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:277
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:617
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:250
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:286
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:539
void endProgress() const
Ends the progress display.
void setSearchEngineVersion(const String &search_engine_version)
Sets the search engine version.
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:783
static bool isDirectory(const String &path)
Return true if the given path specifies a directory.
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:62
std::vector< String >::iterator Iterator
Mutable iterator.
Definition: TextFile.h:54
bool exists(const HitType &hit) const
Definition: IDFilter.h:325
Command line progress.
Definition: ProgressLogger.h:72
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:320
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:89
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
String key
Definition: IDFilter.h:188
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
Search parameters of the DB search.
Definition: ProteinIdentification.h:221
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Element could not be found exception.
Definition: Exception.h:662
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:77
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:443
TPP pepXML file (.pepXML)
Definition: FileTypes.h:75
Used to load and store PepXML files.
Definition: PepXMLFile.h:62
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:89
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1138
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
ItemMap items
Definition: IDFilter.h:307
A container for consensus elements.
Definition: ConsensusMap.h:79
Used to load and store xQuest result files.
Definition: XQuestResultXMLFile.h:55
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
bool methionine_cleavage_
Definition: IDFilter.h:441
IdentificationType argument_type
Definition: IDFilter.h:499
bool higher_score_better
Definition: IDFilter.h:106
void setParameters(const Param ¶m)
Sets the parameters.
bool operator()(PeptideHit &p)
Definition: IDFilter.h:404
xQuest XML file format for protein-protein cross-link identifications (.xquest.xml)
Definition: FileTypes.h:112
Size findByNativeID(const String &native_id) const
Look up spectrum by native ID.
void load(const String &filename, bool trim_lines=false, Int first_n=-1, bool skip_empty_lines=false)
Loads data from a text file.
bool ignore_missed_cleavages_
Definition: IDFilter.h:440
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
double value
Definition: IDFilter.h:189
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
void setSequence(const AASequence &sequence)
sets the peptide sequence
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:230
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:330
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:387
Int toInt() const
Conversion to int.
Specificity getSpecificity() const
Returns the specificity for the digestion.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:88
void setSearchEngine(const String &search_engine)
Sets the search engine type.
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
HasMaxRank(Size rank_)
Definition: IDFilter.h:135
bool operator()(const HitType &hit) const
Definition: IDFilter.h:196
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, const SpectrumMetaDataLookup &lookup)
Loads data from a Mascot XML file.
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, bool load_proteins=true, bool load_empty_hits=true)
loads data from a OMSSAXML file
const Param & getDefaults() const
Non-mutable access to the default parameters.
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:396
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:531
Type
Actual file types enum.
Definition: FileTypes.h:58
EnzymaticDigestion & digestion_
Definition: IDFilter.h:390
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1131
static bool fileList(const String &dir, const String &file_pattern, StringList &output, bool full_path=false)
Retrieves a list of files matching file_pattern in directory dir (returns filenames without paths unl...
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:309
Used to load and store idXML files.
Definition: IdXMLFile.h:63
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1125
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
OMSSA XML file format for peptide identifications (.xml)
Definition: FileTypes.h:83
double list
Definition: DataValue.h:72
Int max_cleavages_
Definition: IDFilter.h:392
bool operator()(const HitType &hit) const
Definition: IDFilter.h:216
double getScore() const
returns the PSM score
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:419
bool loadExperiment(const String &filename, MSExperiment &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool rewrite_source_file=true, const bool compute_hash=true)
Loads a file into an MSExperiment.
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:168
void load(const String &filename, std::vector< PeptideIdentification > &pep_ids, std::vector< ProteinIdentification > &prot_ids)
Load the content of the xquest.xml file into the provided data structures.
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
Generates theoretical spectra for peptides with various options.
Definition: TheoreticalSpectrumGenerator.h:67
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1063
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1265
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:273
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:267
bool filterByMissedCleavages(const String &sequence, std::function< bool(const Int)> filter) const
Filter based on the number of missed cleavages.
Int min_cleavages_
Definition: IDFilter.h:391
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1194
A more convenient string class.
Definition: String.h:58
void load(const String &result_filename, std::vector< PeptideIdentification > &peptide_identifications, ProteinIdentification &protein_identification, const double p_value_threshold, std::vector< double > &pvalues, const String &database="", const bool ignore_proteins_per_peptide=false)
loads data from a Sequest outfile
Mascot XML file format for peptide identifications (.xml)
Definition: FileTypes.h:84
Helper class for looking up spectrum meta data.
Definition: SpectrumMetaDataLookup.h:142
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
Representation of a peptide hit.
Definition: PeptideHit.h:54
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1146
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:90
DataType valueType() const
returns the type of value stored
Definition: DataValue.h:365
double score
Definition: IDFilter.h:105
static enum ScoreType getScoreType(String score_type_name)
Return a score type given its name.
Size rank
Definition: IDFilter.h:133
Used to load XTandemXML files.
Definition: XTandemXMLFile.h:56
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:893
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1207
void readSpectra(const SpectrumContainer &spectra, const String &scan_regexp=default_scan_regexp)
Read and index spectra for later look-up.
Definition: SpectrumLookup.h:103
bool operator()(const HitType &hit) const
Definition: IDFilter.h:144
Used to load Mascot XML files.
Definition: MascotXMLFile.h:57
HitType argument_type
Definition: IDFilter.h:208
bool isHigherScoreBetter() const
returns the peptide score orientation
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:191
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:306
ScoreType
Types of Percolator scores.
Definition: PercolatorOutfile.h:58
Invalid value exception.
Definition: Exception.h:335
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:129
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:68
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:497
void sort()
Sorts the hits by score.
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:291
Facilitates file handling by file type recognition.
Definition: FileHandler.h:62
Used to load (storing not supported, yet) ProtXML files.
Definition: ProtXMLFile.h:70
bool operator()(const HitType &hit) const
Definition: IDFilter.h:173
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
void load(const String &filename, ProteinIdentification &proteins, std::vector< PeptideIdentification > &peptides, SpectrumMetaDataLookup &lookup, enum ScoreType output_score=QVALUE)
Loads a Percolator output file.
HitType argument_type
Definition: IDFilter.h:186
double toDouble() const
Conversion to double.
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
static Type nameToType(const String &name)
Converts a file type name into a Type.
integer value
Definition: DataValue.h:68
Definition: EnzymaticDigestion.h:71
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:874
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Percolator tab-delimited output (PSM level)
Definition: FileTypes.h:107
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
Parse Error exception.
Definition: Exception.h:622
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
ProteaseDigestion & digestion_
Definition: IDFilter.h:439
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:771
Representation of a set of modification definitions.
Definition: ModificationDefinitionsSet.h:58
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a MzIdentML file.
TPP protXML file (.protXML)
Definition: FileTypes.h:76
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
String identifier
Definition: FASTAFile.h:78
Invalid conversion exception.
Definition: Exception.h:362