152 static const std::array<std::string, (
Size)Unmatched::SIZE_OF_UNMATCHED> names_of_unmatched;
159 SIZE_OF_MISSING_DECOY
161 static const std::array<std::string, (
Size)MissingDecoy::SIZE_OF_MISSING_DECOY> names_of_missing_decoy;
171 inline ExitCodes run(std::vector<FASTAFile::FASTAEntry>& proteins, std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids)
174 return run<TFI_Vector>(protein_container, prot_ids, pep_ids);
215 if ((enzyme_name_ ==
"Chymotrypsin" || enzyme_name_ ==
"Chymotrypsin/P" || enzyme_name_ ==
"TrypChymo")
219 "The used enzyme " + enzyme_name_ +
"differentiates between I and L, therefore the IL_equivalent option cannot be used.");
222 if (decoy_string_.empty())
230 OPENMS_LOG_WARN <<
"Unable to determine decoy string automatically (not enough decoys were detected)! Using default " << (r.is_prefix ?
"prefix" :
"suffix") <<
" decoy string '" << r.name <<
"'\n"
231 <<
"If you think that this is incorrect, please provide a decoy_string and its position manually!" << std::endl;
233 prefix_ = r.is_prefix;
234 decoy_string_ = r.name;
236 OPENMS_LOG_INFO <<
"Using " << (prefix_ ?
"prefix" :
"suffix") <<
" decoy string '" << decoy_string_ <<
"'" << std::endl;
243 if (!enzyme_name_.empty() && (enzyme_name_.compare(AUTO_MODE) != 0))
247 else if (!prot_ids.empty() && prot_ids[0].getSearchParameters().digestion_enzyme.getName() !=
"unknown_enzyme")
249 OPENMS_LOG_INFO <<
"Info: using '" << prot_ids[0].getSearchParameters().digestion_enzyme.getName() <<
"' as enzyme (obtained from idXML) for digestion." << std::endl;
250 enzyme.
setEnzyme(&prot_ids[0].getSearchParameters().digestion_enzyme);
254 OPENMS_LOG_WARN <<
"Warning: Enzyme name neither given nor deduceable from input. Defaulting to Trypsin!" << std::endl;
258 bool xtandem_fix_parameters =
false;
259 bool msgfplus_fix_parameters =
false;
262 for (
const auto& prot_id : prot_ids)
264 String search_engine = prot_id.getOriginalSearchEngineName();
266 OPENMS_LOG_INFO <<
"Peptide identification engine: " << search_engine << std::endl;
267 if (search_engine ==
"XTANDEM" || prot_id.getSearchParameters().metaValueExists(
"SE:XTandem")) { xtandem_fix_parameters =
true; }
268 if (search_engine ==
"MS-GF+" || search_engine ==
"MSGFPLUS" || prot_id.getSearchParameters().metaValueExists(
"SE:MS-GF+")) { msgfplus_fix_parameters =
true; }
272 if (msgfplus_fix_parameters && enzyme.
getEnzymeName() ==
"Trypsin")
274 OPENMS_LOG_WARN <<
"MSGFPlus detected but enzyme cutting rules were set to Trypsin. Correcting to Trypsin/P to cope with special cutting rule in MSGFPlus." << std::endl;
280 if (!enzyme_specificity_.empty() && (enzyme_specificity_.compare(AUTO_MODE) != 0))
286 enzyme.
setSpecificity(prot_ids[0].getSearchParameters().enzyme_term_specificity);
291 OPENMS_LOG_WARN <<
"Warning: Enzyme specificity neither given nor present in the input file. Defaulting to 'full'!" << std::endl;
299 const size_t PROTEIN_CACHE_SIZE = 4e5;
301 this->startProgress(0, 1,
"Load first DB chunk");
302 proteins.cacheChunk(PROTEIN_CACHE_SIZE);
305 if (proteins.empty())
307 OPENMS_LOG_ERROR <<
"Error: An empty database was provided. Mapping makes no sense. Aborting..." << std::endl;
308 return DATABASE_EMPTY;
313 OPENMS_LOG_WARN <<
"Warning: An empty set of peptide identifications was provided. Output will be empty as well." << std::endl;
314 if (!keep_unreferenced_proteins_)
317 for (std::vector<ProteinIdentification>::iterator it = prot_ids.begin();
318 it != prot_ids.end(); ++it)
320 it->getHits().clear();
323 return PEPTIDE_IDS_EMPTY;
328 std::vector<bool> protein_is_decoy;
329 std::vector<std::string> protein_accessions;
331 bool invalid_protein_sequence =
false;
338 bool has_illegal_AAs(
false);
340 for (std::vector<PeptideIdentification>::const_iterator it1 = pep_ids.begin(); it1 != pep_ids.end(); ++it1)
343 const std::vector<PeptideHit>& hits = it1->getHits();
344 for (std::vector<PeptideHit>::const_iterator it2 = hits.begin(); it2 != hits.end(); ++it2)
350 String seq = it2->getSequence().toUnmodifiedString().
remove(
'*');
353 OPENMS_LOG_ERROR <<
"Peptide sequence '" << it2->getSequence() <<
"' contains one or more ambiguous amino acids (B|J|Z|X).\n";
354 has_illegal_AAs =
true;
360 appendValue(pep_DB, seq.c_str());
365 OPENMS_LOG_ERROR <<
"One or more peptides contained illegal amino acids. This is not allowed!"
366 <<
"\nPlease either remove the peptide or replace it with one of the unambiguous ones (while allowing for ambiguous AA's to match the protein)." << std::endl;;
369 OPENMS_LOG_INFO <<
"Mapping " << length(pep_DB) <<
" peptides to " << (proteins.size() == PROTEIN_CACHE_SIZE ?
"? (unknown number of)" :
String(proteins.size())) <<
" proteins." << std::endl;
371 if (length(pep_DB) == 0)
373 OPENMS_LOG_WARN <<
"Warning: Peptide identifications have no hits inside! Output will be empty as well." << std::endl;
374 return PEPTIDE_IDS_EMPTY;
380 OPENMS_LOG_INFO <<
"Searching with up to " << aaa_max_ <<
" ambiguous amino acid(s) and " << mm_max_ <<
" mismatch(es)!" << std::endl;
391 uint16_t count_j_proteins(0);
392 bool has_active_data =
true;
393 const std::string jumpX(aaa_max_ + mm_max_ + 1,
'X');
395 this->startProgress(0, proteins.size() == PROTEIN_CACHE_SIZE ? std::numeric_limits<SignedSize>::max() : proteins.size(),
"Aho-Corasick");
396 std::atomic<int> progress_prots(0);
411 DEBUG_ONLY std::cerr <<
" activating cache ...\n";
412 has_active_data = proteins.activateCache();
413 protein_accessions.resize(proteins.getChunkOffset() + proteins.chunkSize());
416 if (!has_active_data)
break;
421 DEBUG_ONLY std::cerr <<
"Filling Protein Cache ...";
422 proteins.cacheChunk(PROTEIN_CACHE_SIZE);
423 protein_is_decoy.resize(proteins.getChunkOffset() + prot_count);
426 const String& seq = proteins.chunkAt(i).identifier;
427 protein_is_decoy[i + proteins.getChunkOffset()] = (prefix_ ? seq.
hasPrefix(decoy_string_) : seq.
hasSuffix(decoy_string_));
431 DEBUG_ONLY std::cerr <<
" starting for loop \n";
433 #pragma omp for schedule(dynamic, 100) nowait
437 if (omp_get_thread_num() == 0)
439 this->setProgress(progress_prots);
442 prot = proteins.chunkAt(i).sequence;
446 if (prot.
has(
'[') || prot.
has(
'('))
448 invalid_protein_sequence =
true;
467 Size prot_idx = i + proteins.getChunkOffset();
476 size_t offset = -1, start = 0;
477 while ((offset = prot.find(jumpX, offset + 1)) != std::string::npos)
480 addHits_(fuzzyAC, pattern, pep_DB, prot.
substr(start, offset + jumpX.size() - start), prot, prot_idx, (
int)start, func_threads);
482 while (offset + jumpX.size() < prot.size() && prot[offset + jumpX.size()] ==
'X') ++offset;
487 if (start < prot.size())
489 addHits_(fuzzyAC, pattern, pep_DB, prot.
substr(start), prot, prot_idx, (
int)start, func_threads);
494 addHits_(fuzzyAC, pattern, pep_DB, prot, prot, prot_idx, 0, func_threads);
499 protein_accessions[prot_idx] = proteins.chunkAt(i).identifier;
500 acc_to_prot_thread[protein_accessions[prot_idx]] = prot_idx;
507 #pragma omp critical(PeptideIndexer_joinAC)
512 func.
merge(func_threads);
514 acc_to_prot.insert(acc_to_prot_thread.begin(), acc_to_prot_thread.end());
515 acc_to_prot_thread.clear();
521 std::cout <<
"Merge took: " << s.
toString() <<
"\n";
523 std::cout << mu.
delta(
"Aho-Corasick") <<
"\n\n";
529 <<
" ... rejected by enzyme filter: " << func.
filter_rejected << std::endl;
531 if (count_j_proteins)
533 OPENMS_LOG_WARN <<
"PeptideIndexer found " << count_j_proteins <<
" protein sequences in your database containing the amino acid 'J'."
534 <<
"To match 'J' in a protein, an ambiguous amino acid placeholder for I/L will be used.\n"
535 <<
"This costs runtime and eats into the 'aaa_max' limit, leaving less opportunity for B/Z/X matches.\n"
536 <<
"If you want 'J' to be treated as unambiguous, enable '-IL_equivalent'!" << std::endl;
546 for (
Size run_idx = 0; run_idx < prot_ids.size(); ++run_idx)
548 runid_to_runidx[prot_ids[run_idx].getIdentifier()] = run_idx;
552 Size stats_matched_unique(0);
553 Size stats_matched_multi(0);
554 Size stats_unmatched(0);
555 Size stats_count_m_t(0);
556 Size stats_count_m_d(0);
557 Size stats_count_m_td(0);
562 for (std::vector<PeptideIdentification>::iterator it1 = pep_ids.begin(); it1 != pep_ids.end(); ++it1)
565 Size run_idx = runid_to_runidx[it1->getIdentifier()];
567 std::vector<PeptideHit>& hits = it1->getHits();
569 for (std::vector<PeptideHit>::iterator it_hit = hits.begin(); it_hit != hits.end(); )
572 it_hit->setPeptideEvidences(std::vector<PeptideEvidence>());
577 bool matches_target(
false);
578 bool matches_decoy(
false);
580 std::set<Size> prot_indices;
582 for (std::set<PeptideProteinMatchInformation>::const_iterator it_i = func.
pep_to_prot[pep_idx].begin();
585 prot_indices.insert(it_i->protein_index);
586 const String& accession = protein_accessions[it_i->protein_index];
587 PeptideEvidence pe(accession, it_i->position, it_i->position + (
int)it_hit->getSequence().size() - 1, it_i->AABefore, it_i->AAAfter);
588 it_hit->addPeptideEvidence(pe);
590 runidx_to_protidx[run_idx].insert(it_i->protein_index);
592 if (protein_is_decoy[it_i->protein_index])
594 matches_decoy =
true;
598 matches_target =
true;
603 if (matches_decoy && matches_target)
605 it_hit->setMetaValue(
"target_decoy",
"target+decoy");
608 else if (matches_target)
610 it_hit->setMetaValue(
"target_decoy",
"target");
613 else if (matches_decoy)
615 it_hit->setMetaValue(
"target_decoy",
"decoy");
620 if (prot_indices.size() == 1)
622 it_hit->setMetaValue(
"protein_references",
"unique");
623 ++stats_matched_unique;
625 else if (prot_indices.size() > 1)
627 it_hit->setMetaValue(
"protein_references",
"non-unique");
628 ++stats_matched_multi;
633 if (stats_unmatched < 15)
OPENMS_LOG_INFO <<
"Unmatched peptide: " << it_hit->getSequence() <<
"\n";
634 else if (stats_unmatched == 15)
OPENMS_LOG_INFO <<
"Unmatched peptide: ...\n";
635 if (unmatched_action_ == Unmatched::REMOVE)
637 it_hit = hits.erase(it_hit);
642 it_hit->setMetaValue(
"protein_references",
"unmatched");
651 Size total_peptides = stats_count_m_t + stats_count_m_d + stats_count_m_td + stats_unmatched;
655 OPENMS_LOG_INFO <<
" unmatched : " << stats_unmatched <<
" (" << stats_unmatched * 100 / total_peptides <<
" %)\n";
657 OPENMS_LOG_INFO <<
" match to target DB only: " << stats_count_m_t <<
" (" << stats_count_m_t * 100 / total_peptides <<
" %)\n";
658 OPENMS_LOG_INFO <<
" match to decoy DB only : " << stats_count_m_d <<
" (" << stats_count_m_d * 100 / total_peptides <<
" %)\n";
659 OPENMS_LOG_INFO <<
" match to both : " << stats_count_m_td <<
" (" << stats_count_m_td * 100 / total_peptides <<
" %)\n";
662 OPENMS_LOG_INFO <<
" no match (to 0 protein) : " << stats_unmatched <<
"\n";
663 OPENMS_LOG_INFO <<
" unique match (to 1 protein) : " << stats_matched_unique <<
"\n";
664 OPENMS_LOG_INFO <<
" non-unique match (to >1 protein): " << stats_matched_multi << std::endl;
667 Size stats_matched_proteins(0), stats_matched_new_proteins(0), stats_orphaned_proteins(0), stats_proteins_target(0), stats_proteins_decoy(0);
670 for (
Size run_idx = 0; run_idx < prot_ids.size(); ++run_idx)
672 std::set<Size> masterset = runidx_to_protidx[run_idx];
674 std::vector<ProteinHit>& phits = prot_ids[run_idx].getHits();
677 std::vector<ProteinHit> orphaned_hits;
678 for (std::vector<ProteinHit>::iterator p_hit = phits.begin(); p_hit != phits.end(); ++p_hit)
680 const String& acc = p_hit->getAccession();
681 if (!acc_to_prot.
has(acc))
683 ++stats_orphaned_proteins;
684 if (keep_unreferenced_proteins_)
686 p_hit->setMetaValue(
"target_decoy",
"");
687 orphaned_hits.push_back(*p_hit);
692 phits = orphaned_hits;
697 phits.reserve(phits.size() + masterset.size());
698 for (std::set<Size>::const_iterator it = masterset.begin(); it != masterset.end(); ++it)
703 if (write_protein_sequence_ || write_protein_description_)
705 proteins.readAt(fe, *it);
706 if (write_protein_sequence_)
710 if (write_protein_description_)
715 if (protein_is_decoy[*it])
718 ++stats_proteins_decoy;
723 ++stats_proteins_target;
725 phits.push_back(hit);
726 ++stats_matched_new_proteins;
728 stats_matched_proteins += phits.size();
735 OPENMS_LOG_INFO <<
" total proteins searched: " << proteins.size() <<
"\n";
736 OPENMS_LOG_INFO <<
" matched proteins : " << stats_matched_proteins <<
" (" << stats_matched_new_proteins <<
" new)\n";
737 if (stats_matched_proteins)
739 OPENMS_LOG_INFO <<
" matched target proteins: " << stats_proteins_target <<
" (" << stats_proteins_target * 100 / stats_matched_proteins <<
" %)\n";
740 OPENMS_LOG_INFO <<
" matched decoy proteins : " << stats_proteins_decoy <<
" (" << stats_proteins_decoy * 100 / stats_matched_proteins <<
" %)\n";
742 OPENMS_LOG_INFO <<
" orphaned proteins : " << stats_orphaned_proteins << (keep_unreferenced_proteins_ ?
" (all kept)" :
" (all removed)\n");
747 bool has_error =
false;
749 if (invalid_protein_sequence)
751 OPENMS_LOG_ERROR <<
"Error: One or more protein sequences contained the characters '[' or '(', which are illegal in protein sequences."
752 <<
"\nPeptide hits might be masked by these characters (which usually indicate presence of modifications).\n";
756 if ((stats_count_m_d + stats_count_m_td) == 0)
758 String msg(
"No peptides were matched to the decoy portion of the database! Did you provide the correct concatenated database? Are your 'decoy_string' (=" + decoy_string_ +
") and 'decoy_string_position' (=" + std::string(param_.getValue(
"decoy_string_position")) +
") settings correct?");
759 if (missing_decoy_action_ == MissingDecoy::IS_ERROR)
761 OPENMS_LOG_ERROR <<
"Error: " << msg <<
"\nSet 'missing_decoy_action' to 'warn' if you are sure this is ok!\nAborting ..." << std::endl;
764 else if (missing_decoy_action_ == MissingDecoy::WARN)
766 OPENMS_LOG_WARN <<
"Warn: " << msg <<
"\nSet 'missing_decoy_action' to 'error' if you want to elevate this to an error!" << std::endl;
773 if (stats_unmatched > 0)
775 OPENMS_LOG_ERROR <<
"PeptideIndexer found unmatched peptides, which could not be associated to a protein.\n";
776 if (unmatched_action_ == Unmatched::IS_ERROR)
779 <<
"Potential solutions:\n"
780 <<
" - check your FASTA database is identical to the search DB (or use 'auto')\n"
781 <<
" - set 'enzyme:specificity' and 'enzyme:name' to 'auto' to match the parameters of the search engine\n"
782 <<
" - increase 'aaa_max' to allow more ambiguous amino acids\n"
783 <<
" - as a last resort: use the 'unmatched_action' option to accept or even remove unmatched peptides\n"
784 <<
" (note that unmatched peptides cannot be used for FDR calculation or quantification)\n";
787 else if (unmatched_action_ == Unmatched::WARN)
789 OPENMS_LOG_ERROR <<
" Warning: " << stats_unmatched <<
" unmatched hits have been found, but were not removed!\n"
790 <<
"These are not annotated with target/decoy information and might lead to issues with downstream tools (such as FDR).\n"
791 <<
"Switch to '" << names_of_unmatched[(
Size)Unmatched::REMOVE] <<
"' if you want to avoid these problems.\n";
793 else if (unmatched_action_ == Unmatched::REMOVE)
795 OPENMS_LOG_ERROR <<
" Warning: " << stats_unmatched <<
" unmatched hits have been removed!\n"
796 <<
"Make sure that these hits are actually a violation of the cutting rules by inspecting the database!\n";
797 if (xtandem_fix_parameters)
OPENMS_LOG_ERROR <<
"Since the results are from X!Tandem, this is probably ok (check anyways).\n";
808 OPENMS_LOG_ERROR <<
"Result files will be written, but PeptideIndexer will exit with an error code." << std::endl;
809 return UNEXPECTED_RESULT;
827 const std::tuple<const Size&, const Int&, const char&, const char&>
tie()
const
829 return std::tie(protein_index,
position, AABefore, AAAfter);
833 return tie() < other.
tie();
837 return tie() == other.
tie();
844 typedef std::map<OpenMS::Size, std::set<PeptideProteinMatchInformation> >
MapType;
855 pep_to_prot(), filter_passed(0), filter_rejected(0), enzyme_(enzyme), xtandem_(xtandem)
861 if (pep_to_prot.empty())
867 for (FoundProteinFunctor::MapType::const_iterator it = other.
pep_to_prot.begin(); it != other.
pep_to_prot.end(); ++it)
869 this->pep_to_prot[it->first].insert(other.
pep_to_prot[it->first].begin(), other.
pep_to_prot[it->first].end());
894 (
position + len_pep >= seq_prot.size()) ?
898 pep_to_prot[idx_pep].insert(match);
916 const seqan::Peptide& tmp_pep = pep_DB[fuzzyAC.
getHitDBIndex()];
924 bool prefix_{
false };
929 bool write_protein_sequence_{
false };
930 bool write_protein_description_{
false };
931 bool keep_unreferenced_proteins_{
false };
933 bool IL_equivalent_{
false };
#define DEBUG_ONLY
Definition: AhoCorasickAmbiguous.h:46
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
Extended Aho-Corasick algorithm capable of matching ambiguous amino acids in the pattern (i....
Definition: AhoCorasickAmbiguous.h:973
::seqan::StringSet<::seqan::AAString > PeptideDB
Definition: AhoCorasickAmbiguous.h:975
void setProtein(const String &protein_sequence)
Reset to new protein sequence. All previous data is forgotten.
Definition: AhoCorasickAmbiguous.h:1026
Int getHitProteinPosition()
Offset into protein sequence where hit was found.
Definition: AhoCorasickAmbiguous.h:1059
static void initPattern(const PeptideDB &pep_db, const int aaa_max, const int mm_max, FuzzyACPattern &pattern)
Construct a trie from a set of peptide sequences (which are to be found in a protein).
Definition: AhoCorasickAmbiguous.h:993
bool findNext(const FuzzyACPattern &pattern)
Enumerate hits.
Definition: AhoCorasickAmbiguous.h:1039
Size getHitDBIndex()
Get index of hit into peptide database of the pattern.
Definition: AhoCorasickAmbiguous.h:1049
::seqan::Pattern< PeptideDB, ::seqan::FuzzyAC > FuzzyACPattern
Definition: AhoCorasickAmbiguous.h:976
static Result findDecoyString(FASTAContainer< T > &proteins)
Heuristic to determine the decoy string given a set of protein names.
Definition: FASTAContainer.h:361
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
@ SPEC_FULL
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:70
@ SPEC_UNKNOWN
Definition: EnzymaticDigestion.h:71
static Specificity getSpecificityByName(const String &name)
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:77
String getEnzymeName() const
Returns the enzyme for the digestion.
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Not implemented exception.
Definition: Exception.h:430
FASTAContainer<TFI_Vector> simply takes an existing vector of FASTAEntries and provides the same inte...
Definition: FASTAContainer.h:246
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:52
bool has(const Key &key) const
Test whether the map contains the given key.
Definition: Map.h:108
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
static const char C_TERMINAL_AA
Definition: PeptideEvidence.h:61
static const char N_TERMINAL_AA
Definition: PeptideEvidence.h:60
Refreshes the protein references for all peptide hits in a vector of PeptideIdentifications and adds ...
Definition: PeptideIndexing.h:128
const String & getDecoyString() const
Unmatched
Action to take when peptide hits could not be matched.
Definition: PeptideIndexing.h:146
MissingDecoy
Definition: PeptideIndexing.h:155
static char const *const AUTO_MODE
name of enzyme/specificity which signals that the enzyme/specificity should be taken from meta inform...
Definition: PeptideIndexing.h:132
ExitCodes run(std::vector< FASTAFile::FASTAEntry > &proteins, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids)
forward for old interface and pyOpenMS; use run<T>() for more control
Definition: PeptideIndexing.h:171
ExitCodes run(FASTAContainer< T > &proteins, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids)
Re-index peptide identifications honoring enzyme cutting rules, ambiguous amino acids and target/deco...
Definition: PeptideIndexing.h:213
~PeptideIndexing() override
Default destructor.
PeptideIndexing()
Default constructor.
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
ExitCodes
Exit codes.
Definition: PeptideIndexing.h:136
@ PEPTIDE_IDS_EMPTY
Definition: PeptideIndexing.h:139
@ ILLEGAL_PARAMETERS
Definition: PeptideIndexing.h:140
@ DATABASE_EMPTY
Definition: PeptideIndexing.h:138
@ EXECUTION_OK
Definition: PeptideIndexing.h:137
void addHits_(AhoCorasickAmbiguous &fuzzyAC, const AhoCorasickAmbiguous::FuzzyACPattern &pattern, const AhoCorasickAmbiguous::PeptideDB &pep_DB, const String &prot, const String &full_prot, SignedSize idx_prot, Int offset, FoundProteinFunctor &func_threads) const
Definition: PeptideIndexing.h:911
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:61
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
Representation of a protein hit.
Definition: ProteinHit.h:60
void setSequence(const String &sequence)
sets the protein sequence
void setDescription(const String &description)
sets the description of the protein
void setAccession(const String &accession)
sets the accession of the protein
This class is used to determine the current process' CPU (user and/or kernel) and wall time.
Definition: StopWatch.h:66
String toString() const
get a compact representation of the current time status.
void start()
Start the stop watch.
void stop()
Stop the stop watch (can be resumed later). If the stop watch was not running an exception is thrown.
double getClockTime() const
void reset()
Clear the stop watch but keep running.
static String & toUpper(String &this_s)
Definition: StringUtils.h:874
A more convenient string class.
Definition: String.h:61
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
String & remove(char what)
Remove all occurrences of the character what.
bool has(Byte byte) const
true if String contains the byte, false otherwise
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
int Int
Signed integer type.
Definition: Types.h:102
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
template parameter for vector-based FASTA access
Definition: FASTAContainer.h:82
bool isAmbiguous(AAcid c)
Definition: AhoCorasickAmbiguous.h:580
String< AAcid, Alloc< void > > AAString
Definition: AhoCorasickAmbiguous.h:206
Size< TNeedle >::Type position(const PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:563
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
String sequence
Definition: FASTAFile.h:75
String description
Definition: FASTAFile.h:74
Definition: PeptideIndexing.h:842
ProteaseDigestion enzyme_
Definition: PeptideIndexing.h:850
OpenMS::Size filter_rejected
Definition: PeptideIndexing.h:847
void addHit(const OpenMS::Size idx_pep, const OpenMS::Size idx_prot, const OpenMS::Size len_pep, const OpenMS::String &seq_prot, OpenMS::Int position)
Definition: PeptideIndexing.h:880
std::map< OpenMS::Size, std::set< PeptideProteinMatchInformation > > MapType
Definition: PeptideIndexing.h:844
FoundProteinFunctor(const ProteaseDigestion &enzyme, bool xtandem)
Definition: PeptideIndexing.h:854
MapType pep_to_prot
Definition: PeptideIndexing.h:845
bool xtandem_
Definition: PeptideIndexing.h:851
void merge(FoundProteinFunctor &other)
Definition: PeptideIndexing.h:859
OpenMS::Size filter_passed
Definition: PeptideIndexing.h:846
A convenience class to report either absolute or delta (between two timepoints) RAM usage.
Definition: SysInfo.h:84
String delta(const String &event="delta")
void after()
record data for the second timepoint