|
OpenMS
2.5.0
|
Go to the documentation of this file.
94 void getMultipleSpectra(std::map<Int, MSSpectrum>& spectra,
const NASequence& oligo,
const std::set<Int>& charges,
Int base_charge = 1)
const;
97 void updateMembers_()
override;
103 void addFragmentPeaks_(
MSSpectrum& spectrum,
const std::vector<double>& fragment_masses,
const String& ion_type,
double offset,
double intensity,
Size start = 0)
const;
106 void addAMinusBPeaks_(
MSSpectrum& spectrum,
const std::vector<double>& fragment_masses,
const NASequence& oligo,
Size start = 0)
const;
112 void addChargedSpectrum_(
MSSpectrum& spectrum,
const MSSpectrum& uncharged_spectrum,
Int charge,
bool add_precursor)
const;
static DateTime now()
Returns the current date and time.
void addMSLevel(int level)
adds a desired MS level for peaks to load
Normalizes the peak intensities spectrum-wise.
Definition: Normalizer.h:57
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
Stream class for writing to comma/tab/...-separated values files.
Definition: SVOutStream.h:54
Generates theoretical spectra for nucleic acid sequences.
Definition: NucleicAcidSpectrumGenerator.h:53
double d_intensity_
Definition: NucleicAcidSpectrumGenerator.h:130
IdentificationData::IdentifiedOligoRef oligo_ref
Definition: NucleicAcidSearchEngine.cpp:273
void getMultipleSpectra(std::map< Int, MSSpectrum > &spectra, const NASequence &oligo, const std::set< Int > &charges, Int base_charge=1) const
Generates spectra in multiple charge states for an oligonucleotide sequence.
Management and storage of parameters / INI files.
Definition: Param.h:73
std::set< Int > charges
Definition: DBSearchParam.h:55
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
Read/write Mascot generic files (MGF).
Definition: MascotGenericFile.h:61
bool add_aB_ions_
Definition: NucleicAcidSpectrumGenerator.h:122
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
const MzTabOSMSectionRows & getOSMSectionRows() const
bool operator()(const NASequence &s)
Definition: NucleicAcidSearchEngine.cpp:334
Size findNearest(CoordinateType mz) const
Binary search for the peak nearest to a specific m/z.
bool add_first_prefix_ion_
Definition: NucleicAcidSpectrumGenerator.h:123
Representation of an empirical formula.
Definition: EmpiricalFormula.h:82
void setProgress(SignedSize value) const
Sets the current progress.
bool add_x_ions_
Definition: NucleicAcidSpectrumGenerator.h:119
const Param & getParameters() const
Non-mutable access to the parameters.
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:43
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
SeqType sequence
Definition: IdentifiedSequence.h:54
Size scan_index
Definition: NucleicAcidSearchEngine.cpp:257
Information about software used for data processing.
Definition: DataProcessingSoftware.h:49
void preprocessSpectra_(PeakMap &exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm, bool single_charge_spectra, bool negative_mode, Int min_charge, Int max_charge, bool include_unknown_charge)
Definition: NucleicAcidSearchEngine.cpp:531
const InputFiles & getInputFiles() const
Return the registered input files (immutable)
Definition: IdentificationData.h:300
static void applyVariableModifications(const std::set< ConstRibonucleotidePtr > &var_mods, const NASequence &seq, Size max_variable_mods_per_NASequence, std::vector< NASequence > &all_modified_NASequences, bool keep_original=true)
Applies variable modifications to a single NASequence. If keep_original is set the original (e....
ExitCodes
Exit codes.
Definition: TOPPBase.h:149
double a_intensity_
Definition: NucleicAcidSpectrumGenerator.h:127
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:193
NLargest removes all but the n largest peaks.
Definition: NLargest.h:54
std::vector< ScoreTypeRef > assigned_scores
Definition: DataProcessingSoftware.h:57
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
ProcessingStepRef getCurrentProcessingStep()
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:121
IdentifiedOligoRef registerIdentifiedOligo(const IdentifiedOligo &oligo)
Register an identified RNA oligonucleotide.
Size max_size_
Definition: NucleicAcidSearchEngine.cpp:328
bool add_d_ions_
Definition: NucleicAcidSpectrumGenerator.h:117
bool add_z_ions_
Definition: NucleicAcidSpectrumGenerator.h:121
Meta data for a search hit (e.g. peptide-spectrum match).
Definition: MoleculeQueryMatch.h:61
double calculatePrecursorMass_(double mz, Int charge, Int isotope, double adduct_mass, bool negative_mode)
Definition: NucleicAcidSearchEngine.cpp:656
Precursor meta information.
Definition: Precursor.h:57
bool add_all_precursor_charges_
Definition: NucleicAcidSpectrumGenerator.h:126
Search query, e.g. spectrum or feature.
Definition: DataQuery.h:47
void addScore(ScoreTypeRef score_type, double score, const boost::optional< ProcessingStepRef > &processing_step_opt=boost::none)
Add a score (possibly connected to a processing step)
Definition: ScoredProcessingResult.h:97
Size size() const
Definition: MSExperiment.h:127
bool add_precursor_peaks_
Definition: NucleicAcidSpectrumGenerator.h:125
Size getNrSpectra() const
get the total number of spectra available
void setScore(double score)
sets the PSM score
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
void setCharge(Int charge)
Mutable access to the charge.
String adduct
Definition: NucleicAcidSearchEngine.cpp:260
void resolveAmbiguousMods_(HitsByScore &hits)
Definition: NucleicAcidSearchEngine.cpp:677
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
Definition: ProteinIdentification.h:213
bool resolve_ambiguous_mods_
Definition: NucleicAcidSearchEngine.cpp:156
void clearMSLevels()
clears the MS levels
bool add_y_ions_
Definition: NucleicAcidSpectrumGenerator.h:120
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
void setCharge(Int charge)
sets the charge of the peptide
const DataQueries & getDataQueries() const
Return the registered data queries (immutable)
Definition: IdentificationData.h:336
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:233
void filterPeakMap(PeakMap &exp)
void calculateCoverages(bool check_molecule_length=false)
Calculate sequence coverages of parent molecules.
const MoleculeQueryMatches & getMoleculeQueryMatches() const
Return the registered molecule-query matches (immutable)
Definition: IdentificationData.h:372
void setEnzyme(const DigestionEnzyme *enzyme) override
Sets the enzyme for the digestion.
void postProcessHits_(const PeakMap &exp, vector< HitsByScore > &annotated_hits, IdentificationData &id_data, bool negative_mode)
Definition: NucleicAcidSearchEngine.cpp:738
Size missed_cleavages
Definition: DBSearchParam.h:67
void deisotopeAndSingleChargeMSSpectrum_(MSSpectrum &in, Int min_charge, Int max_charge, double fragment_tolerance, bool fragment_unit_ppm, bool keep_only_deisotoped=false, Size min_isopeaks=3, Size max_isopeaks=10, bool make_single_charged=true)
Definition: NucleicAcidSearchEngine.cpp:375
PeakMassType mass_type
Mass type of the peaks.
Definition: ProteinIdentification.h:228
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Iterator begin()
Definition: MSExperiment.h:157
Definition: MetaData.h:74
void generateLFQInput_(IdentificationData &id_data, const String &out_file)
Definition: NucleicAcidSearchEngine.cpp:869
const double C13C12_MASSDIFF_U
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static void removeDecoys(IdentificationData &id_data)
Base class for TOPP applications.
Definition: TOPPBase.h:144
Size min_size_
Definition: NucleicAcidSearchEngine.cpp:327
double y_intensity_
Definition: NucleicAcidSpectrumGenerator.h:133
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
double b_intensity_
Definition: NucleicAcidSpectrumGenerator.h:128
void setMSLevels(const std::vector< Int > &levels)
sets the desired MS levels for peaks to load
PeakAnnotationSteps peak_annotations
Definition: MoleculeQueryMatch.h:71
Options for loading files containing peak data.
Definition: PeakFileOptions.h:47
const DigestionEnzyme * digestion_enzyme
Definition: DBSearchParam.h:66
Class for the enzymatic digestion of RNAs.
Definition: RNaseDigestion.h:52
static MzTab exportMzTab(const IdentificationData &id_data)
Export to mzTab format.
void setName(const String &name)
Sets the name.
void cleanup(bool require_query_match=true, bool require_identified_sequence=true, bool require_parent_match=true, bool require_parent_group=false, bool require_match_group=false)
Clean up the data structure after filtering parts of it.
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
HasInvalidLength(Size min_size, Size max_size)
Definition: NucleicAcidSearchEngine.cpp:330
bool modifyStrings(bool modify)
Switch modification of strings (quoting/replacing of separators) on/off.
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:234
void sortByPosition()
Lexicographically sorts the peaks by their position.
const PrecursorInfo * precursor_ref
Definition: NucleicAcidSearchEngine.cpp:277
void setCurrentProcessingStep(ProcessingStepRef step_ref)
Set a data processing step that will apply to all subsequent "register..." calls.
CoordinateType getMZ() const
Non-mutable access to m/z.
Definition: Peak1D.h:115
void setVersion(const String &version)
Sets the software version.
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
SearchParamRef registerDBSearchParam(const DBSearchParam ¶m)
Register database search parameters.
Iterator end()
Definition: MSExperiment.h:167
vector< PeptideHit::PeakAnnotation > annotations
Definition: NucleicAcidSearchEngine.cpp:276
void setHigherScoreBetter(bool value)
sets the peptide score orientation
bool add_metainfo_
Definition: NucleicAcidSpectrumGenerator.h:124
void endProgress() const
Ends the progress display.
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:136
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:89
const ScoreTypes & getScoreTypes() const
Return the registered score types (immutable)
Definition: IdentificationData.h:330
Search parameters of the DB search.
Definition: ProteinIdentification.h:221
enum MoleculeType molecule_type
Definition: DBSearchParam.h:48
File adapter for MzTab files.
Definition: MzTabFile.h:58
Element could not be found exception.
Definition: Exception.h:662
ExitCodes main_(int, const char **) override
The actual "main" method. main_() is invoked by main().
Definition: NucleicAcidSearchEngine.cpp:922
Data processing step that is applied to the data (e.g. database search, PEP calculation,...
Definition: DataProcessingStep.h:46
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:113
int main(int argc, const char **argv)
Definition: NucleicAcidSearchEngine.cpp:1378
void registerOptionsAndFlags_()
Sets the valid command line options (with argument) and flags (without argument).
Definition: NucleicAcidSearchEngine.cpp:159
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:236
ProcessingStepRef registerDataProcessingStep(const DataProcessingStep &step)
Register a data processing step.
bool fragment_tolerance_ppm
Definition: DBSearchParam.h:63
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
const std::string PRECURSOR_ERROR_PPM_USERPARAM
bool add_w_ions_
Definition: NucleicAcidSpectrumGenerator.h:118
void setScoreType(const String &type)
sets the peptide score type
const double PROTON_MASS_U
set< ConstRibonucleotidePtr > getModifications_(const set< String > &mod_names)
Definition: NucleicAcidSearchEngine.cpp:283
static RibonucleotideDB * getInstance()
replacement for constructor (singleton pattern)
bool add_b_ions_
Definition: NucleicAcidSpectrumGenerator.h:115
void setParameters(const Param ¶m)
Sets the parameters.
bool isAmbiguous() const
Return whether this is an "ambiguous" modification (representing isobaric modifications on the base/r...
double c_intensity_
Definition: NucleicAcidSpectrumGenerator.h:129
bool precursor_tolerance_ppm
Definition: DBSearchParam.h:62
static String path(const String &file)
Returns the path of the file (without the file name).
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:470
Size min_length
Definition: DBSearchParam.h:68
void calculateAndFilterFDR_(IdentificationData &id_data, bool only_top_hits)
Definition: NucleicAcidSearchEngine.cpp:841
String database
Definition: DBSearchParam.h:51
std::set< String > variable_mods
Definition: DBSearchParam.h:58
const MzTabNucleicAcidSectionRows & getNucleicAcidSectionRows() const
static void filterQueryMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
void setSequence(const AASequence &sequence)
sets the peptide sequence
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:230
bool add_a_ions_
Definition: NucleicAcidSpectrumGenerator.h:114
Parameters specific to a database search step.
Definition: DBSearchParam.h:46
double precursor_mass_tolerance
Definition: DBSearchParam.h:60
map< String, String > ambiguous_mods_
Definition: NucleicAcidSearchEngine.cpp:155
NucleicAcidSearchEngine()
Definition: NucleicAcidSearchEngine.cpp:146
double x_intensity_
Definition: NucleicAcidSpectrumGenerator.h:132
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:235
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:855
double getAverageWeight() const
returns the average weight of the formula (includes proton charges)
static void exportIDs(const IdentificationData &id_data, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, bool export_oligonucleotides=false)
Export to legacy peptide/protein identifications.
void store(const String &filename, const PeakMap &map) const
Stores a map in an MzML file.
void digest(const NASequence &rna, std::vector< NASequence > &output, Size min_length=0, Size max_length=0) const
Performs the enzymatic digestion of a (potentially modified) RNA.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
double fragment_mass_tolerance
Definition: DBSearchParam.h:61
Identified sequence (peptide or oligonucleotide)
Definition: IdentifiedSequence.h:52
vector< String > fragment_ion_codes_
Definition: NucleicAcidSearchEngine.cpp:154
const IdentifiedOligos & getIdentifiedOligos() const
Return the registered identified oligonucleotides (immutable)
Definition: IdentificationData.h:366
const Param & getDefaults() const
Non-mutable access to the default parameters.
Size isotope
Definition: NucleicAcidSearchEngine.cpp:259
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
Information about a score type.
Definition: ScoreType.h:45
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
multimap< double, AnnotatedHit, greater< double > > HitsByScore
Definition: NucleicAcidSearchEngine.cpp:280
QString toQString() const
Conversion to Qt QString.
void load(const String &filename, PeakMap &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
std::string toString() const
Size max_length
Definition: DBSearchParam.h:69
Definition: NucleicAcidSearchEngine.cpp:271
Used to load and store idXML files.
Definition: IdXMLFile.h:63
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:232
This class handles csv files. Currently only loading is implemented.
Definition: CsvFile.h:49
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
static double computeHyperScore(double fragment_mass_error, bool fragment_mass_tolerance_unit_ppm, const MSSpectrum &exp_spectrum, const MSSpectrum &db_spectrum, double mz_lower_bound=0.0)
hyperscore computation
File adapter for MzML files.
Definition: MzMLFile.h:55
Definition: NucleicAcidSearchEngine.cpp:325
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
Int charge
Definition: NucleicAcidSearchEngine.cpp:258
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Representation of a nucleic acid sequence.
Definition: NASequence.h:62
Definition: MetaData.h:75
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
Representation of a ribonucleotide (modified or unmodified)
Definition: Ribonucleotide.h:51
EmpiricalFormula parseAdduct_(const String &adduct)
Definition: NucleicAcidSearchEngine.cpp:338
ExitCodes main(int argc, const char **argv)
Main routine of all TOPP applications.
A more convenient string class.
Definition: String.h:58
void store(const String &filename, const PeakMap &experiment, bool compact=false)
stores the experiment data in a MascotGenericFile that can be used as input for MASCOT shell executio...
void filterPeakSpectrum(PeakSpectrum &spectrum)
std::pair< ScoreTypeRef, bool > findScoreType(const String &score_name) const
Look up a score type by name.
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
Representation of a peptide hit.
Definition: PeptideHit.h:54
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
double getMonoWeight() const
returns the mono isotopic weight of the formula (includes proton charges)
Definition: NucleicAcidSearchEngine.cpp:255
const MzTabOligonucleotideSectionRows & getOligonucleotideSectionRows() const
ScoreTypeRef registerScoreType(const ScoreType &score)
Register a score type.
String & remove(char what)
Remove all occurrences of the character what.
ThresholdMower removes all peaks below a threshold.
Definition: ThresholdMower.h:51
QueryMatchRef registerMoleculeQueryMatch(const MoleculeQueryMatch &match)
Register a molecule-query match (e.g. peptide-spectrum match)
double precursor_error_ppm
Definition: NucleicAcidSearchEngine.cpp:275
String db
The used database.
Definition: ProteinIdentification.h:224
Calculates false discovery rates (FDR) from identifications.
Definition: FalseDiscoveryRate.h:77
static void importSequences(IdentificationData &id_data, const std::vector< FASTAFile::FASTAEntry > &fasta, IdentificationData::MoleculeType type=IdentificationData::MoleculeType::PROTEIN, const String &decoy_pattern="")
Import FASTA sequences as parent molecules.
IdentificationData::ScoreTypeRef applyToQueryMatches(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref) const
Calculate FDR on the level of molecule-query matches (e.g. peptide-spectrum matches) for "general" id...
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
enum MassType mass_type
Definition: DBSearchParam.h:49
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
double precursor_intensity_
Definition: NucleicAcidSpectrumGenerator.h:136
Invalid value exception.
Definition: Exception.h:335
DataQueryRef registerDataQuery(const DataQuery &query)
Register a data query (e.g. MS2 spectrum or feature)
void clear(bool clear_meta_data)
Clears all data and meta data.
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
double aB_intensity_
Definition: NucleicAcidSpectrumGenerator.h:135
double w_intensity_
Definition: NucleicAcidSpectrumGenerator.h:131
NASequence sequence
Definition: NucleicAcidSearchEngine.cpp:274
static String basename(const String &file)
Returns the basename of the file (without the path).
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:98
InputFileRef registerInputFile(const String &file)
Register an input file.
bool add_c_ions_
Definition: NucleicAcidSpectrumGenerator.h:116
double z_intensity_
Definition: NucleicAcidSpectrumGenerator.h:134
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
void filterPeakMap(PeakMap &exp) const
WindowMower augments the highest peaks in a sliding or jumping window.
Definition: WindowMower.h:54
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
Definition: MetaData.h:67
Definition: NucleicAcidSearchEngine.cpp:140
void registerIDMetaData_(IdentificationData &id_data, const String &in_mzml, const vector< String > &primary_files, const IdentificationData::DBSearchParam &search_param)
Definition: NucleicAcidSearchEngine.cpp:807
PrecursorInfo(Size scan_index, Int charge, Size isotope, const String &adduct)
Definition: NucleicAcidSearchEngine.cpp:262
void setMZ(double mz)
sets the MZ of the MS2 spectrum
void store(const String &filename, const MzTab &mz_tab) const
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
Definition: StatisticFunctions.h:151
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
ProcessingSoftwareRef registerDataProcessingSoftware(const DataProcessingSoftware &software)
Register data processing software.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void filterPeakSpectrum(PeakSpectrum &spectrum)
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:229