|
OpenMS
2.5.0
|
Go to the documentation of this file.
95 bool isRTColumnOn()
const;
98 void wrapSVM(std::vector<AASequence>& peptide_sequences, std::vector<double>& predicted_retention_times);
107 void setDefaultParams_();
162 void updateMembers_()
override;
bool load(const String &filename)
void saveModel(std::string modelFilename) const
saves the svm model
the nu parameter for nu-SVR
Definition: SVMWrapper.h:96
void store(const String &filename, const Param ¶m) const
Write XML file.
Management and storage of parameters / INI files.
Definition: Param.h:73
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:379
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
the C parameter of the svm
Definition: SVMWrapper.h:95
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
String toString(const T &i)
fallback template for general purpose using Boost::Karma; more specializations below
Definition: StringUtils.h:85
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
with N-terminus and C-terminus
Definition: Residue.h:152
bool exists(const String &key) const
Tests if a parameter is set (expecting its fully qualified name, e.g., TextExporter:1:proteins_only)
void loadModel(std::string modelFilename)
loads the model
static const DataValue EMPTY
Empty data value for comparisons.
Definition: DataValue.h:62
Definition: SVMWrapper.h:101
SimTypes::SimCoordinateType total_gradient_time_
Total gradient time.
Definition: RTSimulation.h:135
the svm type cab be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:92
Representation of a protein identification run.
Definition: ProteinIdentification.h:71
Definition: SVMWrapper.h:107
void setParameter(SVM_parameter_type type, Int value)
You can set the parameters of the svm:
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:79
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
double egh_variance_location_
EGH sigma value.
Definition: RTSimulation.h:153
the degree for the polynomial- kernel
Definition: SVMWrapper.h:94
any text format, which has only loose definition of what it actually contains – thus it is usually ha...
Definition: FileTypes.h:95
OpenMS::String rt_model_file_
Definition: RTSimulation.h:132
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:56
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
A container for features.
Definition: FeatureMap.h:95
std::vector< String >::const_iterator ConstIterator
Non-mutable iterator.
Definition: TextFile.h:56
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:49
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
Base class for TOPP applications.
Definition: TOPPBase.h:144
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
the kernel type
Definition: SVMWrapper.h:93
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
static void destroyProblem(svm_problem *problem)
frees all the memory of the svm_problem instance
double egh_tau_scale_
EGH tau scale parameter of the lorentzian variation.
Definition: RTSimulation.h:150
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:783
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
SimTypes::MutableSimRandomNumberGeneratorPtr rnd_gen_
Random number generator.
Definition: RTSimulation.h:159
Int getIntParameter(SVM_parameter_type type)
You can get the actual int- parameters of the svm.
void getSignificanceBorders(svm_problem *data, std::pair< double, double > &borders, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000)
calculates the significance borders of the error model and stores them in 'sigmas'
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:505
SimTypes::SimCoordinateType rt_sampling_rate_
bin size in rt dimension
Definition: RTSimulation.h:145
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
void setTrainingSample(svm_problem *training_sample)
This is used for being able to perform predictions with non libsvm standard kernels.
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:97
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:55
Definition: SVMWrapper.h:100
bool toBool() const
Conversion to bool.
double getDoubleParameter(SVM_parameter_type type)
You can get the actual double- parameters of the svm.
std::vector< double > labels
Definition: SVMWrapper.h:57
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
double getPValue(double sigma1, double sigma2, std::pair< double, double > point)
calculates a p-value for a given data point using the model parameters
SimTypes::SimCoordinateType gradient_min_
gradient ranges
Definition: RTSimulation.h:140
double egh_tau_location_
EGH tau value.
Definition: RTSimulation.h:148
Simulates/Predicts retention times for peptides or peptide separation.
Definition: RTSimulation.h:54
Used to load and store idXML files.
Definition: IdXMLFile.h:63
SimTypes::SimCoordinateType gradient_max_
Maximal observed gradient time.
Definition: RTSimulation.h:142
void encodeProblemWithOligoBorderVectors(const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors)
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
A more convenient string class.
Definition: String.h:58
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
Representation of a peptide hit.
Definition: PeptideHit.h:54
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
double egh_variance_scale_
EGH sigma scale parameter of the lorentzian variation.
Definition: RTSimulation.h:155
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
void load(const String &filename, Param ¶m)
Read XML file.
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:54
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
boost::shared_ptr< SimRandomNumberGenerator > MutableSimRandomNumberGeneratorPtr
Definition: SimTypes.h:174
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
Int train(struct svm_problem *problem)
trains the svm
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
bool store(const String &filename) const
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:874
double performCrossValidation(svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, double > &start_values_map, const std::map< SVM_parameter_type, double > &step_sizes_map, const std::map< SVM_parameter_type, double > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, double > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false)
Performs a CV for the data given by 'problem'.
void setMZ(double mz)
sets the MZ of the MS2 spectrum
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.