86 void encodeCompositionVectors(
const std::vector<String> & sequences,
const String & allowed_characters, std::vector<std::vector<std::pair<Int, double> > > & composition_vectors);
91 void encodeLibSVMVectors(
const std::vector<std::vector<std::pair<Int, double> > > & feature_vectors, std::vector<svm_node *> & libsvm_vectors);
95 std::vector<double> & labels);
99 std::vector<double> & labels,
100 const String & allowed_characters);
108 std::vector<double> & labels,
109 const String & allowed_characters,
110 UInt maximum_sequence_length);
118 std::vector<double> & labels,
119 const String & allowed_characters);
130 const String & allowed_characters,
132 std::vector<std::pair<Int, double> > & libsvm_vector,
134 bool unpaired =
false,
135 bool length_encoding =
false);
139 std::vector<double> & labels,
141 const String & allowed_characters,
144 bool unpaired =
false,
145 bool length_encoding =
false);
150 const String & allowed_characters,
152 std::vector<std::vector<std::pair<Int, double> > > & vectors);
178 const String & allowed_characters,
179 std::vector<std::pair<Int, double> > & values,
180 bool is_right_border =
false);
191 const String & allowed_characters =
"ACDEFGHIKLMNPQRSTVWY",
192 UInt maximum_sequence_length = 50)
194 std::vector<double> predicted_retention_times;
197 std::vector<double> temp_rts;
198 temp_rts.resize(sequences.size(), 0);
199 svm_problem * prediction_data =
203 maximum_sequence_length);
204 svm.
predict(prediction_data, predicted_retention_times);
206 return predicted_retention_times;
212 std::pair<Int, double> b);
Representation of a peptide/protein sequence.
Definition: AASequence.h:112
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:59
svm_problem * loadLibSVMProblem(const String &filename)
loads the LibSVM-encoded data stored in 'filename'
void encodeOligoBorders(String sequence, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::pair< Int, double > > &libsvm_vector, bool strict=false, bool unpaired=false, bool length_encoding=false)
encodes the borders of the sequence as k_mer oligos and stores them in 'libsvm_vector'
void encodeCompositionVectors(const std::vector< String > &sequences, const String &allowed_characters, std::vector< std::vector< std::pair< Int, double > > > &composition_vectors)
stores composition vectors of the sequences given by 'sequence' in 'composition_vectors'
svm_problem * encodeLibSVMProblemWithCompositionVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters)
creates composition vectors for 'sequences' and stores them in LibSVM compliant format
svm_node * encodeLibSVMVector(const std::vector< std::pair< Int, double > > &feature_vector)
encodes the feature vector in LibSVM compliant format
svm_problem * encodeLibSVMProblemWithCompositionLengthAndWeightVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters)
creates composition vectors with additional length and average weight information for 'sequences' and...
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
void libSVMVectorsToString(svm_problem *vector, String &output)
stores a string representation of the encoded sequences in 'vectors' in 'output'
svm_problem * encodeLibSVMProblemWithOligoBorderVectors(const std::vector< String > &sequences, std::vector< double > &labels, UInt k_mer_length, const String &allowed_characters, UInt border_length, bool strict=false, bool unpaired=false, bool length_encoding=false)
creates oligo border vectors vectors for 'sequences' and stores them in LibSVM compliant format
void encodeCompositionVector(const String &sequence, std::vector< std::pair< Int, double > > &encoded_vector, const String &allowed_characters="ACDEFGHIKLMNPQRSTVWY")
stores a composition vector of 'sequence' in 'encoded_vector'
static std::vector< double > predictPeptideRT(const std::vector< String > &sequences, SVMWrapper &svm, const String &allowed_characters="ACDEFGHIKLMNPQRSTVWY", UInt maximum_sequence_length=50)
Definition: LibSVMEncoder.h:189
bool storeLibSVMProblem(const String &filename, const svm_problem *problem) const
stores the LibSVM-encoded data in a text file that can be used by the LibSVM applications (svm-scale,...
void encodeOligo(const AASequence &sequence, UInt k_mer_length, const String &allowed_characters, std::vector< std::pair< Int, double > > &values, bool is_right_border=false)
encodes an AASequence instance in oligo encoding
LibSVMEncoder()=default
Constructor.
svm_problem * encodeLibSVMProblem(const std::vector< svm_node * > &vectors, std::vector< double > &labels)
encodes the LibSVM compliant vectors into a LibSVM compliant structure
~LibSVMEncoder()=default
Destructor.
static bool cmpOligos_(std::pair< Int, double > a, std::pair< Int, double > b)
comparator for oligos encoded by encodeOligo
void encodeLibSVMVectors(const std::vector< std::vector< std::pair< Int, double > > > &feature_vectors, std::vector< svm_node * > &libsvm_vectors)
encodes the feature vectors in LibSVM compliant format
static void destroyProblem(svm_problem *&problem, bool free_nodes=true)
frees all the memory of the svm_problem instance
void libSVMVectorToString(svm_node *vector, String &output)
stores a string representation of the encoded sequence 'vector' in 'output'
void encodeProblemWithOligoBorderVectors(const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors)
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:85
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
A more convenient string class.
Definition: String.h:60
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48