OpenMS
|
Serves for encoding sequences into feature vectors. More...
#include <OpenMS/FORMAT/LibSVMEncoder.h>
Public Member Functions | |
LibSVMEncoder ()=default | |
Constructor. More... | |
~LibSVMEncoder ()=default | |
Destructor. More... | |
void | encodeCompositionVector (const String &sequence, std::vector< std::pair< Int, double > > &encoded_vector, const String &allowed_characters="ACDEFGHIKLMNPQRSTVWY") |
stores a composition vector of 'sequence' in 'encoded_vector' More... | |
void | encodeCompositionVectors (const std::vector< String > &sequences, const String &allowed_characters, std::vector< std::vector< std::pair< Int, double > > > &composition_vectors) |
stores composition vectors of the sequences given by 'sequence' in 'composition_vectors' More... | |
svm_node * | encodeLibSVMVector (const std::vector< std::pair< Int, double > > &feature_vector) |
encodes the feature vector in LibSVM compliant format More... | |
void | encodeLibSVMVectors (const std::vector< std::vector< std::pair< Int, double > > > &feature_vectors, std::vector< svm_node * > &libsvm_vectors) |
encodes the feature vectors in LibSVM compliant format More... | |
svm_problem * | encodeLibSVMProblem (const std::vector< svm_node * > &vectors, std::vector< double > &labels) |
encodes the LibSVM compliant vectors into a LibSVM compliant structure More... | |
svm_problem * | encodeLibSVMProblemWithCompositionVectors (const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters) |
creates composition vectors for 'sequences' and stores them in LibSVM compliant format More... | |
svm_problem * | encodeLibSVMProblemWithCompositionAndLengthVectors (const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length) |
creates composition vectors with additional length information for 'sequences' and stores them in LibSVM compliant format More... | |
svm_problem * | encodeLibSVMProblemWithCompositionLengthAndWeightVectors (const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters) |
creates composition vectors with additional length and average weight information for 'sequences' and stores them in LibSVM compliant format More... | |
bool | storeLibSVMProblem (const String &filename, const svm_problem *problem) const |
stores the LibSVM-encoded data in a text file that can be used by the LibSVM applications (svm-scale, svm-train,...) More... | |
svm_problem * | loadLibSVMProblem (const String &filename) |
loads the LibSVM-encoded data stored in 'filename' More... | |
void | encodeOligoBorders (String sequence, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::pair< Int, double > > &libsvm_vector, bool strict=false, bool unpaired=false, bool length_encoding=false) |
encodes the borders of the sequence as k_mer oligos and stores them in 'libsvm_vector' More... | |
svm_problem * | encodeLibSVMProblemWithOligoBorderVectors (const std::vector< String > &sequences, std::vector< double > &labels, UInt k_mer_length, const String &allowed_characters, UInt border_length, bool strict=false, bool unpaired=false, bool length_encoding=false) |
creates oligo border vectors vectors for 'sequences' and stores them in LibSVM compliant format More... | |
void | encodeProblemWithOligoBorderVectors (const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors) |
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors' More... | |
void | libSVMVectorToString (svm_node *vector, String &output) |
stores a string representation of the encoded sequence 'vector' in 'output' More... | |
void | libSVMVectorsToString (svm_problem *vector, String &output) |
stores a string representation of the encoded sequences in 'vectors' in 'output' More... | |
void | encodeOligo (const AASequence &sequence, UInt k_mer_length, const String &allowed_characters, std::vector< std::pair< Int, double > > &values, bool is_right_border=false) |
encodes an AASequence instance in oligo encoding More... | |
Static Public Member Functions | |
static void | destroyProblem (svm_problem *&problem, bool free_nodes=true) |
frees all the memory of the svm_problem instance More... | |
static std::vector< double > | predictPeptideRT (const std::vector< String > &sequences, SVMWrapper &svm, const String &allowed_characters="ACDEFGHIKLMNPQRSTVWY", UInt maximum_sequence_length=50) |
Static Private Member Functions | |
static bool | cmpOligos_ (std::pair< Int, double > a, std::pair< Int, double > b) |
comparator for oligos encoded by encodeOligo More... | |
Serves for encoding sequences into feature vectors.
The class can be used to construct composition vectors for sequences. Additionally the vectors can be encoded into the libsvm format.
|
default |
Constructor.
|
default |
Destructor.
comparator for oligos encoded by encodeOligo
|
static |
frees all the memory of the svm_problem instance
This function is used to free all the memory used by 'problem'
Referenced by LibSVMEncoder::predictPeptideRT().
void encodeCompositionVector | ( | const String & | sequence, |
std::vector< std::pair< Int, double > > & | encoded_vector, | ||
const String & | allowed_characters = "ACDEFGHIKLMNPQRSTVWY" |
||
) |
stores a composition vector of 'sequence' in 'encoded_vector'
The allowed characters given by 'allowed_characters' are counted in the sequence 'sequence' and the relative frequency of the letters are stored in the composition vector. The first entry of the vector (<UInt, double>) corresponds to the first letter of 'allowed_characters' that has a non zero frequency in 'sequence' and its corresponding relative frequency...
void encodeCompositionVectors | ( | const std::vector< String > & | sequences, |
const String & | allowed_characters, | ||
std::vector< std::vector< std::pair< Int, double > > > & | composition_vectors | ||
) |
stores composition vectors of the sequences given by 'sequence' in 'composition_vectors'
The allowed characters given by 'allowed_characters' are counted in the sequences 'sequences' and the relative frequency of the letters are stored in the composition vectors. The first entry of the first vector (<UInt, double>) corresponds to the first letter of 'allowed_characters' that has a non zero frequency in the first 'sequence' and its corresponding relative frequency...
svm_problem* encodeLibSVMProblem | ( | const std::vector< svm_node * > & | vectors, |
std::vector< double > & | labels | ||
) |
encodes the LibSVM compliant vectors into a LibSVM compliant structure
svm_problem* encodeLibSVMProblemWithCompositionAndLengthVectors | ( | const std::vector< String > & | sequences, |
std::vector< double > & | labels, | ||
const String & | allowed_characters, | ||
UInt | maximum_sequence_length | ||
) |
creates composition vectors with additional length information for 'sequences' and stores them in LibSVM compliant format
Referenced by LibSVMEncoder::predictPeptideRT().
svm_problem* encodeLibSVMProblemWithCompositionLengthAndWeightVectors | ( | const std::vector< String > & | sequences, |
std::vector< double > & | labels, | ||
const String & | allowed_characters | ||
) |
creates composition vectors with additional length and average weight information for 'sequences' and stores them in LibSVM compliant format
svm_problem* encodeLibSVMProblemWithCompositionVectors | ( | const std::vector< String > & | sequences, |
std::vector< double > & | labels, | ||
const String & | allowed_characters | ||
) |
creates composition vectors for 'sequences' and stores them in LibSVM compliant format
svm_problem* encodeLibSVMProblemWithOligoBorderVectors | ( | const std::vector< String > & | sequences, |
std::vector< double > & | labels, | ||
UInt | k_mer_length, | ||
const String & | allowed_characters, | ||
UInt | border_length, | ||
bool | strict = false , |
||
bool | unpaired = false , |
||
bool | length_encoding = false |
||
) |
creates oligo border vectors vectors for 'sequences' and stores them in LibSVM compliant format
svm_node* encodeLibSVMVector | ( | const std::vector< std::pair< Int, double > > & | feature_vector | ) |
encodes the feature vector in LibSVM compliant format
void encodeLibSVMVectors | ( | const std::vector< std::vector< std::pair< Int, double > > > & | feature_vectors, |
std::vector< svm_node * > & | libsvm_vectors | ||
) |
encodes the feature vectors in LibSVM compliant format
void encodeOligo | ( | const AASequence & | sequence, |
UInt | k_mer_length, | ||
const String & | allowed_characters, | ||
std::vector< std::pair< Int, double > > & | values, | ||
bool | is_right_border = false |
||
) |
encodes an AASequence instance in oligo encoding
This function is used to get the oligo encoding for AASequence 'sequence'. If a residue is modified, it gets an extra oligo function.
void encodeOligoBorders | ( | String | sequence, |
UInt | k_mer_length, | ||
const String & | allowed_characters, | ||
UInt | border_length, | ||
std::vector< std::pair< Int, double > > & | libsvm_vector, | ||
bool | strict = false , |
||
bool | unpaired = false , |
||
bool | length_encoding = false |
||
) |
encodes the borders of the sequence as k_mer oligos and stores them in 'libsvm_vector'
void encodeProblemWithOligoBorderVectors | ( | const std::vector< AASequence > & | sequences, |
UInt | k_mer_length, | ||
const String & | allowed_characters, | ||
UInt | border_length, | ||
std::vector< std::vector< std::pair< Int, double > > > & | vectors | ||
) |
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
void libSVMVectorsToString | ( | svm_problem * | vector, |
String & | output | ||
) |
stores a string representation of the encoded sequences in 'vectors' in 'output'
This function can be used if one wants to print the feature vectors that are used in the libsvm.
void libSVMVectorToString | ( | svm_node * | vector, |
String & | output | ||
) |
stores a string representation of the encoded sequence 'vector' in 'output'
This function can be used if one wants to print one feature vector that is used in the libsvm.
svm_problem* loadLibSVMProblem | ( | const String & | filename | ) |
loads the LibSVM-encoded data stored in 'filename'
|
inlinestatic |
bool storeLibSVMProblem | ( | const String & | filename, |
const svm_problem * | problem | ||
) | const |
stores the LibSVM-encoded data in a text file that can be used by the LibSVM applications (svm-scale, svm-train,...)