#include <BALL/QSAR/Model.h>
Public Member Functions | |
Constructors and Destructors | |
Model (const QSARData &q) | |
virtual | ~Model () |
virtual void | operator= (const Model &m) |
Protected Member Functions | |
Input and Output. The following methods can be used to implement the functions saveToFile() and readFromFile() in final classes derived from this base-class | |
void | readMatrix (Matrix< double > &mat, std::ifstream &in, uint lines, uint col) |
void | readVector (Vector< double > &vec, std::ifstream &in, uint no_cells, bool column_vector) |
void | readModelParametersFromFile (std::ifstream &in) |
void | saveModelParametersToFile (std::ofstream &out) |
virtual void | saveDescriptorInformationToFile (std::ofstream &out) |
virtual void | readDescriptorInformationFromFile (std::ifstream &in, int no_descriptors, bool transformation) |
void | readResponseTransformationFromFile (std::ifstream &in, int no_y) |
void | saveResponseTransformationToFile (std::ofstream &out) |
Protected Attributes | |
int | default_no_opt_steps_ |
Friends | |
class | Validation |
class | RegressionValidation |
class | ClassificationValidation |
Attributes | |
| |
const QSARData * | data |
Validation * | model_val |
Matrix< double > | descriptor_matrix_ |
vector< string > | substance_names_ |
vector< string > | descriptor_names_ |
Matrix< double > | descriptor_transformations_ |
Matrix< double > | y_transformations_ |
Matrix< double > | Y_ |
String | type_ |
std::multiset< unsigned int > | descriptor_IDs_ |
Accessors | |
| |
void | copyData (const Model &m) |
void | copyDescriptorIDs (const Model &m) |
void | readTrainingData () |
virtual Vector< double > | predict (const vector< double > &substance, bool transform)=0 |
void | deleteDescriptorIDs () |
virtual void | train ()=0 |
virtual bool | optimizeParameters (int, int) |
bool | optimizeParameters (int k) |
virtual double | calculateStdErr () |
virtual void | setParameters (vector< double > &) |
virtual vector< double > | getParameters () const |
std::multiset< unsigned int > * | getDescriptorIDs () |
void | setDataSource (const QSARData *q) |
virtual void | saveToFile (string filename)=0 |
virtual void | readFromFile (string filename)=0 |
const Matrix< double > * | getDescriptorMatrix () |
const vector< string > * | getSubstanceNames () |
const vector< string > * | getDescriptorNames () |
const Matrix< double > * | getY () |
void | setDescriptorIDs (const std::multiset< unsigned int > &sl) |
const string * | getType () |
void | getUnnormalizedFeatureValue (int compound, int feature, double &return_value) |
void | getUnnormalizedResponseValue (int compound, int response, double &return_value) |
Vector< double > | getSubstanceVector (const vector< double > &substance, bool transform) |
Vector< double > | getSubstanceVector (const Vector< double > &substance, bool transform) |
void | backTransformPrediction (Vector< double > &pred) |
void | addLambda (Matrix< double > &matrix, double &lambda) |
void | readDescriptorInformation () |
Definition at line 52 of file Model.h.
BALL::QSAR::Model::Model | ( | const QSARData & | q | ) |
constructur
q | QSARData object, from which the data for this model should be taken |
virtual BALL::QSAR::Model::~Model | ( | ) | [virtual] |
adds offset lambda to the diagonal of the given matrix
void BALL::QSAR::Model::backTransformPrediction | ( | Vector< double > & | pred | ) | [protected] |
transforms a prediction (obtained by Model.train()) according to the inverse of the transformation(s) of the activity values of the training data
virtual double BALL::QSAR::Model::calculateStdErr | ( | ) | [inline, virtual] |
Reimplemented in BALL::QSAR::GPModel.
void BALL::QSAR::Model::copyData | ( | const Model & | m | ) |
copies the data (descriptor matrix, names of substances and descriptors) and the IDs of the selected descriptors from m
void BALL::QSAR::Model::copyDescriptorIDs | ( | const Model & | m | ) |
copies the IDs of the selected descriptors from m
void BALL::QSAR::Model::deleteDescriptorIDs | ( | ) |
removes all entries from descriptor_IDs
std::multiset<unsigned int>* BALL::QSAR::Model::getDescriptorIDs | ( | ) |
returns a const pointer to the descriptor IDs of this model
const Matrix<double>* BALL::QSAR::Model::getDescriptorMatrix | ( | ) |
returns a const pointer to the descriptor matrix of this model
const vector<string>* BALL::QSAR::Model::getDescriptorNames | ( | ) |
returns a const pointer to the names of the descriptors of this model
virtual vector<double> BALL::QSAR::Model::getParameters | ( | ) | const [virtual] |
Reimplemented in BALL::QSAR::ALLModel, BALL::QSAR::GPModel, BALL::QSAR::KNNModel, BALL::QSAR::KPCRModel, BALL::QSAR::KPLSModel, BALL::QSAR::LDAModel, BALL::QSAR::LibsvmModel, BALL::QSAR::NBModel, BALL::QSAR::OPLSModel, BALL::QSAR::PCRModel, BALL::QSAR::PLSModel, BALL::QSAR::RRModel, and BALL::QSAR::SNBModel.
const vector<string>* BALL::QSAR::Model::getSubstanceNames | ( | ) |
returns a const pointer to the names of the substances of this model
Vector<double> BALL::QSAR::Model::getSubstanceVector | ( | const Vector< double > & | substance, | |
bool | transform | |||
) | [protected] |
Vector<double> BALL::QSAR::Model::getSubstanceVector | ( | const vector< double > & | substance, | |
bool | transform | |||
) | [protected] |
returns a Row-Vector containing only the values for these descriptors, that have been selected for this model
substance | a vector of *all* descriptor values for the substance to be predicted |
const string* BALL::QSAR::Model::getType | ( | ) |
returns the type of the current model, e.g. "MLR", "PLS", ...
void BALL::QSAR::Model::getUnnormalizedFeatureValue | ( | int | compound, | |
int | feature, | |||
double & | return_value | |||
) |
Fetches the un-normalized value for the specified feature of the desired compound (instance) from the data that this Model currently contains. This method is needed for visualization purposes only.
void BALL::QSAR::Model::getUnnormalizedResponseValue | ( | int | compound, | |
int | response, | |||
double & | return_value | |||
) |
Fetches the un-normalized value for the specified response of the desired compound (instance) from the data that this Model currently contains. This method is needed for visualization purposes only.
const Matrix<double>* BALL::QSAR::Model::getY | ( | ) |
returns a const pointer to the activity values of this model
virtual void BALL::QSAR::Model::operator= | ( | const Model & | m | ) | [virtual] |
copy constructur; creates a model with the same specifications as the given one (same model and kernel parameters). If the given model has been trained, the training result is copied as well.
Note, that the input data that has been read by m to m.descriptor_matrix_ and m.Y_ is NOT copied to new model, since the input data is not part of the specification of a model. If nevertheless, copying of the input data is desired, use function copyData() (afterwards).
Reimplemented in BALL::QSAR::KernelModel.
bool BALL::QSAR::Model::optimizeParameters | ( | int | k | ) |
virtual bool BALL::QSAR::Model::optimizeParameters | ( | int | , | |
int | ||||
) | [inline, virtual] |
optimizes parameters (!=number of features) of the model, e.g. no of latente variables in case of PLS model or kernel width in case of automated lazy learning model.
The number of selected features (=descriptors) is NOT changed by this method. Use class FeatureSelection in order to do this.
Reimplemented in BALL::QSAR::ALLModel, BALL::QSAR::KNNModel, BALL::QSAR::KPLSModel, BALL::QSAR::OPLSModel, and BALL::QSAR::PLSModel.
virtual Vector<double> BALL::QSAR::Model::predict | ( | const vector< double > & | substance, | |
bool | transform | |||
) | [pure virtual] |
Predicts the activities of a given substance
substance | the substance which activity is to be predicted in form of a vecor containing the values for *all* descriptors (if neccessary, relevant descriptors will be selected automatically) | |
transform | determines whether the values for each descriptor of the given substance should be transformed before prediction of activity. If (transform==1): each descriptor value is transformed according to the centering of the respective column of QSARData.descriptor_matrix used to train this model. If the substance to be predicted is part of the same input data (e.g. same SD-file) as the training data (as is the case during cross validation), transform should therefore be set to 0. |
Implemented in BALL::QSAR::ALLModel, BALL::QSAR::FitModel, BALL::QSAR::GPModel, BALL::QSAR::KernelModel, BALL::QSAR::LDAModel, BALL::QSAR::LinearModel, BALL::QSAR::LogitModel, BALL::QSAR::NBModel, and BALL::QSAR::SNBModel.
void BALL::QSAR::Model::readDescriptorInformation | ( | ) | [protected] |
reads selected descriptors, their names and the information about their transformations (mean and stddev of each descriptor). This function is used after feature selection to read information about the selected features
virtual void BALL::QSAR::Model::readDescriptorInformationFromFile | ( | std::ifstream & | in, | |
int | no_descriptors, | |||
bool | transformation | |||
) | [protected, virtual] |
virtual void BALL::QSAR::Model::readFromFile | ( | string | filename | ) | [pure virtual] |
reconstruct a saved Model from a file
Implemented in BALL::QSAR::ALLModel, BALL::QSAR::KernelModel, BALL::QSAR::LDAModel, BALL::QSAR::LogitModel, BALL::QSAR::NBModel, BALL::QSAR::RegressionModel, and BALL::QSAR::SNBModel.
void BALL::QSAR::Model::readMatrix | ( | Matrix< double > & | mat, | |
std::ifstream & | in, | |||
uint | lines, | |||
uint | col | |||
) | [protected] |
reconstructs a Matrix<double> from a given input stream after resizing the given Matrix<double> as specified
void BALL::QSAR::Model::readModelParametersFromFile | ( | std::ifstream & | in | ) | [protected] |
void BALL::QSAR::Model::readResponseTransformationFromFile | ( | std::ifstream & | in, | |
int | no_y | |||
) | [protected] |
void BALL::QSAR::Model::readTrainingData | ( | ) |
copies the data for the relevant descriptors from the bound QSARData object into this model and updates Model.descriptor_transformations and Model.y_transformations .
If no explicit feature selection was done, i.e. if descriptor_IDs is emtpy, all data is fetched.
If feature selection was done, i.e. if descriptor_IDs is not empty, only the columns of the relevant descriptors are fetched.
void BALL::QSAR::Model::readVector | ( | Vector< double > & | vec, | |
std::ifstream & | in, | |||
uint | no_cells, | |||
bool | column_vector | |||
) | [protected] |
virtual void BALL::QSAR::Model::saveDescriptorInformationToFile | ( | std::ofstream & | out | ) | [protected, virtual] |
overloaded by class RegressionModel, whose member function can also save coefficients and coefficient-errors
Reimplemented in BALL::QSAR::RegressionModel.
void BALL::QSAR::Model::saveModelParametersToFile | ( | std::ofstream & | out | ) | [protected] |
void BALL::QSAR::Model::saveResponseTransformationToFile | ( | std::ofstream & | out | ) | [protected] |
virtual void BALL::QSAR::Model::saveToFile | ( | string | filename | ) | [pure virtual] |
save Model to a file
Implemented in BALL::QSAR::ALLModel, BALL::QSAR::KernelModel, BALL::QSAR::LDAModel, BALL::QSAR::LogitModel, BALL::QSAR::NBModel, BALL::QSAR::RegressionModel, and BALL::QSAR::SNBModel.
void BALL::QSAR::Model::setDataSource | ( | const QSARData * | q | ) |
void BALL::QSAR::Model::setDescriptorIDs | ( | const std::multiset< unsigned int > & | sl | ) |
manually specify a set of descriptors
virtual void BALL::QSAR::Model::setParameters | ( | vector< double > & | ) | [inline, virtual] |
sets the model parameters according to the given values.
Reimplemented in BALL::QSAR::ALLModel, BALL::QSAR::GPModel, BALL::QSAR::KNNModel, BALL::QSAR::KPCRModel, BALL::QSAR::KPLSModel, BALL::QSAR::LDAModel, BALL::QSAR::LibsvmModel, BALL::QSAR::NBModel, BALL::QSAR::OPLSModel, BALL::QSAR::PCRModel, BALL::QSAR::PLSModel, BALL::QSAR::RRModel, and BALL::QSAR::SNBModel.
virtual void BALL::QSAR::Model::train | ( | ) | [pure virtual] |
Starts training the model.
Implemented in BALL::QSAR::ALLModel, BALL::QSAR::FitModel, BALL::QSAR::GPModel, BALL::QSAR::KPCRModel, BALL::QSAR::KPLSModel, BALL::QSAR::LDAModel, BALL::QSAR::LibsvmModel, BALL::QSAR::LogitModel, BALL::QSAR::MLRModel, BALL::QSAR::NBModel, BALL::QSAR::OPLSModel, BALL::QSAR::PCRModel, BALL::QSAR::PLSModel, BALL::QSAR::RRModel, and BALL::QSAR::SNBModel.
friend class ClassificationValidation [friend] |
Reimplemented in BALL::QSAR::ClassificationModel.
friend class RegressionValidation [friend] |
Reimplemented in BALL::QSAR::KernelModel, and BALL::QSAR::RegressionModel.
friend class Validation [friend] |
const QSARData* BALL::QSAR::Model::data |
int BALL::QSAR::Model::default_no_opt_steps_ [protected] |
std::multiset<unsigned int> BALL::QSAR::Model::descriptor_IDs_ [protected] |
list containing the IDs of the selected descriptors (=features); with IDs >= 0
If this list is empty, it is assumed that no feature selection was done, i.e. that all descriptors are to be considered for cross-validation and prediction of activity.
If it is not empty, only the descriptors in this list are used for cross-validation and prediction of activity.
Matrix<double> BALL::QSAR::Model::descriptor_matrix_ [protected] |
vector<string> BALL::QSAR::Model::descriptor_names_ [protected] |
Matrix<double> BALL::QSAR::Model::descriptor_transformations_ [protected] |
2xm dimensional matrix (m=no of descriptors) containing mean and stddev of each selected descriptor.
The content of this matrix is updated only by Model.readTrainingData()
vector<string> BALL::QSAR::Model::substance_names_ [protected] |
String BALL::QSAR::Model::type_ [protected] |
Matrix<double> BALL::QSAR::Model::Y_ [protected] |
Matrix<double> BALL::QSAR::Model::y_transformations_ [protected] |
2xc dimensional matrix (c=no of activities) containing mean and stddev of each activity.
The content of this matrix is updated only by Model.readTrainingData()