00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef MODEL
00026 #define MODEL
00027
00028 #include <vector>
00029 #include <set>
00030
00031 #include <BALL/MATHS/LINALG/matrix.h>
00032
00033 #ifndef VALIDATION
00034 #include <BALL/QSAR/validation.h>
00035 #endif
00036
00037 #ifndef QSARH
00038 #include <BALL/QSAR/QSARData.h>
00039 #endif
00040
00041 #ifndef QSAR_EXCEPTION
00042 #include <BALL/QSAR/exception.h>
00043 #endif
00044
00045 #include <BALL/MATHS/parsedFunction.h>
00046
00047 namespace BALL
00048 {
00049
00050 namespace QSAR
00051 {
00052 class BALL_EXPORT Model
00053 {
00054 public:
00060 Model(const QSARData& q);
00061
00062 virtual ~Model();
00063
00066 virtual void operator=(const Model& m);
00068
00069
00074 void copyData(const Model& m);
00075
00077 void copyDescriptorIDs(const Model& m);
00078
00079
00083 void readTrainingData();
00084
00085
00092 virtual Vector<double> predict(const vector<double>& substance, bool transform) =0;
00093
00095 void deleteDescriptorIDs();
00096
00098 virtual void train() =0;
00099
00100
00105 virtual bool optimizeParameters(int , int ){return 0;};
00106
00107 bool optimizeParameters(int k);
00108
00109 virtual double calculateStdErr()
00110 {return -1.0;};
00111
00113 virtual void setParameters(vector<double>& ){};
00114
00115 virtual vector<double> getParameters() const;
00116
00118 std::multiset<unsigned int>* getDescriptorIDs();
00119
00120 void setDataSource(const QSARData* q);
00121
00123 virtual void saveToFile(string filename) = 0;
00124
00126 virtual void readFromFile(string filename) = 0;
00127
00129 const Matrix<double>* getDescriptorMatrix();
00130
00132 const vector<string>* getSubstanceNames();
00133
00135 const vector<string>* getDescriptorNames();
00136
00138 const Matrix<double>* getY();
00139
00141 void setDescriptorIDs(const std::multiset<unsigned int>& sl);
00142
00144 const string* getType();
00145
00147 void getUnnormalizedFeatureValue(int compound, int feature, double& return_value);
00148
00150 void getUnnormalizedResponseValue(int compound, int response, double& return_value);
00152
00153
00158 const QSARData* data;
00159
00161 Validation* model_val;
00163
00164
00165 protected:
00166
00170 int default_no_opt_steps_;
00171
00177 Vector<double> getSubstanceVector(const vector<double>& substance, bool transform);
00178
00179 Vector<double> getSubstanceVector(const Vector<double>& substance, bool transform);
00180
00182 void backTransformPrediction(Vector<double>& pred);
00183
00185 void addLambda(Matrix<double>& matrix, double& lambda);
00186
00188 void readDescriptorInformation();
00190
00191
00196 void readMatrix(Matrix<double>& mat, std::ifstream& in, uint lines, uint col);
00197
00198 void readVector(Vector<double>& vec, std::ifstream& in, uint no_cells, bool column_vector);
00199
00200 void readModelParametersFromFile(std::ifstream& in);
00201 void saveModelParametersToFile(std::ofstream& out);
00202
00203
00205 virtual void saveDescriptorInformationToFile(std::ofstream& out);
00206 virtual void readDescriptorInformationFromFile(std::ifstream& in, int no_descriptors, bool transformation);
00207
00208 void readResponseTransformationFromFile(std::ifstream& in, int no_y);
00209 void saveResponseTransformationToFile(std::ofstream& out);
00211
00212
00217 Matrix<double> descriptor_matrix_;
00218
00220 vector<string> substance_names_;
00221
00223 vector<string> descriptor_names_;
00224
00227 Matrix<double> descriptor_transformations_;
00228
00231 Matrix<double> y_transformations_;
00232
00235 Matrix<double> Y_;
00236
00238 String type_;
00239
00243 std::multiset<unsigned int> descriptor_IDs_;
00245
00246 friend class Validation;
00247 friend class RegressionValidation;
00248 friend class ClassificationValidation;
00249 #ifdef BALL_HAS_LAPACK
00250 friend class PCRModel;
00251 friend class KPCRModel;
00252 friend class FeatureSelection;
00253 #endif //BALL_HAS_LAPACK
00254 };
00255
00257 Model* createNewModelFromFile(String model_file, const QSARData& q);
00258 }
00259 }
00260
00261 #endif // MODEL