55 #ifndef QSAR_EXCEPTION
59 #include <gsl/gsl_randist.h>
60 #include <gsl/gsl_cdf.h>
87 bool isDataCentered()
const;
90 bool isResponseCentered()
const;
96 vector<String>* readPropertyNames(
String sd_file);
101 void readSDFile(
const char* file);
108 void readSDFile(
const char* file, std::multiset<int>& act,
bool useExDesc=1,
bool append=0,
bool translate_class_labels=0);
113 void calculateBALLDescriptors(
Molecule& m);
116 void displayMatrix();
120 void centerData(
bool center_Y=0);
123 void scaleAllDescriptors();
126 unsigned int getNoSubstances()
const;
129 unsigned int getNoDescriptors()
const;
138 void readCSVFile(
const char* file,
int no_y,
bool xlabels,
bool ylabels,
const char* sep=
",",
bool appendDescriptors=0,
bool translate_class_labels=0);
141 void manipulateY(vector<String> v);
145 void manipulateY(
String v);
149 void discretizeY(vector<double> thresholds);
151 void transformX(vector<String> v);
154 vector<QSARData*> partitionInputData(
int p);
157 void saveToFile(
string filename)
const;
160 void readFromFile(
string filename);
164 vector<QSARData*> generateExternalSet(
double fraction)
const;
170 vector<QSARData*> evenSplit(
int no_test_splits,
int current_test_split_id,
int response_id=0)
const;
173 vector<double>* getSubstance(
int s)
const;
176 vector<double>* getActivity(
int s)
const;
179 unsigned int getNoResponseVariables()
const;
181 const vector<string>* getSubstanceNames()
const;
184 bool checkforDiscreteY()
const;
188 bool checkforDiscreteY(
const char* file, std::multiset<int>& activity_IDs)
const;
191 void setDataFolder(
const char* folder);
195 void removeHighlyCorrelatedCompounds(
double& compound_cor_threshold,
double& feature_cor_threshold);
202 void getSimilarDescriptors(
int descriptor_ID,
double correlation, std::list<std::pair<uint,String> >& similar_descriptor_IDs)
const;
212 void setDescriptorNames(
const Molecule& m, std::multiset<int>& activity_IDs,
bool useExDesc=1);
216 void removeInvalidDescriptors(std::multiset<int>& invalidDescriptors);
218 void removeInvalidSubstances(std::multiset<int>& inv);
221 void readMatrix(
VMatrix& mat, std::ifstream& in,
char seperator,
unsigned int lines,
unsigned int col);
225 void checkActivityIDs(std::multiset<int>& act,
int no_properties);
229 void insertSubstance(
const QSARData* source,
int s,
bool backtransformation=0);
232 void printMatrix(
const VMatrix& mat, std::ostream& out)
const;