35 #ifndef QSAR_EXCEPTION
47 class MolecularSimilarity;
66 bool isDataCentered()
const;
69 bool isResponseCentered()
const;
75 vector<String>* readPropertyNames(
String sd_file);
80 void readSDFile(
const char* file);
87 void readSDFile(
const char* file, std::multiset<int>& act,
bool useExDesc=1,
bool append=0,
bool translate_class_labels=0);
89 void readSDFile(
const char* file, std::set<String>& activity_names,
bool useExDesc=1,
bool append=0,
bool translate_class_labels=0,
bool calc_phychem_properties=1,
bool calc_topological_properties=1);
96 void centerData(
bool center_Y=0);
99 void scaleAllDescriptors();
102 unsigned int getNoSubstances()
const;
105 unsigned int getNoDescriptors()
const;
114 void readCSVFile(
const char* file,
int no_y,
bool xlabels,
bool ylabels,
const char* sep=
",",
bool appendDescriptors=0,
bool translate_class_labels=0);
117 void manipulateY(std::vector<String> v);
121 void manipulateY(
String v);
125 void discretizeY(std::vector<double> thresholds);
127 void transformX(std::vector<String> v);
130 std::vector<QSARData*> partitionInputData(
int p);
133 void saveToFile(
string filename)
const;
136 void readFromFile(
string filename);
140 std::vector<QSARData*> generateExternalSet(
double fraction)
const;
146 std::vector<QSARData*> evenSplit(
int no_test_splits,
int current_test_split_id,
int response_id=0)
const;
149 std::vector<double>* getSubstance(
int s)
const;
152 std::vector<double>* getActivity(
int s)
const;
155 unsigned int getNoResponseVariables()
const;
157 const std::vector<string>* getSubstanceNames()
const;
160 bool checkforDiscreteY()
const;
164 bool checkforDiscreteY(
const char* file, std::multiset<int>& activity_IDs)
const;
167 void setDataFolder(
const char* folder);
171 void removeHighlyCorrelatedCompounds(
double& compound_cor_threshold,
double& feature_cor_threshold);
178 void getSimilarDescriptors(
int descriptor_ID,
double correlation, std::list<std::pair<uint,String> >& similar_descriptor_IDs)
const;
191 void calculateBALLDescriptors(
Molecule& m);
197 void setDescriptorNames(
const Molecule& m, std::multiset<int>& activity_IDs,
bool useExDesc=1,
bool resize=1);
201 void removeInvalidDescriptors(std::multiset<int>& invalidDescriptors);
203 void removeInvalidSubstances(std::multiset<int>& inv);
206 void readMatrix(
VMatrix& mat, std::ifstream& in,
char seperator,
unsigned int lines,
unsigned int col);
210 void checkActivityIDs(std::multiset<int>& act,
int no_properties);
214 void insertSubstance(
const QSARData* source,
int s,
bool backtransformation=0);
217 void printMatrix(
const VMatrix& mat, std::ostream& out)
const;
258 friend class FitModel;
std::map< String, int > class_names_
vector< string > substance_names_
vector< string > column_names_
VMatrix y_transformations_
VMatrix descriptor_transformations_
std::multiset< int > invalidDescriptors_
VMatrix descriptor_matrix_
std::multiset< int > invalidSubstances_