GAFFCESParser.h

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 
00004 #ifndef BALL_MOLMEC_AMBER_GAFFCESPARSER_H
00005 #define BALL_MOLMEC_AMBER_GAFFCESPARSER_H
00006 
00007 #ifndef BALL_KERNEL_ATOM_H
00008 #include <BALL/KERNEL/atom.h>
00009 #endif
00010 
00011 #ifndef BALL_DATATYPE_STRING_H
00012 #include <BALL/DATATYPE/string.h>
00013 #endif
00014 
00015 #include <set>
00016 #include <map>
00017 #include <vector>
00018 
00019 namespace BALL
00020 {
00021   
00022   class GAFFCESParser
00023   {
00024     public:
00025 
00026       //atomic property string
00027       class APSMatcher
00028       {
00029         public:
00030             //encode Ringatomtypes
00031             enum APSType
00032             {
00033               IS_RING_ATOM,
00034               IS_NON_RING_ATOM,
00035               IS_PLANAR,
00036               IS_PLANAR_WITH_DB_TO_NR,
00037               IS_PURELY_AROMATIC,
00038               IS_PURELY_ALIPHATIC,
00039               IS_OTHER_RING,
00040               IS_3_RING_ATOM,
00041               IS_4_RING_ATOM,
00042               IS_5_RING_ATOM,
00043               IS_6_RING_ATOM,
00044               IS_7_RING_ATOM,
00045               IS_8_RING_ATOM,
00046               IS_9_RING_ATOM,
00047               PURE_SINGLE_BOND,
00048               PURE_SINGLE_BOND_TO_PARENT,
00049               NO_PURE_SINGLE_BOND_TO_PARENT,
00050               SINGLE_BOND,
00051               SINGLE_BOND_TO_PARENT,
00052               NO_SINGLE_BOND_TO_PARENT,
00053               PURE_DOUBLE_BOND,
00054               PURE_DOUBLE_BOND_TO_PARENT,
00055               NO_PURE_DOUBLE_BOND_TO_PARENT,
00056               DOUBLE_BOND,
00057               DOUBLE_BOND_TO_PARENT,
00058               NO_DOUBLE_BOND_TO_PARENT,
00059               TRIPLE_BOND,
00060               TRIPLE_BOND_TO_PARENT,
00061               NO_TRIPLE_BOND_TO_PARENT,
00062               DELOCALIZED_BOND,
00063               DELOCALIZED_BOND_TO_PARENT,
00064               NO_DELOCALIZED_BOND_TO_PARENT,
00065               AROMATIC_BOND,
00066               AROMATIC_BOND_TO_PARENT,
00067               NO_AROMATIC_BOND_TO_PARENT,
00068               APS_TRUE
00069             };
00070 
00071             class APSTerm
00072             {
00073               public:
00074                 APSTerm(APSType new_type, int new_feature_number)
00075                   : type(new_type),
00076                     feature_number(new_feature_number)
00077                 {}
00078 
00079                 APSType type;
00080                 //contain the number of occurence of a given feature
00081                 int feature_number;
00082             };
00083 
00084             //check if current atom is in a ring
00085             bool isRingAtom(Atom& atom);
00086             //check if the current atom is in a ring with size n
00087             //and return the number of occurence
00088             int isNRingAtom(Size size, Atom& atom);
00089             //check if the current atom is not a ringatom or
00090             //in a ten-membered or larger ring
00091             bool isNonRingAtom(Atom& atom); 
00092 
00093             bool checkGAFFProperties(Atom& atom, Atom& predecessor, APSTerm aps);
00094 
00095 
00096             APSMatcher();
00097             ~APSMatcher();
00098 
00099             String printAPS();
00100 
00101             // check if atom matches atomic property string
00102             bool operator() (Atom& atom, Atom& predecessor);
00103 
00104             //store atomic property string
00105             //external vector: all AND (",") types
00106             //internal vector: all OR (".")types
00107             std::vector < std::vector< APSTerm> > aps_terms;
00108 
00109           protected:
00110             bool hasBond_(Atom* atom, Atom* predecessor, int bond_type, int feature_number);
00111       };
00112 
00113       struct State
00114       {
00115         //not "thread-safe"
00116         GAFFCESParser* current_parser;
00117         
00118         APSMatcher::APSType current_aps_type;
00119         int feature_number;
00120       };
00121   
00122 
00123 
00124       //chemical environment string
00125       class CESPredicate
00126       {
00127         public:
00128           //encode wildcard elements
00129           enum CESwildcards
00130           {
00131             XA, XB, XC, XD, XX    
00132           };
00133 
00134           CESPredicate(GAFFCESParser* parser) 
00135             : parent(0),
00136               parser_(parser)
00137           {
00138             initStringToWildcard();
00139           };
00140 
00141           virtual ~CESPredicate();
00142 
00143           //initialize stringToWildcard map
00144           void initStringToWildcard();
00145           std::map<String, CESwildcards> getStringToWildcard();
00146 
00147           //add a CESwildcardsConnectionPredicate to "predicate tree"
00148           void addCESwildcardsConnectionPredicate(String wildcard, Size partners);
00149           //add a CESwilddcardsPredicate to "predicate tree"
00150           void addCESwildcardsPredicate(String wildcard);
00151           //add an CESelementPredicate to "predicate tree"
00152           void addCESelementPredicate(String name);
00153           //add an CESelementConnectionPredicate to "predicate tree"
00154           void addCESelementConnectionPredicate(Size partners, String name);
00155           // add a TruePredicate to "predicate tree"
00156           void addTruePredicate();
00157 
00158           //check if atom and its environment match predicates
00159           virtual bool operator () (Atom& atom);
00160           //check if atom matches "predicates in predicate-tree"
00161           virtual bool match(Atom&){return false;};
00162           //delete children 
00163           void clear();
00164 
00165           // check whether this atom is contained on a path to the root
00166           bool alreadySeenThisAtom(Atom* atom);
00167 
00168           //to expand aps_term in aps_matcher object
00169           void addNewAND();
00170           void addNewOR(APSMatcher::APSType aps, int feature_number);
00171 
00172           //store existing atomic property string
00173           APSMatcher aps_matcher; 
00174         
00175           //all CESPredicates for current_predicate ->children of current_predicate
00176           std::vector<CESPredicate*> children;
00177           //CESPredicate, that has current_predicate in his children vector
00178           CESPredicate* parent;
00179 
00180           //The atom we are trying to match to this predicate
00181           Atom* atom_to_test;
00182           protected:
00183             //map to convert String into wildcard-element
00184             std::map<String, CESwildcards > stringToWildcard_;    
00185             GAFFCESParser* parser_;
00186       };
00187   
00188       //element-name of the partnerAtom and number of its connected atoms
00189       class CESelementConnectionPredicate : public CESPredicate
00190       {
00191         public:
00192           CESelementConnectionPredicate(GAFFCESParser* parser) 
00193             : CESPredicate(parser),
00194               numberOfPartners_(0), 
00195               elementName_("NoName") 
00196           {};
00197 
00198           ~CESelementConnectionPredicate();
00199 
00200           void setNumberOfPartners(Size number);
00201           void setElementName(String name);
00202           Size getNumberOfPartners();
00203           String getElementName();
00204           //check if atom matches predicate
00205           bool match(Atom& atom);
00206         protected:
00207           Size numberOfPartners_;
00208           String elementName_;
00209       };
00210   
00211       //element-name of the partnerAtom
00212       class CESelementPredicate : public CESPredicate
00213       {
00214         public:
00215           CESelementPredicate(GAFFCESParser* parser) 
00216             : CESPredicate(parser),
00217               elementName_("NoName") 
00218           {
00219           };
00220 
00221           ~CESelementPredicate();     
00222 
00223           void setElementName(String name);
00224           String getElementName();
00225           //check if atom matches predicate
00226           bool match(Atom& atom);
00227         protected:
00228           String elementName_;
00229       };
00230 
00231       //XA,XB,XC,XD,XX 
00232       class CESwildcardsPredicate : public CESPredicate
00233       {
00234         public:
00235           CESwildcardsPredicate(GAFFCESParser* parser)
00236             : CESPredicate(parser)
00237           {};
00238 
00239           ~CESwildcardsPredicate();
00240 
00241           void setWildcards(String new_wildcard);
00242           CESwildcards getWildcards();
00243           //checks if atom matches the given wildcard-element (XA, XB, XC, XD, XX) 
00244           bool matchWildcards(Atom& atom);
00245           //check if atom matches predicate
00246           bool match(Atom& atom);
00247         protected:
00248           CESwildcards wildcards_;
00249       };
00250 
00251       //XA,XB,XC,XD,XX and number of partnerAtoms
00252       class CESwildcardsConnectionPredicate : public CESPredicate
00253       {
00254         public:
00255           CESwildcardsConnectionPredicate(GAFFCESParser* parser) 
00256             : CESPredicate(parser),
00257               numberOfPartners_(0) 
00258           {};
00259           ~CESwildcardsConnectionPredicate();
00260 
00261           void setNumberOfPartners(Size number);
00262           void setWildcards(String new_wildcard);
00263           Size getNumberOfPartners();
00264           CESwildcards getWildcards();
00265           //checks if atom matches the given wildcard-element (XA, XB, XC, XD, XX) 
00266           bool matchWildcards(Atom& atom);
00267           //check if atom matches predicate
00268           bool match(Atom& atom);
00269         protected:
00270           CESwildcards wildcards_;
00271           Size numberOfPartners_;
00272       };
00273 
00274       //string is "*" which means always true
00275       class TruePredicate : public CESPredicate
00276       {
00277         public:
00278           TruePredicate(GAFFCESParser* parser)
00279             : CESPredicate(parser)
00280           {};
00281           ~TruePredicate() {};
00282           //check if atom matches predicate (always true!)
00283           bool match(Atom&) { return true; }
00284       };
00285 
00286       //Parser-match-Function checking if atom's environment matches the "predicate tree"
00287       bool match(Atom& atom);
00288 
00289       //initialize Set of Elementsymbols
00290       void initElementSymbols();
00291       const std::set<String>& getElementSymbols();
00292 
00293       GAFFCESParser();  
00294       GAFFCESParser(const String& cesstring);
00295       ~GAFFCESParser();
00296 
00297       //for lexer/parser
00298       Size read(char* buf, Size max_size);
00299 
00300       static State state;
00301 
00303       TruePredicate root;
00304       //fixed root of the "predicate tree"
00305       CESPredicate* root_predicate;
00306       //parent-predicate of current_predicate
00307       CESPredicate* current_root_predicate;
00308       //"predicate-node" in the "predicate tree" we actually considering
00309       CESPredicate* current_predicate;
00310     
00311       
00312       //parse chemical environment string
00313       bool parse(const String& cesstring);
00314       //check if any atom matches parsed ces_string
00315       bool GAFFCESatomMatcher(Atom& atom, const String& cesstring);
00316       //start filling the children vector for a current predicate
00317       void startChildPredicates();
00318       //end up filling the children vector for a current predicate 
00319       void endChildPredicates();
00320 
00321     protected:
00322       //current chemical environment string
00323       String cesstring_;
00324       //set with all valid element symbols
00325       std::set<String> element_symbols_;
00326       //for Parser/Lexer function YYINPUT
00327       Position read_start_;
00328   };
00329   
00330 }
00331 
00332 #endif