smartsParser.h

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // $Id: smartsParser.h,v 1.10.8.3 2007/03/28 16:07:35 bertsch Exp $
00005 //
00006 
00007 #ifndef BALL_STRUCTURE_SMARTES_PARSER_H
00008 #define BALL_STRUCTURE_SMARTES_PARSER_H
00009 
00010 #ifndef BALL_COMMON_H
00011   # include <BALL/common.h>
00012 #endif
00013 
00014 #include <map>
00015 #include <set>
00016 
00017 // needed for MSVC:
00018 #undef CW_DEFAULT
00019 
00020 namespace BALL 
00021 {
00022   // forward declarations
00023   class Bond;
00024   class Atom;
00025   class Element;
00026 
00037   class BALL_EXPORT SmartsParser
00038   {
00039     public:
00040 
00041     enum ZEIsomerType
00042     {
00043       ANY_ZE = 1,
00044       NONE,
00045       Z,
00046       E
00047     };
00048 
00050     enum ChiralClass
00051     {
00052       CHIRAL_CLASS_UNSPECIFIED = 1,
00053       NONCHIRAL,
00054       NONCHIRAL_OR_UNSPECIFIED,
00055       CW_DEFAULT, // TH
00056       CW_DEFAULT_OR_UNSPECIFIED,
00057       CCW_DEFAULT, // TH
00058       CCW_DEFAULT_OR_UNSPECIFIED,
00059       CW_TH, // tetrahdral
00060       CW_TH_OR_UNSPECIFIED,
00061       CCW_TH,
00062       CCW_TH_OR_UNSPECIFIED,
00063       CW_AL, // allene-like
00064       CW_AL_OR_UNSPECIFIED,
00065       CCW_AL, 
00066       CCW_AL_OR_UNSPECIFIED,
00067       CW_SP, // square planar
00068       CW_SP_OR_UNSPECIFIED,
00069       CCW_SP,
00070       CCW_SP_OR_UNSPECIFIED,
00071       CW_TB, //trigonal bipyramidal
00072       CW_TB_OR_UNSPECIFIED,
00073       CCW_TB,
00074       CCW_TB_OR_UNSPECIFIED,
00075       CW_OH, // octahedral
00076       CW_OH_OR_UNSPECIFIED,
00077       CCW_OH,
00078       CCW_OH_OR_UNSPECIFIED
00079     };
00080 
00088     enum LogicalOperator
00089     {
00090       AND,
00091       OR,
00092       AND_LOW,
00093       NOOP
00094     };
00095 
00096 
00098     class SPAtom;
00099 
00107     class BALL_EXPORT SPBond 
00108     {
00109       public:
00110 
00112         enum SPBondOrder
00113         {
00114           SINGLE = 1,
00115           SINGLE_UP,
00116           SINGLE_UP_OR_ANY,
00117           SINGLE_DOWN,
00118           SINGLE_DOWN_OR_ANY,
00119           SINGLE_OR_AROMATIC,
00120           AROMATIC,
00121           DOUBLE,
00122           TRIPLE,
00123           NOT_NECESSARILY_CONNECTED,
00124           IN_RING,
00125           ANY
00126         };
00127     
00131 
00132         SPBond();
00133 
00135         SPBond(SPBondOrder bond_order);
00136 
00138         SPBond(SPAtom* first, SPAtom* second, SPBondOrder bond_order);
00139 
00141         virtual ~SPBond() ;
00143 
00147 
00148         ZEIsomerType getZEType() const { return ze_type_; }
00149 
00151         void setZEType(ZEIsomerType type) { ze_type_ = type; }
00152 
00154         void setBondOrder(SPBondOrder bond_order);
00155 
00157         SPBondOrder getBondOrder() const { return bond_order_; }
00158 
00160         bool isNot() const { return not_; }
00161 
00163         void setNot(bool is_not) { not_ = is_not; }
00164 
00165         // returns true if the SPBond matches the given bond
00166         bool equals(const Bond* bond) const;
00168 
00169       protected:
00170 
00172         ZEIsomerType  ze_type_;
00173 
00175         SPBondOrder bond_order_;
00176 
00178         bool not_;
00179     };
00180     
00190     class BALL_EXPORT SPAtom
00191     {
00192       public:
00193 
00195         enum PropertyType
00196         {
00197           ISOTOPE = 1,
00198           CHARGE,
00199           AROMATIC,
00200           ALIPHATIC,
00201           IN_NUM_RINGS,
00202           IN_RING_SIZE,
00203           IN_BRACKETS,
00204           CONNECTED,
00205           EXPLICIT_HYDROGENS,
00206           VALENCE,
00207           IMPLICIT_HYDROGENS,
00208           DEGREE,
00209           RING_CONNECTED,
00210           CHIRALITY,
00211           SYMBOL
00212         };
00213 
00215         union PropertyValue
00216         {
00217           int int_value;
00218           bool bool_value;
00219           const Element* element_value;
00220           ChiralClass chiral_class_value;
00221         };
00222 
00224         struct Property
00225         {
00226           public:
00227         
00231 
00232             Property(PropertyType type, int value);
00233             
00235             Property(PropertyType type, bool value);
00236             
00238             Property(PropertyType type, const Element* value);
00239             
00241             Property(PropertyType type, ChiralClass value);
00242 
00244             virtual ~Property();
00246 
00248             void operator = (const Property& rhs);
00249 
00253 
00254             PropertyType getType() const { return type_; }
00255 
00257             PropertyValue getValue() const { return value_; }
00259 
00260           private:
00261             
00263             Property();
00264             
00266             PropertyType type_;
00267 
00269             PropertyValue value_;
00270         };
00271         
00309 
00310         SPAtom();
00311 
00313         SPAtom(const String& symbol);
00314 
00316         virtual ~SPAtom() ;
00318 
00319 
00323 
00324         void setProperty(PropertyType type, int int_value);
00325         
00327         void setProperty(PropertyType type, bool  flag);
00328 
00330         void setProperty(PropertyType type, const Element* element);
00331 
00333         void setProperty(PropertyType type, ChiralClass chirality);
00334 
00336         void setProperty(Property property);
00337 
00339         void addPropertiesFromSPAtom(SPAtom* sp_atom);
00340 
00342         void setNotProperty(PropertyType type);
00343 
00345         bool hasProperty(PropertyType type) const;
00346 
00348         PropertyValue getProperty(PropertyType type);
00349 
00351         Size countProperties() const;
00352 
00354         Size getDefaultValence(const Atom* atom) const;
00355         
00357         Size countRealValences(const Atom* atom) const;
00358         
00360         Size getNumberOfImplicitHydrogens(const Atom* atom) const;
00362 
00366 
00367         bool equals(const Atom* atom) const;
00369         
00370       protected:
00371 
00373         Atom* atom_;
00374 
00376         std::map<PropertyType, PropertyValue> properties_;
00377 
00379         std::set<PropertyType> not_properties_;
00380     };
00381 
00383     class SPNode;
00384 
00387     class BALL_EXPORT SPEdge
00388     {
00389       public:
00390 
00394 
00395         SPEdge();
00396 
00398         SPEdge(const SPEdge& sp_edge);
00399 
00401         virtual ~SPEdge();
00403         
00407 
00408         bool isInternal() const { return internal_; }
00409 
00411         void setInternal(bool internal) { internal_ = internal; }
00412         
00414         void setSPBond(SPBond* sp_bond) { bond_ = sp_bond; }
00415 
00417         SPBond* getSPBond() const { return bond_; }
00418         
00420         void setFirstSPNode(SPNode* first) { first_ = first; }
00421 
00423         SPNode* getFirstSPNode() const { return first_; } 
00424         
00426         void setSecondSPNode(SPNode* second) { second_ = second; }
00427 
00429         SPNode* getSecondSPNode() const { return second_; }
00430         
00432         SPNode* getPartnerSPNode(SPNode* node) { return node == first_ ? second_ : first_; }
00433         
00435         bool isNot() const { return is_not_; }
00436 
00438         void setNot(bool is_not) { is_not_ = is_not; }
00439       
00441         void setFirstSPEdge(SPEdge* first) { first_edge_ = first; }
00442 
00444         SPEdge* getFirstSPEdge() const { return first_edge_; }
00445 
00447         void setSecondSPEdge(SPEdge* second) { second_edge_ = second; }
00448 
00450         SPEdge* getSecondSPEdge() const { return second_edge_; }
00451     
00453         void setLogicalOperator(LogicalOperator log_op) { log_op_ = log_op; }
00454 
00456         LogicalOperator getLogicalOperator() const { return log_op_; }
00458       
00459       protected:
00460 
00462         bool internal_;
00463 
00465         bool is_not_;
00466 
00468         SPNode* first_;
00469 
00471         SPNode* second_;
00472 
00474         SPBond* bond_;
00475 
00477         SPEdge* first_edge_;
00478 
00480         SPEdge* second_edge_;
00481 
00483         LogicalOperator log_op_;
00484     };
00485 
00488     class BALL_EXPORT SPNode
00489     {
00490       public:
00491     
00495 
00496         typedef std::vector<SPEdge*>::iterator EdgeIterator;
00497 
00499         typedef std::vector<SPEdge*>::const_iterator EdgeConstIterator;
00501   
00502 
00506 
00507         SPNode();
00508 
00510         SPNode(SPAtom* atom);
00511 
00513         SPNode(SPNode* first, LogicalOperator log_op, SPNode* second);
00514 
00516         SPNode(const SPNode& sp_node);
00517 
00519         virtual ~SPNode();
00521         
00522 
00526 
00527         bool isInternal() const { return internal_; }
00528 
00530         void setInternal(bool internal) { internal_ = internal; }
00531         
00533         bool isRecursive() const { return recursive_; }
00534 
00536         void setRecursive(bool recursive); 
00537       
00539         void setComponentNumber(int no) { component_no_ = no; }
00540 
00542         Size getComponentNumber() { return component_no_; }
00543       
00545         SPAtom* getSPAtom() const { return sp_atom_; }
00546 
00548         void setSPAtom(SPAtom* sp_atom) { sp_atom_ = sp_atom; }
00549     
00551         SPEdge* getFirstEdge() const { return first_edge_; }
00552 
00554         void setFirstEdge(SPEdge* first) { first_edge_ = first; }
00555 
00557         SPEdge* getSecondEdge() const { return second_edge_; }
00558 
00560         void setSecondEdge(SPEdge* second) { second_edge_ = second; }
00561     
00563         bool getNot() const { return is_not_; }
00564 
00566         void setNot(bool is_not) { is_not_ = is_not; }
00567   
00568 
00570         //void setInBrackets() { in_brackets_ = true; }
00571 
00573         void addSPEdge(SPEdge* sp_edge) { edges_.push_back(sp_edge); }
00574 
00576         void setLogicalOperator(LogicalOperator log_op) { log_op_ = log_op; }
00577 
00579         LogicalOperator getLogicalOperator() const { return log_op_; }  
00580 
00582         Size countEdges() const { return edges_.size(); }
00584 
00588 
00589         EdgeIterator begin() { return edges_.begin(); }
00590 
00592         EdgeIterator end() { return edges_.end(); }
00593 
00595         EdgeConstIterator begin() const { return edges_.begin(); }
00596 
00598         EdgeConstIterator end() const { return edges_.end(); }
00600 
00601       protected:
00602         
00604         bool internal_;
00605 
00607         bool is_not_;
00608 
00610         bool recursive_;
00611 
00613         //bool in_brackets_;
00614 
00616         LogicalOperator log_op_;
00617 
00619         std::vector<SPEdge*> edges_;
00620 
00622         SPEdge* first_edge_;
00623 
00625         SPEdge* second_edge_;
00626 
00628         SPAtom* sp_atom_;
00629 
00631         int component_no_;
00632     };
00633 
00634   
00638 
00639     SmartsParser();
00640       
00642     SmartsParser(const SmartsParser& parser);
00643 
00645     virtual ~SmartsParser();
00647     
00653     void parse(const String& s)
00654       throw(Exception::ParseError);
00655 
00659 
00660     SPAtom* createAtom(const String& symbol, bool in_bracket = false);
00661 
00663     void setRoot(SPNode* root) { root_ = root; }
00664 
00666     SPNode* getRoot() const { return root_; }
00667 
00669     void dumpTree();
00670     
00672     void clear();
00673     
00675     void addRingConnection(SPNode* spnode, Size index);
00676     
00678     std::map<Size, std::vector<SPNode*> > getRingConnections() const;
00679   
00681     void setSSSR(const std::vector<std::vector<Atom*> >& sssr);
00682 
00684     void setNeedsSSSR(bool needs_sssr) { needs_SSSR_ = needs_sssr; }
00685 
00687     bool getNeedsSSSR() const { return needs_SSSR_; }
00688 
00690     void setRecursive(bool recursive) { recursive_ = recursive; }
00691 
00693     bool isRecursive() const { return recursive_; }
00694 
00696     void setComponentGrouping(bool component_grouping) { component_grouping_ = component_grouping; }
00697 
00699     bool hasComponentGrouping() const { return component_grouping_; }
00700 
00702     struct State
00703     {
00704       Size          char_count;
00705       SmartsParser* current_parser;
00706       const char*   buffer;
00707     };
00708     
00710     static State state;
00711 
00713     const std::set<SPNode*>& getNodes() const { return nodes_; }
00714 
00716     const std::set<SPEdge*>& getEdges() const { return edges_; }
00717 
00719     void addEdge(SPEdge* edge) { edges_.insert(edge); }
00720 
00722     void addNode(SPNode* node) { nodes_.insert(node); }
00723 
00725     bool hasRecursiveEdge(SPEdge* edge) const { return rec_edges_.find(edge) != rec_edges_.end(); }
00726 
00728     void addRecursiveEdge(SPEdge* edge) { rec_edges_.insert(edge); }
00729 
00731     void setNextComponentNumberToSubTree(SPNode* spnode);
00733 
00734     protected:
00735 
00737       bool needs_SSSR_;
00738 
00740       bool recursive_;
00741 
00743       bool component_grouping_;
00744 
00746       static vector<std::set<const Atom*> >* sssr_;
00747 
00749       void dumpTreeRecursive_(SPNode* node, Size depth);
00750 
00752       void dumpTreeRecursive_(SPEdge* edge, Size depth);
00753       
00755       std::map<Size, std::vector<SPNode*> > ring_connections_;
00756     
00758       static SmartsParser* current_parser_;
00759       
00761       std::set<SPEdge*> edges_;
00762       
00764       std::set<SPNode*> nodes_;
00765 
00767       std::set<SPEdge*> rec_edges_;
00768 
00770       SPNode* root_;
00771 
00773       int component_no_;
00774   };
00775   
00776 } // namespace BALL
00777 
00778 #endif // BALL_STRUCTURE_SMARTS_PARSER_H
00779