assignBondOrderProcessor.h

Go to the documentation of this file.
00001 #ifndef BALL_STRUCTURE_ASSIGNBONDORDERPROCESSOR_H
00002 #define BALL_STRUCTURE_ASSIGNBONDORDERPROCESSOR_H
00003 
00004 #ifndef BALL_CONCEPT_PROCESSOR_H
00005   #include <BALL/CONCEPT/processor.h>
00006 #endif
00007 
00008 #ifndef BALL_KERNEL_ATOMCONTAINER_H
00009   #include <BALL/KERNEL/atomContainer.h>
00010 #endif
00011 
00012 #ifndef BALL_DATATYPE_HASHMAP_H
00013   #include <BALL/DATATYPE/hashMap.h>
00014 #endif
00015 
00016 #ifndef BALL_DATATYPE_HASHSET_H
00017   #include <BALL/DATATYPE/hashSet.h>
00018 #endif
00019 
00020 
00021 #ifndef BALL_KERNEL_BOND_H
00022   #include <BALL/KERNEL/bond.h>
00023 #endif
00024 
00025 #ifndef BALL_DATATYPE_OPTIONS_H
00026 # include <BALL/DATATYPE/options.h>
00027 #endif
00028 
00029 #ifndef BALL_COMMON_LIMITS_H
00030 # include <BALL/COMMON/limits.h>
00031 #endif
00032 
00033 #ifndef BALL_COMMON_EXCEPTION_H
00034 # include <BALL/COMMON/exception.h>
00035 #endif
00036 
00037 #ifndef BALL_SYSTEM_TIMER_H
00038 #include <BALL/SYSTEM/timer.h>
00039 #endif
00040 
00041 #include <map>
00042 #include <vector>
00043 #include <queue>
00044 
00045 #ifdef BALL_HAS_LPSOLVE
00046     struct _lprec;
00047     typedef struct _lprec lprec;
00048 #endif
00049 
00050 namespace BALL 
00051 {
00085   class BALL_EXPORT AssignBondOrderProcessor 
00086     : public UnaryProcessor<AtomContainer> 
00087   {
00088     protected:
00089       class Solution_;
00090       friend class Solution;
00091       class PQ_Entry_;
00092       friend class PQ_Entry_;
00093 
00094     public:
00095 
00099 
00100       struct BALL_EXPORT Option
00101       { 
00105         static const char* OVERWRITE_SINGLE_BOND_ORDERS;  
00106 
00110         static const char* OVERWRITE_DOUBLE_BOND_ORDERS;  
00111         
00115         static const char* OVERWRITE_TRIPLE_BOND_ORDERS;  
00116         
00125         static const char* OVERWRITE_SELECTED_BONDS;  
00126         
00130         static const char* ADD_HYDROGENS;             //TODO
00131 
00136         static const char* COMPUTE_ALSO_CONNECTIVITY; //TODO
00137 
00142         static const char* CONNECTIVITY_CUTOFF;       //TODO 
00143         
00146         static const char* USE_FINE_PENALTY;
00147 
00150         static const char* KEKULIZE_RINGS;
00151 
00154         static const char* ALGORITHM; 
00155         
00161         static const char* HEURISTIC;  
00162 
00165         static const char* INIFile; 
00166         
00169         static const char* MAX_BOND_ORDER;
00170 
00175         static const char* MAX_NUMBER_OF_SOLUTIONS;
00176       
00181         static const char* COMPUTE_ALSO_NON_OPTIMAL_SOLUTIONS;
00182 
00188         static const char* BOND_LENGTH_WEIGHTING;
00189         
00193         static const char* APPLY_FIRST_SOLUTION;
00194         
00201         static const char* GREEDY_K_SIZE;  
00202 
00209         static const char* BRANCH_AND_BOUND_CUTOFF; 
00210 
00211 
00212       };
00213 
00215       struct BALL_EXPORT Default
00216       {
00217         static const bool OVERWRITE_SINGLE_BOND_ORDERS;
00218         static const bool OVERWRITE_DOUBLE_BOND_ORDERS;
00219         static const bool OVERWRITE_TRIPLE_BOND_ORDERS;
00220         static const bool OVERWRITE_SELECTED_BONDS;
00221         static const bool ADD_HYDROGENS;
00222         static const bool COMPUTE_ALSO_CONNECTIVITY; 
00223         static const float CONNECTIVITY_CUTOFF; 
00224         static const bool USE_FINE_PENALTY;
00225         static const bool KEKULIZE_RINGS;
00226         static const String ALGORITHM;
00227         static const String HEURISTIC;
00228         static const String INIFile;
00229         static const int MAX_BOND_ORDER;  
00230         static const int MAX_NUMBER_OF_SOLUTIONS;
00231         static const bool COMPUTE_ALSO_NON_OPTIMAL_SOLUTIONS;
00232         static const float BOND_LENGTH_WEIGHTING;
00233         static const bool APPLY_FIRST_SOLUTION;
00234         static const int GREEDY_K_SIZE;
00235         static const float BRANCH_AND_BOUND_CUTOFF;
00236       };
00237 
00238       struct BALL_EXPORT Algorithm
00239       {
00240         static const String A_STAR;
00241         static const String ILP; 
00242         static const String K_GREEDY;
00243         static const String BRANCH_AND_BOUND;
00244       };
00245       
00246       struct BALL_EXPORT Heuristic
00247       {
00248         static const String SIMPLE;
00249         static const String MEDIUM; 
00250         static const String TIGHT;
00251       };
00252 
00254     
00255       BALL_CREATE(AssignBondOrderProcessor);
00256 
00260           
00262       AssignBondOrderProcessor();
00263     
00265       AssignBondOrderProcessor(const AssignBondOrderProcessor& abop);
00266     
00267       // constructor with parameter filename //TODO
00268       //AssignBondOrderProcessor(const String& file_name) throw(Exception::FileNotFound);
00269       
00271       virtual ~AssignBondOrderProcessor();
00273 
00277 
00279       virtual bool start();
00280   
00285       void clear();
00286 
00299       virtual Processor::Result operator ()(AtomContainer& ac);
00300 
00302       virtual bool finish();
00303 
00305     
00309     
00316       Size getNumberOfAddedHydrogens(Position i)
00317       { 
00318         if (i >= solutions_.size())
00319         {
00320           Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00321           return 0;
00322         }
00323         int num_hydrogens = 0;
00324         
00325         HashMap<Atom*, int>::Iterator it = solutions_[i].number_of_virtual_hydrogens.begin(); 
00326         for (; it != solutions_[i].number_of_virtual_hydrogens.end(); it++)
00327           num_hydrogens += it->second;
00328         return num_hydrogens;
00329       }
00330 
00340       Size getNumberOfComputedSolutions() {return solutions_.size();}
00341 
00342 
00345       AtomContainer* getAtomContainer() {return ac_;}
00346 
00349       AtomContainer const* getAtomContainer() const {return ac_;}
00350 
00359       const System& getSolution(Position i) throw(Exception::IndexOverflow);
00360 
00366       float getTotalCharge(Position i)
00367       {
00368         if (i >= solutions_.size())
00369         {
00370           Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00371 
00372           return Limits<float>::max();
00373         }
00374         else
00375           return getTotalCharge_(solutions_[i]);
00376 
00377       }
00378       
00385       float getTotalPenalty(Position i)
00386       {
00387         if (i >= solutions_.size())
00388         {
00389           Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00390 
00391           return Limits<float>::max();
00392         }
00393         else
00394           return getTotalPenalty_(solutions_[i]);
00395       }
00396      
00397       /* Returns the number of node expansions before solution i was found.
00398        *
00399        * param    i  index of the solution, whose number of node expansions should be returned.
00400        * return  int -   number of node expansions before solution i was found.   
00401        */
00402       int getNumberOfNodeExpansions(Position i)
00403       {
00404         if (i >= solutions_.size())
00405         {
00406           Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00407 
00408           return -1;
00409         }
00410         else
00411           return getNumberOfNodeExpansions_(solutions_[i]);
00412       }
00413 
00414       /* Returns the number of node expansions before solution i was found.
00415        *
00416        * param    i  index of the solution, whose  queue size should be returned. 
00417        * return  int -  queue size when solution i was found.  
00418        */
00419       int getQueueSize(Position i)
00420       { 
00421         if (i >= solutions_.size())
00422         {
00423           Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00424 
00425           return -1;
00426         }
00427         else
00428           return getQueueSize_(solutions_[i]);
00429       }
00430     
00443       bool apply(Position i);
00444 
00450       void resetBondOrders();
00451 
00458       bool computeNextSolution(bool apply_solution = true);
00459 
00462       void setDefaultOptions();
00463 
00470       float evaluatePenalty(AtomContainer* ac);
00472       
00476 
00477       AssignBondOrderProcessor& operator = (const AssignBondOrderProcessor& abop);
00479 
00483 
00484       Options options;
00485 
00487     
00488     protected:
00489       
00490       // Nested class storing the parameters of a solution to our ILP
00491       class Solution_
00492       { 
00493         friend class AssignBondOrderProcessor;
00494 
00495         public:
00496           // Default constructor
00497           Solution_();
00498           
00499           // Detailed constructor 
00500           Solution_(PQ_Entry_& entry ,AssignBondOrderProcessor* abop,
00501                     int number_of_node_expansions, int search_queue_size);
00502 
00503           // Destructor
00504           virtual ~Solution_();
00505           
00506           // 
00507           void clear();
00508         
00509           // 
00510           int getNumberOfNodeExpansions() const {return node_expansions;}
00511           
00512           //
00513           int getQueueSize() const {return queue_size;}
00514 
00515           // denotes whether the problem could be solved or not  
00516           bool valid;
00517           
00518           // the result : the set of bond orders for _ALL_ original bonds
00519           HashMap<Bond*, int> bond_orders;
00520           
00521           // the result part2: the atoms with n additional hydrogens
00522           HashMap<Atom*, int> number_of_virtual_hydrogens;
00523         
00524           // the virtual atoms and bonds that should be deleted when the next 
00525           // solution is applied
00526           vector<Atom*> atoms_to_delete;
00527           //vector<Bond*> bonds_to_delete;
00528 
00529           // the values of the objective function
00530           float atom_type_penalty;
00531           float bond_length_penalty;
00532           float total_charge;
00533           int node_expansions;
00534           int queue_size;
00535           AssignBondOrderProcessor* parent;
00536       };
00537       
00538       // Nested class storing a priority queue entry for the A-STAR-Option
00539       class PQ_Entry_
00540       { 
00541         friend class AssignBondOrderProcessor;
00542 
00543         public:
00544         
00545           // Default constructor
00546           PQ_Entry_();
00547                 
00548           // Copy constructor
00549           PQ_Entry_(const PQ_Entry_& entry);
00550 
00551           // Destructor
00552           ~PQ_Entry_();
00553           
00554           // 
00555           void clear();
00556           
00557           // the less operator.
00558           // NOTE: we want a reverse sort, hence we actually return a "greater" 
00559           bool operator < (const PQ_Entry_& b) const;  
00560             // the penalty
00561     
00562           float coarsePenalty(float atom_type_penalty, float bond_length_penalty) const 
00563           {
00564             return ( ( (    (atom_type_normalization_factor < 0.0001)
00565                          || (bond_length_normalization_factor < 0.0001)
00566                          || (alpha < 0.0001)) ? 
00567                        atom_type_penalty :
00568                        ((1.-alpha) * (atom_type_penalty / atom_type_normalization_factor)
00569                         + (alpha* bond_length_penalty / bond_length_normalization_factor))));
00570           }
00571                   
00572           // the penalty
00573           float coarsePenalty() const 
00574           { 
00575             return coarsePenalty(estimated_atom_type_penalty, estimated_bond_length_penalty);
00576           }
00577           
00578           // the bond length penalty 
00579           float finePenalty() const {return estimated_bond_length_penalty;}
00580 
00581           // the estimated atom type penalty
00582           float estimated_atom_type_penalty;   
00583           // the estimated bond length penalty
00584           float estimated_bond_length_penalty;
00585 
00586           // the bond orders 
00587           // the i-th entry denotes the bondorder of the i-th bond
00588           // unset bonds get the order 0
00589           vector<short> bond_orders;
00590       
00591           // the index of the bond last considered 
00592           Position last_bond;
00593 
00594           // NOTE: these variables are here to speed up
00595           //       a number of decisions, but we do not
00596           //       want to store them in each PQ_Entry_
00597           //       object since they are the same in the
00598           //       whole queue
00599           //
00600           //       But this approach is not thread-safe!
00601           //       Two simultaneously running bond order threads
00602           //       might overwrite their alpha, ... values!
00603           static float alpha;
00604           static float atom_type_normalization_factor;
00605           static float bond_length_normalization_factor;
00606           static bool  use_fine_penalty;
00607         };
00608       
00614       bool readOptions_();
00615       
00616 
00621       bool readAtomPenalties_() throw(Exception::FileNotFound());
00622       
00631       bool preassignPenaltyClasses_();
00632     
00639       int getPenaltyClass_(Atom* atom);
00640 
00641 
00657       bool precomputeBondLengthPenalties_();
00658 
00670       float computeVirtualHydrogens_(Atom* atom);
00671 
00674       bool apply_(Solution_& solution);
00675 
00678       void storeOriginalConfiguration_();
00679   
00680       /* Returns the queue's size at the moment the given solution was found.
00681        *
00682        * param   sol  solution, whose queue size should be returned. 
00683        * return  int -  queue size when the given solution was found.  
00684        */ 
00685       int getQueueSize_(const Solution_& sol){return sol.getQueueSize();}
00686   
00693       float getTotalCharge_(const Solution_& sol) 
00694       {
00695         if (sol.valid) 
00696         {
00697           return sol.total_charge;
00698         }
00699         else
00700         {
00701           return 0;
00702         } 
00703       }
00704 
00711       float getTotalPenalty_(const Solution_& sol) 
00712       {
00713         if (   (atom_type_normalization_factor_   < 0.00001) 
00714             || (bond_length_normalization_factor_ < 0.00001)
00715             || (alpha_ < 0.0001)) 
00716         {
00717           return sol.atom_type_penalty;
00718         }
00719         else
00720         {
00721           return (  (1.-alpha_) * (sol.atom_type_penalty/atom_type_normalization_factor_) 
00722                   + (alpha_*sol.bond_length_penalty/bond_length_normalization_factor_));
00723         } 
00724       }
00725       
00726       /* Returns the number of node expansions before the given solution was found.
00727        *
00728        * param   sol  solution, whose number of node expansions should be returned. 
00729        * return  int -  number of node expansions before solution i was found.  
00730        */
00731       int getNumberOfNodeExpansions_(const Solution_& sol){return sol.getNumberOfNodeExpansions();}
00732   
00733 
00734 #ifdef BALL_HAS_LPSOLVE
00735 
00737       bool createILP_();
00738 
00741       bool solveILP_();
00742 #endif
00743       
00745       bool valid_;
00746 
00748       bool evaluation_mode_;
00749 
00750       // Map for storing the bonds fixed orders
00751       // if a bond is free, the map returns 0
00752       std::map<Bond*, short> bond_fixed_;
00753 
00754       // all free bonds in the atom container
00755       std::vector<Bond*> free_bonds_;
00756 
00757       // Map for storing the bonds associated index (all bonds)
00758       HashMap<Bond*, Index> bond_to_index_;
00759       
00760       // Vector for mapping from variable indices onto bonds (all bonds)
00761       std::vector<Bond*> index_to_bond_;
00762   
00763 
00764 
00765       // ***************** datastructures for virtual hydrogen bonds ****************** 
00766       //
00767       //  NOTE: a single virtual bond represents ALL possible hydrogen 
00768       //        bonds for a given atom
00769       //
00770       // the atoms with upto n possible additional hydrogens
00771       HashMap<Atom*, int> number_of_virtual_hydrogens_;  
00772       //
00773       // the max number of virtual hydrogens per virtual bond index
00774       std::vector<int> virtual_bond_index_to_number_of_virtual_hydrogens_;  
00775       //  
00776       // the number of virtual bonds
00777       Size num_of_virtual_bonds_;
00778       //
00779       // the virtual bond index assigned to this atom!
00780       vector<Atom*> virtual_bond_index_to_atom_;
00781       HashMap<Atom*, int> atom_to_virtual_bond_index_;
00782       //
00783       //
00784       // a virtual dummy bond
00785       Bond* virtual_bond_;
00786       //
00787       //
00788       
00789       // ********************* ILP stuff ***********************
00790       //
00791       // Vector for mapping from variable indices onto free bonds in the
00792       // order used by the ILP
00793       std::vector<Bond*> ilp_index_to_free_bond_;
00794 
00795       // number of bond variables in the ILP
00796       Position ilp_number_of_free_bonds_;
00797       
00798       // Constant penalty (fixed bonds)
00799       float ilp_const_penalty_;
00800       
00801       // ******************* general datastructures *********************
00802 
00803       // the number of bonds given (free + fixed!)
00804       Position total_num_of_bonds_; 
00805 
00806       // num of free bonds without virtual bonds!
00807       int num_of_free_bonds_;
00808 
00809       // store for all atom-indices the atoms fixed valences 
00810       std::vector<Position> fixed_val_;
00811 
00812       // storing the solutions
00813       vector<Solution_> solutions_;
00814 
00815       // the original conformation before we computed anything
00816       Solution_ starting_configuration_; 
00817       
00818       // the inverse of the atom type penalty normalization factor
00819       float atom_type_normalization_factor_;
00820 
00821       // the inverse of the bond length penalty normalization factor
00822       float bond_length_normalization_factor_; 
00823 
00824       // denotes the index of the last applied solution
00825       // -1 if there was no valid solution applied
00826       int last_applied_solution_;
00827       
00828       // the AtomContainer, the processor is operating on
00829       AtomContainer* ac_;
00830 
00831       // max bond order to consider
00832       int max_bond_order_;
00833 
00834       // balance parameter between atom type and bond length penalty
00835       float alpha_; 
00836 
00837       // the max number of solutions to compute 
00838       int max_number_of_solutions_;
00839 
00840       // flag to indicate, whether also non-optimal solutions should be computed 
00841       bool compute_also_non_optimal_solutions_;
00842 
00843       // flag for adding missing hydrogens
00844       bool add_missing_hydrogens_;
00845 
00846       // flag for computing also the bond connectivity
00847       bool compute_also_connectivity_;
00848 
00849       // flag for using fine penalties derived from 3d information
00850       bool use_fine_penalty_;
00851       
00852       // this enum allows faster access to the type of the chosen heuristic than a string compare
00853       enum HEURISTIC_INDEX
00854       {
00855         SIMPLE,
00856         MEDIUM,
00857         TIGHT
00858       };
00859 
00860       HEURISTIC_INDEX heuristic_index_;
00861 
00862       // //////// ************ for Algorithm::BRANCH_AND_BOUND ************ /////////
00863       bool performBranchAndBound_();
00864       float greedy_atom_type_penalty_; 
00865       float greedy_bond_length_penalty_; 
00866 
00867       // //////// ************ for Algorithm::K_GREEDY ************ /////////
00868       vector<PQ_Entry_> performGreedy_(PQ_Entry_& entry, Size greedy_k  = 10);
00869       int greedy_node_expansions_;
00870       
00871       // //////// ************ for Algorithm::A_STAR ************ /////////
00873       bool  performAStarStep_();
00874       
00875       
00876       // ////////              general stuff                      /////////
00877 
00879       std::priority_queue<PQ_Entry_> queue_;
00880       
00888       bool estimatePenalty_(PQ_Entry_& entry, bool include_heuristic_term = true);
00889     
00891       float estimateAtomTypePenalty_(Atom* atom, 
00892                                      Index atom_index,    // the atom index
00893                                      int fixed_valence,   // its so far fixed valence (incl. virtual H's)
00894                                      int fixed_virtual_order, // its so far fixed virtual H's
00895                                      int num_free_bonds,  // its number of unfixed original bonds
00896                                      PQ_Entry_& entry);   
00898       //  NOTE: virtual bonds are excluded!
00899       float estimateBondLengthPenalty_(Index atom_index, // the atom index
00900                                        const vector<Bond*>& free_bonds, 
00901                                        int fixed_virtual_order,  
00902                                        int fixed_valence, 
00903                                        int num_free_bonds);
00904     
00905       // The penalty administration datastructures.
00906       //  filled by readAtomPenalties_
00907       //  organized in imaginarey blocks of length  
00908       //  block_to_length_[i], starting from 
00909       //  block_to_start_idx_[i] associating 
00910       //  block_to_start_valence_[i] to the start_idx
00911       vector<int> penalties_;
00912       vector<Position> block_to_start_idx_;
00913       vector<Size> block_to_length_;
00914       vector<int> block_to_start_valence_;
00915       // stores the defining element and the SMART-string of each block
00916       vector<std::pair<String, String> > block_definition_; 
00917 
00918 
00919       // Stores which atom belongs to which penalty block.
00920       // The first vector element of each atom block denotes the penalty block 
00921       // assigned to the atom without any additional VIRTUAL Hydrogens,
00922       // the second element with one additional Hydrogen and so on. 
00923       vector< vector<int> > atom_to_block_;
00924           
00925       // Stores the possible bond lengths penalties per order.
00926       HashMap<Bond*, vector<float> > bond_lengths_penalties_;
00927 
00928       // The current number of node expansions. 
00929       // step_ + queue_.size() gives the number of touched nodes.
00930       int step_;
00931 
00932       Timer timer_;
00933 #ifdef BALL_HAS_LPSOLVE
00934       lprec* ilp_;
00935 #endif
00936     };
00937 
00938 } // namespace BALL 
00939 
00940 
00941 #endif // BALL_STRUCTURE_ASSIGNBONDORDERPROCESSOR_H