00001 #ifndef BALL_STRUCTURE_ASSIGNBONDORDERPROCESSOR_H
00002 #define BALL_STRUCTURE_ASSIGNBONDORDERPROCESSOR_H
00003
00004 #ifndef BALL_CONCEPT_PROCESSOR_H
00005 #include <BALL/CONCEPT/processor.h>
00006 #endif
00007
00008 #ifndef BALL_KERNEL_ATOMCONTAINER_H
00009 #include <BALL/KERNEL/atomContainer.h>
00010 #endif
00011
00012 #ifndef BALL_DATATYPE_HASHMAP_H
00013 #include <BALL/DATATYPE/hashMap.h>
00014 #endif
00015
00016 #ifndef BALL_DATATYPE_HASHSET_H
00017 #include <BALL/DATATYPE/hashSet.h>
00018 #endif
00019
00020
00021 #ifndef BALL_KERNEL_BOND_H
00022 #include <BALL/KERNEL/bond.h>
00023 #endif
00024
00025 #ifndef BALL_DATATYPE_OPTIONS_H
00026 # include <BALL/DATATYPE/options.h>
00027 #endif
00028
00029 #ifndef BALL_COMMON_LIMITS_H
00030 # include <BALL/COMMON/limits.h>
00031 #endif
00032
00033 #ifndef BALL_COMMON_EXCEPTION_H
00034 # include <BALL/COMMON/exception.h>
00035 #endif
00036
00037 #ifndef BALL_SYSTEM_TIMER_H
00038 #include <BALL/SYSTEM/timer.h>
00039 #endif
00040
00041 #include <map>
00042 #include <vector>
00043 #include <queue>
00044
00045 #ifdef BALL_HAS_LPSOLVE
00046 struct _lprec;
00047 typedef struct _lprec lprec;
00048 #endif
00049
00050 namespace BALL
00051 {
00085 class BALL_EXPORT AssignBondOrderProcessor
00086 : public UnaryProcessor<AtomContainer>
00087 {
00088 protected:
00089 class Solution_;
00090 friend class Solution;
00091 class PQ_Entry_;
00092 friend class PQ_Entry_;
00093
00094 public:
00095
00099
00100 struct BALL_EXPORT Option
00101 {
00105 static const char* OVERWRITE_SINGLE_BOND_ORDERS;
00106
00110 static const char* OVERWRITE_DOUBLE_BOND_ORDERS;
00111
00115 static const char* OVERWRITE_TRIPLE_BOND_ORDERS;
00116
00125 static const char* OVERWRITE_SELECTED_BONDS;
00126
00130 static const char* ADD_HYDROGENS;
00131
00136 static const char* COMPUTE_ALSO_CONNECTIVITY;
00137
00142 static const char* CONNECTIVITY_CUTOFF;
00143
00146 static const char* USE_FINE_PENALTY;
00147
00150 static const char* KEKULIZE_RINGS;
00151
00154 static const char* ALGORITHM;
00155
00161 static const char* HEURISTIC;
00162
00165 static const char* INIFile;
00166
00169 static const char* MAX_BOND_ORDER;
00170
00175 static const char* MAX_NUMBER_OF_SOLUTIONS;
00176
00181 static const char* COMPUTE_ALSO_NON_OPTIMAL_SOLUTIONS;
00182
00188 static const char* BOND_LENGTH_WEIGHTING;
00189
00193 static const char* APPLY_FIRST_SOLUTION;
00194
00201 static const char* GREEDY_K_SIZE;
00202
00209 static const char* BRANCH_AND_BOUND_CUTOFF;
00210
00211
00212 };
00213
00215 struct BALL_EXPORT Default
00216 {
00217 static const bool OVERWRITE_SINGLE_BOND_ORDERS;
00218 static const bool OVERWRITE_DOUBLE_BOND_ORDERS;
00219 static const bool OVERWRITE_TRIPLE_BOND_ORDERS;
00220 static const bool OVERWRITE_SELECTED_BONDS;
00221 static const bool ADD_HYDROGENS;
00222 static const bool COMPUTE_ALSO_CONNECTIVITY;
00223 static const float CONNECTIVITY_CUTOFF;
00224 static const bool USE_FINE_PENALTY;
00225 static const bool KEKULIZE_RINGS;
00226 static const String ALGORITHM;
00227 static const String HEURISTIC;
00228 static const String INIFile;
00229 static const int MAX_BOND_ORDER;
00230 static const int MAX_NUMBER_OF_SOLUTIONS;
00231 static const bool COMPUTE_ALSO_NON_OPTIMAL_SOLUTIONS;
00232 static const float BOND_LENGTH_WEIGHTING;
00233 static const bool APPLY_FIRST_SOLUTION;
00234 static const int GREEDY_K_SIZE;
00235 static const float BRANCH_AND_BOUND_CUTOFF;
00236 };
00237
00238 struct BALL_EXPORT Algorithm
00239 {
00240 static const String A_STAR;
00241 static const String ILP;
00242 static const String K_GREEDY;
00243 static const String BRANCH_AND_BOUND;
00244 };
00245
00246 struct BALL_EXPORT Heuristic
00247 {
00248 static const String SIMPLE;
00249 static const String MEDIUM;
00250 static const String TIGHT;
00251 };
00252
00254
00255 BALL_CREATE(AssignBondOrderProcessor);
00256
00260
00262 AssignBondOrderProcessor();
00263
00265 AssignBondOrderProcessor(const AssignBondOrderProcessor& abop);
00266
00267
00268
00269
00271 virtual ~AssignBondOrderProcessor();
00273
00277
00279 virtual bool start();
00280
00285 void clear();
00286
00299 virtual Processor::Result operator ()(AtomContainer& ac);
00300
00302 virtual bool finish();
00303
00305
00309
00316 Size getNumberOfAddedHydrogens(Position i)
00317 {
00318 if (i >= solutions_.size())
00319 {
00320 Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00321 return 0;
00322 }
00323 int num_hydrogens = 0;
00324
00325 HashMap<Atom*, int>::Iterator it = solutions_[i].number_of_virtual_hydrogens.begin();
00326 for (; it != solutions_[i].number_of_virtual_hydrogens.end(); it++)
00327 num_hydrogens += it->second;
00328 return num_hydrogens;
00329 }
00330
00340 Size getNumberOfComputedSolutions() {return solutions_.size();}
00341
00342
00345 AtomContainer* getAtomContainer() {return ac_;}
00346
00349 AtomContainer const* getAtomContainer() const {return ac_;}
00350
00359 const System& getSolution(Position i) throw(Exception::IndexOverflow);
00360
00366 float getTotalCharge(Position i)
00367 {
00368 if (i >= solutions_.size())
00369 {
00370 Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00371
00372 return Limits<float>::max();
00373 }
00374 else
00375 return getTotalCharge_(solutions_[i]);
00376
00377 }
00378
00385 float getTotalPenalty(Position i)
00386 {
00387 if (i >= solutions_.size())
00388 {
00389 Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00390
00391 return Limits<float>::max();
00392 }
00393 else
00394 return getTotalPenalty_(solutions_[i]);
00395 }
00396
00397
00398
00399
00400
00401
00402 int getNumberOfNodeExpansions(Position i)
00403 {
00404 if (i >= solutions_.size())
00405 {
00406 Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00407
00408 return -1;
00409 }
00410 else
00411 return getNumberOfNodeExpansions_(solutions_[i]);
00412 }
00413
00414
00415
00416
00417
00418
00419 int getQueueSize(Position i)
00420 {
00421 if (i >= solutions_.size())
00422 {
00423 Log.error() << "AssignBondOrderProcessor: No solution with index " << i << std::endl;
00424
00425 return -1;
00426 }
00427 else
00428 return getQueueSize_(solutions_[i]);
00429 }
00430
00443 bool apply(Position i);
00444
00450 void resetBondOrders();
00451
00458 bool computeNextSolution(bool apply_solution = true);
00459
00462 void setDefaultOptions();
00463
00470 float evaluatePenalty(AtomContainer* ac);
00472
00476
00477 AssignBondOrderProcessor& operator = (const AssignBondOrderProcessor& abop);
00479
00483
00484 Options options;
00485
00487
00488 protected:
00489
00490
00491 class Solution_
00492 {
00493 friend class AssignBondOrderProcessor;
00494
00495 public:
00496
00497 Solution_();
00498
00499
00500 Solution_(PQ_Entry_& entry ,AssignBondOrderProcessor* abop,
00501 int number_of_node_expansions, int search_queue_size);
00502
00503
00504 virtual ~Solution_();
00505
00506
00507 void clear();
00508
00509
00510 int getNumberOfNodeExpansions() const {return node_expansions;}
00511
00512
00513 int getQueueSize() const {return queue_size;}
00514
00515
00516 bool valid;
00517
00518
00519 HashMap<Bond*, int> bond_orders;
00520
00521
00522 HashMap<Atom*, int> number_of_virtual_hydrogens;
00523
00524
00525
00526 vector<Atom*> atoms_to_delete;
00527
00528
00529
00530 float atom_type_penalty;
00531 float bond_length_penalty;
00532 float total_charge;
00533 int node_expansions;
00534 int queue_size;
00535 AssignBondOrderProcessor* parent;
00536 };
00537
00538
00539 class PQ_Entry_
00540 {
00541 friend class AssignBondOrderProcessor;
00542
00543 public:
00544
00545
00546 PQ_Entry_();
00547
00548
00549 PQ_Entry_(const PQ_Entry_& entry);
00550
00551
00552 ~PQ_Entry_();
00553
00554
00555 void clear();
00556
00557
00558
00559 bool operator < (const PQ_Entry_& b) const;
00560
00561
00562 float coarsePenalty(float atom_type_penalty, float bond_length_penalty) const
00563 {
00564 return ( ( ( (atom_type_normalization_factor < 0.0001)
00565 || (bond_length_normalization_factor < 0.0001)
00566 || (alpha < 0.0001)) ?
00567 atom_type_penalty :
00568 ((1.-alpha) * (atom_type_penalty / atom_type_normalization_factor)
00569 + (alpha* bond_length_penalty / bond_length_normalization_factor))));
00570 }
00571
00572
00573 float coarsePenalty() const
00574 {
00575 return coarsePenalty(estimated_atom_type_penalty, estimated_bond_length_penalty);
00576 }
00577
00578
00579 float finePenalty() const {return estimated_bond_length_penalty;}
00580
00581
00582 float estimated_atom_type_penalty;
00583
00584 float estimated_bond_length_penalty;
00585
00586
00587
00588
00589 vector<short> bond_orders;
00590
00591
00592 Position last_bond;
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603 static float alpha;
00604 static float atom_type_normalization_factor;
00605 static float bond_length_normalization_factor;
00606 static bool use_fine_penalty;
00607 };
00608
00614 bool readOptions_();
00615
00616
00621 bool readAtomPenalties_() throw(Exception::FileNotFound());
00622
00631 bool preassignPenaltyClasses_();
00632
00639 int getPenaltyClass_(Atom* atom);
00640
00641
00657 bool precomputeBondLengthPenalties_();
00658
00670 float computeVirtualHydrogens_(Atom* atom);
00671
00674 bool apply_(Solution_& solution);
00675
00678 void storeOriginalConfiguration_();
00679
00680
00681
00682
00683
00684
00685 int getQueueSize_(const Solution_& sol){return sol.getQueueSize();}
00686
00693 float getTotalCharge_(const Solution_& sol)
00694 {
00695 if (sol.valid)
00696 {
00697 return sol.total_charge;
00698 }
00699 else
00700 {
00701 return 0;
00702 }
00703 }
00704
00711 float getTotalPenalty_(const Solution_& sol)
00712 {
00713 if ( (atom_type_normalization_factor_ < 0.00001)
00714 || (bond_length_normalization_factor_ < 0.00001)
00715 || (alpha_ < 0.0001))
00716 {
00717 return sol.atom_type_penalty;
00718 }
00719 else
00720 {
00721 return ( (1.-alpha_) * (sol.atom_type_penalty/atom_type_normalization_factor_)
00722 + (alpha_*sol.bond_length_penalty/bond_length_normalization_factor_));
00723 }
00724 }
00725
00726
00727
00728
00729
00730
00731 int getNumberOfNodeExpansions_(const Solution_& sol){return sol.getNumberOfNodeExpansions();}
00732
00733
00734 #ifdef BALL_HAS_LPSOLVE
00735
00737 bool createILP_();
00738
00741 bool solveILP_();
00742 #endif
00743
00745 bool valid_;
00746
00748 bool evaluation_mode_;
00749
00750
00751
00752 std::map<Bond*, short> bond_fixed_;
00753
00754
00755 std::vector<Bond*> free_bonds_;
00756
00757
00758 HashMap<Bond*, Index> bond_to_index_;
00759
00760
00761 std::vector<Bond*> index_to_bond_;
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771 HashMap<Atom*, int> number_of_virtual_hydrogens_;
00772
00773
00774 std::vector<int> virtual_bond_index_to_number_of_virtual_hydrogens_;
00775
00776
00777 Size num_of_virtual_bonds_;
00778
00779
00780 vector<Atom*> virtual_bond_index_to_atom_;
00781 HashMap<Atom*, int> atom_to_virtual_bond_index_;
00782
00783
00784
00785 Bond* virtual_bond_;
00786
00787
00788
00789
00790
00791
00792
00793 std::vector<Bond*> ilp_index_to_free_bond_;
00794
00795
00796 Position ilp_number_of_free_bonds_;
00797
00798
00799 float ilp_const_penalty_;
00800
00801
00802
00803
00804 Position total_num_of_bonds_;
00805
00806
00807 int num_of_free_bonds_;
00808
00809
00810 std::vector<Position> fixed_val_;
00811
00812
00813 vector<Solution_> solutions_;
00814
00815
00816 Solution_ starting_configuration_;
00817
00818
00819 float atom_type_normalization_factor_;
00820
00821
00822 float bond_length_normalization_factor_;
00823
00824
00825
00826 int last_applied_solution_;
00827
00828
00829 AtomContainer* ac_;
00830
00831
00832 int max_bond_order_;
00833
00834
00835 float alpha_;
00836
00837
00838 int max_number_of_solutions_;
00839
00840
00841 bool compute_also_non_optimal_solutions_;
00842
00843
00844 bool add_missing_hydrogens_;
00845
00846
00847 bool compute_also_connectivity_;
00848
00849
00850 bool use_fine_penalty_;
00851
00852
00853 enum HEURISTIC_INDEX
00854 {
00855 SIMPLE,
00856 MEDIUM,
00857 TIGHT
00858 };
00859
00860 HEURISTIC_INDEX heuristic_index_;
00861
00862
00863 bool performBranchAndBound_();
00864 float greedy_atom_type_penalty_;
00865 float greedy_bond_length_penalty_;
00866
00867
00868 vector<PQ_Entry_> performGreedy_(PQ_Entry_& entry, Size greedy_k = 10);
00869 int greedy_node_expansions_;
00870
00871
00873 bool performAStarStep_();
00874
00875
00876
00877
00879 std::priority_queue<PQ_Entry_> queue_;
00880
00888 bool estimatePenalty_(PQ_Entry_& entry, bool include_heuristic_term = true);
00889
00891 float estimateAtomTypePenalty_(Atom* atom,
00892 Index atom_index,
00893 int fixed_valence,
00894 int fixed_virtual_order,
00895 int num_free_bonds,
00896 PQ_Entry_& entry);
00898
00899 float estimateBondLengthPenalty_(Index atom_index,
00900 const vector<Bond*>& free_bonds,
00901 int fixed_virtual_order,
00902 int fixed_valence,
00903 int num_free_bonds);
00904
00905
00906
00907
00908
00909
00910
00911 vector<int> penalties_;
00912 vector<Position> block_to_start_idx_;
00913 vector<Size> block_to_length_;
00914 vector<int> block_to_start_valence_;
00915
00916 vector<std::pair<String, String> > block_definition_;
00917
00918
00919
00920
00921
00922
00923 vector< vector<int> > atom_to_block_;
00924
00925
00926 HashMap<Bond*, vector<float> > bond_lengths_penalties_;
00927
00928
00929
00930 int step_;
00931
00932 Timer timer_;
00933 #ifdef BALL_HAS_LPSOLVE
00934 lprec* ilp_;
00935 #endif
00936 };
00937
00938 }
00939
00940
00941 #endif // BALL_STRUCTURE_ASSIGNBONDORDERPROCESSOR_H