00001 // -*- Mode: C++; tab-width: 2; -*- 00002 // vi: set ts=2: 00003 // 00004 00005 #ifndef BALL_FORMAT_NMRSTARFILE_H 00006 #define BALL_FORMAT_NMRSTARFILE_H 00007 00008 #ifndef BALL_FORMAT_CIFFILE_H 00009 # include <BALL/FORMAT/CIFFile.h> 00010 #endif 00011 00012 #ifndef BALL_COMMON_LIMITS_H 00013 # include <BALL/COMMON/limits.h> 00014 #endif 00015 00016 #ifndef BALL_KERNEL_PROTEIN_H 00017 # include <BALL/KERNEL/protein.h> 00018 #endif 00019 00020 #ifndef BALL_STRUCTURE_PEPTIDES_H 00021 # include <BALL/STRUCTURE/peptides.h> 00022 #endif 00023 00024 #include <vector> 00025 00026 namespace BALL 00027 { 00066 class BALL_EXPORT NMRStarFile 00067 : public CIFFile 00068 { 00069 public: 00070 00071 // constant variables to denote unknown values 00072 static const float FLOAT_VALUE_NA; 00073 static const int INT_VALUE_NA; 00074 00075 static const Position POSITION_VALUE_NA; 00076 00082 00088 class BALL_EXPORT SampleCondition 00089 { 00090 public: 00091 SampleCondition(); 00092 00093 // saveframe name -- referenced in the sample_condition_label 00094 // of the saveframe "assigned_chemical_shifts" 00095 String name; 00096 // Accoring to the NMRStarFile 2.1 documentation 00097 // the first entry MUST BE "_Variable_type". 00098 // This is why we are allowed to map per type :-) 00099 vector<String> types; 00100 StringHashMap<float> values; 00101 StringHashMap<float> errors; 00102 StringHashMap<String> units; 00103 00104 std::ostream& operator >> (std::ostream& s); 00105 }; 00106 00110 class BALL_EXPORT Sample 00111 { 00112 public: 00113 00118 class BALL_EXPORT Component 00119 { 00120 public: 00121 Component(); 00122 void clear(); 00123 00124 String label; 00125 float concentration_value; 00126 String value_unit; 00127 float concentration_min; 00128 float concentration_max; 00129 String isotopic_labeling; 00130 00131 std::ostream& operator >> (std::ostream& s); 00132 }; 00133 00134 Sample(); 00135 void clear(); 00136 00137 String label; 00138 String type; 00139 String details; 00140 vector <Component> components; 00141 00142 std::ostream& operator >> (std::ostream& s); 00143 }; 00144 00150 class BALL_EXPORT ShiftReferenceElement 00151 { 00152 public: 00153 ShiftReferenceElement(); 00154 00155 String mol_common_name; 00156 String atom_type; 00157 Position isotope_number; 00158 String atom_group; 00159 String shift_units; 00160 float shift_value; 00161 String reference_method; 00162 String reference_type; 00163 float indirect_shift_ratio; 00164 00165 std::ostream& operator >> (std::ostream& s); 00166 }; 00167 00173 class BALL_EXPORT ShiftReferenceSet 00174 { 00175 public: 00176 ShiftReferenceSet(); 00177 00178 // The saveframe's name = set name 00179 // can be referenced in the saveframe "assigned_chemical_shifts" 00180 String name; 00181 std::vector<ShiftReferenceElement> elements; 00182 00183 std::ostream& operator >> (std::ostream& s); 00184 }; 00185 00191 class BALL_EXPORT NMRAtomData 00192 { 00193 public: 00194 NMRAtomData(); 00195 00196 Position atom_ID; 00197 Position residue_seq_code; 00198 String residue_label; 00199 String atom_name; 00200 char atom_type; 00201 float shift_value; 00202 float error_value; 00203 Position ambiguity_code; 00204 00205 bool operator == (const NMRAtomData& atom) const; 00206 std::ostream& operator >> (std::ostream& s); 00207 }; 00208 00209 00219 class BALL_EXPORT NMRAtomDataSet 00220 { 00221 public: 00222 NMRAtomDataSet(NMRStarFile* parent); 00223 00224 String name; 00225 std::vector<NMRAtomData> atom_data; 00226 String condition; 00227 String reference; 00228 std::vector<String> samples; 00229 00230 std::ostream& operator >> (std::ostream& s); 00231 00232 protected: 00233 NMRStarFile* parent_; 00234 }; 00235 00241 class BALL_EXPORT EntryInformation 00242 { 00243 public: 00244 EntryInformation(); 00245 ~EntryInformation(); 00246 00247 std::ostream& operator >> (std::ostream& s); 00248 void clear(); 00249 00250 String entry_type; 00251 String BMRB_accession_code; 00252 String NMR_STAR_version; 00253 String experimental_method; 00254 }; 00255 00260 class BALL_EXPORT MonomericPolymer 00261 { 00262 public: 00264 class BALL_EXPORT HomologDB 00265 { 00266 public: 00267 HomologDB(); 00268 00269 std::ostream& operator >> (std::ostream& s); 00270 void clear(); 00271 00272 String name; 00273 String accession_code; 00274 String entry_mol_name; 00275 float seq_to_submitted_percentage; 00276 float subject_length; 00277 float seq_identity; 00278 float seq_positive; 00279 float homology_expectation_value; 00280 }; 00281 00282 00283 MonomericPolymer(); 00284 00285 String label_name; 00286 String type; 00287 String polymer_class; 00288 String common_name; 00289 String name_variant; 00290 float molecular_mass; 00291 String details; 00292 // polymer residue sequence information 00293 int number_of_residues; 00294 String residue_sequence; 00295 // we want to allow things like resid 137A, so we cannot use Index 00296 // key: index -- value: aminoacidname 00297 StringHashMap<String> residues_by_index; 00298 vector<HomologDB> homolog_database_entries; 00299 00300 std::ostream& operator >> (std::ostream& s); 00301 void clear(); 00302 }; 00303 00304 00309 class BALL_EXPORT MolecularSystem 00310 { 00311 // System related information 00312 public: 00313 class BALL_EXPORT RelatedDB 00314 { 00315 public: 00316 RelatedDB(); 00317 00318 std::ostream& operator >> (std::ostream& s); 00319 void clear(); 00320 00321 String name; 00322 String accession_code; 00323 String entry_mol_name; 00324 String relation_type; 00325 String details; 00326 }; 00327 00328 00329 // Central class for convenience 00330 class BALL_EXPORT ChemicalUnit 00331 { 00332 public: 00333 ChemicalUnit(); 00334 std::ostream& operator >> (std::ostream& s); 00335 void clear(); 00336 00337 String component_name; 00338 String label; 00339 MonomericPolymer* monomeric_polymer; 00340 NMRAtomDataSet* shifts; 00341 }; 00342 00343 00344 MolecularSystem(); 00345 ~MolecularSystem(); 00346 00347 // Name of the molecular system 00348 String system_name; 00349 String abbreviation_common; 00350 vector<ChemicalUnit> chemical_units; 00351 String system_physical_state; 00352 String system_oligomer_state; 00353 String system_paramagnetic; 00354 String system_thiol_state; 00356 float system_molecular_weight; 00357 // related entries in various DB's 00358 vector<RelatedDB> related_database_entries; 00359 00361 00362 std::ostream& operator >> (std::ostream& s); 00363 void clear(); 00364 }; 00365 00366 00371 class BALL_EXPORT NMRSpectrometer 00372 { 00373 public: 00374 String name; 00375 String manufacturer; 00376 String model; 00377 float field_strength; 00378 00379 std::ostream& operator >> (std::ostream& s); 00380 }; 00381 00382 00388 class BALL_EXPORT BALLToBMRBMapper 00389 { 00390 public: 00391 00398 //<saveframe_id, atom_id_in_nmr_atom_data_set> 00399 typedef std::pair<Position, Position> BMRBIndex; 00400 typedef std::map<Atom const* , BMRBIndex> BALLToBMRBMapping; 00401 typedef std::map<const NMRAtomData*, Atom const*> BMRBToBALLMapping; 00402 //TODO: Dont use pointer but something more sophisticated! 00403 00405 00409 BALLToBMRBMapper(); 00410 00412 BALLToBMRBMapper(Chain const& chain, const NMRStarFile& nmr_data); 00413 00415 virtual ~BALLToBMRBMapper() {} 00416 00418 00421 00423 const Chain* getChain() const {return chain_;} 00424 00426 void setChain(Chain const& chain) { chain_ = &chain; 00427 num_mismatches_ = -1; 00428 num_gabs_ = -1;} 00429 00431 void setNMRStarFile(NMRStarFile const& nmrfile) {nmr_data_ = &nmrfile; 00432 num_mismatches_ = -1; 00433 num_gabs_ = -1;} 00434 00436 const NMRStarFile* getNMRStarFile() const {return nmr_data_;} 00437 00439 BALLToBMRBMapping& getBALLToBMRBMapping() {return ball_to_bmrb_map_;} 00440 00442 const BALLToBMRBMapping& getBALLToBMRBMapping() const {return ball_to_bmrb_map_;} 00443 00445 BMRBToBALLMapping& getBMRBToBALLMapping() {return bmrb_to_ball_map_;} 00446 00448 const BMRBToBALLMapping& getBMRBToBALLMapping() const {return bmrb_to_ball_map_;} 00449 00451 int getNumberOfMismatches(){return num_mismatches_;} 00452 00454 int getNumberOfGabs(){return num_gabs_;} 00455 00457 bool isMapped(const NMRAtomData& nmr_atom) const; 00458 00464 const Atom* getBALLAtom(const NMRAtomData& nmr_atom) const; 00465 00470 bool isMapped(Atom const* atom) const; 00471 00473 BMRBIndex operator () (const Atom* atom); 00474 00481 bool createTrivialMapping(); 00482 00491 bool createMapping(const String& aligned_ball_sequence, 00492 const String& aligned_nmrstar_sequence); 00493 00496 void clear(); 00497 00499 00500 protected: 00501 00502 Peptides::NameConverter name_converter_; 00503 00507 00509 BALLToBMRBMapping ball_to_bmrb_map_; 00510 00512 BMRBToBALLMapping bmrb_to_ball_map_; 00513 00514 // NOTE: do *not* attempt to delete these pointers! 00515 const Chain* chain_; 00516 const NMRStarFile* nmr_data_; 00517 int num_mismatches_; 00518 int num_gabs_; 00519 00521 00522 private: 00523 const Atom* findNMRAtom_(const NMRAtomData& atom) const; 00524 00525 }; 00526 00528 00531 00534 NMRStarFile(); 00535 00539 NMRStarFile(const String& file_name, File::OpenMode open_mode = std::ios::in) 00540 throw(Exception::FileNotFound); 00541 00543 ~NMRStarFile(); 00545 00546 00550 00553 bool read() 00554 throw(Exception::ParseError); 00555 00556 /* Read an NMRStarFile and assign the shifts to the 00557 given AtomContainer using a trivial standard mapping. 00558 If the AtomContainer is a system, the first chain in chosen. 00559 00560 @param ac AtomContainer to which the NMRStarfile's shift should be assigned. 00561 @return bool - <tt>true</tt> if reading the file was successful 00562 */ 00563 //TODO to be able to use this function, further functions getMapping() and assign() are needed. 00564 bool read(AtomContainer& ac); 00565 00572 bool assignShifts(BALLToBMRBMapper& ball_to_bmrb_mapping); 00573 00584 bool assignShifts(AtomContainer& ac, const String& aligned_ball_sequence, 00585 const String& aligned_nmrstar_sequence); 00586 00589 Size getNumberOfAtoms() const; 00590 00593 Size getNumberOfShiftsAssigned() const {return number_of_assigned_shifts_;}; 00594 00597 const std::vector<NMRAtomDataSet>& getNMRData() const; 00598 00601 const EntryInformation& getEntryInformation() const {return entry_information_;}; 00602 00605 const MolecularSystem& getMolecularInformation() const {return molecular_system_;}; 00606 00609 SampleCondition& getSampleConditionByName(String name); 00610 00613 const SampleCondition& getSampleConditionByName(String name) const; 00614 00616 SampleCondition& getSampleCondition(Position i) {return sample_conditions_[i];}; 00617 00619 const SampleCondition& getSampleCondition(Position i) const {return sample_conditions_[i];}; 00620 00622 Size getNumberOfSampleConditions() const {return sample_conditions_.size();}; 00623 00625 const std::vector<SampleCondition>& getSampleConditions() const {return sample_conditions_;}; 00626 00628 std::vector<SampleCondition>& getSampleConditions() {return sample_conditions_;}; 00629 00630 // addSampleCondition TODO!! 00631 00633 std::vector<Sample> getSamples() const {return samples_;}; 00634 //const std::vector<Sample>& getSamples() const {return samples_;}; 00635 00637 Size getNumberOfSamples() const {return samples_.size();}; 00638 00640 bool hasSample(String label) const; 00641 00645 Sample getSample(Position i) const; 00646 00650 Sample getSample(String label) const; 00651 00653 std::vector<ShiftReferenceSet>& getShiftReferenceSets() {return shift_references_;}; 00655 const std::vector<ShiftReferenceSet>& getShiftReferenceSets() const {return shift_references_;}; 00656 00658 Size getNumberOfShiftReferenceSets() const {return shift_references_.size();}; 00659 00661 ShiftReferenceSet& getShiftReferenceSet(Position i) {return shift_references_[i];}; 00663 const ShiftReferenceSet& getShiftReferenceSet(Position i) const {return shift_references_[i];}; 00664 00666 const ShiftReferenceSet& getShiftReferenceSetByName(String name) const; 00668 ShiftReferenceSet& getShiftReferenceSetByName(String name); 00669 00670 00672 std::vector<NMRSpectrometer>& getNMRSpectrometers() {return nmr_spectrometers_;}; 00674 const std::vector<NMRSpectrometer>& getNMRSpectrometers() const {return nmr_spectrometers_;}; 00675 00677 Size getNumberOfNMRSpectrometers() const {return nmr_spectrometers_.size();}; 00678 00680 NMRSpectrometer& getNMRSpectrometer(Position i); 00682 const NMRSpectrometer& getNMRSpectrometer(Position i) const; 00683 00685 NMRSpectrometer& getNMRSpectrometerByName(String name); 00687 const NMRSpectrometer& getNMRSpectrometerByName(String name) const; 00688 00690 String getNMRSpectrometerManufacturer(Position i) const; 00691 00693 float getNMRSpectrometerFieldStrength(Position i) const; 00694 00695 00699 NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name) throw(Exception::OutOfRange); 00700 const NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name) const throw(Exception::OutOfRange); 00701 00703 Size getNumberOfMonomericPolymers() const {return monomeric_polymers_.size();}; 00704 00706 vector<MonomericPolymer> getMonomericPolymers() const {return monomeric_polymers_;}; 00707 00709 bool hasMonomericPolymer(String name) const; 00710 00716 bool isMonomericPolymer(String chemical_unit_label); 00717 00721 //TODO: Store changes/additions as Saveframes also in CIFFile 00722 void addMonomericPolymer(MonomericPolymer mp); 00723 00724 00733 String getResidueSequence(Position i=0) const; 00734 00736 bool hasHshifts() const {return has_H_shifts_;}; 00737 00739 bool hasCshifts() const {return has_C_shifts_;}; 00740 00742 bool hasNshifts() const {return has_N_shifts_;}; 00743 00745 00746 00750 00754 bool operator == (const NMRStarFile& f); 00755 00759 bool operator != (const NMRStarFile& f); 00760 00763 void clear(); 00764 00766 00767 private: 00768 00769 /*_ @name NMRStar file specific Help-Methods 00770 */ 00771 //_@{ 00772 00773 00775 void readEntryInformation_(); 00776 00778 void readMolSystem_(); 00779 00781 void readMonomericPolymers_(); 00782 00784 void readSampleConditions_(); 00785 00787 void readShiftReferences_(); 00788 00790 void readShifts_(); 00791 00793 void readSamples_(); 00794 00796 void readNMRSpectrometer_(); 00797 00799 void findDependiencies_(); 00800 00802 void setSpecialCharacters_(String characters); 00803 00805 bool isValidSingleValue_(String value); 00806 00812 bool assignShifts_(BALLToBMRBMapper& pdb_to_bmrb_mapping); 00813 00814 //_@} 00815 /*_ @name NMRStar file specific attributes 00816 */ 00817 //_@{ 00818 00819 /*_ A flag indicating validity of this instance. A sole NMRStarFile 00820 instance cannot be valid, because it does not have any information. 00821 */ 00822 bool valid_; 00823 00825 Size number_of_shift_sets_; 00826 00828 Size number_of_assigned_shifts_; 00829 00831 EntryInformation entry_information_; 00832 00834 MolecularSystem molecular_system_; 00835 00837 std::vector<NMRAtomDataSet> atom_data_sets_; 00838 00840 std::vector<SampleCondition> sample_conditions_; 00841 00843 std::vector<Sample> samples_; 00844 00846 std::vector<ShiftReferenceSet> shift_references_; 00847 00849 std::vector<NMRSpectrometer> nmr_spectrometers_; 00850 00852 vector<MonomericPolymer> monomeric_polymers_; 00853 00855 bool has_H_shifts_; 00856 bool has_C_shifts_; 00857 bool has_N_shifts_; 00858 00859 // a dummy saveframe 00860 SaveFrame dummy_saveframe_; 00861 00862 // a dummy sample condition 00863 SampleCondition dummy_sample_condition_; 00864 00865 // a dummy sample 00866 Sample dummy_sample_; 00867 00868 // a dummy shift reference set 00869 ShiftReferenceSet dummy_shift_reference_set_; 00870 00871 // a dummy nmr spectrometer 00872 NMRSpectrometer dummy_NMR_spectrometer_; 00873 00874 // a dummy nmr spectrometer 00875 MonomericPolymer dummy_monomeric_polymer_; 00876 00878 String special_characters_; 00879 //_@} 00880 }; 00881 00883 } // Namespace BALL 00884 00885 #endif // BALL_FORMAT_NMRSTARFILE_H