00001
00002
00003
00004
00005 #ifndef BALL_FORMAT_NMRSTARFILE_H
00006 #define BALL_FORMAT_NMRSTARFILE_H
00007
00008 #ifndef BALL_FORMAT_CIFFILE_H
00009 # include <BALL/FORMAT/CIFFile.h>
00010 #endif
00011
00012 #ifndef BALL_COMMON_LIMITS_H
00013 # include <BALL/COMMON/limits.h>
00014 #endif
00015
00016 #ifndef BALL_KERNEL_PROTEIN_H
00017 # include <BALL/KERNEL/protein.h>
00018 #endif
00019
00020 #ifndef BALL_STRUCTURE_PEPTIDES_H
00021 # include <BALL/STRUCTURE/peptides.h>
00022 #endif
00023
00024 #include <vector>
00025
00026 namespace BALL
00027 {
00117 class BALL_EXPORT NMRStarFile
00118 : public CIFFile
00119 {
00120 public:
00121
00122
00123 static const float FLOAT_VALUE_NA;
00124 static const int INT_VALUE_NA;
00125
00126 static const Position POSITION_VALUE_NA;
00127
00133
00139 class BALL_EXPORT SampleCondition
00140 {
00141 public:
00142 SampleCondition();
00143
00144
00145
00146 String name;
00147
00148
00149
00150 vector<String> types;
00151 StringHashMap<float> values;
00152 StringHashMap<float> errors;
00153 StringHashMap<String> units;
00154
00155 bool hasType(String type) {return values.has(type);}
00156 std::ostream& operator >> (std::ostream& s);
00157 };
00158
00162 class BALL_EXPORT Sample
00163 {
00164 public:
00165
00170 class BALL_EXPORT Component
00171 {
00172 public:
00173 Component();
00174 void clear();
00175
00176 String label;
00177 float concentration_value;
00178 String value_unit;
00179 float concentration_min;
00180 float concentration_max;
00181 String isotopic_labeling;
00182
00183 std::ostream& operator >> (std::ostream& s);
00184 };
00185
00186 Sample();
00187 void clear();
00188
00189 String label;
00190 String type;
00191 String details;
00192 vector <Component> components;
00193
00194 std::ostream& operator >> (std::ostream& s);
00195 };
00196
00202 class BALL_EXPORT ShiftReferenceElement
00203 {
00204 public:
00205 ShiftReferenceElement();
00206
00207 String mol_common_name;
00208 String atom_type;
00209 Position isotope_number;
00210 String atom_group;
00211 String shift_units;
00212 float shift_value;
00213 String reference_method;
00214 String reference_type;
00215 float indirect_shift_ratio;
00216
00217 std::ostream& operator >> (std::ostream& s);
00218 };
00219
00225 class BALL_EXPORT ShiftReferenceSet
00226 {
00227 public:
00228 ShiftReferenceSet();
00229
00230
00231
00232 String name;
00233 std::vector<ShiftReferenceElement> elements;
00234
00235 std::ostream& operator >> (std::ostream& s);
00236 };
00237
00243 class BALL_EXPORT NMRAtomData
00244 {
00245 public:
00246 NMRAtomData();
00247
00248 Position atom_ID;
00249 Position residue_seq_code;
00250 String residue_label;
00251 String atom_name;
00252 char atom_type;
00253 float shift_value;
00254 float error_value;
00255 Position ambiguity_code;
00256
00257 bool operator == (const NMRAtomData& atom) const;
00258 std::ostream& operator >> (std::ostream& s);
00259 };
00260
00261
00271 class BALL_EXPORT NMRAtomDataSet
00272
00273 {
00274 public:
00275 NMRAtomDataSet(NMRStarFile* parent);
00276
00277 String name;
00278 String label;
00279 std::vector<NMRAtomData> atom_data;
00280 String condition;
00281 String reference;
00282 std::vector<String> samples;
00283
00284 std::ostream& operator >> (std::ostream& s);
00285
00286 protected:
00287 NMRStarFile* parent_;
00288 };
00289
00295 class BALL_EXPORT EntryInformation
00296 {
00297 public:
00298 EntryInformation();
00299 ~EntryInformation();
00300
00301 std::ostream& operator >> (std::ostream& s);
00302 void clear();
00303
00304 String entry_type;
00305 String BMRB_accession_code;
00306 String NMR_STAR_version;
00307 String experimental_method;
00308 String submission_date;
00309
00310 };
00311
00316 class BALL_EXPORT MonomericPolymer
00317 {
00318 public:
00320 class BALL_EXPORT HomologDB
00321 {
00322 public:
00323 HomologDB();
00324
00325 std::ostream& operator >> (std::ostream& s);
00326 void clear();
00327
00328 String name;
00329 String accession_code;
00330 String entry_mol_name;
00331 float seq_to_submitted_percentage;
00332 float subject_length;
00333 float seq_identity;
00334 float seq_positive;
00335 float homology_expectation_value;
00336 };
00337
00338
00339 MonomericPolymer();
00340
00341 String label_name;
00342 String type;
00343 String polymer_class;
00344 String common_name;
00345 String name_variant;
00346 float molecular_mass;
00347 String details;
00348
00349 int number_of_residues;
00350 String residue_sequence;
00351
00352
00353 StringHashMap<String> residues_by_index;
00354 vector<HomologDB> homolog_database_entries;
00355
00356 std::ostream& operator >> (std::ostream& s);
00357 void clear();
00358 };
00359
00360
00365 class BALL_EXPORT MolecularSystem
00366 {
00367
00368 public:
00369 class BALL_EXPORT RelatedDB
00370 {
00371 public:
00372 RelatedDB();
00373
00374 std::ostream& operator >> (std::ostream& s);
00375 void clear();
00376
00377 String name;
00378 String accession_code;
00379 String entry_mol_name;
00380 String relation_type;
00381 String details;
00382 };
00383
00384
00385
00386 class BALL_EXPORT ChemicalUnit
00387 {
00388 public:
00389 ChemicalUnit();
00390 std::ostream& operator >> (std::ostream& s);
00391 void clear();
00392
00393 String component_name;
00394 String label;
00395 MonomericPolymer* monomeric_polymer;
00396 NMRAtomDataSet* shifts;
00397 };
00398
00399
00400 MolecularSystem();
00401 ~MolecularSystem();
00402
00403 ChemicalUnit const& getChemicalUnit(Position i) const { return chemical_units[i]; }
00404 ChemicalUnit& getChemicalUnit(Position i) { return chemical_units[i]; }
00405
00406 Size getNumberOfChemicalUnits() const {return chemical_units.size(); }
00407
00408
00409 String system_name;
00410 String abbreviation_common;
00411 vector<ChemicalUnit> chemical_units;
00412 String system_physical_state;
00413 String system_oligomer_state;
00414 String system_paramagnetic;
00415 String system_thiol_state;
00417 float system_molecular_weight;
00418
00419 vector<RelatedDB> related_database_entries;
00420
00422
00423 std::ostream& operator >> (std::ostream& s);
00424 void clear();
00425 };
00426
00427
00432 class BALL_EXPORT NMRSpectrometer
00433 {
00434 public:
00435 String name;
00436 String manufacturer;
00437 String model;
00438 float field_strength;
00439
00440 std::ostream& operator >> (std::ostream& s);
00441 };
00442
00443
00449 class BALL_EXPORT BALLToBMRBMapper
00450 {
00451 public:
00452
00459
00460 typedef std::pair<Position, Position> BMRBIndex;
00461 typedef std::map<Atom const* , BMRBIndex> BALLToBMRBMapping;
00462 typedef std::map<const NMRAtomData*, Atom const*> BMRBToBALLMapping;
00463
00464
00466
00470 BALLToBMRBMapper();
00471
00478 BALLToBMRBMapper(Chain const& chain, const NMRStarFile& nmr_data, const String& chemical_unit);
00479
00481 virtual ~BALLToBMRBMapper() {}
00482
00484
00487
00489 const Chain* getChain() const {return chain_;}
00490
00492 void setChain(Chain const& chain) { chain_ = &chain;
00493 num_mismatches_ = -1;
00494 num_gaps_ = -1;}
00495
00497 void setNMRStarFile(NMRStarFile const& nmrfile) {nmr_data_ = &nmrfile;
00498 num_mismatches_ = -1;
00499 num_gaps_ = -1;}
00500
00502 const NMRStarFile* getNMRStarFile() const {return nmr_data_;}
00503
00505 void setNMRAtomDataSet(NMRAtomDataSet const& nmr_atom_data_set){nmr_atom_data_set_= &nmr_atom_data_set;}
00506
00508 bool setNMRAtomDataSetByName(String const& chemical_unit_name);
00509
00510
00511 const NMRAtomDataSet* getNMRAtomDataSet() const{return nmr_atom_data_set_;}
00512
00514 BALLToBMRBMapping& getBALLToBMRBMapping() {return ball_to_bmrb_map_;}
00515
00517 const BALLToBMRBMapping& getBALLToBMRBMapping() const {return ball_to_bmrb_map_;}
00518
00520 BMRBToBALLMapping& getBMRBToBALLMapping() {return bmrb_to_ball_map_;}
00521
00523 const BMRBToBALLMapping& getBMRBToBALLMapping() const {return bmrb_to_ball_map_;}
00524
00526 int getNumberOfMismatches(){return num_mismatches_;}
00527
00529 int getNumberOfGaps(){return num_gaps_;}
00530
00532 bool isMapped(const NMRAtomData& nmr_atom) const;
00533
00539 const Atom* getBALLAtom(const NMRAtomData& nmr_atom) const;
00540
00545 bool isMapped(Atom const* atom) const;
00546
00548 BMRBIndex operator () (const Atom* atom);
00549
00556 bool createTrivialMapping();
00557
00567 bool createMapping(const String& aligned_ball_sequence,
00568 const String& aligned_nmrstar_sequence);
00569
00572 void clear();
00573
00575
00576 protected:
00577
00578 Peptides::NameConverter name_converter_;
00579
00583
00585 BALLToBMRBMapping ball_to_bmrb_map_;
00586
00588 BMRBToBALLMapping bmrb_to_ball_map_;
00589
00590
00591 const Chain* chain_;
00592 const NMRStarFile* nmr_data_;
00593 const NMRAtomDataSet* nmr_atom_data_set_;
00594 Position nmr_atom_data_set_index_;
00595 int num_mismatches_;
00596 int num_gaps_;
00597 bool valid_;
00599
00600 private:
00601 const Atom* findNMRAtom_(const NMRAtomData& atom) const;
00602
00603 };
00604
00606
00609
00612 NMRStarFile();
00613
00618 NMRStarFile(const String& file_name, File::OpenMode open_mode = std::ios::in);
00619
00621 ~NMRStarFile();
00623
00624
00628
00632 bool read();
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642 bool read(AtomContainer& ac);
00643
00650 bool assignShifts(BALLToBMRBMapper& ball_to_bmrb_mapping);
00651
00663 bool assignShifts(AtomContainer& ac,
00664 const String& chemical_unit,
00665 const String& aligned_ball_sequence,
00666 const String& aligned_nmrstar_sequence);
00667
00670 Size getNumberOfAtoms() const;
00671
00674 Size getNumberOfShiftsAssigned() const {return number_of_assigned_shifts_;};
00675
00678 const std::vector<NMRAtomDataSet>& getNMRData() const;
00679
00682 const EntryInformation& getEntryInformation() const {return entry_information_;};
00683
00686 const MolecularSystem& getMolecularInformation() const {return molecular_system_;};
00687
00690 MolecularSystem& getMolecularInformation() {return molecular_system_;};
00691
00692
00695 const MolecularSystem::ChemicalUnit& getChemicalUnitByLabel(String const& label) const;
00696
00699 MolecularSystem::ChemicalUnit& getChemicalUnitByLabel(String const& label);
00700
00701
00704 bool hasSampleCondition(String name);
00705
00708 bool hasSampleCondition(String name) const;
00709
00712 SampleCondition& getSampleConditionByName(String name);
00713
00716 const SampleCondition& getSampleConditionByName(String name) const;
00717
00719 SampleCondition& getSampleCondition(Position i) {return sample_conditions_[i];};
00720
00722 const SampleCondition& getSampleCondition(Position i) const {return sample_conditions_[i];};
00723
00725 Size getNumberOfSampleConditions() const {return sample_conditions_.size();};
00726
00728 const std::vector<SampleCondition>& getSampleConditions() const {return sample_conditions_;};
00729
00731 std::vector<SampleCondition>& getSampleConditions() {return sample_conditions_;};
00732
00733
00734
00736 std::vector<Sample> getSamples() const {return samples_;};
00737
00738
00740 Size getNumberOfSamples() const {return samples_.size();};
00741
00743 bool hasSample(String label) const;
00744
00748 Sample getSample(Position i) const;
00749
00753 Sample getSample(String label) const;
00754
00756 std::vector<ShiftReferenceSet>& getShiftReferenceSets() {return shift_references_;};
00758 const std::vector<ShiftReferenceSet>& getShiftReferenceSets() const {return shift_references_;};
00759
00761 Size getNumberOfShiftReferenceSets() const {return shift_references_.size();};
00762
00764 bool hasShiftReferenceSet(String name);
00765
00767 ShiftReferenceSet& getShiftReferenceSet(Position i) {return shift_references_[i];};
00769 const ShiftReferenceSet& getShiftReferenceSet(Position i) const {return shift_references_[i];};
00770
00772 const ShiftReferenceSet& getShiftReferenceSetByName(String name) const;
00774 ShiftReferenceSet& getShiftReferenceSetByName(String name);
00775
00776
00778 std::vector<NMRSpectrometer>& getNMRSpectrometers() {return nmr_spectrometers_;};
00780 const std::vector<NMRSpectrometer>& getNMRSpectrometers() const {return nmr_spectrometers_;};
00781
00783 Size getNumberOfNMRSpectrometers() const {return nmr_spectrometers_.size();};
00784
00786 NMRSpectrometer& getNMRSpectrometer(Position i);
00788 const NMRSpectrometer& getNMRSpectrometer(Position i) const;
00789
00791 NMRSpectrometer& getNMRSpectrometerByName(String name);
00793 const NMRSpectrometer& getNMRSpectrometerByName(String name) const;
00794
00796 String getNMRSpectrometerManufacturer(Position i) const;
00797
00799 float getNMRSpectrometerFieldStrength(Position i) const;
00800
00801
00805 NMRStarFile::MonomericPolymer& getMonomericPolymer(Position i);
00806
00810 const NMRStarFile::MonomericPolymer& getMonomericPolymer(Position i) const;
00811
00815 NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name);
00816
00820 const NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name) const;
00821
00823 Size getNumberOfMonomericPolymers() const {return monomeric_polymers_.size();};
00824
00826 vector<MonomericPolymer> getMonomericPolymers() const {return monomeric_polymers_;};
00827
00829 bool hasMonomericPolymer(String name) const;
00830
00836 bool isMonomericPolymer(String chemical_unit_label);
00837
00841
00842 void addMonomericPolymer(MonomericPolymer mp);
00843
00844
00853 String getResidueSequence(Position i=0) const;
00854
00856 bool hasHshifts() const {return has_H_shifts_;};
00857
00859 bool hasCshifts() const {return has_C_shifts_;};
00860
00862 bool hasNshifts() const {return has_N_shifts_;};
00863
00865
00866
00870
00874 bool operator == (const NMRStarFile& f);
00875
00879 bool operator != (const NMRStarFile& f);
00880
00883 void clear();
00884
00886
00887 private:
00888
00889
00890
00891
00892
00894 void readEntryInformation_();
00895
00897 void readMolSystem_();
00898
00900 void readMonomericPolymers_();
00901
00903 void readSampleConditions_();
00904
00906 void readShiftReferences_();
00907
00909 void readShifts_();
00910
00912 void readSamples_();
00913
00915 void readNMRSpectrometer_();
00916
00918 void findDependiencies_();
00919
00921 void setSpecialCharacters_(String characters);
00922
00924 bool isValidSingleValue_(String value);
00925
00927 float valueToFloat_(String value);
00928
00930 int valueToInt_(String value);
00936 bool assignShifts_(BALLToBMRBMapper& pdb_to_bmrb_mapping);
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946 bool valid_;
00947
00949 Size number_of_shift_sets_;
00950
00952 Size number_of_assigned_shifts_;
00953
00955 EntryInformation entry_information_;
00956
00958 MolecularSystem molecular_system_;
00959
00961 std::vector<NMRAtomDataSet> atom_data_sets_;
00962
00964 std::vector<SampleCondition> sample_conditions_;
00965
00967 std::vector<Sample> samples_;
00968
00970 std::vector<ShiftReferenceSet> shift_references_;
00971
00973 std::vector<NMRSpectrometer> nmr_spectrometers_;
00974
00976 vector<MonomericPolymer> monomeric_polymers_;
00977
00979 bool has_H_shifts_;
00980 bool has_C_shifts_;
00981 bool has_N_shifts_;
00982
00983
00984 SaveFrame dummy_saveframe_;
00985
00986
00987 SampleCondition dummy_sample_condition_;
00988
00989
00990 Sample dummy_sample_;
00991
00992
00993 ShiftReferenceSet dummy_shift_reference_set_;
00994
00995
00996 NMRSpectrometer dummy_NMR_spectrometer_;
00997
00998
00999 MonomericPolymer dummy_monomeric_polymer_;
01000
01002 String special_characters_;
01003
01004 };
01005
01007 }
01008
01009 #endif // BALL_FORMAT_NMRSTARFILE_H