OpenMS
DigestionEnzymeDB.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Xiao Liang $
6 // $Authors: Xiao Liang, Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 #include <OpenMS/SYSTEM/File.h>
16 
17 #include <set>
18 #include <map>
19 
20 namespace OpenMS
21 {
31  template<typename DigestionEnzymeType, typename InstanceType> class DigestionEnzymeDB
32  {
33  public:
34 
38  typedef typename std::set<const DigestionEnzymeType*>::const_iterator ConstEnzymeIterator;
39  typedef typename std::set<const DigestionEnzymeType*>::iterator EnzymeIterator;
41 
43  static InstanceType* getInstance()
44  {
45  static InstanceType* db_ = nullptr;
46  if (db_ == nullptr)
47  {
48  db_ = new InstanceType;
49  }
50  return db_;
51  }
52 
58  {
59  for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
60  {
61  delete *it;
62  }
63  }
65 
72  const DigestionEnzymeType* getEnzyme(const String& name) const
73  {
74  auto pos = enzyme_names_.find(name);
75  if (pos == enzyme_names_.end())
76  {
77  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name);
78  }
79  return pos->second;
80  }
81 
84  const DigestionEnzymeType* getEnzymeByRegEx(const String& cleavage_regex) const
85  {
86  if (!hasRegEx(cleavage_regex))
87  {
88  // @TODO: why does this use a different exception than "getEnzyme"?
89  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
90  String("Enzyme with regex " + cleavage_regex + " was not registered in Enzyme DB, register first!").c_str());
91  }
92  return enzyme_regex_.at(cleavage_regex);
93  }
94 
96  void getAllNames(std::vector<String>& all_names) const
97  {
98  all_names.clear();
99  for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
100  {
101  all_names.push_back((*it)->getName());
102  }
103  }
105 
110  bool hasEnzyme(const String& name) const
111  {
112  return (enzyme_names_.find(name) != enzyme_names_.end());
113  }
114 
116  bool hasRegEx(const String& cleavage_regex) const
117  {
118  return (enzyme_regex_.find(cleavage_regex) != enzyme_regex_.end());
119  }
120 
122  bool hasEnzyme(const DigestionEnzymeType* enzyme) const
123  {
124  return (const_enzymes_.find(enzyme) != const_enzymes_.end() );
125  }
127 
131  inline ConstEnzymeIterator beginEnzyme() const { return const_enzymes_.begin(); } // we only allow constant iterators -- this DB is not meant to be modifiable
132  inline ConstEnzymeIterator endEnzyme() const { return const_enzymes_.end(); }
133 
135  protected:
136  DigestionEnzymeDB(const String& db_file = "")
137  {
138  if (!db_file.empty())
139  {
140  readEnzymesFromFile_(db_file);
141  }
142  }
143 
145  DigestionEnzymeDB(const DigestionEnzymeDB& enzymes_db) = delete;
147 
152  DigestionEnzymeDB& operator=(const DigestionEnzymeDB& enzymes_db) = delete;
154 
156  void readEnzymesFromFile_(const String& filename)
157  {
158  String file = File::find(filename);
159 
160  Param param;
161  ParamXMLFile().load(file, param);
162  if (param.empty()) return;
163 
164  std::vector<String> split;
165  String(param.begin().getName()).split(':', split);
166  if (split[0] != "Enzymes")
167  {
168  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, split[0], "name 'Enzymes' expected");
169  }
170 
171  try
172  {
173  std::map<String, String> values;
174  String previous_enzyme = split[1];
175  // this iterates over all the "ITEM" elements in the XML file:
176  for (Param::ParamIterator it = param.begin(); it != param.end(); ++it)
177  {
178  String(it.getName()).split(':', split);
179  if (split[0] != "Enzymes") break; // unexpected content in the XML file
180  if (split[1] != previous_enzyme)
181  {
182  // add enzyme and reset:
183  addEnzyme_(parseEnzyme_(values));
184  previous_enzyme = split[1];
185  values.clear();
186  }
187  values[it.getName()] = String(it->value.toString());
188  }
189  // add last enzyme
190  addEnzyme_(parseEnzyme_(values));
191  }
192  catch (Exception::BaseException& e)
193  {
194  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, e.what(), "");
195  }
196  }
197 
199  const DigestionEnzymeType* parseEnzyme_(std::map<String, String>& values) const
200  {
201  DigestionEnzymeType* enzy_ptr = new DigestionEnzymeType();
202 
203  for (std::map<String, String>::iterator it = values.begin(); it != values.end(); ++it)
204  {
205  const String& key = it->first;
206  const String& value = it->second;
207  if (!enzy_ptr->setValueFromFile(key, value))
208  {
209  OPENMS_LOG_ERROR << "Error while parsing enzymes file: unknown key '" << key << "' with value '" << value << "'" << std::endl;
210  }
211  }
212  return enzy_ptr;
213  }
214 
216  void addEnzyme_(const DigestionEnzymeType* enzyme)
217  {
218  // add to internal storage
219  const_enzymes_.insert(enzyme);
220  // add to internal indices (by name and its synonyms)
221  String name = enzyme->getName();
222  enzyme_names_[name] = enzyme;
223  enzyme_names_[name.toLower()] = enzyme;
224  for (std::set<String>::const_iterator it = enzyme->getSynonyms().begin(); it != enzyme->getSynonyms().end(); ++it)
225  {
226  enzyme_names_[*it] = enzyme;
227  }
228  // ... and by regex
229  if (enzyme->getRegEx() != "")
230  {
231  enzyme_regex_[enzyme->getRegEx()] = enzyme;
232  }
233  return;
234  }
235 
236  std::map<String, const DigestionEnzymeType*> enzyme_names_;
237 
238  std::map<String, const DigestionEnzymeType*> enzyme_regex_;
239 
240  std::set<const DigestionEnzymeType*> const_enzymes_;
241 
242  };
243 }
244 
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:439
Digestion enzyme database (base class)
Definition: DigestionEnzymeDB.h:32
std::set< const DigestionEnzymeType * >::iterator EnzymeIterator
Definition: DigestionEnzymeDB.h:39
const DigestionEnzymeType * parseEnzyme_(std::map< String, String > &values) const
parses an enzyme, given the key/value pairs from an XML file
Definition: DigestionEnzymeDB.h:199
DigestionEnzymeDB(const String &db_file="")
Definition: DigestionEnzymeDB.h:136
DigestionEnzymeDB(const DigestionEnzymeDB &enzymes_db)=delete
copy constructor
std::map< String, const DigestionEnzymeType * > enzyme_regex_
index by regex
Definition: DigestionEnzymeDB.h:238
static InstanceType * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:43
const DigestionEnzymeType * getEnzymeByRegEx(const String &cleavage_regex) const
Definition: DigestionEnzymeDB.h:84
ConstEnzymeIterator endEnzyme() const
Definition: DigestionEnzymeDB.h:132
virtual ~DigestionEnzymeDB()
destructor
Definition: DigestionEnzymeDB.h:57
ConstEnzymeIterator beginEnzyme() const
Definition: DigestionEnzymeDB.h:131
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:96
std::set< const DigestionEnzymeType * >::const_iterator ConstEnzymeIterator
Definition: DigestionEnzymeDB.h:38
void readEnzymesFromFile_(const String &filename)
reads enzymes from the given file
Definition: DigestionEnzymeDB.h:156
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:72
bool hasEnzyme(const DigestionEnzymeType *enzyme) const
returns true if the db contains the enzyme of the given pointer
Definition: DigestionEnzymeDB.h:122
bool hasEnzyme(const String &name) const
returns true if the db contains a enzyme with the given name (supports synonym names)
Definition: DigestionEnzymeDB.h:110
std::set< const DigestionEnzymeType * > const_enzymes_
set of enzymes
Definition: DigestionEnzymeDB.h:240
DigestionEnzymeDB & operator=(const DigestionEnzymeDB &enzymes_db)=delete
assignment operator
std::map< String, const DigestionEnzymeType * > enzyme_names_
index by names
Definition: DigestionEnzymeDB.h:236
bool hasRegEx(const String &cleavage_regex) const
returns true if the db contains a enzyme with the given regex
Definition: DigestionEnzymeDB.h:116
void addEnzyme_(const DigestionEnzymeType *enzyme)
add to internal data; also update indices for search by name and regex
Definition: DigestionEnzymeDB.h:216
Exception base class.
Definition: Exception.h:65
Element could not be found exception.
Definition: Exception.h:650
A method or algorithm argument contains illegal values.
Definition: Exception.h:624
Parse Error exception.
Definition: Exception.h:598
static String find(const String &filename, StringList directories=StringList())
Looks up the location of the file filename.
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:25
void load(const String &filename, Param &param)
Read XML file.
Forward const iterator for the Param class.
Definition: Param.h:168
std::string getName() const
Returns the absolute path of the current element (including all sections)
Management and storage of parameters / INI files.
Definition: Param.h:44
ParamIterator begin() const
Begin iterator for the internal tree.
bool empty() const
Returns if there are no entries.
ParamIterator end() const
End iterator for the internal tree.
A more convenient string class.
Definition: String.h:34
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & toLower()
Converts the string to lowercase.
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition: StringUtilsSimple.h:340
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22