OpenMS
Loading...
Searching...
No Matches
DigestionEnzymeDB.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Xiao Liang $
6// $Authors: Xiao Liang, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
15#include <OpenMS/SYSTEM/File.h>
16
17#include <set>
18#include <map>
19
20namespace OpenMS
21{
31 template<typename DigestionEnzymeType, typename InstanceType> class DigestionEnzymeDB
32 {
33 public:
34
38 typedef typename std::set<const DigestionEnzymeType*>::const_iterator ConstEnzymeIterator;
39 typedef typename std::set<const DigestionEnzymeType*>::iterator EnzymeIterator;
41
43 static InstanceType* getInstance()
44 {
45 static InstanceType* db_ = nullptr;
46 if (db_ == nullptr)
47 {
48 db_ = new InstanceType;
49 }
50 return db_;
51 }
52
58 {
59 for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
60 {
61 delete *it;
62 }
63 }
65
72 const DigestionEnzymeType* getEnzyme(const String& name) const
73 {
74 auto pos = enzyme_names_.find(name);
75 if (pos == enzyme_names_.end())
76 {
77 throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name);
78 }
79 return pos->second;
80 }
81
84 const DigestionEnzymeType* getEnzymeByRegEx(const String& cleavage_regex) const
85 {
86 if (!hasRegEx(cleavage_regex))
87 {
88 // @TODO: why does this use a different exception than "getEnzyme"?
89 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
90 String("Enzyme with regex " + cleavage_regex + " was not registered in Enzyme DB, register first!").c_str());
91 }
92 return enzyme_regex_.at(cleavage_regex);
93 }
94
96 void getAllNames(std::vector<String>& all_names) const
97 {
98 all_names.clear();
99 for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
100 {
101 all_names.push_back((*it)->getName());
102 }
103 }
105
110 bool hasEnzyme(const String& name) const
111 {
112 return (enzyme_names_.find(name) != enzyme_names_.end());
113 }
114
116 bool hasRegEx(const String& cleavage_regex) const
117 {
118 return (enzyme_regex_.find(cleavage_regex) != enzyme_regex_.end());
119 }
120
122 bool hasEnzyme(const DigestionEnzymeType* enzyme) const
123 {
124 return (const_enzymes_.find(enzyme) != const_enzymes_.end() );
125 }
127
131 inline ConstEnzymeIterator beginEnzyme() const { return const_enzymes_.begin(); } // we only allow constant iterators -- this DB is not meant to be modifiable
132 inline ConstEnzymeIterator endEnzyme() const { return const_enzymes_.end(); }
133
135 protected:
136 DigestionEnzymeDB(const String& db_file = "")
137 {
138 if (!db_file.empty())
139 {
140 readEnzymesFromFile_(db_file);
141 }
142 }
143
145 DigestionEnzymeDB(const DigestionEnzymeDB& enzymes_db) = delete;
147
152 DigestionEnzymeDB& operator=(const DigestionEnzymeDB& enzymes_db) = delete;
154
156 void readEnzymesFromFile_(const String& filename)
157 {
158 String file = File::find(filename);
159
160 Param param;
161 ParamXMLFile().load(file, param);
162 if (param.empty()) return;
163
164 std::vector<String> split;
165 String(param.begin().getName()).split(':', split);
166 if (split[0] != "Enzymes")
167 {
168 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, split[0], "name 'Enzymes' expected");
169 }
170
171 try
172 {
173 std::map<String, String> values;
174 String previous_enzyme = split[1];
175 // this iterates over all the "ITEM" elements in the XML file:
176 for (Param::ParamIterator it = param.begin(); it != param.end(); ++it)
177 {
178 String(it.getName()).split(':', split);
179 if (split[0] != "Enzymes") break; // unexpected content in the XML file
180 if (split[1] != previous_enzyme)
181 {
182 // add enzyme and reset:
183 addEnzyme_(parseEnzyme_(values));
184 previous_enzyme = split[1];
185 values.clear();
186 }
187 values[it.getName()] = String(it->value.toString());
188 }
189 // add last enzyme
190 addEnzyme_(parseEnzyme_(values));
191 }
192 catch (Exception::BaseException& e)
193 {
194 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, e.what(), "");
195 }
196 }
197
199 const DigestionEnzymeType* parseEnzyme_(std::map<String, String>& values) const
200 {
201 DigestionEnzymeType* enzy_ptr = new DigestionEnzymeType();
202
203 for (std::map<String, String>::iterator it = values.begin(); it != values.end(); ++it)
204 {
205 const String& key = it->first;
206 const String& value = it->second;
207 if (!enzy_ptr->setValueFromFile(key, value))
208 {
209 OPENMS_LOG_ERROR << "Error while parsing enzymes file: unknown key '" << key << "' with value '" << value << "'" << std::endl;
210 }
211 }
212 return enzy_ptr;
213 }
214
216 void addEnzyme_(const DigestionEnzymeType* enzyme)
217 {
218 // add to internal storage
219 const_enzymes_.insert(enzyme);
220 // add to internal indices (by name and its synonyms)
221 String name = enzyme->getName();
222 enzyme_names_[name] = enzyme;
223 enzyme_names_[name.toLower()] = enzyme;
224 for (std::set<String>::const_iterator it = enzyme->getSynonyms().begin(); it != enzyme->getSynonyms().end(); ++it)
225 {
226 enzyme_names_[*it] = enzyme;
227 }
228 // ... and by regex
229 if (enzyme->getRegEx() != "")
230 {
231 enzyme_regex_[enzyme->getRegEx()] = enzyme;
232 }
233 return;
234 }
235
236 std::map<String, const DigestionEnzymeType*> enzyme_names_;
237
238 std::map<String, const DigestionEnzymeType*> enzyme_regex_;
239
240 std::set<const DigestionEnzymeType*> const_enzymes_;
241
242 };
243}
244
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition LogStream.h:442
subpage TOPP_TargetedFileConverter Converts targeted feature or consensus feature files subpage TOPP_FileInfo Shows basic information about the file
Definition TOPP.doxygen:44
Digestion enzyme database (base class)
Definition DigestionEnzymeDB.h:32
const DigestionEnzymeType * parseEnzyme_(std::map< String, String > &values) const
parses an enzyme, given the key/value pairs from an XML file
Definition DigestionEnzymeDB.h:199
static InstanceType * getInstance()
this member function serves as a replacement of the constructor
Definition DigestionEnzymeDB.h:43
DigestionEnzymeDB(const String &db_file="")
Definition DigestionEnzymeDB.h:136
DigestionEnzymeDB(const DigestionEnzymeDB &enzymes_db)=delete
copy constructor
std::map< String, const DigestionEnzymeType * > enzyme_regex_
index by regex
Definition DigestionEnzymeDB.h:238
ConstEnzymeIterator endEnzyme() const
Definition DigestionEnzymeDB.h:132
virtual ~DigestionEnzymeDB()
destructor
Definition DigestionEnzymeDB.h:57
ConstEnzymeIterator beginEnzyme() const
Definition DigestionEnzymeDB.h:131
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition DigestionEnzymeDB.h:72
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition DigestionEnzymeDB.h:96
void readEnzymesFromFile_(const String &filename)
reads enzymes from the given file
Definition DigestionEnzymeDB.h:156
DigestionEnzymeDB & operator=(const DigestionEnzymeDB &enzymes_db)=delete
assignment operator
std::set< constDigestionEnzymeType * >::const_iterator ConstEnzymeIterator
Definition DigestionEnzymeDB.h:38
std::set< constDigestionEnzymeType * >::iterator EnzymeIterator
Definition DigestionEnzymeDB.h:39
bool hasEnzyme(const DigestionEnzymeType *enzyme) const
returns true if the db contains the enzyme of the given pointer
Definition DigestionEnzymeDB.h:122
bool hasEnzyme(const String &name) const
returns true if the db contains a enzyme with the given name (supports synonym names)
Definition DigestionEnzymeDB.h:110
std::set< const DigestionEnzymeType * > const_enzymes_
set of enzymes
Definition DigestionEnzymeDB.h:240
std::map< String, const DigestionEnzymeType * > enzyme_names_
index by names
Definition DigestionEnzymeDB.h:236
const DigestionEnzymeType * getEnzymeByRegEx(const String &cleavage_regex) const
Definition DigestionEnzymeDB.h:84
bool hasRegEx(const String &cleavage_regex) const
returns true if the db contains a enzyme with the given regex
Definition DigestionEnzymeDB.h:116
void addEnzyme_(const DigestionEnzymeType *enzyme)
add to internal data; also update indices for search by name and regex
Definition DigestionEnzymeDB.h:216
Exception base class.
Definition Exception.h:63
Element could not be found exception.
Definition Exception.h:654
A method or algorithm argument contains illegal values.
Definition Exception.h:630
Parse Error exception.
Definition Exception.h:593
static String find(const String &filename, StringList directories=StringList())
Looks up the location of the file filename.
The file pendant of the Param class used to load and store the param datastructure as paramXML (i....
Definition ParamXMLFile.h:25
void load(const String &filename, Param &param)
Read XML file.
Forward const iterator for the Param class.
Definition Param.h:170
std::string getName() const
Returns the absolute path of the current element (including all sections)
Management and storage of parameters / INI files.
Definition Param.h:46
ParamIterator begin() const
Begin iterator for the internal tree.
bool empty() const
Returns if there are no entries.
ParamIterator end() const
End iterator for the internal tree.
A more convenient string class.
Definition String.h:34
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & toLower()
Converts the string to lowercase.
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19