OpenMS  2.8.0
StringUtilsSimple.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg, Chris Bielow $
32 // $Authors: Marc Sturm, Stephan Aiche, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
40 
41 #include <string>
42 #include <sstream>
43 #include <vector>
44 #include <cmath>
45 #include <algorithm>
46 
47 namespace OpenMS
48 {
49  class String;
50 
51  namespace StringUtils
52  {
53 
54  //
56  //
57  static inline String numberLength(double d, UInt n)
58  {
59  std::stringstream s;
60  //reserve one space for the minus sign
61  Int sign = 0;
62  if (d < 0)
63  sign = 1;
64  d = fabs(d);
65 
66  if (d < pow(10.0, Int(n - sign - 2)))
67  {
68  s.precision(writtenDigits(d));
69  if (sign == 1)
70  s << "-";
71  s << d;
72  }
73  else
74  {
75  UInt exp = 0;
76  while (d > pow(10.0, Int(n - sign - 4)))
77  {
78  d /= 10;
79  ++exp;
80  }
81  d = Int(d) / 10.0;
82  exp += 1;
83  if (sign == 1)
84  s << "-";
85  s << d << "e";
86  if (exp < 10)
87  s << "0";
88  s << exp;
89  }
90  return s.str().substr(0, n);
91  }
92 
93  static inline String& fillLeft(String & this_s, char c, UInt size)
94  {
95  if (this_s.size() < size)
96  {
97  this_s.std::string::operator=(String(size - this_s.size(), c) + this_s);
98  }
99  return this_s;
100  }
101 
102  static inline String& fillRight(String & this_s, char c, UInt size)
103  {
104  if (this_s.size() < size)
105  {
106  this_s.std::string::operator=(this_s + String(size - this_s.size(), c));
107  }
108  return this_s;
109  }
110 
111  static inline bool hasPrefix(const String & this_s, const String & string)
112  {
113  if (string.size() > this_s.size())
114  {
115  return false;
116  }
117  if (string.empty())
118  {
119  return true;
120  }
121  return this_s.compare(0, string.size(), string) == 0;
122  }
123 
124  static inline bool hasSuffix(const String & this_s, const String& string)
125  {
126  if (string.size() > this_s.size())
127  {
128  return false;
129  }
130  if (string.empty())
131  {
132  return true;
133  }
134  return this_s.compare(this_s.size() - string.size(), string.size(), string) == 0;
135  }
136 
137  static inline bool hasSubstring(const String & this_s, const String& string)
138  {
139  return this_s.find(string) != std::string::npos;
140  }
141 
142  static inline bool has(const String & this_s, Byte byte)
143  {
144  return this_s.find(char(byte)) != std::string::npos;
145  }
146 
147  static inline String prefix(const String & this_s, size_t length)
148  {
149  if (length > this_s.size())
150  {
151  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
152  }
153  return this_s.substr(0, length);
154  }
155 
156  static inline String suffix(const String & this_s, size_t length)
157  {
158  if (length > this_s.size())
159  {
160  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
161  }
162  return this_s.substr(this_s.size() - length, length);
163  }
164 
165  static inline String prefix(const String & this_s, Int length)
166  {
167  if (length < 0)
168  {
169  throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
170  }
171  if (length > Int(this_s.size()))
172  {
173  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
174  }
175  return this_s.substr(0, length);
176  }
177 
178  static inline String suffix(const String & this_s, Int length)
179  {
180  if (length < 0)
181  {
182  throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
183  }
184  if (length > Int(this_s.size()))
185  {
186  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
187  }
188  return this_s.substr(this_s.size() - length, length);
189  }
190 
191  static inline String prefix(const String & this_s, char delim)
192  {
193  Size pos = this_s.find(delim);
194  if (pos == std::string::npos) //char not found
195  {
196  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
197  String(delim));
198  }
199  return this_s.substr(0, pos);
200  }
201 
202  static inline String suffix(const String & this_s, char delim)
203  {
204  Size pos = this_s.rfind(delim);
205  if (pos == std::string::npos) //char not found
206  {
207  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
208  String(delim));
209  }
210  return this_s.substr(++pos);
211  }
212 
213  static inline String substr(const String & this_s, size_t pos, size_t n)
214  {
215  Size begin = std::min(pos, this_s.size());
216  return static_cast<String>(this_s.std::string::substr(begin, n));
217  }
218 
219  static inline String chop(const String & this_s, Size n)
220  {
221  Size end = 0;
222  if (n < this_s.size())
223  {
224  end = this_s.size() - n;
225  }
226  return String(this_s.begin(), this_s.begin() + end);
227  }
228 
229  static inline String& trim(String & this_s)
230  {
231  //search for the begin of truncated string
232  std::string::iterator begin = this_s.begin();
233  while (begin != this_s.end() && (*begin == ' ' || *begin == '\t' || *begin == '\n' || *begin == '\r'))
234  {
235  ++begin;
236  }
237 
238  //all characters are whitespaces
239  if (begin == this_s.end())
240  {
241  this_s.clear();
242  return this_s;
243  }
244 
245  //search for the end of truncated string
246  std::string::iterator end = this_s.end();
247  end--;
248  while (end != begin && (*end == ' ' || *end == '\n' || *end == '\t' || *end == '\r'))
249  {
250  --end;
251  }
252  ++end;
253 
254  //no characters are whitespaces
255  if (begin == this_s.begin() && end == this_s.end())
256  {
257  return this_s;
258  }
259 
260  // TODO:
261  // string::operator=(std::string(begin, end));
262  this_s.std::string::operator=(std::string(begin, end));
263 
264  return this_s;
265  }
266 
267  static inline String& quote(String & this_s, char q, String::QuotingMethod method)
268  {
269  if (method == String::ESCAPE)
270  {
271  this_s.substitute(String(R"(\)"), String(R"(\\)"));
272  this_s.substitute(String(q), R"(\)" + String(q));
273  }
274  else if (method == String::DOUBLE)
275  this_s.substitute(String(q), String(q) + String(q));
276  this_s.std::string::operator=(q + this_s + q);
277  return this_s;
278  }
279 
280  static inline String& unquote(String & this_s, char q, String::QuotingMethod method)
281  {
282  // check if input string matches output format of the "quote" method:
283  if ((this_s.size() < 2) || (this_s[0] != q) || (this_s[this_s.size() - 1] != q))
284  {
286  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
287  "'" + this_s + "' does not have the expected format of a quoted string");
288  }
289  this_s.std::string::operator=(this_s.substr(1, this_s.size() - 2)); // remove quotation marks
290  if (method == String::ESCAPE)
291  {
292  this_s.substitute(R"(\)" + String(q), String(q));
293  this_s.substitute(String(R"(\\)"), String(R"(\)"));
294  }
295  else if (method == String::DOUBLE)
296  this_s.substitute(String(q) + String(q), String(q));
297  return this_s;
298  }
299 
300  static inline String& simplify(String & this_s)
301  {
302  String simple;
303 
304  bool last_was_whitespace = false;
305  for (std::string::iterator it = this_s.begin(); it != this_s.end(); ++it)
306  {
307  if (*it == ' ' || *it == '\n' || *it == '\t' || *it == '\r')
308  {
309  if (!last_was_whitespace)
310  {
311  simple += ' ';
312  }
313  last_was_whitespace = true;
314  }
315  else
316  {
317  simple += *it;
318  last_was_whitespace = false;
319  }
320  }
321 
322  this_s.swap(simple);
323  return this_s;
324  }
325 
326  static inline String random(UInt length)
327  {
328  srand(time(nullptr));
329  String tmp(length, '.');
330  size_t random;
331  for (Size i = 0; i < length; ++i)
332  {
333  random = static_cast<size_t>(floor((static_cast<double>(rand()) / (double(RAND_MAX) + 1)) * 62.0));
334  if (random < 10)
335  {
336  tmp[i] = static_cast<char>(random + 48);
337  }
338  else if (random < 36)
339  {
340  tmp[i] = static_cast<char>(random + 55);
341  }
342  else
343  {
344  tmp[i] = static_cast<char>(random + 61);
345  }
346  }
347  return tmp;
348  }
349 
350  static inline String& reverse(String & this_s)
351  {
352  String tmp = this_s;
353  for (Size i = 0; i != this_s.size(); ++i)
354  {
355  this_s[i] = tmp[this_s.size() - 1 - i];
356  }
357  return this_s;
358  }
359 
360  static inline bool split(const String & this_s, const char splitter, std::vector<String>& substrings,
361  bool quote_protect)
362  {
363  substrings.clear();
364  if (this_s.empty())
365  return false;
366 
367  Size nsplits = count(this_s.begin(), this_s.end(), splitter);
368 
369  if (!quote_protect && (nsplits == 0))
370  {
371  substrings.push_back(this_s);
372  return false;
373  }
374 
375  // splitter(s) found
376  substrings.reserve(nsplits + 1);
377 
378  // why is "this_s." needed here?
379  std::string::const_iterator begin = this_s.begin();
380  std::string::const_iterator end = this_s.begin();
381 
382  if (quote_protect)
383  {
384  Int quote_count(0);
385  for (; end != this_s.end(); ++end)
386  {
387  if (*end == '"')
388  {
389  ++quote_count;
390  }
391  if ((quote_count % 2 == 0) && (*end == splitter))
392  {
393  String block = String(begin, end);
394  block.trim();
395  if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
396  (block.suffix(1) == String("\""))))
397  { // block has start or end quote, but not both
398  // (one quote is somewhere in the middle)
400  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
401  String("Could not dequote string '") + block +
402  "' due to wrongly placed '\"'.");
403  }
404  else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
405  (block.suffix(1) == String("\"")))
406  { // block has start and end quotes --> remove them
407  block = block.substr(1, block.size() - 2);
408  }
409  substrings.push_back(block);
410  begin = end + 1;
411  }
412  }
413  // no valid splitter found - return empty list
414  if (substrings.empty())
415  {
416  substrings.push_back(this_s);
417  return false;
418  }
419 
420  String block = String(begin, end);
421  block.trim();
422  if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
423  (block.suffix(1) == String("\""))))
424  { // block has start or end quote but not both
425  // (one quote is somewhere in the middle)
427  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
428  String("Could not dequote string '") + block +
429  "' due to wrongly placed '\"'.");
430  }
431  else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
432  (block.suffix(1) == String("\"")))
433  { // block has start and end quotes --> remove them
434  block = block.substr(1, block.size() - 2);
435  }
436  substrings.push_back(block);
437  }
438  else // do not honor quotes
439  {
440  for (; end != this_s.end(); ++end)
441  {
442  if (*end == splitter)
443  {
444  substrings.push_back(String(begin, end));
445  begin = end + 1;
446  }
447  }
448  substrings.push_back(String(begin, end));
449  }
450 
451  // at this point we are sure that there are at least two components
452  return true;
453  }
454 
455  static inline bool split(const String & this_s, const String& splitter, std::vector<String>& substrings)
456  {
457  substrings.clear();
458  if (this_s.empty())
459  return false;
460 
461  if (splitter.empty()) // split after every character:
462  {
463  substrings.resize(this_s.size());
464  for (Size i = 0; i < this_s.size(); ++i)
465  substrings[i] = this_s[i];
466  return true;
467  }
468 
469  Size len = splitter.size(), start = 0, pos = this_s.find(splitter);
470  if (len == 0)
471  len = 1;
472  while (pos != std::string::npos)
473  {
474  substrings.push_back(this_s.substr(start, pos - start));
475  start = pos + len;
476  pos = this_s.find(splitter, start);
477  }
478  substrings.push_back(this_s.substr(start, this_s.size() - start));
479  return substrings.size() > 1;
480  }
481 
482  static inline bool split_quoted(const String & this_s, const String& splitter, std::vector<String>& substrings,
483  char q, String::QuotingMethod method)
484  {
485  substrings.clear();
486  if (this_s.empty() || splitter.empty())
487  return false;
488 
489  bool in_quote = false;
490  char targets[2] = {q, splitter[0]}; // targets for "find_first_of"
491  std::string rest = splitter.substr(1, splitter.size() - 1);
492  Size start = 0;
493  for (Size i = 0; i < this_s.size(); ++i)
494  {
495  if (in_quote) // skip to closing quotation mark
496  {
497  bool embedded = false;
498  if (method == String::ESCAPE)
499  {
500  for (; i < this_s.size(); ++i)
501  {
502  if (this_s[i] == '\\')
503  embedded = !embedded;
504  else if ((this_s[i] == q) && !embedded)
505  break;
506  else
507  embedded = false;
508  }
509  }
510  else // method: NONE or DOUBLE
511  {
512  for (; i < this_s.size(); ++i)
513  {
514  if (this_s[i] == q)
515  {
516  if (method == String::NONE)
517  break; // found
518  // next character is also closing quotation mark:
519  if ((i < this_s.size() - 1) && (this_s[i + 1] == q))
520  embedded = !embedded;
521  // even number of subsequent quotes (doubled) => found
522  else if (!embedded)
523  break;
524  // odd number of subsequent quotes => belongs to a pair
525  else
526  embedded = false;
527  }
528  }
529  }
530  in_quote = false; // end of quote reached
531  }
532  else
533  {
534  i = this_s.find_first_of(targets, i, 2);
535  if (i == std::string::npos)
536  break; // nothing found
537  if (this_s[i] == q)
538  in_quote = true;
539  else if (this_s.compare(i + 1, rest.size(), rest) == 0) // splitter found
540  {
541  substrings.push_back(this_s.substr(start, i - start));
542  start = i + splitter.size();
543  i = start - 1; // increased by loop
544  }
545  }
546  }
547  if (in_quote) // reached end without finding closing quotation mark
548  {
550  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
551  "unbalanced quotation marks in string '" + this_s + "'");
552  }
553  substrings.push_back(this_s.substr(start, this_s.size() - start));
554  return substrings.size() > 1;
555  }
556 
557  static inline String& toUpper(String & this_s)
558  {
559  std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))toupper);
560  return this_s;
561  }
562 
563  static inline String& firstToUpper(String & this_s)
564  {
565  if (!this_s.empty())
566  {
567  this_s[0] = toupper(this_s[0]);
568  }
569  return this_s;
570  }
571 
572  static inline String& toLower(String & this_s)
573  {
574  std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))tolower);
575  return this_s;
576  }
577 
578  static inline String& substitute(String & this_s, char from, char to)
579  {
580  std::replace(this_s.begin(), this_s.end(), from, to);
581  return this_s;
582  }
583 
584  static inline String& substitute(String & this_s, const String& from, const String& to)
585  {
586  if (!from.empty())
587  {
588  std::vector<String> parts;
589  this_s.split(from, parts);
590  this_s.concatenate(parts.begin(), parts.end(), to);
591  }
592  return this_s;
593  }
594 
595  static inline String& remove(String & this_s, char what)
596  {
597  this_s.erase(std::remove(this_s.begin(), this_s.end(), what), this_s.end());
598  return this_s;
599  }
600 
601  static inline String& ensureLastChar(String & this_s, char end)
602  {
603  if (!this_s.hasSuffix(end))
604  this_s.append(1, end);
605  return this_s;
606  }
607 
608  static inline String& removeWhitespaces(String& this_s)
609  {
610  std::string::const_iterator it = this_s.begin();
611  std::string::iterator dest = this_s.begin();
612  std::string::const_iterator it_end = this_s.end();
613  bool has_spaces(false);
614  while (it != it_end)
615  {
616  const char c = *it;
617  if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
618  {
619  ++it;
620  has_spaces = true;
621  continue; // no need to copy a whitespace
622  }
623  // copy to the left, if we had a whitespace before
624  if (has_spaces) *dest = *it;
625  // advance both
626  ++dest;
627  ++it;
628  }
629 
630  // shorten result
631  if (has_spaces) this_s.resize(dest - this_s.begin());
632 
633  return this_s;
634  }
635 
636  }
637 
638 } // namespace OPENMS
639 
Invalid conversion exception.
Definition: Exception.h:356
Element could not be found exception.
Definition: Exception.h:676
Int overflow exception.
Definition: Exception.h:247
Int underflow exception.
Definition: Exception.h:209
A more convenient string class.
Definition: String.h:60
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
String prefix(SizeType length) const
returns the prefix of length length
QuotingMethod
How to handle embedded quotes when quoting strings.
Definition: String.h:81
@ ESCAPE
Definition: String.h:81
@ DOUBLE
Definition: String.h:81
@ NONE
Definition: String.h:81
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
String suffix(SizeType length) const
returns the suffix of length length
void concatenate(StringIterator first, StringIterator last, const String &glue="")
Concatenates all elements from first to last-1 and inserts glue between the elements.
Definition: String.h:466
OPENMS_BYTE_TYPE Byte
Byte type.
Definition: Types.h:111
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const double c
Definition: Constants.h:209
static bool hasPrefix(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:111
static String & firstToUpper(String &this_s)
Definition: StringUtilsSimple.h:563
static String & remove(String &this_s, char what)
Definition: StringUtilsSimple.h:595
static String chop(const String &this_s, Size n)
Definition: StringUtilsSimple.h:219
static String & reverse(String &this_s)
Definition: StringUtilsSimple.h:350
static String numberLength(double d, UInt n)
Functions.
Definition: StringUtilsSimple.h:57
static String & substitute(String &this_s, char from, char to)
Definition: StringUtilsSimple.h:578
static bool hasSuffix(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:124
static String & toLower(String &this_s)
Definition: StringUtilsSimple.h:572
static String & ensureLastChar(String &this_s, char end)
Definition: StringUtilsSimple.h:601
static String random(UInt length)
Definition: StringUtilsSimple.h:326
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:229
static String & simplify(String &this_s)
Definition: StringUtilsSimple.h:300
static String & unquote(String &this_s, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:280
static String & removeWhitespaces(String &this_s)
Definition: StringUtilsSimple.h:608
static String & toUpper(String &this_s)
Definition: StringUtilsSimple.h:557
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition: StringUtilsSimple.h:360
static bool hasSubstring(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:137
static String & fillRight(String &this_s, char c, UInt size)
Definition: StringUtilsSimple.h:102
static bool has(const String &this_s, Byte byte)
Definition: StringUtilsSimple.h:142
static String & quote(String &this_s, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:267
static bool split_quoted(const String &this_s, const String &splitter, std::vector< String > &substrings, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:482
static String suffix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:156
static String substr(const String &this_s, size_t pos, size_t n)
Definition: StringUtilsSimple.h:213
static String & fillLeft(String &this_s, char c, UInt size)
Definition: StringUtilsSimple.h:93
static String prefix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:147
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
constexpr Int writtenDigits(const FloatingPointType &=FloatingPointType())
Number of digits commonly used for writing a floating point type (a.k.a. precision)....
Definition: Types.h:294