OpenMS
StringUtilsSimple.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg, Chris Bielow $
6 // $Authors: Marc Sturm, Stephan Aiche, Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <OpenMS/CONCEPT/Types.h>
14 
15 #include <algorithm>
16 #include <cmath>
17 #include <sstream>
18 #include <string_view>
19 #include <string>
20 #include <vector>
21 
22 namespace OpenMS
23 {
24  class String;
25 
26  namespace StringUtils
27  {
28 
29  //
31  //
32  static inline String numberLength(double d, UInt n)
33  {
34  std::stringstream s;
35  //reserve one space for the minus sign
36  Int sign = 0;
37  if (d < 0)
38  sign = 1;
39  d = fabs(d);
40 
41  if (d < pow(10.0, Int(n - sign - 2)))
42  {
43  s.precision(writtenDigits(d));
44  if (sign == 1)
45  s << "-";
46  s << d;
47  }
48  else
49  {
50  UInt exp = 0;
51  while (d > pow(10.0, Int(n - sign - 4)))
52  {
53  d /= 10;
54  ++exp;
55  }
56  d = Int(d) / 10.0;
57  exp += 1;
58  if (sign == 1)
59  s << "-";
60  s << d << "e";
61  if (exp < 10)
62  s << "0";
63  s << exp;
64  }
65  return s.str().substr(0, n);
66  }
67 
68  static inline String& fillLeft(String & this_s, char c, UInt size)
69  {
70  if (this_s.size() < size)
71  {
72  this_s.std::string::operator=(String(size - this_s.size(), c) + this_s);
73  }
74  return this_s;
75  }
76 
77  static inline String& fillRight(String & this_s, char c, UInt size)
78  {
79  if (this_s.size() < size)
80  {
81  this_s.std::string::operator=(this_s + String(size - this_s.size(), c));
82  }
83  return this_s;
84  }
85 
86  static inline bool hasPrefix(const String & this_s, const String & string)
87  {
88  if (string.size() > this_s.size())
89  {
90  return false;
91  }
92  if (string.empty())
93  {
94  return true;
95  }
96  return this_s.compare(0, string.size(), string) == 0;
97  }
98 
99  static inline bool hasSuffix(const String & this_s, const String& string)
100  {
101  if (string.size() > this_s.size())
102  {
103  return false;
104  }
105  if (string.empty())
106  {
107  return true;
108  }
109  return this_s.compare(this_s.size() - string.size(), string.size(), string) == 0;
110  }
111 
112  static inline bool hasSubstring(const String & this_s, const String& string)
113  {
114  return this_s.find(string) != std::string::npos;
115  }
116 
117  static inline bool has(const String & this_s, Byte byte)
118  {
119  return this_s.find(char(byte)) != std::string::npos;
120  }
121 
122  static inline String prefix(const String & this_s, size_t length)
123  {
124  if (length > this_s.size())
125  {
126  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
127  }
128  return this_s.substr(0, length);
129  }
130 
131  static inline String suffix(const String & this_s, size_t length)
132  {
133  if (length > this_s.size())
134  {
135  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
136  }
137  return this_s.substr(this_s.size() - length, length);
138  }
139 
140  static inline String prefix(const String & this_s, Int length)
141  {
142  if (length < 0)
143  {
144  throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
145  }
146  if (length > Int(this_s.size()))
147  {
148  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
149  }
150  return this_s.substr(0, length);
151  }
152 
153  static inline String suffix(const String & this_s, Int length)
154  {
155  if (length < 0)
156  {
157  throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
158  }
159  if (length > Int(this_s.size()))
160  {
161  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
162  }
163  return this_s.substr(this_s.size() - length, length);
164  }
165 
166  static inline String prefix(const String & this_s, char delim)
167  {
168  Size pos = this_s.find(delim);
169  if (pos == std::string::npos) //char not found
170  {
171  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
172  String(delim));
173  }
174  return this_s.substr(0, pos);
175  }
176 
177  static inline String suffix(const String & this_s, char delim)
178  {
179  Size pos = this_s.rfind(delim);
180  if (pos == std::string::npos) //char not found
181  {
182  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
183  String(delim));
184  }
185  return this_s.substr(++pos);
186  }
187 
188  static inline String substr(const String & this_s, size_t pos, size_t n)
189  {
190  Size begin = std::min(pos, this_s.size());
191  return static_cast<String>(this_s.std::string::substr(begin, n));
192  }
193 
194  static inline String chop(const String & this_s, Size n)
195  {
196  Size end = 0;
197  if (n < this_s.size())
198  {
199  end = this_s.size() - n;
200  }
201  return String(this_s.begin(), this_s.begin() + end);
202  }
203 
204  static inline String& trim(String & this_s)
205  {
206  //search for the begin of truncated string
207  std::string::iterator begin = this_s.begin();
208  while (begin != this_s.end() && (*begin == ' ' || *begin == '\t' || *begin == '\n' || *begin == '\r'))
209  {
210  ++begin;
211  }
212 
213  //all characters are whitespaces
214  if (begin == this_s.end())
215  {
216  this_s.clear();
217  return this_s;
218  }
219 
220  //search for the end of truncated string
221  std::string::iterator end = this_s.end();
222  end--;
223  while (end != begin && (*end == ' ' || *end == '\n' || *end == '\t' || *end == '\r'))
224  {
225  --end;
226  }
227  ++end;
228 
229  //no characters are whitespaces
230  if (begin == this_s.begin() && end == this_s.end())
231  {
232  return this_s;
233  }
234 
235  // TODO:
236  // string::operator=(std::string(begin, end));
237  this_s.std::string::operator=(std::string(begin, end));
238 
239  return this_s;
240  }
241 
242  static inline bool isQuoted(const String & this_s, char q)
243  {
244  return (this_s.size() < 2) || (this_s[0] != q) || (this_s[this_s.size() - 1] != q);
245  }
246 
247  static inline String& quote(String & this_s, char q, String::QuotingMethod method)
248  {
249  if (method == String::ESCAPE)
250  {
251  this_s.substitute(String(R"(\)"), String(R"(\\)"));
252  this_s.substitute(String(q), R"(\)" + String(q));
253  }
254  else if (method == String::DOUBLE)
255  this_s.substitute(String(q), String(q) + String(q));
256  this_s.std::string::operator=(q + this_s + q);
257  return this_s;
258  }
259 
260  static inline String& unquote(String & this_s, char q, String::QuotingMethod method)
261  {
262  // check if input string matches output format of the "quote" method:
263  if (isQuoted(this_s, q))
264  {
266  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
267  "'" + this_s + "' does not have the expected format of a quoted string");
268  }
269  this_s.std::string::operator=(this_s.substr(1, this_s.size() - 2)); // remove quotation marks
270  if (method == String::ESCAPE)
271  {
272  this_s.substitute(R"(\)" + String(q), String(q));
273  this_s.substitute(String(R"(\\)"), String(R"(\)"));
274  }
275  else if (method == String::DOUBLE)
276  this_s.substitute(String(q) + String(q), String(q));
277  return this_s;
278  }
279 
280  static inline String& simplify(String & this_s)
281  {
282  String simple;
283 
284  bool last_was_whitespace = false;
285  for (std::string::iterator it = this_s.begin(); it != this_s.end(); ++it)
286  {
287  if (*it == ' ' || *it == '\n' || *it == '\t' || *it == '\r')
288  {
289  if (!last_was_whitespace)
290  {
291  simple += ' ';
292  }
293  last_was_whitespace = true;
294  }
295  else
296  {
297  simple += *it;
298  last_was_whitespace = false;
299  }
300  }
301 
302  this_s.swap(simple);
303  return this_s;
304  }
305 
306  static inline String random(UInt length)
307  {
308  srand(time(nullptr));
309  String tmp(length, '.');
310  size_t random;
311  for (Size i = 0; i < length; ++i)
312  {
313  random = static_cast<size_t>(floor((static_cast<double>(rand()) / (double(RAND_MAX) + 1)) * 62.0));
314  if (random < 10)
315  {
316  tmp[i] = static_cast<char>(random + 48);
317  }
318  else if (random < 36)
319  {
320  tmp[i] = static_cast<char>(random + 55);
321  }
322  else
323  {
324  tmp[i] = static_cast<char>(random + 61);
325  }
326  }
327  return tmp;
328  }
329 
330  static inline String& reverse(String & this_s)
331  {
332  String tmp = this_s;
333  for (Size i = 0; i != this_s.size(); ++i)
334  {
335  this_s[i] = tmp[this_s.size() - 1 - i];
336  }
337  return this_s;
338  }
339 
340  static inline bool split(const String & this_s, const char splitter, std::vector<String>& substrings,
341  bool quote_protect)
342  {
343  substrings.clear();
344  if (this_s.empty())
345  return false;
346 
347  Size nsplits = count(this_s.begin(), this_s.end(), splitter);
348 
349  if (!quote_protect && (nsplits == 0))
350  {
351  substrings.push_back(this_s);
352  return false;
353  }
354 
355  // splitter(s) found
356  substrings.reserve(nsplits + 1);
357 
358  // why is "this_s." needed here?
359  std::string::const_iterator begin = this_s.begin();
360  std::string::const_iterator end = this_s.begin();
361 
362  if (quote_protect)
363  {
364  Int quote_count(0);
365  for (; end != this_s.end(); ++end)
366  {
367  if (*end == '"')
368  {
369  ++quote_count;
370  }
371  if ((quote_count % 2 == 0) && (*end == splitter))
372  {
373  String block = String(begin, end);
374  block.trim();
375  if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
376  (block.suffix(1) == String("\""))))
377  { // block has start or end quote, but not both
378  // (one quote is somewhere in the middle)
380  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
381  String("Could not dequote string '") + block +
382  "' due to wrongly placed '\"'.");
383  }
384  else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
385  (block.suffix(1) == String("\"")))
386  { // block has start and end quotes --> remove them
387  block = block.substr(1, block.size() - 2);
388  }
389  substrings.push_back(block);
390  begin = end + 1;
391  }
392  }
393  // no valid splitter found - return empty list
394  if (substrings.empty())
395  {
396  substrings.push_back(this_s);
397  return false;
398  }
399 
400  String block = String(begin, end);
401  block.trim();
402  if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
403  (block.suffix(1) == String("\""))))
404  { // block has start or end quote but not both
405  // (one quote is somewhere in the middle)
407  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
408  String("Could not dequote string '") + block +
409  "' due to wrongly placed '\"'.");
410  }
411  else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
412  (block.suffix(1) == String("\"")))
413  { // block has start and end quotes --> remove them
414  block = block.substr(1, block.size() - 2);
415  }
416  substrings.push_back(block);
417  }
418  else // do not honor quotes
419  {
420  for (; end != this_s.end(); ++end)
421  {
422  if (*end == splitter)
423  {
424  substrings.push_back(String(begin, end));
425  begin = end + 1;
426  }
427  }
428  substrings.push_back(String(begin, end));
429  }
430 
431  // at this point we are sure that there are at least two components
432  return true;
433  }
434 
435  static inline bool split(const String & this_s, const String& splitter, std::vector<String>& substrings)
436  {
437  substrings.clear();
438  if (this_s.empty())
439  return false;
440 
441  if (splitter.empty()) // split after every character:
442  {
443  substrings.resize(this_s.size());
444  for (Size i = 0; i < this_s.size(); ++i)
445  substrings[i] = this_s[i];
446  return true;
447  }
448 
449  Size len = splitter.size(), start = 0, pos = this_s.find(splitter);
450  if (len == 0)
451  len = 1;
452  while (pos != std::string::npos)
453  {
454  substrings.push_back(this_s.substr(start, pos - start));
455  start = pos + len;
456  pos = this_s.find(splitter, start);
457  }
458  substrings.push_back(this_s.substr(start, this_s.size() - start));
459  return substrings.size() > 1;
460  }
461 
462  static inline bool split_quoted(const String & this_s, const String& splitter, std::vector<String>& substrings,
463  char q, String::QuotingMethod method)
464  {
465  substrings.clear();
466  if (this_s.empty() || splitter.empty())
467  return false;
468 
469  bool in_quote = false;
470  char targets[2] = {q, splitter[0]}; // targets for "find_first_of"
471  std::string rest = splitter.substr(1, splitter.size() - 1);
472  Size start = 0;
473  for (Size i = 0; i < this_s.size(); ++i)
474  {
475  if (in_quote) // skip to closing quotation mark
476  {
477  bool embedded = false;
478  if (method == String::ESCAPE)
479  {
480  for (; i < this_s.size(); ++i)
481  {
482  if (this_s[i] == '\\')
483  embedded = !embedded;
484  else if ((this_s[i] == q) && !embedded)
485  break;
486  else
487  embedded = false;
488  }
489  }
490  else // method: NONE or DOUBLE
491  {
492  for (; i < this_s.size(); ++i)
493  {
494  if (this_s[i] == q)
495  {
496  if (method == String::NONE)
497  break; // found
498  // next character is also closing quotation mark:
499  if ((i < this_s.size() - 1) && (this_s[i + 1] == q))
500  embedded = !embedded;
501  // even number of subsequent quotes (doubled) => found
502  else if (!embedded)
503  break;
504  // odd number of subsequent quotes => belongs to a pair
505  else
506  embedded = false;
507  }
508  }
509  }
510  in_quote = false; // end of quote reached
511  }
512  else
513  {
514  i = this_s.find_first_of(targets, i, 2);
515  if (i == std::string::npos)
516  break; // nothing found
517  if (this_s[i] == q)
518  in_quote = true;
519  else if (this_s.compare(i + 1, rest.size(), rest) == 0) // splitter found
520  {
521  substrings.push_back(this_s.substr(start, i - start));
522  start = i + splitter.size();
523  i = start - 1; // increased by loop
524  }
525  }
526  }
527  if (in_quote) // reached end without finding closing quotation mark
528  {
530  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
531  "unbalanced quotation marks in string '" + this_s + "'");
532  }
533  substrings.push_back(this_s.substr(start, this_s.size() - start));
534  return substrings.size() > 1;
535  }
536 
537  static inline String& toUpper(String & this_s)
538  {
539  std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))toupper);
540  return this_s;
541  }
542 
543  static inline String& firstToUpper(String & this_s)
544  {
545  if (!this_s.empty())
546  {
547  this_s[0] = toupper(this_s[0]);
548  }
549  return this_s;
550  }
551 
552  static inline String& toLower(String & this_s)
553  {
554  std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))tolower);
555  return this_s;
556  }
557 
558  static inline String& substitute(String & this_s, char from, char to)
559  {
560  std::replace(this_s.begin(), this_s.end(), from, to);
561  return this_s;
562  }
563 
564  static inline String& substitute(String & this_s, const String& from, const String& to)
565  {
566  if (!from.empty())
567  {
568  std::vector<String> parts;
569  this_s.split(from, parts);
570  this_s.concatenate(parts.begin(), parts.end(), to);
571  }
572  return this_s;
573  }
574 
575  static inline String& remove(String & this_s, char what)
576  {
577  this_s.erase(std::remove(this_s.begin(), this_s.end(), what), this_s.end());
578  return this_s;
579  }
580 
581  static inline String& ensureLastChar(String & this_s, char end)
582  {
583  if (!this_s.hasSuffix(end))
584  this_s.append(1, end);
585  return this_s;
586  }
587 
593  OPENMS_DLLAPI const char* skipWhitespace(const char* p, const char* p_end);
594 
598  inline int skipWhitespace(const std::string_view& data)
599  {
600  auto pos = skipWhitespace(data.data(), data.data() + data.size());
601  return pos - data.data();
602  }
603 
609  OPENMS_DLLAPI const char* skipNonWhitespace(const char* p, const char* p_end);
610 
614  inline int skipNonWhitespace(const std::string_view& data)
615  {
616  auto pos = skipNonWhitespace(data.data(), data.data() + data.size());
617  return pos - data.data();
618  }
619 
620  static inline String& removeWhitespaces(String& this_s)
621  {
622  auto start = skipNonWhitespace(std::string_view(this_s.data()));
623  std::string::const_iterator it = this_s.begin() + start;
624  std::string::iterator dest = this_s.begin() + start;
625  std::string::const_iterator it_end = this_s.end();
626  bool has_spaces(false);
627  while (it != it_end)
628  {
629  const char c = *it;
630  if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
631  {
632  ++it;
633  has_spaces = true;
634  continue; // no need to copy a whitespace
635  }
636  // copy to the left, if we had a whitespace before
637  if (has_spaces) *dest = *it;
638  // advance both
639  ++dest;
640  ++it;
641  }
642 
643  // shorten result
644  if (has_spaces) this_s.resize(dest - this_s.begin());
645 
646  return this_s;
647  }
648 
649  }
650 
651 } // namespace OPENMS
652 
Invalid conversion exception.
Definition: Exception.h:330
Element could not be found exception.
Definition: Exception.h:650
Int overflow exception.
Definition: Exception.h:221
Int underflow exception.
Definition: Exception.h:183
A more convenient string class.
Definition: String.h:34
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
String prefix(SizeType length) const
returns the prefix of length length
QuotingMethod
How to handle embedded quotes when quoting strings.
Definition: String.h:55
@ ESCAPE
Definition: String.h:55
@ DOUBLE
Definition: String.h:55
@ NONE
Definition: String.h:55
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
String suffix(SizeType length) const
returns the suffix of length length
void concatenate(StringIterator first, StringIterator last, const String &glue="")
Concatenates all elements from first to last-1 and inserts glue between the elements.
Definition: String.h:472
OPENMS_BYTE_TYPE Byte
Byte type.
Definition: Types.h:85
int Int
Signed integer type.
Definition: Types.h:76
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
const double c
Definition: Constants.h:188
static bool hasPrefix(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:86
static String & firstToUpper(String &this_s)
Definition: StringUtilsSimple.h:543
static String & remove(String &this_s, char what)
Definition: StringUtilsSimple.h:575
static String chop(const String &this_s, Size n)
Definition: StringUtilsSimple.h:194
static String & reverse(String &this_s)
Definition: StringUtilsSimple.h:330
static bool isQuoted(const String &this_s, char q)
Definition: StringUtilsSimple.h:242
static String numberLength(double d, UInt n)
Functions.
Definition: StringUtilsSimple.h:32
static String & substitute(String &this_s, char from, char to)
Definition: StringUtilsSimple.h:558
static bool hasSuffix(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:99
static String & toLower(String &this_s)
Definition: StringUtilsSimple.h:552
const char * skipWhitespace(const char *p, const char *p_end)
Get the first non-whitespace character (anything but , \t, \r, ' ') in the string pointed to by p (w...
static String & ensureLastChar(String &this_s, char end)
Definition: StringUtilsSimple.h:581
const char * skipNonWhitespace(const char *p, const char *p_end)
Get the first whitespace character ( , \t, \r, ' ') in the string pointed to by p (where p_end is pas...
static String random(UInt length)
Definition: StringUtilsSimple.h:306
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:204
static String & simplify(String &this_s)
Definition: StringUtilsSimple.h:280
static String & unquote(String &this_s, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:260
static String & removeWhitespaces(String &this_s)
Definition: StringUtilsSimple.h:620
static String & toUpper(String &this_s)
Definition: StringUtilsSimple.h:537
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition: StringUtilsSimple.h:340
static bool hasSubstring(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:112
static String & fillRight(String &this_s, char c, UInt size)
Definition: StringUtilsSimple.h:77
static bool has(const String &this_s, Byte byte)
Definition: StringUtilsSimple.h:117
static String & quote(String &this_s, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:247
static bool split_quoted(const String &this_s, const String &splitter, std::vector< String > &substrings, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:462
static String suffix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:131
static String substr(const String &this_s, size_t pos, size_t n)
Definition: StringUtilsSimple.h:188
static String & fillLeft(String &this_s, char c, UInt size)
Definition: StringUtilsSimple.h:68
static String prefix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:122
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
constexpr Int writtenDigits(const FloatingPointType &=FloatingPointType())
Number of digits commonly used for writing a floating point type (a.k.a. precision)....
Definition: Types.h:268