OpenMS
Loading...
Searching...
No Matches
StringUtilsSimple.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg, Chris Bielow $
6// $Authors: Marc Sturm, Stephan Aiche, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
14
15#include <algorithm>
16#include <cmath>
17#include <sstream>
18#include <string_view>
19#include <string>
20#include <vector>
21
22namespace OpenMS
23{
24 class String;
25
26 namespace StringUtils
27 {
28
29 //
31 //
32 static inline String numberLength(double d, UInt n)
33 {
34 std::stringstream s;
35 //reserve one space for the minus sign
36 Int sign = 0;
37 if (d < 0)
38 sign = 1;
39 d = fabs(d);
40
41 if (d < pow(10.0, Int(n - sign - 2)))
42 {
43 s.precision(writtenDigits(d));
44 if (sign == 1)
45 s << "-";
46 s << d;
47 }
48 else
49 {
50 UInt exp = 0;
51 while (d > pow(10.0, Int(n - sign - 4)))
52 {
53 d /= 10;
54 ++exp;
55 }
56 d = Int(d) / 10.0;
57 exp += 1;
58 if (sign == 1)
59 s << "-";
60 s << d << "e";
61 if (exp < 10)
62 s << "0";
63 s << exp;
64 }
65 return s.str().substr(0, n);
66 }
67
68 static inline String& fillLeft(String & this_s, char c, UInt size)
69 {
70 if (this_s.size() < size)
71 {
72 this_s.std::string::operator=(String(size - this_s.size(), c) + this_s);
73 }
74 return this_s;
75 }
76
77 static inline String& fillRight(String & this_s, char c, UInt size)
78 {
79 if (this_s.size() < size)
80 {
81 this_s.std::string::operator=(this_s + String(size - this_s.size(), c));
82 }
83 return this_s;
84 }
85
86 static inline bool hasPrefix(const String & this_s, const String & string)
87 {
88 if (string.size() > this_s.size())
89 {
90 return false;
91 }
92 if (string.empty())
93 {
94 return true;
95 }
96 return this_s.compare(0, string.size(), string) == 0;
97 }
98
99 static inline bool hasSuffix(const String & this_s, const String& string)
100 {
101 if (string.size() > this_s.size())
102 {
103 return false;
104 }
105 if (string.empty())
106 {
107 return true;
108 }
109 return this_s.compare(this_s.size() - string.size(), string.size(), string) == 0;
110 }
111
112 static inline bool hasSubstring(const String & this_s, const String& string)
113 {
114 return this_s.find(string) != std::string::npos;
115 }
116
117 static inline bool has(const String & this_s, Byte byte)
118 {
119 return this_s.find(char(byte)) != std::string::npos;
120 }
121
122 static inline String prefix(const String & this_s, size_t length)
123 {
124 if (length > this_s.size())
125 {
126 throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
127 }
128 return this_s.substr(0, length);
129 }
130
131 static inline String suffix(const String & this_s, size_t length)
132 {
133 if (length > this_s.size())
134 {
135 throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
136 }
137 return this_s.substr(this_s.size() - length, length);
138 }
139
140 static inline String prefix(const String & this_s, Int length)
141 {
142 if (length < 0)
143 {
144 throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
145 }
146 if (length > Int(this_s.size()))
147 {
148 throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
149 }
150 return this_s.substr(0, length);
151 }
152
153 static inline String suffix(const String & this_s, Int length)
154 {
155 if (length < 0)
156 {
157 throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
158 }
159 if (length > Int(this_s.size()))
160 {
161 throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
162 }
163 return this_s.substr(this_s.size() - length, length);
164 }
165
166 static inline String prefix(const String & this_s, char delim)
167 {
168 Size pos = this_s.find(delim);
169 if (pos == std::string::npos) //char not found
170 {
171 throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
172 String(delim));
173 }
174 return this_s.substr(0, pos);
175 }
176
177 static inline String suffix(const String & this_s, char delim)
178 {
179 Size pos = this_s.rfind(delim);
180 if (pos == std::string::npos) //char not found
181 {
182 throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
183 String(delim));
184 }
185 return this_s.substr(++pos);
186 }
187
188 static inline String substr(const String & this_s, size_t pos, size_t n)
189 {
190 Size begin = std::min(pos, this_s.size());
191 return static_cast<String>(this_s.std::string::substr(begin, n));
192 }
193
194 static inline String chop(const String & this_s, Size n)
195 {
196 Size end = 0;
197 if (n < this_s.size())
198 {
199 end = this_s.size() - n;
200 }
201 return String(this_s.begin(), this_s.begin() + end);
202 }
203
204 static inline String& trim(String & this_s)
205 {
206 //search for the begin of truncated string
207 std::string::iterator begin = this_s.begin();
208 while (begin != this_s.end() && (*begin == ' ' || *begin == '\t' || *begin == '\n' || *begin == '\r'))
209 {
210 ++begin;
211 }
212
213 //all characters are whitespaces
214 if (begin == this_s.end())
215 {
216 this_s.clear();
217 return this_s;
218 }
219
220 //search for the end of truncated string
221 std::string::iterator end = this_s.end();
222 end--;
223 while (end != begin && (*end == ' ' || *end == '\n' || *end == '\t' || *end == '\r'))
224 {
225 --end;
226 }
227 ++end;
228
229 //no characters are whitespaces
230 if (begin == this_s.begin() && end == this_s.end())
231 {
232 return this_s;
233 }
234
235 // TODO:
236 // string::operator=(std::string(begin, end));
237 this_s.std::string::operator=(std::string(begin, end));
238
239 return this_s;
240 }
241
242 static inline bool isQuoted(const String & this_s, char q)
243 {
244 return (this_s.size() < 2) || (this_s[0] != q) || (this_s[this_s.size() - 1] != q);
245 }
246
247 static inline String& quote(String & this_s, char q, String::QuotingMethod method)
248 {
249 if (method == String::ESCAPE)
250 {
251 this_s.substitute(String(R"(\)"), String(R"(\\)"));
252 this_s.substitute(String(q), R"(\)" + String(q));
253 }
254 else if (method == String::DOUBLE)
255 this_s.substitute(String(q), String(q) + String(q));
256 this_s.std::string::operator=(q + this_s + q);
257 return this_s;
258 }
259
260 static inline String& unquote(String & this_s, char q, String::QuotingMethod method)
261 {
262 // check if input string matches output format of the "quote" method:
263 if (isQuoted(this_s, q))
264 {
266 __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
267 "'" + this_s + "' does not have the expected format of a quoted string");
268 }
269 this_s.std::string::operator=(this_s.substr(1, this_s.size() - 2)); // remove quotation marks
270 if (method == String::ESCAPE)
271 {
272 this_s.substitute(R"(\)" + String(q), String(q));
273 this_s.substitute(String(R"(\\)"), String(R"(\)"));
274 }
275 else if (method == String::DOUBLE)
276 this_s.substitute(String(q) + String(q), String(q));
277 return this_s;
278 }
279
280 static inline String& simplify(String & this_s)
281 {
282 String simple;
283
284 bool last_was_whitespace = false;
285 for (std::string::iterator it = this_s.begin(); it != this_s.end(); ++it)
286 {
287 if (*it == ' ' || *it == '\n' || *it == '\t' || *it == '\r')
288 {
289 if (!last_was_whitespace)
290 {
291 simple += ' ';
292 }
293 last_was_whitespace = true;
294 }
295 else
296 {
297 simple += *it;
298 last_was_whitespace = false;
299 }
300 }
301
302 this_s.swap(simple);
303 return this_s;
304 }
305
306 static inline String random(UInt length)
307 {
308 srand(time(nullptr));
309 String tmp(length, '.');
310 size_t random;
311 for (Size i = 0; i < length; ++i)
312 {
313 random = static_cast<size_t>(floor((static_cast<double>(rand()) / (double(RAND_MAX) + 1)) * 62.0));
314 if (random < 10)
315 {
316 tmp[i] = static_cast<char>(random + 48);
317 }
318 else if (random < 36)
319 {
320 tmp[i] = static_cast<char>(random + 55);
321 }
322 else
323 {
324 tmp[i] = static_cast<char>(random + 61);
325 }
326 }
327 return tmp;
328 }
329
330 static inline String& reverse(String & this_s)
331 {
332 String tmp = this_s;
333 for (Size i = 0; i != this_s.size(); ++i)
334 {
335 this_s[i] = tmp[this_s.size() - 1 - i];
336 }
337 return this_s;
338 }
339
340 static inline bool split(const String & this_s, const char splitter, std::vector<String>& substrings,
341 bool quote_protect)
342 {
343 substrings.clear();
344 if (this_s.empty())
345 return false;
346
347 Size nsplits = count(this_s.begin(), this_s.end(), splitter);
348
349 if (!quote_protect && (nsplits == 0))
350 {
351 substrings.push_back(this_s);
352 return false;
353 }
354
355 // splitter(s) found
356 substrings.reserve(nsplits + 1);
357
358 // why is "this_s." needed here?
359 std::string::const_iterator begin = this_s.begin();
360 std::string::const_iterator end = this_s.begin();
361
362 if (quote_protect)
363 {
364 Int quote_count(0);
365 for (; end != this_s.end(); ++end)
366 {
367 if (*end == '"')
368 {
369 ++quote_count;
370 }
371 if ((quote_count % 2 == 0) && (*end == splitter))
372 {
373 String block = String(begin, end);
374 block.trim();
375 if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
376 (block.suffix(1) == String("\""))))
377 { // block has start or end quote, but not both
378 // (one quote is somewhere in the middle)
380 __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
381 String("Could not dequote string '") + block +
382 "' due to wrongly placed '\"'.");
383 }
384 else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
385 (block.suffix(1) == String("\"")))
386 { // block has start and end quotes --> remove them
387 block = block.substr(1, block.size() - 2);
388 }
389 substrings.push_back(block);
390 begin = end + 1;
391 }
392 }
393 // no valid splitter found - return empty list
394 if (substrings.empty())
395 {
396 substrings.push_back(this_s);
397 return false;
398 }
399
400 String block = String(begin, end);
401 block.trim();
402 if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
403 (block.suffix(1) == String("\""))))
404 { // block has start or end quote but not both
405 // (one quote is somewhere in the middle)
407 __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
408 String("Could not dequote string '") + block +
409 "' due to wrongly placed '\"'.");
410 }
411 else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
412 (block.suffix(1) == String("\"")))
413 { // block has start and end quotes --> remove them
414 block = block.substr(1, block.size() - 2);
415 }
416 substrings.push_back(block);
417 }
418 else // do not honor quotes
419 {
420 for (; end != this_s.end(); ++end)
421 {
422 if (*end == splitter)
423 {
424 substrings.push_back(String(begin, end));
425 begin = end + 1;
426 }
427 }
428 substrings.push_back(String(begin, end));
429 }
430
431 // at this point we are sure that there are at least two components
432 return true;
433 }
434
435 static inline bool split(const String & this_s, const String& splitter, std::vector<String>& substrings)
436 {
437 substrings.clear();
438 if (this_s.empty())
439 return false;
440
441 if (splitter.empty()) // split after every character:
442 {
443 substrings.resize(this_s.size());
444 for (Size i = 0; i < this_s.size(); ++i)
445 substrings[i] = this_s[i];
446 return true;
447 }
448
449 Size len = splitter.size(), start = 0, pos = this_s.find(splitter);
450 if (len == 0)
451 len = 1;
452 while (pos != std::string::npos)
453 {
454 substrings.push_back(this_s.substr(start, pos - start));
455 start = pos + len;
456 pos = this_s.find(splitter, start);
457 }
458 substrings.push_back(this_s.substr(start, this_s.size() - start));
459 return substrings.size() > 1;
460 }
461
462 static inline bool split_quoted(const String & this_s, const String& splitter, std::vector<String>& substrings,
463 char q, String::QuotingMethod method)
464 {
465 substrings.clear();
466 if (this_s.empty() || splitter.empty())
467 return false;
468
469 bool in_quote = false;
470 char targets[2] = {q, splitter[0]}; // targets for "find_first_of"
471 std::string rest = splitter.substr(1, splitter.size() - 1);
472 Size start = 0;
473 for (Size i = 0; i < this_s.size(); ++i)
474 {
475 if (in_quote) // skip to closing quotation mark
476 {
477 bool embedded = false;
478 if (method == String::ESCAPE)
479 {
480 for (; i < this_s.size(); ++i)
481 {
482 if (this_s[i] == '\\')
483 embedded = !embedded;
484 else if ((this_s[i] == q) && !embedded)
485 break;
486 else
487 embedded = false;
488 }
489 }
490 else // method: NONE or DOUBLE
491 {
492 for (; i < this_s.size(); ++i)
493 {
494 if (this_s[i] == q)
495 {
496 if (method == String::NONE)
497 break; // found
498 // next character is also closing quotation mark:
499 if ((i < this_s.size() - 1) && (this_s[i + 1] == q))
500 embedded = !embedded;
501 // even number of subsequent quotes (doubled) => found
502 else if (!embedded)
503 break;
504 // odd number of subsequent quotes => belongs to a pair
505 else
506 embedded = false;
507 }
508 }
509 }
510 in_quote = false; // end of quote reached
511 }
512 else
513 {
514 i = this_s.find_first_of(targets, i, 2);
515 if (i == std::string::npos)
516 break; // nothing found
517 if (this_s[i] == q)
518 in_quote = true;
519 else if (this_s.compare(i + 1, rest.size(), rest) == 0) // splitter found
520 {
521 substrings.push_back(this_s.substr(start, i - start));
522 start = i + splitter.size();
523 i = start - 1; // increased by loop
524 }
525 }
526 }
527 if (in_quote) // reached end without finding closing quotation mark
528 {
530 __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
531 "unbalanced quotation marks in string '" + this_s + "'");
532 }
533 substrings.push_back(this_s.substr(start, this_s.size() - start));
534 return substrings.size() > 1;
535 }
536
537 static inline String& toUpper(String & this_s)
538 {
539 std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))toupper);
540 return this_s;
541 }
542
543 static inline String& firstToUpper(String & this_s)
544 {
545 if (!this_s.empty())
546 {
547 this_s[0] = toupper(this_s[0]);
548 }
549 return this_s;
550 }
551
552 static inline String& toLower(String & this_s)
553 {
554 std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))tolower);
555 return this_s;
556 }
557
558 static inline String& substitute(String & this_s, char from, char to)
559 {
560 std::replace(this_s.begin(), this_s.end(), from, to);
561 return this_s;
562 }
563
564 static inline String& substitute(String & this_s, const String& from, const String& to)
565 {
566 if (!from.empty())
567 {
568 std::vector<String> parts;
569 this_s.split(from, parts);
570 this_s.concatenate(parts.begin(), parts.end(), to);
571 }
572 return this_s;
573 }
574
575 static inline String& remove(String & this_s, char what)
576 {
577 this_s.erase(std::remove(this_s.begin(), this_s.end(), what), this_s.end());
578 return this_s;
579 }
580
581 static inline String& ensureLastChar(String & this_s, char end)
582 {
583 if (!this_s.hasSuffix(end))
584 this_s.append(1, end);
585 return this_s;
586 }
587
593 OPENMS_DLLAPI const char* skipWhitespace(const char* p, const char* p_end);
594
598 inline int skipWhitespace(const std::string_view& data)
599 {
600 auto pos = skipWhitespace(data.data(), data.data() + data.size());
601 return pos - data.data();
602 }
603
609 OPENMS_DLLAPI const char* skipNonWhitespace(const char* p, const char* p_end);
610
614 inline int skipNonWhitespace(const std::string_view& data)
615 {
616 auto pos = skipNonWhitespace(data.data(), data.data() + data.size());
617 return pos - data.data();
618 }
619
620 static inline String& removeWhitespaces(String& this_s)
621 {
622 auto start = skipNonWhitespace(std::string_view(this_s.data()));
623 std::string::const_iterator it = this_s.begin() + start;
624 std::string::iterator dest = this_s.begin() + start;
625 std::string::const_iterator it_end = this_s.end();
626 bool has_spaces(false);
627 while (it != it_end)
628 {
629 const char c = *it;
630 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
631 {
632 ++it;
633 has_spaces = true;
634 continue; // no need to copy a whitespace
635 }
636 // copy to the left, if we had a whitespace before
637 if (has_spaces) *dest = *it;
638 // advance both
639 ++dest;
640 ++it;
641 }
642
643 // shorten result
644 if (has_spaces) this_s.resize(dest - this_s.begin());
645
646 return this_s;
647 }
648
649 }
650
651} // namespace OPENMS
652
Invalid conversion exception.
Definition Exception.h:331
Element could not be found exception.
Definition Exception.h:654
Int overflow exception.
Definition Exception.h:211
Int underflow exception.
Definition Exception.h:175
A more convenient string class.
Definition String.h:34
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
String prefix(SizeType length) const
returns the prefix of length length
QuotingMethod
How to handle embedded quotes when quoting strings.
Definition String.h:55
@ ESCAPE
Definition String.h:55
@ DOUBLE
Definition String.h:55
@ NONE
Definition String.h:55
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String suffix(SizeType length) const
returns the suffix of length length
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
void concatenate(StringIterator first, StringIterator last, const String &glue="")
Concatenates all elements from first to last-1 and inserts glue between the elements.
Definition String.h:472
uint8_t Byte
Byte type.
Definition Types.h:81
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
static String chop(const String &this_s, Size n)
Definition StringUtilsSimple.h:194
static String & unquote(String &this_s, char q, String::QuotingMethod method)
Definition StringUtilsSimple.h:260
static String & toUpper(String &this_s)
Definition StringUtilsSimple.h:537
static bool isQuoted(const String &this_s, char q)
Definition StringUtilsSimple.h:242
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition StringUtilsSimple.h:340
static bool hasPrefix(const String &this_s, const String &string)
Definition StringUtilsSimple.h:86
static String & fillRight(String &this_s, char c, UInt size)
Definition StringUtilsSimple.h:77
static String & substitute(String &this_s, char from, char to)
Definition StringUtilsSimple.h:558
static String & trim(String &this_s)
Definition StringUtilsSimple.h:204
static String & toLower(String &this_s)
Definition StringUtilsSimple.h:552
static String & simplify(String &this_s)
Definition StringUtilsSimple.h:280
static bool hasSubstring(const String &this_s, const String &string)
Definition StringUtilsSimple.h:112
const char * skipNonWhitespace(const char *p, const char *p_end)
Get the first whitespace character (\n, \t, \r, ' ') in the string pointed to by p (where p_end is pa...
static String suffix(const String &this_s, size_t length)
Definition StringUtilsSimple.h:131
static String & ensureLastChar(String &this_s, char end)
Definition StringUtilsSimple.h:581
static String numberLength(double d, UInt n)
Functions.
Definition StringUtilsSimple.h:32
const char * skipWhitespace(const char *p, const char *p_end)
Get the first non-whitespace character (anything but \n, \t, \r, ' ') in the string pointed to by p (...
static String & reverse(String &this_s)
Definition StringUtilsSimple.h:330
static String & remove(String &this_s, char what)
Definition StringUtilsSimple.h:575
static bool hasSuffix(const String &this_s, const String &string)
Definition StringUtilsSimple.h:99
static String & firstToUpper(String &this_s)
Definition StringUtilsSimple.h:543
static bool has(const String &this_s, Byte byte)
Definition StringUtilsSimple.h:117
static String & removeWhitespaces(String &this_s)
Definition StringUtilsSimple.h:620
static String random(UInt length)
Definition StringUtilsSimple.h:306
static String & quote(String &this_s, char q, String::QuotingMethod method)
Definition StringUtilsSimple.h:247
static bool split_quoted(const String &this_s, const String &splitter, std::vector< String > &substrings, char q, String::QuotingMethod method)
Definition StringUtilsSimple.h:462
static String & fillLeft(String &this_s, char c, UInt size)
Definition StringUtilsSimple.h:68
static String substr(const String &this_s, size_t pos, size_t n)
Definition StringUtilsSimple.h:188
static String prefix(const String &this_s, size_t length)
Definition StringUtilsSimple.h:122
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
constexpr Int writtenDigits(const FloatingPointType &=FloatingPointType())
Number of digits commonly used for writing a floating point type (a.k.a. precision)....
Definition Types.h:264