OpenMS
Loading...
Searching...
No Matches
ProFormaTokenizer.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
12
13#include <optional>
14#include <string>
15#include <string_view>
16
17namespace OpenMS
18{
19
53 class OPENMS_DLLAPI ProFormaTokenizer
54 {
55 public:
57 enum class TokenType
58 {
59 LBRACKET,
60 RBRACKET,
61 LPAREN,
62 RPAREN,
63 LBRACE,
64 RBRACE,
65 LANGLE,
66 RANGLE,
67 PLUS,
68 MINUS,
69 SLASH,
70 PIPE,
71 HASH,
72 COLON,
73 COMMA,
74 CARET,
75 QUESTION,
76 AT,
77 NUMBER,
78 IDENTIFIER,
79 END
80 };
81
88 struct Token
89 {
91 std::string_view text;
92 size_t position;
93
95 bool isEnd() const { return type == TokenType::END; }
96
98 bool is(TokenType t) const { return type == t; }
99 };
100
109 explicit ProFormaTokenizer(std::string_view input, size_t start_pos = 0);
110
113
116
119
122
125
134
143
149 bool hasMore() const;
150
156 size_t position() const;
157
169 std::string_view getContext(size_t pos, size_t before = 20, size_t after = 20) const;
170
177 static const char* tokenTypeName(TokenType type);
178
179 private:
182
185
188
190 bool isAtEnd_() const;
191
193 char current_() const;
194
196 char peek_(size_t offset) const;
197
199 char advance_();
200
202 static bool isLetter_(char c);
203
205 static bool isDigit_(char c);
206
208 std::string_view input_;
209
211 size_t pos_ = 0;
212
214 std::optional<Token> peeked_;
215 };
216
217} // namespace OpenMS
Tokenizer for ProForma v2 peptidoform notation.
Definition ProFormaTokenizer.h:54
~ProFormaTokenizer()=default
Default destructor.
char current_() const
Get the current character (or '\0' if at end)
ProFormaTokenizer(ProFormaTokenizer &&)=default
Move constructor.
ProFormaTokenizer & operator=(ProFormaTokenizer &&)=default
Move assignment operator.
Token next()
Consume and return the next token.
char peek_(size_t offset) const
Get the character at offset from current position (or '\0' if out of bounds)
Token scanIdentifier_()
Scan an identifier token (letter sequence)
std::string_view getContext(size_t pos, size_t before=20, size_t after=20) const
Get a context string around a position for error messages.
ProFormaTokenizer(const ProFormaTokenizer &)=default
Copy constructor.
ProFormaTokenizer(std::string_view input, size_t start_pos=0)
Construct a tokenizer for the given input string.
bool isAtEnd_() const
Check if we have reached the end of input.
Token scanToken_()
Scan and return the next token from the current position.
size_t position() const
Get the current position in the input.
char advance_()
Advance to the next character and return the previous one.
ProFormaTokenizer & operator=(const ProFormaTokenizer &)=default
Copy assignment operator.
std::string_view input_
The input string (must remain valid for tokenizer lifetime)
Definition ProFormaTokenizer.h:208
Token scanNumber_()
Scan a number token (integer, decimal, optionally signed)
TokenType
Token types produced by the tokenizer.
Definition ProFormaTokenizer.h:58
static bool isLetter_(char c)
Check if a character is a letter (A-Za-z)
std::optional< Token > peeked_
Cached peeked token (if any)
Definition ProFormaTokenizer.h:214
static const char * tokenTypeName(TokenType type)
Get a human-readable name for a token type.
Token peek()
Look at the next token without consuming it.
bool hasMore() const
Check if more tokens are available.
static bool isDigit_(char c)
Check if a character is a digit (0-9)
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
A single token from the input stream.
Definition ProFormaTokenizer.h:89
TokenType type
The type of this token.
Definition ProFormaTokenizer.h:90
size_t position
Byte offset in the original input (0-indexed)
Definition ProFormaTokenizer.h:92
bool is(TokenType t) const
Check if this token is a specific single-character type.
Definition ProFormaTokenizer.h:98
std::string_view text
View into the original input (zero-copy)
Definition ProFormaTokenizer.h:91
bool isEnd() const
Check if this is an end-of-input token.
Definition ProFormaTokenizer.h:95