OpenMS
Loading...
Searching...
No Matches
ProFormaData.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
12#include <OpenMS/OpenMSConfig.h>
13
14
15#include <optional>
16#include <utility>
17#include <variant>
18#include <vector>
19
20namespace OpenMS
21{
22
23 // Forward declaration for resolved modification pointer
24 class ResidueModification;
25
40
41
61
62
77
78
88 {
89 LOSSLESS,
91 };
92
93
102 enum class CvDatabase
103 {
104 UNIMOD,
105 MOD,
106 RESID,
107 XLMOD,
108 GNO
109 };
110
111
120 struct OPENMS_DLLAPI CvAccession
121 {
124 };
125
126
136 struct OPENMS_DLLAPI NamedMod
137 {
138 std::optional<CvDatabase> cv_hint;
140 };
141
142
152 struct OPENMS_DLLAPI MassDelta
153 {
155 enum class Source
156 {
157 NONE,
158 OBS,
159 U,
160 M,
161 R,
162 X,
163 G
164 };
165
166 Source source = Source::NONE;
167 double mass;
169 };
170
171
180 struct OPENMS_DLLAPI FormulaTag
181 {
183 std::optional<int> charge;
184 };
185
186
198 struct OPENMS_DLLAPI GlycanComposition
199 {
201 using Monosaccharide = std::variant<String, FormulaTag>;
202
203 std::vector<std::pair<Monosaccharide, int>> components;
204 };
205
206
215 struct OPENMS_DLLAPI InfoTag
216 {
218 };
219
220
232 struct OPENMS_DLLAPI PositionConstraint
233 {
234 std::vector<char> residues;
235 bool n_term = false;
236 bool c_term = false;
237 };
238
239
254 using ModificationTag = std::variant<
256 NamedMod,
257 MassDelta,
260 InfoTag,
262 >;
263
264
275 struct OPENMS_DLLAPI Label
276 {
278 enum class Type
279 {
280 CROSSLINK,
281 BRANCH,
282 AMBIGUOUS
283 };
284
287 std::optional<double> score;
288 };
289
290
305 struct OPENMS_DLLAPI Modification
306 {
308 std::vector<std::pair<ModificationTag, std::optional<Label>>> alternatives;
309
312 const ResidueModification* resolved_mod = nullptr;
313 };
314
315
324 struct OPENMS_DLLAPI SequenceElement
325 {
327 std::vector<Modification> modifications;
328 };
329
330
339 struct OPENMS_DLLAPI AmbiguousRegion
340 {
341 std::vector<SequenceElement> elements;
342 };
343
344
355 struct OPENMS_DLLAPI ModifiedRange
356 {
357 std::vector<SequenceElement> elements;
358 std::vector<Modification> modifications;
359 };
360
361
372 using SequenceSection = std::variant<
376 >;
377
378
390 struct OPENMS_DLLAPI UnlocalisedMod
391 {
392 std::vector<Modification> modifications;
393 std::optional<int> occurrence;
394 };
395
396
407 struct OPENMS_DLLAPI LabileModification
408 {
410 };
411
412
423 struct OPENMS_DLLAPI GlobalModification
424 {
426 std::vector<String> locations;
427 };
428
429
440 struct OPENMS_DLLAPI IsotopeReplacement
441 {
443 };
444
445
455 using GlobalModEntry = std::variant<
458 >;
459
460
471 struct OPENMS_DLLAPI AdductIon
472 {
474 int charge;
475 std::optional<int> occurrence;
476 };
477
478
488 using ChargeState = std::variant<
489 int,
490 std::vector<AdductIon>
491 >;
492
493
508 struct OPENMS_DLLAPI Peptidoform
509 {
510 std::optional<String> name;
511 std::vector<GlobalModEntry> global_mods;
512 std::vector<UnlocalisedMod> unlocalised_mods;
513 std::vector<LabileModification> labile_mods;
514 std::vector<Modification> n_term_mods;
515 std::vector<SequenceSection> sequence;
516 std::vector<Modification> c_term_mods;
517 std::optional<ChargeState> charge;
518 };
519
520
531 struct OPENMS_DLLAPI PeptidoformIon
532 {
533 std::optional<String> name;
534 std::vector<Peptidoform> chains;
535 std::optional<ChargeState> charge;
536 bool is_chimeric = false;
537 };
538
539
550 struct OPENMS_DLLAPI CrossLinkGroup
551 {
553 std::vector<std::pair<size_t, size_t>> sites;
554 };
555
556
557 //--------------------------------------------------------------------------
558 // JSON serialization convenience functions (implementations in ProFormaDataJson.cpp)
559 //--------------------------------------------------------------------------
560
563
573 OPENMS_DLLAPI String toJSON(const Peptidoform& pf);
574
584 OPENMS_DLLAPI Peptidoform peptidoformFromJSON(const String& json_str);
585
595 OPENMS_DLLAPI String toJSON(const PeptidoformIon& pfi);
596
606 OPENMS_DLLAPI PeptidoformIon peptidoformIonFromJSON(const String& json_str);
607
609
610
611} // namespace OpenMS
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
A more convenient string class.
Definition String.h:34
CvDatabase database
The source database (UNIMOD, MOD, RESID, XLMOD, or GNO)
Definition ProFormaData.h:122
ConversionIssueType type
The type of issue.
Definition ProFormaData.h:73
std::optional< ChargeState > charge
Optional per-chain charge (for chimeric spectra)
Definition ProFormaData.h:517
std::vector< Modification > n_term_mods
N-terminal modifications: [Acetyl]-.
Definition ProFormaData.h:514
std::vector< LabileModification > labile_mods
Labile modifications: {Glycan:Hex}.
Definition ProFormaData.h:513
std::vector< std::pair< ModificationTag, std::optional< Label > > > alternatives
Each alternative is a (tag, optional_label) pair.
Definition ProFormaData.h:308
std::optional< int > occurrence
Optional occurrence count from ^N suffix.
Definition ProFormaData.h:393
String isotope
The isotope specification (e.g., "13C", "15N", "D")
Definition ProFormaData.h:442
std::vector< std::pair< Monosaccharide, int > > components
List of (monosaccharide, count) pairs.
Definition ProFormaData.h:203
String formula_string
The chemical formula string (e.g., "C12H20O2")
Definition ProFormaData.h:182
std::vector< SequenceSection > sequence
The sequence with modifications.
Definition ProFormaData.h:515
String text
The info text content.
Definition ProFormaData.h:217
std::optional< int > charge
Optional charge from :z+N suffix.
Definition ProFormaData.h:183
String description
Human-readable description.
Definition ProFormaData.h:74
size_t position
Position in sequence (SIZE_MAX if not position-specific)
Definition ProFormaData.h:75
std::optional< CvDatabase > cv_hint
Optional CV prefix hint (U, M, R, X, G)
Definition ProFormaData.h:138
Modification modification
The labile modification.
Definition ProFormaData.h:409
std::vector< String > locations
Target locations ("K", "N-term", "C-term:K", etc.)
Definition ProFormaData.h:426
std::vector< Modification > modifications
Modifications at this position.
Definition ProFormaData.h:327
String name
The modification name (e.g., "Oxidation", "Phospho")
Definition ProFormaData.h:139
String accession
The accession identifier (e.g., "35" for UNIMOD:35, full string for GNO)
Definition ProFormaData.h:123
std::vector< Peptidoform > chains
One or more peptide chains (separated by // or + in ProForma)
Definition ProFormaData.h:534
std::vector< std::pair< size_t, size_t > > sites
(chain_index, site_index) pairs
Definition ProFormaData.h:553
std::variant< String, FormulaTag > Monosaccharide
A monosaccharide component: either a name (String) or a custom formula (FormulaTag)
Definition ProFormaData.h:201
String formula
The adduct formula (e.g., "Na", "H", "K")
Definition ProFormaData.h:473
std::vector< SequenceElement > elements
The ambiguous amino acid possibilities.
Definition ProFormaData.h:341
int charge
The charge contribution of this adduct.
Definition ProFormaData.h:474
String label
The cross-link label (e.g., XL1)
Definition ProFormaData.h:552
std::optional< String > name
Optional name from (>name) v2.1 extension.
Definition ProFormaData.h:510
std::vector< GlobalModEntry > global_mods
Global modifications: <13C>, <[TMT6plex]@K>
Definition ProFormaData.h:511
std::vector< UnlocalisedMod > unlocalised_mods
Unlocalised modifications: [Phospho]?
Definition ProFormaData.h:512
std::vector< char > residues
List of allowed amino acid residues.
Definition ProFormaData.h:234
std::vector< Modification > c_term_mods
C-terminal modifications: -[Amidated].
Definition ProFormaData.h:516
char amino_acid
Single-letter amino acid code (A-Z)
Definition ProFormaData.h:326
std::variant< SequenceElement, AmbiguousRegion, ModifiedRange > SequenceSection
Variant type representing a section of the sequence.
Definition ProFormaData.h:376
std::variant< CvAccession, NamedMod, MassDelta, FormulaTag, GlycanComposition, InfoTag, PositionConstraint > ModificationTag
Variant type representing any modification tag content.
Definition ProFormaData.h:262
CvDatabase
Controlled vocabulary database prefix for modification accessions.
Definition ProFormaData.h:103
std::variant< IsotopeReplacement, GlobalModification > GlobalModEntry
Variant type for global modification entries.
Definition ProFormaData.h:458
ProFormaWriteMode
Write mode for ProForma string serialization.
Definition ProFormaData.h:88
ConversionIssueType
Issue type for AASequence conversion problems.
Definition ProFormaData.h:48
AASequenceConversionPolicy
Conversion policy for transforming Peptidoform to AASequence.
Definition ProFormaData.h:35
std::variant< int, std::vector< AdductIon > > ChargeState
Charge state specification.
Definition ProFormaData.h:491
@ GNO
Glycan naming ontology.
@ RESID
RESID database.
@ XLMOD
Cross-linking modifications ontology.
@ UNIMOD
UniMod database (https://www.unimod.org/)
@ MOD
PSI-MOD ontology (https://www.ebi.ac.uk/ols/ontologies/mod)
@ LOSSLESS
Preserve original spelling/formatting where possible (e.g., mass delta text)
@ CANONICAL
Normalized output: uppercase CV prefixes, sorted mods, 4 decimal places for masses.
@ UNRESOLVED_MOD
Modification could not be found in ModificationsDB.
@ CROSS_LINK
Cross-link between chains.
@ ALTERNATIVE_MODS
Multiple alternative modifications (|)
@ GLOBAL_MOD
Global modification (applies to multiple sites)
@ UNLOCALISED_MOD
Modification has no specific position.
@ LABILE_MOD
Labile modification (lost during fragmentation)
@ AMBIGUOUS_REGION
Ambiguous amino acid region.
@ MULTIPLE_CHAINS
Multiple peptide chains.
@ MODIFIED_RANGE
Modified range (position uncertain)
@ AMBIGUOUS_MOD
Ambiguously localized modification.
@ UNSUPPORTED_FEATURE
Other unsupported ProForma feature.
@ FAIL_ON_LOSS
Fail if any modification cannot be fully represented.
@ DROP_UNLOCALISED
Drop unlocalised, labile, and global modifications.
@ BEST_EFFORT
Try to convert as much as possible, skip unsupported.
Adduct ion specification for charge state.
Definition ProFormaData.h:472
Ambiguous amino acid region.
Definition ProFormaData.h:340
Description of a conversion issue from Peptidoform to AASequence.
Definition ProFormaData.h:72
Cross-link group connecting sites across chains.
Definition ProFormaData.h:551
Controlled vocabulary accession for a modification.
Definition ProFormaData.h:121
Chemical formula with optional charge.
Definition ProFormaData.h:181
Global modification applied to specific locations.
Definition ProFormaData.h:424
Glycan composition specification.
Definition ProFormaData.h:199
Info tag for arbitrary text annotations.
Definition ProFormaData.h:216
Isotope replacement for stable isotope labeling.
Definition ProFormaData.h:441
Labile modification that may be lost during fragmentation.
Definition ProFormaData.h:408
A modification with one or more alternative tags.
Definition ProFormaData.h:306
Modified sequence range with shared modifications.
Definition ProFormaData.h:356
Named modification with optional CV prefix hint.
Definition ProFormaData.h:137
A single peptidoform (one peptide chain)
Definition ProFormaData.h:509
A peptidoform ion (one or more chains with optional charge)
Definition ProFormaData.h:532
Position constraint specifying allowed residues for a modification.
Definition ProFormaData.h:233
A single amino acid with its modifications.
Definition ProFormaData.h:325
Unlocalised modification with optional occurrence count.
Definition ProFormaData.h:391
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Peptidoform peptidoformFromJSON(const String &json_str)
Construct a Peptidoform from JSON string.
PeptidoformIon peptidoformIonFromJSON(const String &json_str)
Construct a PeptidoformIon from JSON string.
String toJSON(const Peptidoform &pf)
Convert a Peptidoform to JSON string representation.
Label for cross-links, branches, or ambiguous grouping.
Definition ProFormaData.h:276
Type
The type of label.
Definition ProFormaData.h:279
std::optional< double > score
Optional localization score for ambiguous labels (e.g., 0.90)
Definition ProFormaData.h:287
Type type
The label type.
Definition ProFormaData.h:285
String identifier
The label identifier (e.g., XL1, BRANCH, g1)
Definition ProFormaData.h:286
Mass delta modification with optional source hint.
Definition ProFormaData.h:153
Source
Source hint for mass delta values.
Definition ProFormaData.h:156
double mass
The mass delta value in Daltons.
Definition ProFormaData.h:167
String original_text
Original text for lossless roundtrip (e.g., "+15.99" vs "+15.9900")
Definition ProFormaData.h:168