OpenMS
Loading...
Searching...
No Matches
ArrowSchemaRegistry.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/OpenMSConfig.h>
12#include <memory>
13#include <string>
14#include <vector>
15
16// Forward declarations
17namespace arrow
18{
19 class Schema;
20 class DataType;
21 class Table;
22}
23
24namespace OpenMS
25{
26
28 namespace ArrowSchemaValidation
29 {
31 enum class Mode
32 {
33 Strict,
34 Subset
35 };
36
38 struct OPENMS_DLLAPI ValidationResult
39 {
40 bool valid = true;
41 std::vector<std::string> errors;
42 std::string toString() const;
43 };
44
47 const std::shared_ptr<arrow::Table>& table,
48 const std::shared_ptr<arrow::Schema>& expected_schema,
49 Mode mode = Mode::Strict);
50 }
51
53 struct OPENMS_DLLAPI ProteinSchema
54 {
55 static constexpr const char* ACCESSION = "accession";
56 static constexpr const char* SCORE = "score";
57 static constexpr const char* RANK = "rank";
58 static constexpr const char* COVERAGE = "coverage";
59 static constexpr const char* SEQUENCE = "sequence";
60 static constexpr const char* DESCRIPTION = "description";
61 static constexpr const char* IS_DECOY = "is_decoy";
62 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
63 static constexpr const char* MODIFICATIONS = "modifications";
64 static constexpr const char* METAVALUES = "metavalues";
65
66 static std::shared_ptr<arrow::DataType> modificationsType();
67 static std::shared_ptr<arrow::DataType> metavaluesType();
68 static std::shared_ptr<arrow::Schema> schema();
69 };
70
72 struct OPENMS_DLLAPI ProteinGroupSchema
73 {
74 static constexpr const char* GROUP_TYPE = "group_type";
75 static constexpr const char* PROBABILITY = "probability";
76 static constexpr const char* ACCESSIONS = "accessions";
77 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
78 static constexpr const char* GROUP_INDEX = "group_index";
79 static constexpr const char* FLOAT_DATA = "float_data";
80 static constexpr const char* STRING_DATA = "string_data";
81 static constexpr const char* INTEGER_DATA = "integer_data";
82
83 static std::shared_ptr<arrow::DataType> floatDataType();
84 static std::shared_ptr<arrow::DataType> stringDataType();
85 static std::shared_ptr<arrow::DataType> integerDataType();
86 static std::shared_ptr<arrow::Schema> schema();
87 };
88
90 struct OPENMS_DLLAPI SearchParamsSchema
91 {
92 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
93 static constexpr const char* SEARCH_ENGINE = "search_engine";
94 static constexpr const char* SEARCH_ENGINE_VERSION = "search_engine_version";
95 static constexpr const char* INFERENCE_ENGINE = "inference_engine";
96 static constexpr const char* INFERENCE_ENGINE_VERSION = "inference_engine_version";
97 static constexpr const char* DATE = "date";
98 static constexpr const char* SCORE_TYPE = "score_type";
99 static constexpr const char* HIGHER_SCORE_BETTER = "higher_score_better";
100 static constexpr const char* SIGNIFICANCE_THRESHOLD = "significance_threshold";
101 static constexpr const char* DB = "db";
102 static constexpr const char* DB_VERSION = "db_version";
103 static constexpr const char* TAXONOMY = "taxonomy";
104 static constexpr const char* CHARGES = "charges";
105 static constexpr const char* MASS_TYPE = "mass_type";
106 static constexpr const char* PRECURSOR_MASS_TOLERANCE = "precursor_mass_tolerance";
107 static constexpr const char* PRECURSOR_MASS_TOLERANCE_PPM = "precursor_mass_tolerance_ppm";
108 static constexpr const char* FRAGMENT_MASS_TOLERANCE = "fragment_mass_tolerance";
109 static constexpr const char* FRAGMENT_MASS_TOLERANCE_PPM = "fragment_mass_tolerance_ppm";
110 static constexpr const char* DIGESTION_ENZYME = "digestion_enzyme";
111 static constexpr const char* ENZYME_TERM_SPECIFICITY = "enzyme_term_specificity";
112 static constexpr const char* MISSED_CLEAVAGES = "missed_cleavages";
113 static constexpr const char* FIXED_MODIFICATIONS = "fixed_modifications";
114 static constexpr const char* VARIABLE_MODIFICATIONS = "variable_modifications";
115 static constexpr const char* PRIMARY_MS_RUN_PATHS = "primary_ms_run_paths";
116 static constexpr const char* METAVALUES = "metavalues";
117 static constexpr const char* SP_METAVALUES = "sp_metavalues";
118
119 static std::shared_ptr<arrow::DataType> metavaluesType();
120 static std::shared_ptr<arrow::Schema> schema();
121 };
122
124 struct OPENMS_DLLAPI FeatureSchema
125 {
126 static constexpr const char* UNIQUE_ID = "unique_id";
127 static constexpr const char* PARENT_FEATURE_ID = "parent_feature_id";
128 static constexpr const char* DEPTH = "depth";
129 static constexpr const char* RT = "rt";
130 static constexpr const char* MZ = "mz";
131 static constexpr const char* INTENSITY = "intensity";
132 static constexpr const char* CHARGE = "charge";
133 static constexpr const char* QUALITY = "quality";
134 static constexpr const char* QUALITY_RT = "quality_rt";
135 static constexpr const char* QUALITY_MZ = "quality_mz";
136 static constexpr const char* WIDTH = "width";
137 static constexpr const char* RT_BB_MIN = "rt_bb_min";
138 static constexpr const char* RT_BB_MAX = "rt_bb_max";
139 static constexpr const char* MZ_BB_MIN = "mz_bb_min";
140 static constexpr const char* MZ_BB_MAX = "mz_bb_max";
141 static constexpr const char* CONVEX_HULLS = "convex_hulls";
142 static constexpr const char* METAVALUES = "metavalues";
143
144 static std::shared_ptr<arrow::DataType> convexHullType();
145 static std::shared_ptr<arrow::DataType> metavaluesType();
146 static std::shared_ptr<arrow::Schema> schema();
147 };
148
150 struct OPENMS_DLLAPI ConsensusFeatureSchema
151 {
152 static constexpr const char* UNIQUE_ID = "unique_id";
153 static constexpr const char* RT = "rt";
154 static constexpr const char* MZ = "mz";
155 static constexpr const char* INTENSITY = "intensity";
156 static constexpr const char* CHARGE = "charge";
157 static constexpr const char* QUALITY = "quality";
158 static constexpr const char* WIDTH = "width";
159 static constexpr const char* HANDLES = "handles";
160 static constexpr const char* METAVALUES = "metavalues";
161
162 static std::shared_ptr<arrow::DataType> handlesType();
163 static std::shared_ptr<arrow::DataType> metavaluesType();
164 static std::shared_ptr<arrow::Schema> schema();
165 };
166
168 struct OPENMS_DLLAPI PSMSchema
169 {
170 static constexpr const char* SEQUENCE = "sequence";
171 static constexpr const char* PEPTIDOFORM = "peptidoform";
172 static constexpr const char* MODIFICATIONS = "modifications";
173 static constexpr const char* PRECURSOR_CHARGE = "precursor_charge";
174 static constexpr const char* POSTERIOR_ERROR_PROBABILITY = "posterior_error_probability";
175 static constexpr const char* IS_DECOY = "is_decoy";
176 static constexpr const char* CALCULATED_MZ = "calculated_mz";
177 static constexpr const char* OBSERVED_MZ = "observed_mz";
178 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
179 static constexpr const char* PROTEIN_ACCESSIONS = "protein_accessions";
180 static constexpr const char* PREDICTED_RT = "predicted_rt";
181 static constexpr const char* REFERENCE_FILE_NAME = "reference_file_name";
182 static constexpr const char* CV_PARAMS = "cv_params";
183 static constexpr const char* SCAN = "scan";
184 static constexpr const char* RT = "rt";
185 static constexpr const char* ION_MOBILITY = "ion_mobility";
186 static constexpr const char* SPECTRUM_REFERENCE = "spectrum_reference";
187 static constexpr const char* SCORE = "score";
188 static constexpr const char* SCORE_TYPE = "score_type";
189 static constexpr const char* HIGHER_SCORE_BETTER = "higher_score_better";
190 static constexpr const char* RANK = "rank";
191 static constexpr const char* PEPTIDE_IDENTIFICATION_INDEX = "peptide_identification_index";
192 static constexpr const char* PSM_METAVALUES = "psm_metavalues";
193 static constexpr const char* SPECTRUM_METAVALUES = "spectrum_metavalues";
194 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
195 static constexpr const char* MZ_ARRAY = "mz_array";
196 static constexpr const char* INTENSITY_ARRAY = "intensity_array";
197 static constexpr const char* CHARGE_ARRAY = "charge_array";
198 static constexpr const char* ION_TYPE_ARRAY = "ion_type_array";
199
200 static std::shared_ptr<arrow::DataType> modificationsType();
201 static std::shared_ptr<arrow::DataType> additionalScoresType();
202 static std::shared_ptr<arrow::DataType> metavaluesType();
203 static std::shared_ptr<arrow::Schema> schema();
204 };
205
210 struct OPENMS_DLLAPI QPXPSMSchema
211 {
212 static constexpr const char* SEQUENCE = "sequence";
213 static constexpr const char* PEPTIDOFORM = "peptidoform";
214 static constexpr const char* MODIFICATIONS = "modifications";
215 static constexpr const char* CHARGE = "charge";
216 static constexpr const char* POSTERIOR_ERROR_PROBABILITY = "posterior_error_probability";
217 static constexpr const char* IS_DECOY = "is_decoy";
218 static constexpr const char* CALCULATED_MZ = "calculated_mz";
219 static constexpr const char* OBSERVED_MZ = "observed_mz";
220 static constexpr const char* MASS_ERROR_PPM = "mass_error_ppm";
221 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
222 static constexpr const char* PREDICTED_RT = "predicted_rt";
223 static constexpr const char* RUN_FILE_NAME = "run_file_name";
224 static constexpr const char* CV_PARAMS = "cv_params";
225 static constexpr const char* SCAN = "scan";
226 static constexpr const char* RT = "rt";
227 static constexpr const char* ION_MOBILITY = "ion_mobility";
228 static constexpr const char* MISSED_CLEAVAGES = "missed_cleavages";
229 static constexpr const char* PROTEIN_ACCESSIONS = "protein_accessions";
230 static constexpr const char* CROSS_LINKS = "cross_links";
231 static constexpr const char* MZ_ARRAY = "mz_array";
232 static constexpr const char* INTENSITY_ARRAY = "intensity_array";
233 static constexpr const char* CHARGE_ARRAY = "charge_array";
234 static constexpr const char* ION_TYPE_ARRAY = "ion_type_array";
235 static constexpr const char* ION_MOBILITY_ARRAY = "ion_mobility_array";
236
238 static std::shared_ptr<arrow::DataType> modificationsType();
240 static std::shared_ptr<arrow::DataType> additionalScoresType();
242 static std::shared_ptr<arrow::DataType> cvParamsType();
244 static std::shared_ptr<arrow::DataType> crossLinksType();
246 static std::shared_ptr<arrow::Schema> schema();
247 };
248
253 struct OPENMS_DLLAPI QPXFeatureSchema
254 {
255 static constexpr const char* SEQUENCE = "sequence";
256 static constexpr const char* PEPTIDOFORM = "peptidoform";
257 static constexpr const char* MODIFICATIONS = "modifications";
258 static constexpr const char* CHARGE = "charge";
259 static constexpr const char* POSTERIOR_ERROR_PROBABILITY = "posterior_error_probability";
260 static constexpr const char* IS_DECOY = "is_decoy";
261 static constexpr const char* CALCULATED_MZ = "calculated_mz";
262 static constexpr const char* OBSERVED_MZ = "observed_mz";
263 static constexpr const char* MASS_ERROR_PPM = "mass_error_ppm";
264 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
265 static constexpr const char* PREDICTED_RT = "predicted_rt";
266 static constexpr const char* RUN_FILE_NAME = "run_file_name";
267 static constexpr const char* CV_PARAMS = "cv_params";
268 static constexpr const char* SCAN = "scan";
269 static constexpr const char* RT = "rt";
270 static constexpr const char* ION_MOBILITY = "ion_mobility";
271 static constexpr const char* MISSED_CLEAVAGES = "missed_cleavages";
272 static constexpr const char* INTENSITIES = "intensities";
273 static constexpr const char* ADDITIONAL_INTENSITIES = "additional_intensities";
274 static constexpr const char* PG_ACCESSIONS = "pg_accessions";
275 static constexpr const char* ANCHOR_PROTEIN = "anchor_protein";
276 static constexpr const char* UNIQUE = "unique";
277 static constexpr const char* PG_GLOBAL_QVALUE = "pg_global_qvalue";
278 static constexpr const char* PG_POSITIONS = "pg_positions";
279 static constexpr const char* ION_MOBILITY_START = "ion_mobility_start";
280 static constexpr const char* ION_MOBILITY_STOP = "ion_mobility_stop";
281 static constexpr const char* GG_ACCESSIONS = "gg_accessions";
282 static constexpr const char* GG_NAMES = "gg_names";
283 static constexpr const char* ID_RUN_FILE_NAME = "id_run_file_name";
284 static constexpr const char* RT_START = "rt_start";
285 static constexpr const char* RT_STOP = "rt_stop";
286
288 static std::shared_ptr<arrow::DataType> modificationsType();
290 static std::shared_ptr<arrow::DataType> additionalScoresType();
292 static std::shared_ptr<arrow::DataType> cvParamsType();
294 static std::shared_ptr<arrow::DataType> intensitiesType();
296 static std::shared_ptr<arrow::DataType> additionalIntensitiesType();
298 static std::shared_ptr<arrow::DataType> pgAccessionsType();
300 static std::shared_ptr<arrow::DataType> pgPositionsType();
302 static std::shared_ptr<arrow::Schema> schema();
303 };
304
310 struct OPENMS_DLLAPI QPXPgSchema
311 {
312 static constexpr const char* PG_ACCESSIONS = "pg_accessions";
313 static constexpr const char* PG_NAMES = "pg_names";
314 static constexpr const char* GG_ACCESSIONS = "gg_accessions";
315 static constexpr const char* GG_NAMES = "gg_names";
316 static constexpr const char* GG_QVALUE = "gg_qvalue";
317 static constexpr const char* ANCHOR_PROTEIN = "anchor_protein";
318 static constexpr const char* RUN_FILE_NAME = "run_file_name";
319 static constexpr const char* GLOBAL_QVALUE = "global_qvalue";
320 static constexpr const char* PG_QVALUE = "pg_qvalue";
321 static constexpr const char* INTENSITIES = "intensities";
322 static constexpr const char* ADDITIONAL_INTENSITIES = "additional_intensities";
323 static constexpr const char* IS_DECOY = "is_decoy";
324 static constexpr const char* CONTAMINANT = "contaminant";
325 static constexpr const char* PEPTIDES = "peptides";
326 static constexpr const char* PEPTIDE_COUNTS = "peptide_counts";
327 static constexpr const char* FEATURE_COUNTS = "feature_counts";
328 static constexpr const char* SEQUENCE_COVERAGE = "sequence_coverage";
329 static constexpr const char* MOLECULAR_WEIGHT = "molecular_weight";
330 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
331 static constexpr const char* CV_PARAMS = "cv_params";
332
334 static std::shared_ptr<arrow::DataType> intensitiesType();
336 static std::shared_ptr<arrow::DataType> additionalIntensitiesType();
338 static std::shared_ptr<arrow::DataType> peptidesType();
340 static std::shared_ptr<arrow::DataType> peptideCountsType();
342 static std::shared_ptr<arrow::DataType> featureCountsType();
344 static std::shared_ptr<arrow::DataType> additionalScoresType();
346 static std::shared_ptr<arrow::DataType> cvParamsType();
348 static std::shared_ptr<arrow::Schema> schema();
349 };
350
352 struct OPENMS_DLLAPI SpectraLongSchema
353 {
354 static constexpr const char* MZ = "mz";
355 static constexpr const char* INTENSITY = "intensity";
356 static constexpr const char* RT = "rt";
357 static constexpr const char* ION_MOBILITY = "ion_mobility";
358 static constexpr const char* SPECTRUM_INDEX = "spectrum_index";
359 static constexpr const char* MS_LEVEL = "ms_level";
360 static constexpr const char* NATIVE_ID = "native_id";
361 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
362 static constexpr const char* PRECURSOR_CHARGE = "precursor_charge";
363 static constexpr const char* PRECURSOR_INTENSITY = "precursor_intensity";
364 static constexpr const char* ISOLATION_LOWER = "isolation_lower";
365 static constexpr const char* ISOLATION_UPPER = "isolation_upper";
366
367 static std::shared_ptr<arrow::Schema> schema();
368 };
369
371 struct OPENMS_DLLAPI SpectraSemiWideSchema
372 {
373 static constexpr const char* SPECTRUM_INDEX = "spectrum_index";
374 static constexpr const char* RT = "rt";
375 static constexpr const char* MS_LEVEL = "ms_level";
376 static constexpr const char* NATIVE_ID = "native_id";
377 static constexpr const char* MZ = "mz";
378 static constexpr const char* INTENSITY = "intensity";
379 static constexpr const char* ION_MOBILITY = "ion_mobility";
380 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
381 static constexpr const char* PRECURSOR_CHARGE = "precursor_charge";
382 static constexpr const char* PRECURSOR_INTENSITY = "precursor_intensity";
383 static constexpr const char* ISOLATION_LOWER = "isolation_lower";
384 static constexpr const char* ISOLATION_UPPER = "isolation_upper";
385
386 static std::shared_ptr<arrow::Schema> schema();
387 };
388
390 struct OPENMS_DLLAPI ChromatogramSchema
391 {
392 static constexpr const char* RT = "rt";
393 static constexpr const char* INTENSITY = "intensity";
394 static constexpr const char* CHROMATOGRAM_INDEX = "chromatogram_index";
395 static constexpr const char* NATIVE_ID = "native_id";
396 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
397 static constexpr const char* PRODUCT_MZ = "product_mz";
398
399 static std::shared_ptr<arrow::Schema> schema();
400 };
401
403 struct OPENMS_DLLAPI ChromatogramSemiWideSchema
404 {
405 static constexpr const char* CHROMATOGRAM_INDEX = "chromatogram_index";
406 static constexpr const char* NATIVE_ID = "native_id";
407 static constexpr const char* RT = "rt";
408 static constexpr const char* INTENSITY = "intensity";
409 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
410 static constexpr const char* PRODUCT_MZ = "product_mz";
411
412 static std::shared_ptr<arrow::Schema> schema();
413 };
414
416 struct OPENMS_DLLAPI OSWPrecursorSchema
417 {
418 static constexpr const char* PRECURSOR_ID = "precursor_id";
419 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
420 static constexpr const char* CHARGE = "charge";
421 static constexpr const char* LIBRARY_RT = "library_rt";
422 static constexpr const char* LIBRARY_DRIFT_TIME = "library_drift_time";
423 static constexpr const char* DECOY = "decoy";
424 static constexpr const char* TRAML_ID = "traml_id";
425 static constexpr const char* MODIFIED_SEQUENCE = "modified_sequence";
426 static constexpr const char* UNMODIFIED_SEQUENCE = "unmodified_sequence";
427 static constexpr const char* PROTEIN_ACCESSIONS = "protein_accessions";
428
429 static std::shared_ptr<arrow::Schema> schema();
430 };
431
433 struct OPENMS_DLLAPI OSWTransitionSchema
434 {
435 static constexpr const char* TRANSITION_ID = "transition_id";
436 static constexpr const char* PRECURSOR_ID = "precursor_id";
437 static constexpr const char* TRAML_ID = "traml_id";
438 static constexpr const char* PRODUCT_MZ = "product_mz";
439 static constexpr const char* CHARGE = "charge";
440 static constexpr const char* TYPE = "type";
441 static constexpr const char* ANNOTATION = "annotation";
442 static constexpr const char* ORDINAL = "ordinal";
443 static constexpr const char* DETECTING = "detecting";
444 static constexpr const char* IDENTIFYING = "identifying";
445 static constexpr const char* QUANTIFYING = "quantifying";
446 static constexpr const char* LIBRARY_INTENSITY = "library_intensity";
447 static constexpr const char* DECOY = "decoy";
448
449 static std::shared_ptr<arrow::Schema> schema();
450 };
451
453 struct OPENMS_DLLAPI OSWFeaturePrecursorSchema
454 {
455 static constexpr const char* FEATURE_ID = "feature_id";
456 static constexpr const char* RUN_ID = "run_id";
457 static constexpr const char* PRECURSOR_ISOTOPE = "precursor_isotope";
458 static constexpr const char* PRECURSOR_AREA_INTENSITY = "precursor_area_intensity";
459 static constexpr const char* PRECURSOR_APEX_INTENSITY = "precursor_apex_intensity";
460
461 static std::shared_ptr<arrow::Schema> schema();
462 };
463
465 struct OPENMS_DLLAPI OSWRunSchema
466 {
467 static constexpr const char* RUN_ID = "run_id";
468 static constexpr const char* FILENAME = "filename";
469
470 static std::shared_ptr<arrow::Schema> schema();
471 };
472
474 struct OPENMS_DLLAPI OSWFeatureSchema
475 {
476 static constexpr const char* FEATURE_ID = "feature_id";
477 static constexpr const char* RUN_ID = "run_id";
478 static constexpr const char* PRECURSOR_ID = "precursor_id";
479 static constexpr const char* EXP_RT = "exp_rt";
480 static constexpr const char* EXP_IM = "exp_im";
481 static constexpr const char* NORM_RT = "norm_rt";
482 static constexpr const char* DELTA_RT = "delta_rt";
483 static constexpr const char* LEFT_WIDTH = "left_width";
484 static constexpr const char* RIGHT_WIDTH = "right_width";
485 static constexpr const char* EXP_IM_LEFTWIDTH = "exp_im_leftwidth";
486 static constexpr const char* EXP_IM_RIGHTWIDTH = "exp_im_rightwidth";
487 static constexpr const char* MS1_AREA_INTENSITY = "ms1_area_intensity";
488 static constexpr const char* MS1_APEX_INTENSITY = "ms1_apex_intensity";
489 static constexpr const char* MS1_EXP_IM = "ms1_exp_im";
490 static constexpr const char* MS1_DELTA_IM = "ms1_delta_im";
491 static constexpr const char* VAR_MS1_MASSDEV_SCORE = "var_ms1_massdev_score";
492 static constexpr const char* VAR_MS1_IM_MS1_DELTA_SCORE = "var_ms1_im_ms1_delta_score";
493 static constexpr const char* VAR_MS1_MI_SCORE = "var_ms1_mi_score";
494 static constexpr const char* VAR_MS1_MI_CONTRAST_SCORE = "var_ms1_mi_contrast_score";
495 static constexpr const char* VAR_MS1_MI_COMBINED_SCORE = "var_ms1_mi_combined_score";
496 static constexpr const char* VAR_MS1_ISOTOPE_CORRELATION_SCORE = "var_ms1_isotope_correlation_score";
497 static constexpr const char* VAR_MS1_ISOTOPE_OVERLAP_SCORE = "var_ms1_isotope_overlap_score";
498 static constexpr const char* VAR_MS1_XCORR_COELUTION = "var_ms1_xcorr_coelution";
499 static constexpr const char* VAR_MS1_XCORR_COELUTION_CONTRAST = "var_ms1_xcorr_coelution_contrast";
500 static constexpr const char* VAR_MS1_XCORR_COELUTION_COMBINED = "var_ms1_xcorr_coelution_combined";
501 static constexpr const char* VAR_MS1_XCORR_SHAPE = "var_ms1_xcorr_shape";
502 static constexpr const char* VAR_MS1_XCORR_SHAPE_CONTRAST = "var_ms1_xcorr_shape_contrast";
503 static constexpr const char* VAR_MS1_XCORR_SHAPE_COMBINED = "var_ms1_xcorr_shape_combined";
504 static constexpr const char* MS2_AREA_INTENSITY = "ms2_area_intensity";
505 static constexpr const char* MS2_TOTAL_AREA_INTENSITY = "ms2_total_area_intensity";
506 static constexpr const char* MS2_APEX_INTENSITY = "ms2_apex_intensity";
507 static constexpr const char* MS2_EXP_IM = "ms2_exp_im";
508 static constexpr const char* MS2_EXP_IM_LEFTWIDTH = "ms2_exp_im_leftwidth";
509 static constexpr const char* MS2_EXP_IM_RIGHTWIDTH = "ms2_exp_im_rightwidth";
510 static constexpr const char* MS2_DELTA_IM = "ms2_delta_im";
511 static constexpr const char* MS2_TOTAL_MI = "ms2_total_mi";
512 static constexpr const char* VAR_MS2_BSERIES_SCORE = "var_ms2_bseries_score";
513 static constexpr const char* VAR_MS2_DOTPROD_SCORE = "var_ms2_dotprod_score";
514 static constexpr const char* VAR_MS2_INTENSITY_SCORE = "var_ms2_intensity_score";
515 static constexpr const char* VAR_MS2_ISOTOPE_CORRELATION_SCORE = "var_ms2_isotope_correlation_score";
516 static constexpr const char* VAR_MS2_ISOTOPE_OVERLAP_SCORE = "var_ms2_isotope_overlap_score";
517 static constexpr const char* VAR_MS2_LIBRARY_CORR = "var_ms2_library_corr";
518 static constexpr const char* VAR_MS2_LIBRARY_DOTPROD = "var_ms2_library_dotprod";
519 static constexpr const char* VAR_MS2_LIBRARY_MANHATTAN = "var_ms2_library_manhattan";
520 static constexpr const char* VAR_MS2_LIBRARY_RMSD = "var_ms2_library_rmsd";
521 static constexpr const char* VAR_MS2_LIBRARY_ROOTMEANSQUARE = "var_ms2_library_rootmeansquare";
522 static constexpr const char* VAR_MS2_LIBRARY_SANGLE = "var_ms2_library_sangle";
523 static constexpr const char* VAR_MS2_LOG_SN_SCORE = "var_ms2_log_sn_score";
524 static constexpr const char* VAR_MS2_MANHATTAN_SCORE = "var_ms2_manhattan_score";
525 static constexpr const char* VAR_MS2_MASSDEV_SCORE = "var_ms2_massdev_score";
526 static constexpr const char* VAR_MS2_MASSDEV_SCORE_WEIGHTED = "var_ms2_massdev_score_weighted";
527 static constexpr const char* VAR_MS2_MI_SCORE = "var_ms2_mi_score";
528 static constexpr const char* VAR_MS2_MI_WEIGHTED_SCORE = "var_ms2_mi_weighted_score";
529 static constexpr const char* VAR_MS2_MI_RATIO_SCORE = "var_ms2_mi_ratio_score";
530 static constexpr const char* VAR_MS2_NORM_RT_SCORE = "var_ms2_norm_rt_score";
531 static constexpr const char* VAR_MS2_XCORR_COELUTION = "var_ms2_xcorr_coelution";
532 static constexpr const char* VAR_MS2_XCORR_COELUTION_WEIGHTED = "var_ms2_xcorr_coelution_weighted";
533 static constexpr const char* VAR_MS2_XCORR_SHAPE = "var_ms2_xcorr_shape";
534 static constexpr const char* VAR_MS2_XCORR_SHAPE_WEIGHTED = "var_ms2_xcorr_shape_weighted";
535 static constexpr const char* VAR_MS2_YSERIES_SCORE = "var_ms2_yseries_score";
536 static constexpr const char* VAR_MS2_ELUTION_MODEL_FIT_SCORE = "var_ms2_elution_model_fit_score";
537 static constexpr const char* VAR_MS2_IM_XCORR_SHAPE = "var_ms2_im_xcorr_shape";
538 static constexpr const char* VAR_MS2_IM_XCORR_COELUTION = "var_ms2_im_xcorr_coelution";
539 static constexpr const char* VAR_MS2_IM_DELTA_SCORE = "var_ms2_im_delta_score";
540 static constexpr const char* VAR_MS2_IM_LOG_INTENSITY = "var_ms2_im_log_intensity";
541
542 static std::shared_ptr<arrow::Schema> schema();
543 };
544
546 struct OPENMS_DLLAPI OSWFeatureTransitionSchema
547 {
548 static constexpr const char* FEATURE_ID = "feature_id";
549 static constexpr const char* RUN_ID = "run_id";
550 static constexpr const char* TRANSITION_ID = "transition_id";
551 static constexpr const char* AREA_INTENSITY = "area_intensity";
552 static constexpr const char* TOTAL_AREA_INTENSITY = "total_area_intensity";
553 static constexpr const char* APEX_INTENSITY = "apex_intensity";
554 static constexpr const char* APEX_RT = "apex_rt";
555 static constexpr const char* RT_FWHM = "rt_fwhm";
556 static constexpr const char* MASSERROR_PPM = "masserror_ppm";
557 static constexpr const char* TOTAL_MI = "total_mi";
558 static constexpr const char* VAR_INTENSITY_SCORE = "var_intensity_score";
559 static constexpr const char* VAR_INTENSITY_RATIO_SCORE = "var_intensity_ratio_score";
560 static constexpr const char* VAR_LOG_INTENSITY = "var_log_intensity";
561 static constexpr const char* VAR_XCORR_COELUTION = "var_xcorr_coelution";
562 static constexpr const char* VAR_XCORR_SHAPE = "var_xcorr_shape";
563 static constexpr const char* VAR_LOG_SN_SCORE = "var_log_sn_score";
564 static constexpr const char* VAR_MASSDEV_SCORE = "var_massdev_score";
565 static constexpr const char* VAR_MI_SCORE = "var_mi_score";
566 static constexpr const char* VAR_MI_RATIO_SCORE = "var_mi_ratio_score";
567 static constexpr const char* VAR_ISOTOPE_CORRELATION_SCORE = "var_isotope_correlation_score";
568 static constexpr const char* VAR_ISOTOPE_OVERLAP_SCORE = "var_isotope_overlap_score";
569 static constexpr const char* EXP_IM = "exp_im";
570 static constexpr const char* EXP_IM_LEFTWIDTH = "exp_im_leftwidth";
571 static constexpr const char* EXP_IM_RIGHTWIDTH = "exp_im_rightwidth";
572 static constexpr const char* DELTA_IM = "delta_im";
573 static constexpr const char* VAR_IM_DELTA_SCORE = "var_im_delta_score";
574 static constexpr const char* VAR_IM_LOG_INTENSITY = "var_im_log_intensity";
575 static constexpr const char* VAR_IM_XCORR_COELUTION_CONTRAST = "var_im_xcorr_coelution_contrast";
576 static constexpr const char* VAR_IM_XCORR_SHAPE_CONTRAST = "var_im_xcorr_shape_contrast";
577 static constexpr const char* VAR_IM_XCORR_COELUTION_COMBINED = "var_im_xcorr_coelution_combined";
578 static constexpr const char* VAR_IM_XCORR_SHAPE_COMBINED = "var_im_xcorr_shape_combined";
579 static constexpr const char* START_POSITION_AT_5 = "start_position_at_5";
580 static constexpr const char* END_POSITION_AT_5 = "end_position_at_5";
581 static constexpr const char* START_POSITION_AT_10 = "start_position_at_10";
582 static constexpr const char* END_POSITION_AT_10 = "end_position_at_10";
583 static constexpr const char* START_POSITION_AT_50 = "start_position_at_50";
584 static constexpr const char* END_POSITION_AT_50 = "end_position_at_50";
585 static constexpr const char* TOTAL_WIDTH = "total_width";
586 static constexpr const char* TAILING_FACTOR = "tailing_factor";
587 static constexpr const char* ASYMMETRY_FACTOR = "asymmetry_factor";
588 static constexpr const char* SLOPE_OF_BASELINE = "slope_of_baseline";
589 static constexpr const char* BASELINE_DELTA_2_HEIGHT = "baseline_delta_2_height";
590 static constexpr const char* POINTS_ACROSS_BASELINE = "points_across_baseline";
591 static constexpr const char* POINTS_ACROSS_HALF_HEIGHT = "points_across_half_height";
592
593 static std::shared_ptr<arrow::Schema> schema();
594 };
595
597 struct OPENMS_DLLAPI XICSchema
598 {
599 static constexpr const char* RUN_ID = "RUN_ID";
600 static constexpr const char* SOURCE_FILE = "SOURCE_FILE";
601 static constexpr const char* MS_LEVEL = "MS_LEVEL";
602 static constexpr const char* PRECURSOR_ID = "PRECURSOR_ID";
603 static constexpr const char* TRANSITION_ID = "TRANSITION_ID";
604 static constexpr const char* MODIFIED_SEQUENCE = "MODIFIED_SEQUENCE";
605 static constexpr const char* PRECURSOR_CHARGE = "PRECURSOR_CHARGE";
606 static constexpr const char* PRODUCT_CHARGE = "PRODUCT_CHARGE";
607 static constexpr const char* DETECTING_TRANSITION = "DETECTING_TRANSITION";
608 static constexpr const char* PRECURSOR_DECOY = "PRECURSOR_DECOY";
609 static constexpr const char* PRODUCT_DECOY = "PRODUCT_DECOY";
610 static constexpr const char* TRANSITION_ORDINAL = "TRANSITION_ORDINAL";
611 static constexpr const char* TRANSITION_TYPE = "TRANSITION_TYPE";
612 static constexpr const char* ANNOTATION = "ANNOTATION";
613 static constexpr const char* RT_DATA = "RT_DATA";
614 static constexpr const char* INTENSITY_DATA = "INTENSITY_DATA";
615 static constexpr const char* RT_COMPRESSION = "RT_COMPRESSION";
616 static constexpr const char* INTENSITY_COMPRESSION = "INTENSITY_COMPRESSION";
617
618 static std::shared_ptr<arrow::Schema> schema();
619 };
620
622 struct OPENMS_DLLAPI XIMSchema
623 {
624 static constexpr const char* RUN_ID = "RUN_ID";
625 static constexpr const char* SOURCE_FILE = "SOURCE_FILE";
626 static constexpr const char* MS_LEVEL = "MS_LEVEL";
627 static constexpr const char* MOBILOGRAM_TYPE = "MOBILOGRAM_TYPE";
628 static constexpr const char* PRECURSOR_ID = "PRECURSOR_ID";
629 static constexpr const char* TRANSITION_ID = "TRANSITION_ID";
630 static constexpr const char* FEATURE_ID = "FEATURE_ID";
631 static constexpr const char* FEATURE_RT = "FEATURE_RT";
632 static constexpr const char* MODIFIED_SEQUENCE = "MODIFIED_SEQUENCE";
633 static constexpr const char* PRECURSOR_CHARGE = "PRECURSOR_CHARGE";
634 static constexpr const char* PRODUCT_CHARGE = "PRODUCT_CHARGE";
635 static constexpr const char* DETECTING_TRANSITION = "DETECTING_TRANSITION";
636 static constexpr const char* PRECURSOR_DECOY = "PRECURSOR_DECOY";
637 static constexpr const char* PRODUCT_DECOY = "PRODUCT_DECOY";
638 static constexpr const char* TRANSITION_ORDINAL = "TRANSITION_ORDINAL";
639 static constexpr const char* TRANSITION_TYPE = "TRANSITION_TYPE";
640 static constexpr const char* ANNOTATION = "ANNOTATION";
641 static constexpr const char* MOBILITY_DATA = "MOBILITY_DATA";
642 static constexpr const char* INTENSITY_DATA = "INTENSITY_DATA";
643 static constexpr const char* MOBILITY_COMPRESSION = "MOBILITY_COMPRESSION";
644 static constexpr const char* INTENSITY_COMPRESSION = "INTENSITY_COMPRESSION";
645
646 static std::shared_ptr<arrow::Schema> schema();
647 };
648
649} // namespace OpenMS
ValidationResult validate(const std::shared_ptr< arrow::Table > &table, const std::shared_ptr< arrow::Schema > &expected_schema, Mode mode=Mode::Strict)
Validate an Arrow table's schema against an expected schema.
Mode
Validation strictness: Strict requires exact match, Subset allows missing and extra columns.
Definition ArrowSchemaRegistry.h:32
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
@ RT
RT in seconds.
Definition ArrowIOHelpers.h:20
Result of schema validation containing validity flag and error messages.
Definition ArrowSchemaRegistry.h:39
std::vector< std::string > errors
Definition ArrowSchemaRegistry.h:41
Schema for chromatograms in long (one row per data point) format.
Definition ArrowSchemaRegistry.h:391
static std::shared_ptr< arrow::Schema > schema()
Schema for chromatograms in semi-wide (one row per chromatogram, list columns) format.
Definition ArrowSchemaRegistry.h:404
static std::shared_ptr< arrow::Schema > schema()
Schema for consensus feature table (ConsensusMap features)
Definition ArrowSchemaRegistry.h:151
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > handlesType()
static std::shared_ptr< arrow::DataType > metavaluesType()
Schema for LC-MS feature table (FeatureMap features)
Definition ArrowSchemaRegistry.h:125
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > metavaluesType()
static std::shared_ptr< arrow::DataType > convexHullType()
Schema for OpenSWATH feature-level precursor intensity table.
Definition ArrowSchemaRegistry.h:454
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH feature scoring results table.
Definition ArrowSchemaRegistry.h:475
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH per-transition feature scoring results table.
Definition ArrowSchemaRegistry.h:547
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH precursor (peptide query) table.
Definition ArrowSchemaRegistry.h:417
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH run metadata table.
Definition ArrowSchemaRegistry.h:466
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH transition (fragment ion) table.
Definition ArrowSchemaRegistry.h:434
static std::shared_ptr< arrow::Schema > schema()
Schema for peptide-spectrum match (PSM) results table.
Definition ArrowSchemaRegistry.h:169
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > additionalScoresType()
static std::shared_ptr< arrow::DataType > modificationsType()
static std::shared_ptr< arrow::DataType > metavaluesType()
Schema for protein group (indistinguishable group) results table.
Definition ArrowSchemaRegistry.h:73
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > floatDataType()
static std::shared_ptr< arrow::DataType > stringDataType()
static std::shared_ptr< arrow::DataType > integerDataType()
Schema for protein identification results table.
Definition ArrowSchemaRegistry.h:54
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > modificationsType()
static std::shared_ptr< arrow::DataType > metavaluesType()
Schema for QPX feature view (quantms Parquet eXchange format)
Definition ArrowSchemaRegistry.h:254
static std::shared_ptr< arrow::Schema > schema()
Complete Arrow schema for QPX feature table (31 fields)
static std::shared_ptr< arrow::DataType > pgAccessionsType()
Arrow type for protein group accessions: list<struct{accession, start, end, pre, post}>
static std::shared_ptr< arrow::DataType > additionalScoresType()
Arrow type for additional scores (delegates to QPXPSMSchema::additionalScoresType)
static std::shared_ptr< arrow::DataType > additionalIntensitiesType()
Arrow type for additional intensities: list<struct{label, intensities: list<struct{....
static std::shared_ptr< arrow::DataType > intensitiesType()
Arrow type for intensities: list<struct{label, intensity}>
static std::shared_ptr< arrow::DataType > modificationsType()
Arrow type for modifications (delegates to QPXPSMSchema::modificationsType)
static std::shared_ptr< arrow::DataType > cvParamsType()
Arrow type for CV params (delegates to QPXPSMSchema::cvParamsType)
static std::shared_ptr< arrow::DataType > pgPositionsType()
Arrow type for protein group positions: list<struct{protein_accession, start, end}>
Schema for QPX PSM export (quantms Parquet eXchange format, PSM table)
Definition ArrowSchemaRegistry.h:211
static std::shared_ptr< arrow::Schema > schema()
Complete Arrow schema for QPX PSM table (24 fields)
static std::shared_ptr< arrow::DataType > crossLinksType()
Arrow type for cross-links: list<struct{xl_type, partner_sequence, ...}>
static std::shared_ptr< arrow::DataType > additionalScoresType()
Arrow type for additional scores: list<struct{score_name, score_value, higher_better}>
static std::shared_ptr< arrow::DataType > modificationsType()
Arrow type for modifications: list<struct{name, accession, positions: list<struct{position,...
static std::shared_ptr< arrow::DataType > cvParamsType()
Arrow type for CV params: list<struct{cv_name, cv_value}>
Schema for QPX protein group export (quantms Parquet eXchange format, pg table)
Definition ArrowSchemaRegistry.h:311
static std::shared_ptr< arrow::Schema > schema()
Complete Arrow schema for QPX pg table (20 fields)
static std::shared_ptr< arrow::DataType > additionalScoresType()
Arrow type for additional scores (delegates to QPXPSMSchema::additionalScoresType)
static std::shared_ptr< arrow::DataType > additionalIntensitiesType()
Arrow type for additional intensities: list<struct{label, intensities: list<struct{....
static std::shared_ptr< arrow::DataType > intensitiesType()
Arrow type for intensities: list<struct{label, intensity}> (nullable for search-engine output)
static std::shared_ptr< arrow::DataType > peptidesType()
Arrow type for peptides: list<struct{protein_name, peptide_count}>
static std::shared_ptr< arrow::DataType > cvParamsType()
Arrow type for CV params (delegates to QPXPSMSchema::cvParamsType)
static std::shared_ptr< arrow::DataType > featureCountsType()
Arrow type for feature_counts: struct{unique_features, total_features}.
static std::shared_ptr< arrow::DataType > peptideCountsType()
Arrow type for peptide_counts: struct{unique_sequences, total_sequences}.
Schema for search engine parameters and settings table.
Definition ArrowSchemaRegistry.h:91
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > metavaluesType()
Schema for spectra in long (one row per peak) format.
Definition ArrowSchemaRegistry.h:353
static std::shared_ptr< arrow::Schema > schema()
Schema for spectra in semi-wide (one row per spectrum, list columns for peaks) format.
Definition ArrowSchemaRegistry.h:372
static std::shared_ptr< arrow::Schema > schema()
Schema for extracted ion chromatogram (XIC) data table.
Definition ArrowSchemaRegistry.h:598
static std::shared_ptr< arrow::Schema > schema()
Schema for extracted ion mobilogram (XIM) data table.
Definition ArrowSchemaRegistry.h:623
static std::shared_ptr< arrow::Schema > schema()