OpenMS  2.7.0
MzTab.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
44 
45 #include <boost/optional.hpp>
46 
47 #include <map>
48 #include <vector>
49 #include <list>
50 #include <algorithm>
51 
52 #pragma clang diagnostic push
53 #pragma clang diagnostic ignored "-Wnon-virtual-dtor"
54 
55 namespace OpenMS
56 {
67  {
73  };
74 
75  class OPENMS_DLLAPI MzTabDouble
76  {
77 public:
79 
80  explicit MzTabDouble(const double v);
81 
82  void set(const double& value);
83 
84  double get() const;
85 
87 
88  void fromCellString(const String& s);
89 
90  bool isNull() const;
91 
92  void setNull(bool b);
93 
94  bool isNaN() const;
95 
96  void setNaN();
97 
98  bool isInf() const;
99 
100  void setInf();
101 
102  ~MzTabDouble() = default;
103 protected:
104  double value_;
106  };
107 
108  class OPENMS_DLLAPI MzTabDoubleList
109  {
110 public:
111  MzTabDoubleList() = default;
112 
113  bool isNull() const;
114 
115  void setNull(bool b);
116 
118 
119  void fromCellString(const String& s);
120 
121  std::vector<MzTabDouble> get() const;
122 
123  void set(const std::vector<MzTabDouble>& entries);
124 
125  ~MzTabDoubleList() = default;
126 protected:
127  std::vector<MzTabDouble> entries_;
128  };
129 
130  class OPENMS_DLLAPI MzTabInteger
131  {
132 public:
134 
135  explicit MzTabInteger(const int v);
136 
137  void set(const Int& value);
138 
139  Int get() const;
140 
142 
143  void fromCellString(const String& s);
144 
145  bool isNull() const;
146 
147  void setNull(bool b);
148 
149  bool isNaN() const;
150 
151  void setNaN();
152 
153  bool isInf() const;
154 
155  void setInf();
156 
157  ~MzTabInteger() = default;
158 protected:
161  };
162 
163  class OPENMS_DLLAPI MzTabIntegerList
164  {
165 public:
166  MzTabIntegerList() = default;
167 
168  bool isNull() const;
169 
170  void setNull(bool b);
171 
173 
174  void fromCellString(const String& s);
175 
176  std::vector<MzTabInteger> get() const;
177 
178  void set(const std::vector<MzTabInteger>& entries);
179 
180  ~MzTabIntegerList() = default;
181 protected:
182  std::vector<MzTabInteger> entries_;
183  };
184 
185  class OPENMS_DLLAPI MzTabBoolean
186  {
187 public:
189 
190  bool isNull() const;
191 
192  void setNull(bool b);
193 
194  explicit MzTabBoolean(bool v);
195 
196  void set(const bool& value);
197 
198  Int get() const;
199 
201 
202  void fromCellString(const String& s);
203 
204  ~MzTabBoolean() = default;
205 protected:
206  int value_;
207  };
208 
209  class OPENMS_DLLAPI MzTabString
210  {
211 public:
213 
214  explicit MzTabString(const String& s);
215 
216  bool isNull() const;
217 
218  void setNull(bool b);
219 
220  void set(const String& value);
221 
222  String get() const;
223 
225 
226  void fromCellString(const String& s);
227 
228  ~MzTabString() = default;
229 protected:
231  };
232 
233  class OPENMS_DLLAPI MzTabParameter
234  {
235 public:
237 
238  bool isNull() const;
239 
240  void setNull(bool b);
241 
242  void setCVLabel(const String& CV_label);
243 
244  void setAccession(const String& accession);
245 
246  void setName(const String& name);
247 
248  void setValue(const String& value);
249 
251 
253 
254  String getName() const;
255 
256  String getValue() const;
257 
259 
260  void fromCellString(const String& s);
261 
262  ~MzTabParameter() = default;
263 protected:
268  };
269 
270  class OPENMS_DLLAPI MzTabParameterList
271  {
272 public:
273  MzTabParameterList() = default;
274 
275  bool isNull() const;
276 
277  void setNull(bool b);
278 
280 
281  void fromCellString(const String& s);
282 
283  std::vector<MzTabParameter> get() const;
284 
285  void set(const std::vector<MzTabParameter>& parameters);
286 
287  ~MzTabParameterList() = default;
288 protected:
289  std::vector<MzTabParameter> parameters_;
290  };
291 
292  class OPENMS_DLLAPI MzTabStringList
293  {
294 public:
296 
297  bool isNull() const;
298 
299  void setNull(bool b);
300 
302  void setSeparator(char sep);
303 
305 
306  void fromCellString(const String& s);
307 
308  std::vector<MzTabString> get() const;
309 
310  void set(const std::vector<MzTabString>& entries);
311 
312  ~MzTabStringList() = default;
313 protected:
314  std::vector<MzTabString> entries_;
315  char sep_;
316  };
317 
318  class OPENMS_DLLAPI MzTabModification
319  {
320 public:
322 
323  bool isNull() const;
324 
325  void setNull(bool b);
326 
328  void setPositionsAndParameters(const std::vector<std::pair<Size, MzTabParameter> >& ppp);
329 
330  std::vector<std::pair<Size, MzTabParameter> > getPositionsAndParameters() const;
331 
333 
335 
337 
338  void fromCellString(const String& s);
339 
340  ~MzTabModification() = default;
341 protected:
342  std::vector<std::pair<Size, MzTabParameter> > pos_param_pairs_;
344  };
345 
346  class OPENMS_DLLAPI MzTabModificationList
347  {
348 public:
349  bool isNull() const;
350 
351  void setNull(bool b);
352 
354 
355  void fromCellString(const String& s);
356 
357  std::vector<MzTabModification> get() const;
358 
359  void set(const std::vector<MzTabModification>& entries);
360 
362 protected:
363  std::vector<MzTabModification> entries_;
364  };
365 
366  class OPENMS_DLLAPI MzTabSpectraRef
367  {
368 public:
370 
371  bool isNull() const;
372 
373  void setNull(bool b);
374 
375  void setMSFile(Size index);
376 
377  void setSpecRef(const String& spec_ref);
378 
380 
381  Size getMSFile() const;
382 
383  void setSpecRefFile(const String& spec_ref);
384 
386 
387  void fromCellString(const String& s);
388 
389  ~MzTabSpectraRef() = default;
390 protected:
391  Size ms_run_; //< number is specified in the meta data section.
393  };
394 
395 // MTD
396 
397  struct OPENMS_DLLAPI MzTabSampleMetaData
398  {
400  std::map<Size, MzTabParameter> species;
401  std::map<Size, MzTabParameter> tissue;
402  std::map<Size, MzTabParameter> cell_type;
403  std::map<Size, MzTabParameter> disease;
404  std::map<Size, MzTabParameter> custom;
405  };
406 
407  struct OPENMS_DLLAPI MzTabSoftwareMetaData
408  {
410  //TODO shouldn't settings always consist of the name of the setting
411  // and the value?
412  std::map<Size, MzTabString> setting;
413  };
414 
415  struct OPENMS_DLLAPI MzTabModificationMetaData
416  {
420  };
421 
422  struct OPENMS_DLLAPI MzTabAssayMetaData
423  {
425  std::map<Size, MzTabModificationMetaData> quantification_mod;
427  std::vector<int> ms_run_ref; // adapted to address https://github.com/HUPO-PSI/mzTab/issues/26
428  };
429 
430  struct OPENMS_DLLAPI MzTabCVMetaData
431  {
436  };
437 
438  struct OPENMS_DLLAPI MzTabInstrumentMetaData
439  {
442  std::map<Size, MzTabParameter> analyzer;
444  };
445 
446  struct OPENMS_DLLAPI MzTabContactMetaData
447  {
451  };
452 
453  struct OPENMS_DLLAPI MzTabMSRunMetaData
454  {
459  };
460 
461  struct OPENMS_DLLAPI MzTabStudyVariableMetaData
462  {
463  std::vector<int> assay_refs;
464  std::vector<int> sample_refs;
466  };
467 
469  class OPENMS_DLLAPI MzTabMetaData
470  {
471 public:
473 
480 
481  std::map<Size, MzTabParameter> protein_search_engine_score;
482  std::map<Size, MzTabParameter> peptide_search_engine_score;
483  std::map<Size, MzTabParameter> psm_search_engine_score;
484  std::map<Size, MzTabParameter> smallmolecule_search_engine_score;
485  std::map<Size, MzTabParameter> nucleic_acid_search_engine_score;
486  std::map<Size, MzTabParameter> oligonucleotide_search_engine_score;
487  std::map<Size, MzTabParameter> osm_search_engine_score;
488 
489  std::map<Size, MzTabParameterList> sample_processing;
490 
491  std::map<Size, MzTabInstrumentMetaData> instrument;
492 
493  std::map<Size, MzTabSoftwareMetaData> software;
494 
496 
497  std::map<Size, MzTabString> publication;
498 
499  std::map<Size, MzTabContactMetaData> contact;
500 
501  std::map<Size, MzTabString> uri;
502 
503  std::map<Size, MzTabModificationMetaData> fixed_mod;
504 
505  std::map<Size, MzTabModificationMetaData> variable_mod;
506 
508 
512 
513  std::map<Size, MzTabMSRunMetaData> ms_run;
514 
515  std::map<Size, MzTabParameter> custom;
516 
517  std::map<Size, MzTabSampleMetaData> sample;
518 
519  std::map<Size, MzTabAssayMetaData> assay;
520 
521  std::map<Size, MzTabStudyVariableMetaData> study_variable;
522 
523  std::map<Size, MzTabCVMetaData> cv;
524 
525  std::vector<String> colunit_protein;
526  std::vector<String> colunit_peptide;
527  std::vector<String> colunit_psm;
528  std::vector<String> colunit_small_molecule;
529  };
530 
531  typedef std::pair<String, MzTabString> MzTabOptionalColumnEntry; //< column name (not null able), value (null able)
532 
534  struct OPENMS_DLLAPI MzTabProteinSectionRow
535  {
544  std::map<Size, MzTabDouble> best_search_engine_score;
545  std::map<Size, std::map<Size, MzTabDouble> > search_engine_score_ms_run;
547  std::map<Size, MzTabInteger> num_psms_ms_run;
548  std::map<Size, MzTabInteger> num_peptides_distinct_ms_run;
549  std::map<Size, MzTabInteger> num_peptides_unique_ms_run;
555  std::map<Size, MzTabDouble> protein_abundance_assay;
556  std::map<Size, MzTabDouble> protein_abundance_study_variable;
557  std::map<Size, MzTabDouble> protein_abundance_stdev_study_variable;
558  std::map<Size, MzTabDouble> protein_abundance_std_error_study_variable;
559  std::vector<MzTabOptionalColumnEntry> opt_;
560 
562  struct RowCompare
563  {
565  const MzTabProteinSectionRow& row2) const
566  {
567  return row1.accession.get() < row2.accession.get();
568  }
569  };
570  };
571 
573  struct OPENMS_DLLAPI MzTabPeptideSectionRow
574  {
581  std::map<Size, MzTabDouble> best_search_engine_score;
582  std::map<Size, std::map<Size, MzTabDouble> > search_engine_score_ms_run;
591  std::map<Size, MzTabDouble> peptide_abundance_assay;
592  std::map<Size, MzTabDouble> peptide_abundance_study_variable;
593  std::map<Size, MzTabDouble> peptide_abundance_stdev_study_variable;
594  std::map<Size, MzTabDouble> peptide_abundance_std_error_study_variable;
595  std::vector<MzTabOptionalColumnEntry> opt_;
596 
597 
599  struct RowCompare
600  {
602  const MzTabPeptideSectionRow& row2) const
603  {
604  return (std::make_pair(row1.sequence.get(), row1.accession.get()) <
605  std::make_pair(row2.sequence.get(), row2.accession.get()));
606  }
607  };
608  };
609 
611  struct OPENMS_DLLAPI MzTabPSMSectionRow
612  {
620  std::map<Size, MzTabDouble> search_engine_score;
633  std::vector<MzTabOptionalColumnEntry> opt_;
634 
641  void addPepEvidenceToRows(const std::vector<PeptideEvidence>& peptide_evidences);
643  struct RowCompare
644  {
645  bool operator()(const MzTabPSMSectionRow& row1,
646  const MzTabPSMSectionRow& row2) const
647  {
648  // @TODO: sort by "PSM_ID"? what's the point of that field?
649  return (std::make_tuple(row1.sequence.get(),
650  row1.spectra_ref.getMSFile(),
651  row1.spectra_ref.getSpecRef(),
652  row1.accession.get()) <
653  std::make_tuple(row2.sequence.get(),
654  row2.spectra_ref.getMSFile(),
655  row2.spectra_ref.getSpecRef(),
656  row2.accession.get()));
657  }
658  };
659  };
660 
662  struct OPENMS_DLLAPI MzTabSmallMoleculeSectionRow
663  {
681  std::map<Size, MzTabDouble> best_search_engine_score;
682  std::map<Size, std::map<Size, MzTabDouble> > search_engine_score_ms_run;
684  std::map<Size, MzTabDouble> smallmolecule_abundance_assay;
685  std::map<Size, MzTabDouble> smallmolecule_abundance_study_variable;
686  std::map<Size, MzTabDouble> smallmolecule_abundance_stdev_study_variable;
688  std::vector<MzTabOptionalColumnEntry> opt_;
689  };
690 
692  struct OPENMS_DLLAPI MzTabNucleicAcidSectionRow
693  {
701  std::map<Size, MzTabDouble> best_search_engine_score;
702  std::map<Size, std::map<Size, MzTabDouble> > search_engine_score_ms_run;
704  std::map<Size, MzTabInteger> num_osms_ms_run;
705  std::map<Size, MzTabInteger> num_oligos_distinct_ms_run;
706  std::map<Size, MzTabInteger> num_oligos_unique_ms_run;
710  // do GO terms make sense for nucleic acid sequences?
713  std::vector<MzTabOptionalColumnEntry> opt_;
714 
716  struct RowCompare
717  {
719  const MzTabNucleicAcidSectionRow& row2) const
720  {
721  return row1.accession.get() < row2.accession.get();
722  }
723  };
724  };
725 
727  struct OPENMS_DLLAPI MzTabOligonucleotideSectionRow
728  {
733  std::map<Size, MzTabDouble> best_search_engine_score;
734  std::map<Size, std::map<Size, MzTabDouble>> search_engine_score_ms_run;
744  std::vector<MzTabOptionalColumnEntry> opt_;
745 
747  struct RowCompare
748  {
750  const MzTabOligonucleotideSectionRow& row2) const
751  {
752  return (std::make_tuple(row1.sequence.get(), row1.accession.get(),
753  row1.start.get(), row1.end.get()) <
754  std::make_tuple(row2.sequence.get(), row2.accession.get(),
755  row2.start.get(), row2.end.get()));
756  }
757  };
758 
759  };
760 
762  struct OPENMS_DLLAPI MzTabOSMSectionRow
763  {
766  std::map<Size, MzTabDouble> search_engine_score;
775  std::vector<MzTabOptionalColumnEntry> opt_;
776 
778  struct RowCompare
779  {
780  bool operator()(const MzTabOSMSectionRow& row1,
781  const MzTabOSMSectionRow& row2) const
782  {
783  return (std::make_tuple(row1.sequence.get(),
784  row1.spectra_ref.getMSFile(),
785  row1.spectra_ref.getSpecRef()) <
786  std::make_tuple(row2.sequence.get(),
787  row2.spectra_ref.getMSFile(),
788  row2.spectra_ref.getSpecRef()));
789  }
790  };
791  };
792 
793  typedef std::vector<MzTabProteinSectionRow> MzTabProteinSectionRows;
794  typedef std::vector<MzTabPeptideSectionRow> MzTabPeptideSectionRows;
795  typedef std::vector<MzTabPSMSectionRow> MzTabPSMSectionRows;
796  typedef std::vector<MzTabSmallMoleculeSectionRow> MzTabSmallMoleculeSectionRows;
797  typedef std::vector<MzTabNucleicAcidSectionRow> MzTabNucleicAcidSectionRows;
798  typedef std::vector<MzTabOligonucleotideSectionRow> MzTabOligonucleotideSectionRows;
799  typedef std::vector<MzTabOSMSectionRow> MzTabOSMSectionRows;
800 
801 
808  class OPENMS_DLLAPI MzTab
809  {
810  public:
812  MzTab() = default;
813  ~MzTab() = default;
814 
815  const MzTabMetaData& getMetaData() const;
816 
817  void setMetaData(const MzTabMetaData& md);
818 
820 
822 
824 
826 
828 
830 
832 
834 
836 
838 
840 
842 
844 
846 
848 
850 
852 
853  void setCommentRows(const std::map<Size, String>& com);
854 
855  void setEmptyRows(const std::vector<Size>& empty);
856 
857  const std::vector<Size>& getEmptyRows() const;
858 
859  const std::map<Size, String>& getCommentRows() const;
860 
862  std::vector<String> getProteinOptionalColumnNames() const;
863 
865  std::vector<String> getPeptideOptionalColumnNames() const;
866 
868  std::vector<String> getPSMOptionalColumnNames() const;
869 
871  std::vector<String> getSmallMoleculeOptionalColumnNames() const;
872 
874  std::vector<String> getNucleicAcidOptionalColumnNames() const;
875 
877  std::vector<String> getOligonucleotideOptionalColumnNames() const;
878 
879  static void addMetaInfoToOptionalColumns(const std::set<String>& keys, std::vector<MzTabOptionalColumnEntry>& opt, const String& id, const MetaInfoInterface& meta);
880 
882  std::vector<String> getOSMOptionalColumnNames() const;
883 
884  static std::map<Size, MzTabModificationMetaData> generateMzTabStringFromModifications(const std::vector<String>& mods);
885 
886  static std::map<Size, MzTabModificationMetaData> generateMzTabStringFromVariableModifications(const std::vector<String>& mods);
887 
888  static std::map<Size, MzTabModificationMetaData> generateMzTabStringFromFixedModifications(const std::vector<String>& mods);
889 
890  static MzTab exportFeatureMapToMzTab(const FeatureMap& feature_map, const String& filename);
891 
907  const std::vector<ProteinIdentification>& prot_ids,
908  const std::vector<PeptideIdentification>& peptide_ids,
909  const String& filename,
910  bool first_run_inference_only,
911  bool export_empty_pep_ids = false,
912  bool export_all_psms = false,
913  const String& title = "ID export from OpenMS");
914 
915 
920  static MzTabModificationList extractModificationList(const PeptideHit& pep_hit, const std::vector<String>& fixed_mods, const std::vector<String>& localization_mods);
921 
935  const ConsensusMap& consensus_map,
936  const String& filename,
937  const bool first_run_inference_only,
938  const bool export_unidentified_features,
939  const bool export_unassigned_ids,
940  const bool export_subfeatures,
941  const bool export_empty_pep_ids = false,
942  const bool export_all_psms = false,
943  const String& title = "ConsensusMap export from OpenMS");
944 
946  {
947  public:
949  const std::vector<const ProteinIdentification*>& prot_ids,
950  const std::vector<const PeptideIdentification*>& peptide_ids,
951  const String& filename,
952  bool first_run_inference_only,
953  bool export_empty_pep_ids = false,
954  bool export_all_psms = false,
955  const String& title = "ID export from OpenMS");
956 
957  const MzTabMetaData& getMetaData() const;
958 
959  const std::vector<String>& getProteinOptionalColumnNames() const;
960  const std::vector<String>& getPeptideOptionalColumnNames() const;
961  const std::vector<String>& getPSMOptionalColumnNames() const;
962 
966  private:
968  std::set<String> peptide_id_user_value_keys_;
970 
971  // beautiful mapping structs
972  std::map<Size, std::set<Size>> ind2prot_;
973  std::map<Size, std::set<Size>> pg2prot_;
974  std::map<String, size_t> idrunid_2_idrunindex_;
975  std::map<Size, std::vector<std::pair<String, String>>> run_to_search_engines_;
976  std::map<Size, std::vector<std::vector<std::pair<String, String>>>> run_to_search_engines_settings_;
977  std::map<std::pair<size_t,size_t>,size_t> map_id_run_fileidx_2_msfileidx_;
978  std::map<std::pair< String, unsigned >, unsigned> path_label_to_assay_;
979 
980  std::vector<const ProteinIdentification*> prot_ids_;
981  std::vector<const PeptideIdentification*> peptide_ids_;
982 
987  /* currently unused
988  bool export_unidentified_features_;
989  bool export_subfeatures_; */
992  size_t quant_study_variables_ = 0;
993  // size_t n_study_variables_ = 0; //currently unused
994  size_t PRT_STATE_ = 0;
995  size_t prt_run_id_ = 0; // current (protein) identification run
996  size_t prt_hit_id_ = 0; // current protein in (protein) identification run
997  size_t prt_group_id_ = 0;
998  size_t prt_indistgroup_id_ = 0;
999  size_t pep_id_ = 0;
1000  size_t psm_id_ = 0;
1001  size_t current_psm_idx_ = 0;
1002  MzTabString db_, db_version_;
1003 
1004  std::vector<String> prt_optional_column_names_;
1005  std::vector<String> pep_optional_column_names_;
1006  std::vector<String> psm_optional_column_names_;
1007 
1009  };
1010 
1012  {
1013  public:
1015  const ConsensusMap& consensus_map,
1016  const String& filename,
1017  const bool first_run_inference_only,
1018  const bool export_unidentified_features,
1019  const bool export_unassigned_ids,
1020  const bool export_subfeatures,
1021  const bool export_empty_pep_ids = false,
1022  const bool export_all_psms = false,
1023  const String& title = "ConsensusMap export from OpenMS");
1024 
1025  const MzTabMetaData& getMetaData() const;
1026 
1027  const std::vector<String>& getProteinOptionalColumnNames() const;
1028  const std::vector<String>& getPeptideOptionalColumnNames() const;
1029  const std::vector<String>& getPSMOptionalColumnNames() const;
1030 
1034 
1035  private:
1040 
1041  // beautiful mapping structs
1042  std::map<Size, std::set<Size>> ind2prot_;
1043  std::map<Size, std::set<Size>> pg2prot_;
1044  std::map<String, size_t> idrunid_2_idrunindex_;
1045  std::map<Size, std::vector<std::pair<String, String>>> run_to_search_engines_;
1046  std::map<Size, std::vector<std::vector<std::pair<String, String>>>> run_to_search_engines_settings_;
1047  std::map<std::pair<size_t,size_t>,size_t> map_id_run_fileidx_2_msfileidx_;
1048  std::map<std::pair< String, unsigned >, unsigned> path_label_to_assay_;
1049 
1050  std::vector<const ProteinIdentification*> prot_ids_;
1051  std::vector<const PeptideIdentification*> peptide_ids_;
1052 
1061  size_t quant_study_variables_ = 0;
1062  size_t n_study_variables_ = 0;
1063  size_t PRT_STATE_ = 0;
1064  size_t prt_run_id_ = 0; // current (protein) identification run
1065  size_t prt_hit_id_ = 0; // current protein in (protein) identification run
1066  size_t prt_group_id_ = 0;
1067  size_t prt_indistgroup_id_ = 0;
1068  size_t pep_id_ = 0;
1069  size_t pep_counter_ = 0;
1070  size_t psm_id_ = 0;
1071  size_t current_psm_idx_ = 0;
1072  MzTabString db_, db_version_;
1073 
1074  std::vector<String> prt_optional_column_names_;
1075  std::vector<String> pep_optional_column_names_;
1076  std::vector<String> psm_optional_column_names_;
1077 
1079  };
1080 
1081 
1082  protected:
1083  // extract basic mappings
1084 
1085  static std::map<String, Size> mapIDRunIdentifier2IDRunIndex_(const std::vector<const ProteinIdentification*>& prot_ids);
1086 
1087  static boost::optional<MzTabPSMSectionRow> PSMSectionRowFromPeptideID_(
1088  PeptideIdentification const& pid,
1089  std::vector<ProteinIdentification const*> const& prot_id,
1090  std::map<String, size_t>& idrun_2_run_index,
1091  std::map<std::pair<size_t, size_t>, size_t>& map_run_fileidx_2_msfileidx,
1092  std::map<Size, std::vector<std::pair<String, String>>>& run_to_search_engines,
1093  Size const current_psm_idx,
1094  Size const psm_id,
1095  MzTabString const& db,
1096  MzTabString const& db_version,
1097  bool const export_empty_pep_ids,
1098  bool const export_all_psms);
1099 
1101  const ConsensusFeature& c,
1102  const ConsensusMap& consensus_map,
1103  const StringList& ms_runs,
1104  const Size n_study_variables,
1105  const std::set<String>& consensus_feature_user_value_keys,
1106  const std::set<String>& peptide_hit_user_value_keys,
1107  const std::map<String, size_t>& idrun_2_run_index,
1108  const std::map<std::pair<size_t,size_t>,size_t>& map_run_fileidx_2_msfileidx,
1109  const std::map< std::pair< String, unsigned >, unsigned>& path_label_to_assay,
1110  const std::vector<String>& fixed_mods,
1111  bool export_subfeatures);
1112 
1114  const Feature& c,
1115  const std::set<String>& feature_user_value_keys,
1116  const std::set<String>& peptide_hit_user_value_keys,
1117  const std::vector<String>& fixed_mods);
1118 
1120  const ProteinHit& hit,
1121  const MzTabString& db,
1122  const MzTabString& db_version,
1123  const std::set<String>& protein_hit_user_value_keys);
1124 
1127  const MzTabString& db,
1128  const MzTabString& db_version);
1129 
1131  const std::vector<ProteinHit>& protein_hits,
1133  const size_t g,
1134  const std::map<Size, std::set<Size>>& ind2prot,
1135  const MzTabString& db,
1136  const MzTabString& db_version);
1137 
1138  static void addMSRunMetaData_(
1139  const std::map<size_t, String>& msrunindex_2_msfilename,
1140  MzTabMetaData& meta_data);
1141 
1143  const std::vector<const ProteinIdentification*>& prot_ids,
1144  bool skip_first,
1145  std::map<String, size_t>& msfilename_2_msrunindex,
1146  std::map<size_t, String>& msrunindex_2_msfilename);
1147 
1149 
1151 
1152  // TODO: move to core classes?
1153  static void getConsensusMapMetaValues_(const ConsensusMap& consensus_map, std::set<String>& consensus_feature_user_value_keys, std::set<String>& peptide_hit_user_value_keys);
1154 
1155  static void getFeatureMapMetaValues_(const FeatureMap& feature_map, std::set<String>& feature_user_value_keys, std::set<String>& peptide_hit_user_value_keys);
1156 
1158  const std::vector<const ProteinIdentification*>& prot_ids,
1159  std::vector<const PeptideIdentification*>& peptide_ids_,
1160  std::set<String>& protein_hit_user_value_keys,
1161  std::set<String>& peptide_id_user_value_keys,
1162  std::set<String>& peptide_hit_user_value_keys);
1163 
1164 
1165  template <class ForwardIterator>
1166  static void replaceWhiteSpaces_(ForwardIterator first, ForwardIterator last)
1167  {
1168  while (first!=last)
1169  {
1170  first->substitute(' ', '_');
1171  ++first;
1172  }
1173  }
1174 
1175  static void replaceWhiteSpaces_(std::set<String>& keys)
1176  {
1177  std::set<String> tmp_keys;
1178  auto first = keys.begin();
1179  while (first != keys.end())
1180  {
1181  String s = *first;
1182  s.substitute(' ', '_');
1183  tmp_keys.insert(std::move(s));
1184  ++first;
1185  }
1186  std::swap(keys, tmp_keys);
1187  }
1188 
1189  // determine spectrum reference identifier type (e.g., Thermo nativeID) from spectrum references
1190  static MzTabParameter getMSRunSpectrumIdentifierType_(const std::vector<const PeptideIdentification*>& peptide_ids_);
1191 
1193  const std::vector<const ProteinIdentification*>& prot_ids,
1194  const std::vector<const PeptideIdentification*>& pep_ids,
1195  bool skip_first_run,
1196  std::map<std::tuple<String, String, String>, std::set<Size>>& search_engine_to_runs,
1197  std::map<Size, std::vector<std::pair<String, String>>>& run_to_search_engines,
1198  std::map<Size, std::vector<std::vector<std::pair<String, String>>>>& run_to_search_engines_settings,
1199  std::map<String, std::vector<std::pair<String, String>>>& search_engine_to_settings);
1200 
1201  static std::map<Size, std::set<Size>> mapGroupsToProteins_(
1202  const std::vector<ProteinIdentification::ProteinGroup>& groups,
1203  const std::vector<ProteinHit>& proteins);
1204 
1205  static void addSearchMetaData_(
1206  const std::vector<const ProteinIdentification*>& prot_ids,
1207  const std::map<std::tuple<String, String, String>, std::set<Size>>& search_engine_to_runs,
1208  const std::map<String, std::vector<std::pair<String,String>>>& search_engine_to_settings,
1209  MzTabMetaData& meta_data,
1210  bool first_run_inference_only);
1211 
1213  const std::vector<const ProteinIdentification*>& prot_ids,
1214  const std::map<String, size_t>& msfilename_2_msrunindex,
1215  bool skip_first_run,
1216  std::map<std::pair<size_t, size_t>, size_t>& map_run_fileidx_2_msfileidx);
1217 
1219  template <typename SectionRows>
1220  std::vector<String> getOptionalColumnNames_(const SectionRows& rows) const
1221  {
1222  // vector is used to preserve the column order
1223  std::vector<String> names;
1224  if (!rows.empty())
1225  {
1226  for (typename SectionRows::const_iterator it = rows.begin(); it != rows.end(); ++it)
1227  {
1228  for (auto it_opt = it->opt_.cbegin(); it_opt != it->opt_.cend(); ++it_opt)
1229  {
1230  if (std::find(names.begin(), names.end(), it_opt->first) == names.end())
1231  {
1232  names.push_back(it_opt->first);
1233  }
1234  }
1235  }
1236  }
1237  return names;
1238  }
1239 
1241  const std::vector<const ProteinIdentification*>& prot_ids,
1242  StringList& var_mods,
1243  StringList& fixed_mods);
1244 
1245  // create MzTab compatible modification identifier from ResidueModification
1246  // If the Modification has a unimod identifier it will be prefixed as UNIMOD
1247  // otherwise as CHEMMOD (see MzTab specification for details)
1249 
1250  static void checkSequenceUniqueness_(const std::vector<PeptideIdentification>& curr_pep_ids);
1251 
1260  std::vector<Size> empty_rows_;
1261  std::map<Size, String> comment_rows_;
1262  };
1263 
1264 } // namespace OpenMS
1265 
1266 #pragma clang diagnostic pop
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:71
A container for consensus elements.
Definition: ConsensusMap.h:88
A container for features.
Definition: FeatureMap.h:105
An LC-MS feature.
Definition: Feature.h:72
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:61
Definition: MzTab.h:186
String toCellString() const
void fromCellString(const String &s)
void set(const bool &value)
bool isNull() const
int value_
Definition: MzTab.h:206
void setNull(bool b)
Definition: MzTab.h:109
std::vector< MzTabDouble > entries_
Definition: MzTab.h:127
String toCellString() const
void fromCellString(const String &s)
std::vector< MzTabDouble > get() const
void set(const std::vector< MzTabDouble > &entries)
Definition: MzTab.h:76
MzTabCellStateType state_
Definition: MzTab.h:105
bool isNaN() const
double value_
Definition: MzTab.h:104
String toCellString() const
void fromCellString(const String &s)
MzTabDouble(const double v)
~MzTabDouble()=default
void set(const double &value)
bool isNull() const
void setNull(bool b)
double get() const
bool isInf() const
Definition: MzTab.h:164
void set(const std::vector< MzTabInteger > &entries)
String toCellString() const
void fromCellString(const String &s)
std::vector< MzTabInteger > get() const
std::vector< MzTabInteger > entries_
Definition: MzTab.h:182
Definition: MzTab.h:131
MzTabCellStateType state_
Definition: MzTab.h:160
bool isNaN() const
Int value_
Definition: MzTab.h:159
String toCellString() const
void fromCellString(const String &s)
MzTabInteger(const int v)
bool isNull() const
void setNull(bool b)
void set(const Int &value)
bool isInf() const
all meta data of a mzTab file. Please refer to specification for documentation.
Definition: MzTab.h:470
std::map< Size, MzTabCVMetaData > cv
Definition: MzTab.h:523
MzTabString mz_tab_version
Definition: MzTab.h:474
std::map< Size, MzTabParameter > peptide_search_engine_score
Definition: MzTab.h:482
std::map< Size, MzTabParameter > protein_search_engine_score
Definition: MzTab.h:481
std::vector< String > colunit_protein
Definition: MzTab.h:525
MzTabString mz_tab_mode
Definition: MzTab.h:475
MzTabParameter peptide_quantification_unit
Definition: MzTab.h:510
MzTabParameter quantification_method
Definition: MzTab.h:507
std::map< Size, MzTabString > publication
Definition: MzTab.h:497
MzTabString mz_tab_id
Definition: MzTab.h:477
std::map< Size, MzTabParameter > smallmolecule_search_engine_score
Definition: MzTab.h:484
MzTabParameterList false_discovery_rate
Definition: MzTab.h:495
MzTabString description
Definition: MzTab.h:479
std::map< Size, MzTabContactMetaData > contact
Definition: MzTab.h:499
std::map< Size, MzTabModificationMetaData > variable_mod
Definition: MzTab.h:505
std::map< Size, MzTabParameter > psm_search_engine_score
Definition: MzTab.h:483
std::map< Size, MzTabParameter > oligonucleotide_search_engine_score
Definition: MzTab.h:486
MzTabParameter small_molecule_quantification_unit
Definition: MzTab.h:511
std::map< Size, MzTabParameterList > sample_processing
Definition: MzTab.h:489
std::map< Size, MzTabInstrumentMetaData > instrument
Definition: MzTab.h:491
std::map< Size, MzTabModificationMetaData > fixed_mod
Definition: MzTab.h:503
std::map< Size, MzTabStudyVariableMetaData > study_variable
Definition: MzTab.h:521
std::vector< String > colunit_peptide
Definition: MzTab.h:526
std::map< Size, MzTabParameter > custom
Definition: MzTab.h:515
std::map< Size, MzTabSampleMetaData > sample
Definition: MzTab.h:517
MzTabString title
Definition: MzTab.h:478
MzTabString mz_tab_type
Definition: MzTab.h:476
std::map< Size, MzTabMSRunMetaData > ms_run
Definition: MzTab.h:513
std::map< Size, MzTabParameter > osm_search_engine_score
Definition: MzTab.h:487
std::map< Size, MzTabAssayMetaData > assay
Definition: MzTab.h:519
MzTabParameter protein_quantification_unit
Definition: MzTab.h:509
std::map< Size, MzTabString > uri
Definition: MzTab.h:501
std::map< Size, MzTabParameter > nucleic_acid_search_engine_score
Definition: MzTab.h:485
std::vector< String > colunit_small_molecule
Definition: MzTab.h:528
std::map< Size, MzTabSoftwareMetaData > software
Definition: MzTab.h:493
std::vector< String > colunit_psm
Definition: MzTab.h:527
Definition: MzTab.h:347
void set(const std::vector< MzTabModification > &entries)
std::vector< MzTabModification > get() const
void fromCellString(const String &s)
std::vector< MzTabModification > entries_
Definition: MzTab.h:363
Definition: MzTab.h:319
MzTabString getModOrSubstIdentifier() const
String toCellString() const
void fromCellString(const String &s)
MzTabString mod_identifier_
Definition: MzTab.h:343
void setPositionsAndParameters(const std::vector< std::pair< Size, MzTabParameter > > &ppp)
set (potentially ambiguous) position(s) with associated parameter (might be null if not set)
std::vector< std::pair< Size, MzTabParameter > > pos_param_pairs_
Definition: MzTab.h:342
std::vector< std::pair< Size, MzTabParameter > > getPositionsAndParameters() const
void setModificationIdentifier(const MzTabString &mod_id)
Definition: MzTab.h:271
void set(const std::vector< MzTabParameter > &parameters)
String toCellString() const
void fromCellString(const String &s)
std::vector< MzTabParameter > parameters_
Definition: MzTab.h:289
std::vector< MzTabParameter > get() const
Definition: MzTab.h:234
String CV_label_
Definition: MzTab.h:264
String getValue() const
String name_
Definition: MzTab.h:266
String toCellString() const
void fromCellString(const String &s)
String getCVLabel() const
void setName(const String &name)
void setCVLabel(const String &CV_label)
String getAccession() const
String getName() const
String accession_
Definition: MzTab.h:265
String value_
Definition: MzTab.h:267
void setAccession(const String &accession)
void setValue(const String &value)
Definition: MzTab.h:367
void setMSFile(Size index)
String getSpecRef() const
String spec_ref_
Definition: MzTab.h:392
String toCellString() const
void fromCellString(const String &s)
Size ms_run_
Definition: MzTab.h:391
void setSpecRefFile(const String &spec_ref)
void setSpecRef(const String &spec_ref)
Definition: MzTab.h:293
std::vector< MzTabString > get() const
String toCellString() const
void setSeparator(char sep)
needed for e.g. ambiguity_members and GO accessions as these use ',' as separator while the others us...
void fromCellString(const String &s)
char sep_
Definition: MzTab.h:315
std::vector< MzTabString > entries_
Definition: MzTab.h:314
void set(const std::vector< MzTabString > &entries)
Definition: MzTab.h:210
String toCellString() const
void fromCellString(const String &s)
~MzTabString()=default
void set(const String &value)
String value_
Definition: MzTab.h:230
bool isNull() const
String get() const
MzTabString(const String &s)
void setNull(bool b)
Definition: MzTab.h:1012
std::vector< String > pep_optional_column_names_
Definition: MzTab.h:1075
const ConsensusMap & consensus_map_
Definition: MzTab.h:1036
std::map< Size, std::vector< std::pair< String, String > > > run_to_search_engines_
Definition: MzTab.h:1045
std::map< std::pair< size_t, size_t >, size_t > map_id_run_fileidx_2_msfileidx_
Definition: MzTab.h:1047
std::vector< const ProteinIdentification * > prot_ids_
Definition: MzTab.h:1050
String filename_
Definition: MzTab.h:1055
std::map< Size, std::set< Size > > pg2prot_
Definition: MzTab.h:1043
bool export_empty_pep_ids_
Definition: MzTab.h:1059
bool nextPRTRow(MzTabProteinSectionRow &row)
bool export_unidentified_features_
Definition: MzTab.h:1057
bool nextPEPRow(MzTabPeptideSectionRow &row)
const std::vector< String > & getProteinOptionalColumnNames() const
std::set< String > protein_hit_user_value_keys_
Definition: MzTab.h:1037
std::vector< String > psm_optional_column_names_
Definition: MzTab.h:1076
StringList fixed_mods_
Definition: MzTab.h:1056
std::map< String, size_t > idrunid_2_idrunindex_
Definition: MzTab.h:1044
bool export_all_psms_
Definition: MzTab.h:1060
std::vector< const PeptideIdentification * > peptide_ids_
Definition: MzTab.h:1051
std::map< Size, std::set< Size > > ind2prot_
Definition: MzTab.h:1042
const MzTabMetaData & getMetaData() const
StringList ms_runs_
Definition: MzTab.h:1053
bool nextPSMRow(MzTabPSMSectionRow &row)
CMMzTabStream(const ConsensusMap &consensus_map, const String &filename, const bool first_run_inference_only, const bool export_unidentified_features, const bool export_unassigned_ids, const bool export_subfeatures, const bool export_empty_pep_ids=false, const bool export_all_psms=false, const String &title="ConsensusMap export from OpenMS")
std::map< std::pair< String, unsigned >, unsigned > path_label_to_assay_
Definition: MzTab.h:1048
bool first_run_inference_
Definition: MzTab.h:1054
std::map< Size, std::vector< std::vector< std::pair< String, String > > > > run_to_search_engines_settings_
Definition: MzTab.h:1046
const std::vector< String > & getPeptideOptionalColumnNames() const
bool export_subfeatures_
Definition: MzTab.h:1058
std::vector< String > prt_optional_column_names_
Definition: MzTab.h:1074
std::set< String > consensus_feature_user_value_keys_
Definition: MzTab.h:1038
MzTabString db_
Definition: MzTab.h:1072
const std::vector< String > & getPSMOptionalColumnNames() const
MzTabMetaData meta_data_
Definition: MzTab.h:1078
std::set< String > consensus_feature_peptide_hit_user_value_keys_
Definition: MzTab.h:1039
Definition: MzTab.h:946
std::vector< String > pep_optional_column_names_
Definition: MzTab.h:1005
std::map< Size, std::vector< std::pair< String, String > > > run_to_search_engines_
Definition: MzTab.h:975
std::map< std::pair< size_t, size_t >, size_t > map_id_run_fileidx_2_msfileidx_
Definition: MzTab.h:977
std::vector< const ProteinIdentification * > prot_ids_
Definition: MzTab.h:980
String filename_
Definition: MzTab.h:985
std::map< Size, std::set< Size > > pg2prot_
Definition: MzTab.h:973
bool export_empty_pep_ids_
Definition: MzTab.h:990
bool nextPRTRow(MzTabProteinSectionRow &row)
bool nextPEPRow(MzTabPeptideSectionRow &row)
const std::vector< String > & getProteinOptionalColumnNames() const
std::set< String > protein_hit_user_value_keys_
Definition: MzTab.h:967
std::vector< String > psm_optional_column_names_
Definition: MzTab.h:1006
StringList fixed_mods_
Definition: MzTab.h:986
std::map< String, size_t > idrunid_2_idrunindex_
Definition: MzTab.h:974
bool export_all_psms_
Definition: MzTab.h:991
std::vector< const PeptideIdentification * > peptide_ids_
Definition: MzTab.h:981
std::map< Size, std::set< Size > > ind2prot_
Definition: MzTab.h:972
const MzTabMetaData & getMetaData() const
StringList ms_runs_
Definition: MzTab.h:983
bool nextPSMRow(MzTabPSMSectionRow &row)
std::map< std::pair< String, unsigned >, unsigned > path_label_to_assay_
Definition: MzTab.h:978
std::set< String > peptide_hit_user_value_keys_
Definition: MzTab.h:969
bool first_run_inference_
Definition: MzTab.h:984
std::map< Size, std::vector< std::vector< std::pair< String, String > > > > run_to_search_engines_settings_
Definition: MzTab.h:976
const std::vector< String > & getPeptideOptionalColumnNames() const
IDMzTabStream(const std::vector< const ProteinIdentification * > &prot_ids, const std::vector< const PeptideIdentification * > &peptide_ids, const String &filename, bool first_run_inference_only, bool export_empty_pep_ids=false, bool export_all_psms=false, const String &title="ID export from OpenMS")
std::vector< String > prt_optional_column_names_
Definition: MzTab.h:1004
std::set< String > peptide_id_user_value_keys_
Definition: MzTab.h:968
MzTabString db_
Definition: MzTab.h:1002
const std::vector< String > & getPSMOptionalColumnNames() const
MzTabMetaData meta_data_
Definition: MzTab.h:1008
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:809
static void getSearchModifications_(const std::vector< const ProteinIdentification * > &prot_ids, StringList &var_mods, StringList &fixed_mods)
static void replaceWhiteSpaces_(ForwardIterator first, ForwardIterator last)
Definition: MzTab.h:1166
const std::map< Size, String > & getCommentRows() const
static MzTabString getModificationIdentifier_(const ResidueModification &r)
void setOligonucleotideSectionRows(const MzTabOligonucleotideSectionRows &onsd)
MzTabOSMSectionRows osm_data_
/ oligonucleotide-spectrum matches
Definition: MzTab.h:1259
std::vector< Size > empty_rows_
index of empty rows
Definition: MzTab.h:1260
MzTabSmallMoleculeSectionRows small_molecule_data_
Definition: MzTab.h:1256
const std::vector< Size > & getEmptyRows() const
static void addMetaInfoToOptionalColumns(const std::set< String > &keys, std::vector< MzTabOptionalColumnEntry > &opt, const String &id, const MetaInfoInterface &meta)
std::vector< String > getSmallMoleculeOptionalColumnNames() const
Extract opt_ (custom, optional column names)
static MzTab exportConsensusMapToMzTab(const ConsensusMap &consensus_map, const String &filename, const bool first_run_inference_only, const bool export_unidentified_features, const bool export_unassigned_ids, const bool export_subfeatures, const bool export_empty_pep_ids=false, const bool export_all_psms=false, const String &title="ConsensusMap export from OpenMS")
export linked peptide features aka consensus map
std::vector< String > getOligonucleotideOptionalColumnNames() const
Extract opt_ (custom, optional column names)
static MzTab exportIdentificationsToMzTab(const std::vector< ProteinIdentification > &prot_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &filename, bool first_run_inference_only, bool export_empty_pep_ids=false, bool export_all_psms=false, const String &title="ID export from OpenMS")
Export peptide and protein identifications to mzTab.
MzTabOligonucleotideSectionRows oligonucleotide_data_
Definition: MzTab.h:1258
static MzTabParameter getProteinScoreType_(const ProteinIdentification &prot_id)
MzTabPSMSectionRows & getPSMSectionRows()
static void addMSRunMetaData_(const std::map< size_t, String > &msrunindex_2_msfilename, MzTabMetaData &meta_data)
static MzTabPeptideSectionRow peptideSectionRowFromFeature_(const Feature &c, const std::set< String > &feature_user_value_keys, const std::set< String > &peptide_hit_user_value_keys, const std::vector< String > &fixed_mods)
static void getFeatureMapMetaValues_(const FeatureMap &feature_map, std::set< String > &feature_user_value_keys, std::set< String > &peptide_hit_user_value_keys)
std::map< Size, String > comment_rows_
comments
Definition: MzTab.h:1261
const MzTabMetaData & getMetaData() const
const MzTabNucleicAcidSectionRows & getNucleicAcidSectionRows() const
std::vector< String > getPeptideOptionalColumnNames() const
Extract opt_ (custom, optional column names)
std::vector< String > getOSMOptionalColumnNames() const
Extract opt_ (custom, optional column names)
void setNucleicAcidSectionRows(const MzTabNucleicAcidSectionRows &nasd)
static MzTabProteinSectionRow nextProteinSectionRowFromIndistinguishableGroup_(const std::vector< ProteinHit > &protein_hits, const ProteinIdentification::ProteinGroup &group, const size_t g, const std::map< Size, std::set< Size >> &ind2prot, const MzTabString &db, const MzTabString &db_version)
void setPeptideSectionRows(const MzTabPeptideSectionRows &psd)
static void replaceWhiteSpaces_(std::set< String > &keys)
Definition: MzTab.h:1175
static size_t getQuantStudyVariables_(const ProteinIdentification &pid)
static MzTabProteinSectionRow nextProteinSectionRowFromProteinGroup_(const ProteinIdentification::ProteinGroup &group, const MzTabString &db, const MzTabString &db_version)
static std::map< Size, std::set< Size > > mapGroupsToProteins_(const std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &proteins)
const MzTabPeptideSectionRows & getPeptideSectionRows() const
void setSmallMoleculeSectionRows(const MzTabSmallMoleculeSectionRows &smsd)
void setProteinSectionRows(const MzTabProteinSectionRows &psd)
std::vector< String > getNucleicAcidOptionalColumnNames() const
Extract opt_ (custom, optional column names)
static void getConsensusMapMetaValues_(const ConsensusMap &consensus_map, std::set< String > &consensus_feature_user_value_keys, std::set< String > &peptide_hit_user_value_keys)
void setMetaData(const MzTabMetaData &md)
MzTabPSMSectionRows psm_data_
Definition: MzTab.h:1255
const MzTabPSMSectionRows & getPSMSectionRows() const
static MzTabParameter getMSRunSpectrumIdentifierType_(const std::vector< const PeptideIdentification * > &peptide_ids_)
const MzTabOligonucleotideSectionRows & getOligonucleotideSectionRows() const
static MzTabPeptideSectionRow peptideSectionRowFromConsensusFeature_(const ConsensusFeature &c, const ConsensusMap &consensus_map, const StringList &ms_runs, const Size n_study_variables, const std::set< String > &consensus_feature_user_value_keys, const std::set< String > &peptide_hit_user_value_keys, const std::map< String, size_t > &idrun_2_run_index, const std::map< std::pair< size_t, size_t >, size_t > &map_run_fileidx_2_msfileidx, const std::map< std::pair< String, unsigned >, unsigned > &path_label_to_assay, const std::vector< String > &fixed_mods, bool export_subfeatures)
~MzTab()=default
static void mapBetweenRunAndSearchEngines_(const std::vector< const ProteinIdentification * > &prot_ids, const std::vector< const PeptideIdentification * > &pep_ids, bool skip_first_run, std::map< std::tuple< String, String, String >, std::set< Size >> &search_engine_to_runs, std::map< Size, std::vector< std::pair< String, String >>> &run_to_search_engines, std::map< Size, std::vector< std::vector< std::pair< String, String >>>> &run_to_search_engines_settings, std::map< String, std::vector< std::pair< String, String >>> &search_engine_to_settings)
MzTabProteinSectionRows & getProteinSectionRows()
MzTabNucleicAcidSectionRows nucleic_acid_data_
Definition: MzTab.h:1257
static std::map< Size, MzTabModificationMetaData > generateMzTabStringFromVariableModifications(const std::vector< String > &mods)
std::vector< String > getOptionalColumnNames_(const SectionRows &rows) const
Helper function for "get...OptionalColumnNames" functions.
Definition: MzTab.h:1220
MzTabProteinSectionRows protein_data_
Definition: MzTab.h:1253
static std::map< Size, MzTabModificationMetaData > generateMzTabStringFromFixedModifications(const std::vector< String > &mods)
static void checkSequenceUniqueness_(const std::vector< PeptideIdentification > &curr_pep_ids)
static std::map< String, Size > mapIDRunIdentifier2IDRunIndex_(const std::vector< const ProteinIdentification * > &prot_ids)
std::vector< String > getProteinOptionalColumnNames() const
Extract opt_ (custom, optional column names)
static boost::optional< MzTabPSMSectionRow > PSMSectionRowFromPeptideID_(PeptideIdentification const &pid, std::vector< ProteinIdentification const * > const &prot_id, std::map< String, size_t > &idrun_2_run_index, std::map< std::pair< size_t, size_t >, size_t > &map_run_fileidx_2_msfileidx, std::map< Size, std::vector< std::pair< String, String >>> &run_to_search_engines, Size const current_psm_idx, Size const psm_id, MzTabString const &db, MzTabString const &db_version, bool const export_empty_pep_ids, bool const export_all_psms)
void setEmptyRows(const std::vector< Size > &empty)
static MzTabModificationList extractModificationList(const PeptideHit &pep_hit, const std::vector< String > &fixed_mods, const std::vector< String > &localization_mods)
MzTabPeptideSectionRows peptide_data_
Definition: MzTab.h:1254
const MzTabSmallMoleculeSectionRows & getSmallMoleculeSectionRows() const
static void mapIDRunFileIndex2MSFileIndex_(const std::vector< const ProteinIdentification * > &prot_ids, const std::map< String, size_t > &msfilename_2_msrunindex, bool skip_first_run, std::map< std::pair< size_t, size_t >, size_t > &map_run_fileidx_2_msfileidx)
MzTab()=default
Default constructor.
static void getIdentificationMetaValues_(const std::vector< const ProteinIdentification * > &prot_ids, std::vector< const PeptideIdentification * > &peptide_ids_, std::set< String > &protein_hit_user_value_keys, std::set< String > &peptide_id_user_value_keys, std::set< String > &peptide_hit_user_value_keys)
std::vector< String > getPSMOptionalColumnNames() const
Extract opt_ (custom, optional column names)
MzTabMetaData meta_data_
Definition: MzTab.h:1252
const MzTabOSMSectionRows & getOSMSectionRows() const
static void mapBetweenMSFileNameAndMSRunIndex_(const std::vector< const ProteinIdentification * > &prot_ids, bool skip_first, std::map< String, size_t > &msfilename_2_msrunindex, std::map< size_t, String > &msrunindex_2_msfilename)
const MzTabProteinSectionRows & getProteinSectionRows() const
static std::map< Size, MzTabModificationMetaData > generateMzTabStringFromModifications(const std::vector< String > &mods)
MzTabPeptideSectionRows & getPeptideSectionRows()
void setCommentRows(const std::map< Size, String > &com)
void setPSMSectionRows(const MzTabPSMSectionRows &psd)
static void addSearchMetaData_(const std::vector< const ProteinIdentification * > &prot_ids, const std::map< std::tuple< String, String, String >, std::set< Size >> &search_engine_to_runs, const std::map< String, std::vector< std::pair< String, String >>> &search_engine_to_settings, MzTabMetaData &meta_data, bool first_run_inference_only)
void setOSMSectionRows(const MzTabOSMSectionRows &osd)
static MzTabProteinSectionRow proteinSectionRowFromProteinHit_(const ProteinHit &hit, const MzTabString &db, const MzTabString &db_version, const std::set< String > &protein_hit_user_value_keys)
static MzTab exportFeatureMapToMzTab(const FeatureMap &feature_map, const String &filename)
Representation of a peptide hit.
Definition: PeptideHit.h:57
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
Representation of a protein hit.
Definition: ProteinHit.h:60
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:118
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
Representation of a modification.
Definition: ResidueModification.h:77
A more convenient string class.
Definition: String.h:61
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
MzTabCellStateType
Data model of MzTab files.
Definition: MzTab.h:67
@ SIZE_OF_MZTAB_CELLTYPE
Definition: MzTab.h:72
@ MZTAB_CELLSTATE_NULL
Definition: MzTab.h:69
@ MZTAB_CELLSTATE_INF
Definition: MzTab.h:71
@ MZTAB_CELLSTATE_NAN
Definition: MzTab.h:70
@ MZTAB_CELLSTATE_DEFAULT
Definition: MzTab.h:68
const double c
Definition: Constants.h:209
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
MzTabInteger taxid
NEWT taxonomy for the species.
Definition: MzTab.h:673
MzTabInteger end
Definition: MzTab.h:743
MzTabSpectraRef spectra_ref
Spectra identifying the peptide.
Definition: MzTab.h:590
MzTabDouble calc_mass_to_charge
Precursor ion’s m/z.
Definition: MzTab.h:670
std::vector< MzTabPSMSectionRow > MzTabPSMSectionRows
Definition: MzTab.h:795
std::map< Size, MzTabDouble > smallmolecule_abundance_assay
Definition: MzTab.h:684
MzTabParameter software
Definition: MzTab.h:409
MzTabString url
Definition: MzTab.h:435
MzTabString position
Definition: MzTab.h:419
MzTabString modifications
Modifications identified on the small molecule.
Definition: MzTab.h:683
MzTabString email
Definition: MzTab.h:450
MzTabString full_name
Definition: MzTab.h:433
MzTabString sequence
The peptide’s sequence.
Definition: MzTab.h:575
std::map< Size, MzTabInteger > num_oligos_distinct_ms_run
Definition: MzTab.h:705
MzTabDoubleList retention_time
Time points in seconds. Semantics may vary.
Definition: MzTab.h:585
MzTabString name
Definition: MzTab.h:448
MzTabDoubleList retention_time_window
Definition: MzTab.h:586
MzTabParameterList fragmentation_method
Definition: MzTab.h:458
std::vector< int > sample_refs
Definition: MzTab.h:464
MzTabString label
Definition: MzTab.h:432
MzTabParameter format
Definition: MzTab.h:455
std::map< Size, MzTabDouble > smallmolecule_abundance_std_error_study_variable
Definition: MzTab.h:687
std::vector< MzTabProteinSectionRow > MzTabProteinSectionRows
Definition: MzTab.h:793
std::map< Size, MzTabDouble > best_search_engine_score
Search engine(s) score(s) for the peptide.
Definition: MzTab.h:581
MzTabString smiles
Molecular structure in SMILES format.
Definition: MzTab.h:666
MzTabBoolean unique
0=false, 1=true, null else: Peptide is unique for the protein.
Definition: MzTab.h:577
MzTabParameter source
Definition: MzTab.h:441
MzTabString uri
Location of the PSMs source entry.
Definition: MzTab.h:589
std::map< Size, MzTabParameter > disease
Definition: MzTab.h:403
std::map< Size, MzTabString > setting
Definition: MzTab.h:412
std::vector< int > assay_refs
Definition: MzTab.h:463
std::map< Size, MzTabParameter > tissue
Definition: MzTab.h:401
MzTabModificationList modifications
Modifications identified in the peptide.
Definition: MzTab.h:584
MzTabParameter id_format
Definition: MzTab.h:457
MzTabInteger charge
Precursor ion’s charge.
Definition: MzTab.h:587
std::map< Size, MzTabDouble > smallmolecule_abundance_stdev_study_variable
Definition: MzTab.h:686
MzTabString database_version
Version (and optionally # of entries).
Definition: MzTab.h:579
std::map< Size, MzTabModificationMetaData > quantification_mod
Definition: MzTab.h:425
MzTabString description
Definition: MzTab.h:399
MzTabParameter detector
Definition: MzTab.h:443
MzTabStringList identifier
The small molecule’s identifier.
Definition: MzTab.h:664
std::vector< MzTabPeptideSectionRow > MzTabPeptideSectionRows
Definition: MzTab.h:794
std::map< Size, std::map< Size, MzTabDouble > > search_engine_score_ms_run
Definition: MzTab.h:582
MzTabString accession
The protein’s accession.
Definition: MzTab.h:576
MzTabStringList ambiguity_members
Alternative nucleic acid identifications.
Definition: MzTab.h:707
MzTabDouble mass_to_charge
Precursor ion’s m/z.
Definition: MzTab.h:588
MzTabDouble coverage
(0-1) Fraction of nucleic acid sequence identified.
Definition: MzTab.h:712
MzTabParameter modification
Definition: MzTab.h:417
std::map< Size, MzTabInteger > num_oligos_unique_ms_run
Definition: MzTab.h:706
MzTabInteger start
Definition: MzTab.h:742
std::map< Size, MzTabParameter > cell_type
Definition: MzTab.h:402
std::map< Size, MzTabDouble > peptide_abundance_std_error_study_variable
Definition: MzTab.h:594
MzTabString site
Definition: MzTab.h:418
MzTabString inchi_key
InChi Key of the identified compound.
Definition: MzTab.h:667
MzTabParameter quantification_reagent
Definition: MzTab.h:424
MzTabString affiliation
Definition: MzTab.h:449
MzTabString location
Definition: MzTab.h:456
std::map< Size, MzTabParameter > custom
Definition: MzTab.h:404
std::vector< MzTabOSMSectionRow > MzTabOSMSectionRows
Definition: MzTab.h:799
MzTabInteger reliability
(1-3) 0=null Identification reliability for the peptide.
Definition: MzTab.h:583
std::vector< int > ms_run_ref
Definition: MzTab.h:427
std::map< Size, MzTabInteger > num_osms_ms_run
Definition: MzTab.h:704
std::map< Size, MzTabDouble > search_engine_score
Search engine(s) score(s) for the match.
Definition: MzTab.h:766
MzTabStringList go_terms
List of GO terms for the nucleic acid.
Definition: MzTab.h:711
MzTabParameter name
Definition: MzTab.h:440
MzTabString pre
Definition: MzTab.h:740
MzTabDouble exp_mass_to_charge
Precursor ion’s m/z.
Definition: MzTab.h:669
std::map< Size, MzTabDouble > peptide_abundance_study_variable
Definition: MzTab.h:592
std::map< Size, MzTabDouble > smallmolecule_abundance_study_variable
Definition: MzTab.h:685
std::vector< MzTabOptionalColumnEntry > opt_
Optional columns must start with “opt_”.
Definition: MzTab.h:595
std::vector< MzTabOligonucleotideSectionRow > MzTabOligonucleotideSectionRows
Definition: MzTab.h:798
MzTabParameterList search_engine
Search engine(s) that identified the peptide.
Definition: MzTab.h:580
std::pair< String, MzTabString > MzTabOptionalColumnEntry
Definition: MzTab.h:531
MzTabString post
Definition: MzTab.h:741
std::map< Size, MzTabDouble > peptide_abundance_stdev_study_variable
Definition: MzTab.h:593
std::map< Size, MzTabDouble > peptide_abundance_assay
Definition: MzTab.h:591
MzTabString chemical_formula
Chemical formula of the identified compound.
Definition: MzTab.h:665
MzTabString sample_ref
Definition: MzTab.h:426
std::vector< MzTabSmallMoleculeSectionRow > MzTabSmallMoleculeSectionRows
Definition: MzTab.h:796
MzTabString species
Human readable name of the species.
Definition: MzTab.h:674
std::vector< MzTabNucleicAcidSectionRow > MzTabNucleicAcidSectionRows
Definition: MzTab.h:797
MzTabString version
Definition: MzTab.h:434
MzTabString database
Name of the sequence database.
Definition: MzTab.h:578
std::map< Size, MzTabParameter > species
Definition: MzTab.h:400
std::map< Size, MzTabParameter > analyzer
Definition: MzTab.h:442
Definition: MzTab.h:423
Definition: MzTab.h:431
Definition: MzTab.h:447
Definition: MzTab.h:439
Definition: MzTab.h:454
Definition: MzTab.h:416
NUC - Nucleic acid section (table-based)
Definition: MzTab.h:693
OSM - OSM (oligonucleotide-spectrum match) section (table-based)
Definition: MzTab.h:763
OLI - Oligonucleotide section (table-based)
Definition: MzTab.h:728
PEP - Peptide section (Table based)
Definition: MzTab.h:574
Definition: MzTab.h:398
SML Small molecule section (table based)
Definition: MzTab.h:663
Definition: MzTab.h:408
Definition: MzTab.h:462
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:886
Comparison operator for sorting rows.
Definition: MzTab.h:717
bool operator()(const MzTabNucleicAcidSectionRow &row1, const MzTabNucleicAcidSectionRow &row2) const
Definition: MzTab.h:718
Comparison operator for sorting rows.
Definition: MzTab.h:779
bool operator()(const MzTabOSMSectionRow &row1, const MzTabOSMSectionRow &row2) const
Definition: MzTab.h:780
Comparison operator for sorting rows.
Definition: MzTab.h:748
bool operator()(const MzTabOligonucleotideSectionRow &row1, const MzTabOligonucleotideSectionRow &row2) const
Definition: MzTab.h:749
Comparison operator for sorting rows.
Definition: MzTab.h:644
bool operator()(const MzTabPSMSectionRow &row1, const MzTabPSMSectionRow &row2) const
Definition: MzTab.h:645
PSM - PSM section (Table based)
Definition: MzTab.h:612
MzTabSpectraRef spectra_ref
Spectrum for this PSM.
Definition: MzTab.h:628
MzTabDouble calc_mass_to_charge
The calculated m/z ratio of the experimental precursor ion.
Definition: MzTab.h:626
MzTabString start
(List of) Start positions in parent protein(s)
Definition: MzTab.h:631
MzTabString sequence
The peptide’s sequence.
Definition: MzTab.h:613
MzTabDoubleList retention_time
Time points in seconds. Semantics may vary.
Definition: MzTab.h:623
MzTabBoolean unique
0=false, 1=true, null else: Peptide is unique for the protein.
Definition: MzTab.h:616
MzTabString uri
Location of the PSM’s source entry.
Definition: MzTab.h:627
void addPepEvidenceToRows(const std::vector< PeptideEvidence > &peptide_evidences)
Gets peptide_evidences with data from internal structures adds their info to an MzTabPSMSectionRow (p...
MzTabModificationList modifications
Modifications identified in the peptide.
Definition: MzTab.h:622
MzTabInteger charge
The charge of the experimental precursor ion.
Definition: MzTab.h:624
MzTabString database_version
Version (and optionally # of entries).
Definition: MzTab.h:618
MzTabString accession
List of potential parent protein accessions as in the fasta DB.
Definition: MzTab.h:615
MzTabString end
(List of) Start positions in parent protein(s)
Definition: MzTab.h:632
MzTabInteger reliability
(1-3) 0=null Identification reliability for the peptide.
Definition: MzTab.h:621
std::map< Size, MzTabDouble > search_engine_score
Search engine(s) score(s) for the peptide.
Definition: MzTab.h:620
MzTabString pre
(List of) Amino acid in parent protein(s) before the start of the current PSM
Definition: MzTab.h:629
MzTabDouble exp_mass_to_charge
The observed m/z ratio of the experimental precursor ion (either directly from the raw data or correc...
Definition: MzTab.h:625
std::vector< MzTabOptionalColumnEntry > opt_
Optional columns must start with “opt_”.
Definition: MzTab.h:633
MzTabParameterList search_engine
Search engine(s) that identified the peptide.
Definition: MzTab.h:619
MzTabString post
(List of) Amino acid in parent protein(s) after the start of the current PSM
Definition: MzTab.h:630
MzTabInteger PSM_ID
A unique ID of a PSM line.
Definition: MzTab.h:614
MzTabString database
Name of the sequence database.
Definition: MzTab.h:617
Comparison operator for sorting rows.
Definition: MzTab.h:600
bool operator()(const MzTabPeptideSectionRow &row1, const MzTabPeptideSectionRow &row2) const
Definition: MzTab.h:601
Comparison operator for sorting rows.
Definition: MzTab.h:563
bool operator()(const MzTabProteinSectionRow &row1, const MzTabProteinSectionRow &row2) const
Definition: MzTab.h:564
PRT - Protein section (Table based)
Definition: MzTab.h:535
MzTabInteger taxid
NEWT taxonomy for the species.
Definition: MzTab.h:539
std::map< Size, MzTabDouble > protein_abundance_assay
Definition: MzTab.h:555
std::map< Size, MzTabDouble > best_search_engine_score
best_search_engine_score[1-n]
Definition: MzTab.h:544
MzTabString uri
Location of the protein’s source entry.
Definition: MzTab.h:552
std::map< Size, MzTabDouble > protein_abundance_std_error_study_variable
Definition: MzTab.h:558
MzTabModificationList modifications
Modifications identified in the protein.
Definition: MzTab.h:551
MzTabString database_version
String Version of the protein database.
Definition: MzTab.h:542
MzTabString description
Human readable description (i.e. the name)
Definition: MzTab.h:538
std::map< Size, MzTabInteger > num_peptides_distinct_ms_run
Definition: MzTab.h:548
std::map< Size, MzTabDouble > protein_abundance_study_variable
Definition: MzTab.h:556
std::map< Size, std::map< Size, MzTabDouble > > search_engine_score_ms_run
search_engine_score[index1]_ms_run[index2]
Definition: MzTab.h:545
MzTabString accession
The protein’s accession.
Definition: MzTab.h:537
MzTabStringList ambiguity_members
Alternative protein identifications.
Definition: MzTab.h:550
MzTabDouble coverage
(0-1) Amount of protein sequence identified.
Definition: MzTab.h:554
MzTabInteger reliability
Definition: MzTab.h:546
std::map< Size, MzTabInteger > num_psms_ms_run
Definition: MzTab.h:547
MzTabStringList go_terms
List of GO terms for the protein.
Definition: MzTab.h:553
std::map< Size, MzTabInteger > num_peptides_unique_ms_run
Definition: MzTab.h:549
std::map< Size, MzTabDouble > protein_abundance_stdev_study_variable
Definition: MzTab.h:557
std::vector< MzTabOptionalColumnEntry > opt_
Optional Columns must start with “opt_”
Definition: MzTab.h:559
MzTabParameterList search_engine
Search engine(s) identifying the protein.
Definition: MzTab.h:543
MzTabString species
Human readable name of the species.
Definition: MzTab.h:540
MzTabString database
Name of the protein database.
Definition: MzTab.h:541