OpenMS  2.5.0
IdentificationDataConverter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 #include <OpenMS/FORMAT/MzTab.h>
42 
43 namespace OpenMS
44 {
45  class OPENMS_DLLAPI IdentificationDataConverter
46  {
47  public:
48 
50  static void importIDs(IdentificationData& id_data,
51  const std::vector<ProteinIdentification>& proteins,
52  const std::vector<PeptideIdentification>& peptides);
53 
55  static void exportIDs(const IdentificationData& id_data,
56  std::vector<ProteinIdentification>& proteins,
57  std::vector<PeptideIdentification>& peptides,
58  bool export_oligonucleotides = false);
59 
61  static MzTab exportMzTab(const IdentificationData& id_data);
62 
64  static void importSequences(IdentificationData& id_data,
65  const std::vector<FASTAFile::FASTAEntry>& fasta,
68  const String& decoy_pattern = "");
69 
70  protected:
71 
73  template <typename MzTabSectionRow>
76  std::vector<MzTabSectionRow>& output,
77  std::map<IdentificationData::ScoreTypeRef, Size>& score_map)
78  {
79  MzTabSectionRow row;
80  row.accession.set(parent.accession);
81  exportStepsAndScoresToMzTab_(parent.steps_and_scores, row.search_engine,
82  row.best_search_engine_score, score_map);
83  row.description.set(parent.description);
84  row.coverage.set(parent.coverage);
85  if (!parent.sequence.empty())
86  {
88  opt_seq.first = "opt_sequence";
89  opt_seq.second.set(parent.sequence);
90  row.opt_.push_back(opt_seq);
91  }
92  output.push_back(row);
93  }
94 
96  template <typename MzTabSectionRow, typename IdentSeq>
98  const IdentSeq& identified, std::vector<MzTabSectionRow>& output,
99  std::map<IdentificationData::ScoreTypeRef, Size>& score_map)
100  {
101  MzTabSectionRow row;
102  // @TODO: handle modifications properly
103  row.sequence.set(identified.sequence.toString());
104  exportStepsAndScoresToMzTab_(identified.steps_and_scores,
105  row.search_engine,
106  row.best_search_engine_score, score_map);
107  if (identified.parent_matches.empty()) // no parent information given
108  {
109  // row.unique.set(false); // leave this unset?
110  output.push_back(row);
111  }
112  else // generate entries (with duplicated data) for every accession
113  {
114  bool unique = (identified.parent_matches.size() == 1);
115  for (const auto& match_pair : identified.parent_matches)
116  {
117  const String& accession = match_pair.first->accession;
118  MzTabSectionRow copy = row;
119  copy.accession.set(accession);
120  copy.unique.set(unique);
121  addMzTabMoleculeParentContext_(match_pair.second, copy);
122  output.push_back(copy);
123  }
124  }
125  }
126 
128  template <typename MzTabSectionRow>
130  const String& sequence,
131  const IdentificationData::MoleculeQueryMatch& match, double calc_mass,
132  std::vector<MzTabSectionRow>& output,
133  std::map<IdentificationData::ScoreTypeRef, Size>& score_map,
134  std::map<IdentificationData::InputFileRef, Size>& file_map)
135  {
136  MzTabSectionRow xsm; // PSM or OSM
137  // @TODO: handle modifications properly
138  xsm.sequence.set(sequence);
139  exportStepsAndScoresToMzTab_(match.steps_and_scores, xsm.search_engine,
140  xsm.search_engine_score, score_map);
141  const IdentificationData::DataQuery& query = *match.data_query_ref;
142  std::vector<MzTabDouble> rts(1);
143  rts[0].set(query.rt);
144  xsm.retention_time.set(rts);
145  xsm.charge.set(match.charge);
146  xsm.exp_mass_to_charge.set(query.mz);
147  xsm.calc_mass_to_charge.set(calc_mass / abs(match.charge));
148  if (query.input_file_opt)
149  {
150  xsm.spectra_ref.setMSFile(file_map[*query.input_file_opt]);
151  }
152  xsm.spectra_ref.setSpecRef(query.data_id);
153  // @TODO: find a way of passing in the names of relevant meta values
154  // (e.g. from NucleicAcidSearchEngine), instead of hard-coding them here
155  static const std::vector<String> meta_out({"adduct", "isotope_offset"});
156  for (const String& meta : meta_out)
157  {
158  if (match.metaValueExists(meta))
159  {
160  MzTabOptionalColumnEntry opt_meta;
161  opt_meta.first = "opt_" + meta;
162  opt_meta.second.set(match.getMetaValue(meta));
163  xsm.opt_.push_back(opt_meta);
164  }
165  }
166  // don't repeat data from the peptide section (e.g. accessions)
167  // why are "pre"/"post"/"start"/"end" not in the peptide section?!
168  output.push_back(xsm);
169  }
170 
172  static void exportStepsAndScoresToMzTab_(
173  const IdentificationData::AppliedProcessingSteps& steps_and_scores,
174  MzTabParameterList& steps_out, std::map<Size, MzTabDouble>& scores_out,
175  std::map<IdentificationData::ScoreTypeRef, Size>& score_map);
176 
178  static void addMzTabSEScores_(
179  const std::map<IdentificationData::ScoreTypeRef, Size>& scores,
180  std::map<Size, MzTabParameter>& output);
181 
183  static void addMzTabMoleculeParentContext_(
184  const std::set<IdentificationData::MoleculeParentMatch>& matches,
186 
188  static void addMzTabMoleculeParentContext_(
189  const std::set<IdentificationData::MoleculeParentMatch>& matches,
191 
193  static IdentificationData::SearchParamRef importDBSearchParameters_(
195  IdentificationData& id_data);
196 
198  static ProteinIdentification::SearchParameters exportDBSearchParameters_(
200 
202  static void exportMSRunInformation_(
204  ProteinIdentification& protein);
205  };
206 }
FASTAFile.h
OpenMS::IdentificationDataConverter::exportQueryMatchToMzTab_
static void exportQueryMatchToMzTab_(const String &sequence, const IdentificationData::MoleculeQueryMatch &match, double calc_mass, std::vector< MzTabSectionRow > &output, std::map< IdentificationData::ScoreTypeRef, Size > &score_map, std::map< IdentificationData::InputFileRef, Size > &file_map)
Export a molecule-query match (peptide- or oligonucleotide-spectrum match) to mzTab.
Definition: IdentificationDataConverter.h:129
OpenMS::IdentificationDataInternal::ParentMolecule
Representation of a parent molecule that is identified only indirectly (e.g. a protein).
Definition: ParentMolecule.h:49
OpenMS::IdentificationDataInternal::DataQuery::data_id
String data_id
spectrum or feature ID (from the file referenced by "input_file_ref"):
Definition: DataQuery.h:50
OpenMS::IdentificationDataInternal::IteratorWrapper
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:43
OpenMS::MzTabParameterList
Definition: MzTab.h:315
OpenMS::MzTabPeptideSectionRow
PEP - Peptide section (Table based)
Definition: MzTab.h:628
OpenMS::MzTabOligonucleotideSectionRow
OLI - Oligonucleotide section (table-based)
Definition: MzTab.h:774
OpenMS::IdentificationDataInternal::MoleculeQueryMatch::data_query_ref
DataQueryRef data_query_ref
Definition: MoleculeQueryMatch.h:65
OpenMS::IdentificationDataInternal::MoleculeQueryMatch
Meta data for a search hit (e.g. peptide-spectrum match).
Definition: MoleculeQueryMatch.h:61
OpenMS::IdentificationDataInternal::DataQuery
Search query, e.g. spectrum or feature.
Definition: DataQuery.h:47
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:71
MzTab.h
OpenMS::MetaInfoInterface::metaValueExists
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
OpenMS::MetaInfoInterface::getMetaValue
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
OpenMS::IdentificationDataInternal::DataQuery::input_file_opt
boost::optional< InputFileRef > input_file_opt
Definition: DataQuery.h:53
OpenMS::IdentificationDataInternal::ScoredProcessingResult::steps_and_scores
AppliedProcessingSteps steps_and_scores
Definition: ScoredProcessingResult.h:46
OpenMS::IdentificationDataConverter
Definition: IdentificationDataConverter.h:45
OpenMS::IdentificationDataInternal::ParentMolecule::sequence
String sequence
Definition: ParentMolecule.h:57
OpenMS::IdentificationDataInternal::ParentMolecule::description
String description
Definition: ParentMolecule.h:59
OpenMS::IdentificationDataConverter::exportParentMoleculeToMzTab_
static void exportParentMoleculeToMzTab_(const IdentificationData::ParentMolecule &parent, std::vector< MzTabSectionRow > &output, std::map< IdentificationData::ScoreTypeRef, Size > &score_map)
Export a parent molecule (protein or nucleic acid) to mzTab.
Definition: IdentificationDataConverter.h:74
IdentificationData.h
OpenMS::IdentificationData
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:89
OpenMS::IdentificationDataInternal::ParentMolecule::coverage
double coverage
sequence coverage as a fraction between 0 and 1
Definition: ParentMolecule.h:61
OpenMS::ProteinIdentification::SearchParameters
Search parameters of the DB search.
Definition: ProteinIdentification.h:221
OpenMS::IdentificationDataInternal::MoleculeType
MoleculeType
Definition: MetaData.h:63
ProteinIdentification.h
OpenMS::IdentificationDataInternal::MoleculeQueryMatch::charge
Int charge
Definition: MoleculeQueryMatch.h:67
OpenMS::MzTab
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:855
OpenMS::MzTabOptionalColumnEntry
std::pair< String, MzTabString > MzTabOptionalColumnEntry
Definition: MzTab.h:586
OpenMS::IdentificationData::AppliedProcessingSteps
IdentificationDataInternal::AppliedProcessingSteps AppliedProcessingSteps
Definition: IdentificationData.h:123
OpenMS::IdentificationDataInternal::PROTEIN
Definition: MetaData.h:65
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::IdentificationDataInternal::DataQuery::mz
double mz
Definition: DataQuery.h:55
OpenMS::IdentificationDataConverter::exportPeptideOrOligoToMzTab_
static void exportPeptideOrOligoToMzTab_(const IdentSeq &identified, std::vector< MzTabSectionRow > &output, std::map< IdentificationData::ScoreTypeRef, Size > &score_map)
Export an identified sequence (peptide or oligonucleotide, but not small molecule/compound) to mzTab.
Definition: IdentificationDataConverter.h:97
PeptideIdentification.h
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::IdentificationDataInternal::DataQuery::rt
double rt
Definition: DataQuery.h:55
OpenMS::IdentificationDataInternal::ParentMolecule::accession
String accession
Definition: ParentMolecule.h:51