OpenMS
IDScoreSwitcherAlgorithm.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Julianus Pfeuffer $
6 // $Authors: Julianus Pfeuffer $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 
16 #include <vector>
17 #include <set>
18 
19 namespace OpenMS
20 {
21 
22  class OPENMS_DLLAPI IDScoreSwitcherAlgorithm:
23  public DefaultParamHandler
24  {
25  public:
27 
32  enum class ScoreType
33  {
34  RAW,
35  RAW_EVAL,
36  PP,
37  PEP,
38  FDR,
39  QVAL,
40  };
41 
43  bool isScoreType(const String& score_name, const ScoreType& type)
44  {
45  const std::set<String>& possible_types = type_to_str_[type];
46  return possible_types.find(score_name) != possible_types.end();
47  }
48 
54  template <typename IDType>
55  void switchScores(IDType& id, Size& counter)
56  {
57  for (auto hit_it = id.getHits().begin();
58  hit_it != id.getHits().end(); ++hit_it, ++counter)
59  {
60  if (!hit_it->metaValueExists(new_score_))
61  {
62  std::stringstream msg;
63  msg << "Meta value '" << new_score_ << "' not found for " << *hit_it;
64  throw Exception::MissingInformation(__FILE__, __LINE__,
65  OPENMS_PRETTY_FUNCTION, msg.str());
66  }
67 
68  const String& old_score_meta = (old_score_.empty() ? id.getScoreType() :
69  old_score_);
70  const DataValue& dv = hit_it->getMetaValue(old_score_meta);
71  if (!dv.isEmpty()) // meta value for old score already exists
72  {
73  // TODO: find a better way to check if old score type is something different (even if it has same name)
74  // This currently, is a workaround for e.g., having Percolator_qvalue as meta value and same q-value as main score (getScore()).
75  if (fabs((double(dv) - hit_it->getScore()) * 2.0 /
76  (double(dv) + hit_it->getScore())) > tolerance_)
77  {
78  hit_it->setMetaValue(old_score_meta + "~", hit_it->getScore());
79  }
80  }
81  else
82  {
83  hit_it->setMetaValue(old_score_meta, hit_it->getScore());
84  }
85  hit_it->setScore(hit_it->getMetaValue(new_score_));
86  }
87  id.setScoreType(new_score_type_);
88  id.setHigherScoreBetter(higher_better_);
89  }
90 
94  void switchToGeneralScoreType(std::vector<PeptideIdentification>& id, ScoreType type, Size& counter)
95  {
96  if (id.empty()) return;
97  String t = findScoreType(id[0], type);
98  if (t.empty())
99  {
100  String msg = "First encountered ID does not have the requested score type.";
101  throw Exception::MissingInformation(__FILE__, __LINE__,
102  OPENMS_PRETTY_FUNCTION, msg);
103  }
104  else if (t == id[0].getScoreType())
105  {
106  // we assume that all the other peptide ids
107  // also already have the correct score set
108  return;
109  }
110 
111  if (t.hasSuffix("_score"))
112  {
113  new_score_type_ = t.chop(6);
114  }
115  else
116  {
117  new_score_type_ = t;
118  }
119  new_score_ = t;
120 
121  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
122  {
123  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
124  higher_better_ = type_to_better_[type];
125  }
126  for (auto& i : id)
127  {
128  switchScores(i, counter);
129  }
130  }
131 
135  void switchToGeneralScoreType(ConsensusMap& cmap, ScoreType type, Size& counter, bool unassigned_peptides_too = true)
136  {
137  String new_type = "";
138  for (const auto& f : cmap)
139  {
140  const auto& ids = f.getPeptideIdentifications();
141  if (!ids.empty())
142  {
143  new_type = findScoreType(ids[0], type);
144  if (new_type == ids[0].getScoreType())
145  {
146  return;
147  }
148  else
149  {
150  break;
151  }
152  }
153  }
154 
155  if (new_type.empty())
156  {
157  String msg = "First encountered ID does not have the requested score type.";
158  throw Exception::MissingInformation(__FILE__, __LINE__,
159  OPENMS_PRETTY_FUNCTION, msg);
160  }
161 
162  if (new_type.hasSuffix("_score"))
163  {
164  new_score_type_ = new_type.chop(6);
165  }
166  else
167  {
168  new_score_type_ = new_type;
169  }
170  new_score_ = new_type;
171 
172  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
173  {
174  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
175  higher_better_ = type_to_better_[type];
176  }
177 
178  const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
179  cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
180  }
181 
182 
184  template <typename IDType>
186  {
187  const String& curr_score_type = id.getScoreType();
188  const std::set<String>& possible_types = type_to_str_[type];
189  if (possible_types.find(curr_score_type) != possible_types.end())
190  {
191  OPENMS_LOG_INFO << "Requested score type already set as main score: " + curr_score_type + "\n";
192  return curr_score_type;
193  }
194  else
195  {
196  if (id.getHits().empty())
197  {
198  OPENMS_LOG_WARN << "Identification entry used to check for alternative score was empty.\n";
199  return "";
200  }
201  const auto& hit = id.getHits()[0];
202  for (const auto& poss_str : possible_types)
203  {
204  if (hit.metaValueExists(poss_str)) return poss_str;
205  else if (hit.metaValueExists(poss_str + "_score")) return poss_str + "_score";
206  }
207  OPENMS_LOG_WARN << "Score of requested type not found in the UserParams of the checked ID object.\n";
208  return "";
209  }
210  }
211 
212  private:
213  void updateMembers_() override;
214 
216  const double tolerance_ = 1e-6;
217 
219  String new_score_, new_score_type_, old_score_;
221  bool higher_better_; // for the new scores, are higher ones better?
222 
224  std::map<ScoreType, std::set<String>> type_to_str_ =
225  {
226  {ScoreType::RAW, {"XTandem", "OMSSA", "SEQUEST:xcorr", "Mascot", "mvh", "Sage"}},
227  //TODO find out reasonable raw scores for SES that provide E-Values as main score or see below
228  //TODO there is no test for spectraST idXML, so I don't know its score
229  //TODO check if we should combine RAW and RAW_EVAL:
230  // What if a SE does not have an e-value score (spectrast, OMSSA, crux/sequest, myrimatch),
231  // then you need additional if's/try's
232  {ScoreType::RAW_EVAL, {"expect", "SpecEValue", "E-Value", "evalue", "MS:1002053", "MS:1002257"}},
233  {ScoreType::PP, {"Posterior Probability"}},
234  {ScoreType::PEP, {"Posterior Error Probability", "pep", "MS:1001493"}}, // TODO add CV terms
235  {ScoreType::FDR, {"FDR", "fdr", "false discovery rate"}},
236  {ScoreType::QVAL, {"q-value", "qvalue", "MS:1001491", "q-Value", "qval"}}
237  };
238 
240  std::map<ScoreType, bool> type_to_better_ =
241  {
242  {ScoreType::RAW, true}, //TODO this might actually not always be true
243  {ScoreType::RAW_EVAL, false},
244  {ScoreType::PP, true},
245  {ScoreType::PEP, false},
246  {ScoreType::FDR, false},
247  {ScoreType::QVAL, false}
248  };
249  };
250 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:449
A container for consensus elements.
Definition: ConsensusMap.h:66
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Not all required information provided.
Definition: Exception.h:162
Definition: IDScoreSwitcherAlgorithm.h:24
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
finds a certain score type in an ID and its metavalues if present, otherwise returns empty string
Definition: IDScoreSwitcherAlgorithm.h:185
void switchToGeneralScoreType(ConsensusMap &cmap, ScoreType type, Size &counter, bool unassigned_peptides_too=true)
Definition: IDScoreSwitcherAlgorithm.h:135
void switchScores(IDType &id, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:55
ScoreType
Definition: IDScoreSwitcherAlgorithm.h:33
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
String new_score_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:219
void switchToGeneralScoreType(std::vector< PeptideIdentification > &id, ScoreType type, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:94
bool higher_better_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:221
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score_name is of ScoreType type.
Definition: IDScoreSwitcherAlgorithm.h:43
void applyFunctionOnPeptideIDs(T &&f, bool include_unassigned=true)
applies a function on all PeptideIDs or only assigned ones
Definition: MapUtilities.h:42
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
A more convenient string class.
Definition: String.h:34
String chop(Size n) const
Returns a substring where n characters were removed from the end of the string.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22