OpenMS  2.7.0
MessagePasserFactory.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <cmath>
38 #include <Evergreen/evergreen.hpp>
39 
40 typedef unsigned long int uiint;
41 
42 namespace OpenMS
43 {
44  namespace Internal
45 {
50  template <typename Label>
52  private:
53  //const int minInputsPAF = 3; //@todo could be used to decide when brute force is better.
54 
57 
60  std::map<int, double> chgLLhoods = {{1, 0.7}, {2, 0.9}, {3, 0.7}, {4, 0.5}, {5, 0.5}};
61 
65  inline double notConditionalGivenSum(unsigned long summ) {
66  // use log for better precision
67  return std::pow(2., log2(1. - beta_) + summ * log2(1. - alpha_));
68  //return std::pow((1.0 - alpha_), summ) * (1.0 - beta_); // standard way
69  }
70 
71  public:
73  evergreen::TableDependency<Label> createProteinFactor(Label id, int nrMissingPeps = 0);
75  evergreen::TableDependency<Label> createProteinFactor(Label id, double prior, int nrMissingPeps = 0);
76 
79  evergreen::TableDependency<Label> createPeptideEvidenceFactor(Label id, double prob);
80 
84  evergreen::TableDependency<Label> createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId);
85 
88  evergreen::TableDependency<Label> createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId);
89 
90  //For extended model. @todo currently unused
91  evergreen::TableDependency<Label> createSumFactor(size_t nrParents, Label nId);
92  evergreen::TableDependency<Label> createReplicateFactor(Label seqId, Label repId);
93  evergreen::TableDependency<Label> createChargeFactor(Label repId, Label chargeId, int chg);
94 
96  evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId);
98  evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId);
100  evergreen::PseudoAdditiveDependency<Label> createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency <Label> > & deps);
101 
110  MessagePasserFactory<Label>(double alpha, double beta, double gamma, double p, double pep_prior);
111 
112 
113 
115  // TODO we could recollect the protIDs from the union of parents.
116  void fillVectorsOfMessagePassers(const std::vector<Label> & protIDs,
117  const std::vector<std::vector<Label>> & parentsOfPeps,
118  const std::vector<double> & pepEvidences,
119  evergreen::InferenceGraphBuilder<Label> & igb);
120 
121  //void fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
122  // const std::vector<std::vector<Label>> & parentsOfPeps,
123  // const std::vector<double> & pepEvidences,
124  // evergreen::InferenceGraphBuilder<Label> & igb);
125  };
126 
127  //IMPLEMENTATIONS:
128 
129  template <typename Label>
130  MessagePasserFactory<Label>::MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior) {
131  assert(0. <= alpha && alpha <= 1.);
132  assert(0. <= beta && beta <= 1.);
133  assert(0. <= gamma && gamma <= 1.);
134  //Note: smaller than 1 might be possible but is untested right now.
135  assert(p >= 1.);
136  assert(0. < pep_prior && pep_prior < 1.);
137  alpha_ = alpha;
138  beta_ = beta;
139  gamma_ = gamma;
140  p_ = p;
141  pepPrior_ = pep_prior;
142  }
143 
144  template <typename Label>
145  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, int nrMissingPeps) {
146  double prior = gamma_;
147  if (nrMissingPeps > 0)
148  {
149  double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
150  prior = -prior/(prior * powFactor - prior - powFactor);
151  }
152  double table[] = {1.0 - prior, prior};
153  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
154  return evergreen::TableDependency<Label>(lpmf,p_);
155  }
156 
157  template <typename Label>
158  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, double prior, int nrMissingPeps) {
159  if (nrMissingPeps > 0)
160  {
161  double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
162  prior = -prior/(prior * powFactor - prior - powFactor);
163  }
164  double table[] = {1.0 - prior, prior};
165  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
166  return evergreen::TableDependency<Label>(lpmf,p_);
167  }
168 
169  template <typename Label>
170  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createPeptideEvidenceFactor(Label id, double prob) {
171  double table[] = {(1 - prob) * (1 - pepPrior_), prob * pepPrior_};
172  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
173  return evergreen::TableDependency<Label>(lpmf,p_);
174  }
175 
176 
177  template <typename Label>
178  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
179  evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
180  for (unsigned long i=0; i <= nrParents; ++i) {
181  double notConditional = notConditionalGivenSum(i);
182  unsigned long indexArr[2] = {i,0ul};
183  table[indexArr] = notConditional;
184  unsigned long indexArr2[2] = {i,1ul};
185  table[indexArr2] = 1.0 - notConditional;
186  }
187  //std::cout << table << std::endl;
188  evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
189  //std::cout << lpmf << std::endl;
190  return evergreen::TableDependency<Label>(lpmf,p_);
191  }
192 
193  template <typename Label>
194  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
195  evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
196  unsigned long z[2]{0ul,0ul};
197  unsigned long z1[2]{0ul,1ul};
198  table[z] = 1. - beta_;
199  table[z1] = beta_;
200  for (unsigned long i=1; i <= nrParents; ++i) {
201  double notConditional = notConditionalGivenSum(i);
202  unsigned long indexArr[2] = {i,0ul};
203  table[indexArr] = notConditional / i;
204  unsigned long indexArr2[2] = {i,1ul};
205  table[indexArr2] = (1.0 - notConditional) / i;
206  }
207  //std::cout << table << std::endl;
208  evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
209  //std::cout << lpmf << std::endl;
210  return evergreen::TableDependency<Label>(lpmf,p_);
211  }
212 
213  template <typename Label>
214  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumFactor(size_t nrParents, Label nId) {
215  evergreen::Tensor<double> table({nrParents+1});
216  for (unsigned long i=0; i <= nrParents; ++i) {
217  table[i] = 1.0/(nrParents+1.);
218  }
219  //std::cout << table << std::endl;
220  evergreen::LabeledPMF<Label> lpmf({nId}, evergreen::PMF({0L}, table));
221  //std::cout << lpmf << std::endl;
222  return evergreen::TableDependency<Label>(lpmf,p_);
223  }
224 
225  template <typename Label>
226  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createReplicateFactor(Label seqId, Label repId) {
227  using arr = unsigned long[2];
228  evergreen::Tensor<double> table({2,2});
229  table[arr{0,0}] = 0.999;
230  table[arr{0,1}] = 0.001;
231  table[arr{1,0}] = 0.1;
232  table[arr{1,1}] = 0.9;
233  //std::cout << table << std::endl;
234  evergreen::LabeledPMF<Label> lpmf({seqId,repId}, evergreen::PMF({0L,0L}, table));
235  //std::cout << lpmf << std::endl;
236  return evergreen::TableDependency<Label>(lpmf,p_);
237  }
238 
239  template <typename Label>
240  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createChargeFactor(Label repId, Label chgId, int chg) {
241  double chgPrior = chgLLhoods[chg];
242  using arr = unsigned long[2];
243  evergreen::Tensor<double> table({2,2});
244  table[arr{0,0}] = 0.999;
245  table[arr{0,1}] = 0.001;
246  table[arr{1,0}] = 0.1;
247  table[arr{1,1}] = chgPrior;
248  //std::cout << table << std::endl;
249  evergreen::LabeledPMF<Label> lpmf({repId,chgId}, evergreen::PMF({0L,0L}, table));
250  //std::cout << lpmf << std::endl;
251  return evergreen::TableDependency<Label>(lpmf,p_);
252  }
253 
254  template <typename Label>
255  evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId) {
256  std::vector<std::vector<Label>> parents;
257  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
258  return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
259  }
260 
261  template <typename Label>
262  evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId) {
263  std::vector<std::vector<Label>> parents;
264  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
265  return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
266  }
267 
268  template <typename Label>
269  evergreen::PseudoAdditiveDependency<Label> MessagePasserFactory<Label>::createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency<Label>> & deps) {
270  std::vector<std::vector<Label>> parents;
271  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
272  return evergreen::PseudoAdditiveDependency<Label>(parents, {nId}, deps, p_);
273  }
274 
276  // TODO we could recollect the protIDs from the union of parents.
277  template <typename Label>
278  void MessagePasserFactory<Label>::fillVectorsOfMessagePassers(const std::vector<Label>& protIDs,
279  const std::vector<std::vector<Label>>& parentsOfPeps,
280  const std::vector<double>& pepEvidences,
281  evergreen::InferenceGraphBuilder<Label>& igb)
282  {
283  //TODO asserts could be loosened
284  assert(parentsOfPeps.size() == pepEvidences.size());
285  for (const std::vector<uiint>& parents : parentsOfPeps)
286  for (Label parent : parents)
287  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
288 
289  for (uiint pid : protIDs)
290  igb.insert_dependency(createProteinFactor(pid));
291 
292  for (uiint j = 0; j < parentsOfPeps.size(); j++)
293  {
294  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
295  igb.insert_dependency(createSumEvidenceFactor(parentsOfPeps[j],j,j));
296  igb.insert_dependency(createPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
297  }
298  }
299 
300  /* unused but working
301  template <typename Label>
302  void MessagePasserFactory<Label>::fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
303  const std::vector<std::vector<Label>> & parentsOfPeps,
304  const std::vector<double> & pepEvidences,
305  evergreen::InferenceGraphBuilder<Label> & igb)
306  {
307  assert(parentsOfPeps.size() == pepEvidences.size());
308  for (std::vector<uiint> parents : parentsOfPeps)
309  for (uiint parent : parents)
310  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
311 
312  for (uiint pid : protIDs)
313  igb.insert_dependency(createProteinFactor(pid));
314 
315  for (uiint j = 0; j < parentsOfPeps.size(); j++)
316  {
317  std::vector<evergreen::TableDependency<std::string> > deps;
318  auto pepdep = createSumEvidenceFactor(parentsOfPeps[j],j,j);
319  auto sumdep = createSumFactor(parentsOfPeps[j],j);
320  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
321  igb.insert_dependency(pepdep);
322  deps.push_back(sumdep);
323  for (const auto& parent : parentsOfPeps[j]) {
324  deps.push_back(createProteinFactor(parent));
325  }
326 
327  //igb.insert_dependency(createEmptyPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
328  igb.insert_dependency(createBFPeptideProbabilisticAdderFactor(parentsOfPeps[j],j,deps));
329  }
330  }
331  */
332 
333  } // namespace Internal
334 } // namespace OpenMS
unsigned long int uiint
Definition: MessagePasserFactory.h:40
Definition: MessagePasserFactory.h:51
evergreen::TableDependency< Label > createProteinFactor(Label id, int nrMissingPeps=0)
Protein Factor initialized with model prior (missing peps are experimental)
Definition: MessagePasserFactory.h:145
double notConditionalGivenSum(unsigned long summ)
Definition: MessagePasserFactory.h:65
evergreen::AdditiveDependency< Label > createPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId)
To sum up distributions for the number of parent proteins of a peptide with convolution trees.
Definition: MessagePasserFactory.h:255
double alpha_
the model parameters
Definition: MessagePasserFactory.h:56
MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior)
Constructor.
Definition: MessagePasserFactory.h:130
double beta_
Definition: MessagePasserFactory.h:56
void fillVectorsOfMessagePassers(const std::vector< Label > &protIDs, const std::vector< std::vector< Label >> &parentsOfPeps, const std::vector< double > &pepEvidences, evergreen::InferenceGraphBuilder< Label > &igb)
Works on a vector of protein indices (potentially not consecutive)
Definition: MessagePasserFactory.h:278
evergreen::TableDependency< Label > createChargeFactor(Label repId, Label chargeId, int chg)
Definition: MessagePasserFactory.h:240
double gamma_
Definition: MessagePasserFactory.h:56
double pepPrior_
Definition: MessagePasserFactory.h:56
evergreen::TableDependency< Label > createReplicateFactor(Label seqId, Label repId)
Definition: MessagePasserFactory.h:226
evergreen::TableDependency< Label > createSumFactor(size_t nrParents, Label nId)
Definition: MessagePasserFactory.h:214
evergreen::TableDependency< Label > createPeptideEvidenceFactor(Label id, double prob)
Definition: MessagePasserFactory.h:170
double p_
Definition: MessagePasserFactory.h:56
std::map< int, double > chgLLhoods
Definition: MessagePasserFactory.h:60
evergreen::TableDependency< Label > createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId)
Definition: MessagePasserFactory.h:194
evergreen::TableDependency< Label > createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId)
Definition: MessagePasserFactory.h:178
evergreen::PseudoAdditiveDependency< Label > createBFPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId, const std::vector< evergreen::TableDependency< Label > > &deps)
To sum up distributions for the number of parent proteins of a peptide brute-force.
Definition: MessagePasserFactory.h:269
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:886