OpenMS  2.4.0
MessagePasserFactory.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_ID_MESSAGEPASSERFACTORY_HPP
36 #define OPENMS_ANALYSIS_ID_MESSAGEPASSERFACTORY_HPP
37 
38 #include <cmath>
39 #include <Evergreen/evergreen.hpp>
40 #include <Utility/inference_utilities.hpp>
41 
42 typedef unsigned long int uiint;
43 
44 template <typename Label>
46 private:
47  const int minInputsPAF = 3;
48  double alpha, beta, gamma, p, pepPrior;
49  Label offset;
50 
51  inline double notConditionalGivenSum(unsigned long summ) {
52  // use log for better precision
53  return pow(2., log2(1. - beta) + summ * log2(1. - alpha));
54  //return std::pow((1.0 - alpha), summ) * (1.0 - beta);
55  }
56 
57 public:
58  TableDependency<Label> createProteinFactor(Label id, int nrMissingPeps = 0);
59  TableDependency<Label> createProteinFactor(Label id, double prior, int nrMissingPeps = 0);
60 
61  TableDependency<Label> createPeptideEvidenceFactor(Label id, double prob);
62 
63  TableDependency<Label> createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId);
64  TableDependency<Label> createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId);
65 
66  TableDependency<Label> createSumFactor(size_t nrParents, Label nId);
67 
68  AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId);
69  AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId);
70 
71  PseudoAdditiveDependency<Label> createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<TableDependency <Label> > & deps);
72 
73  MessagePasserFactory<Label>(double alpha, double beta, double gamma, double p, double pepPrior);
74 
75 
76 
78  // TODO we could recollect the protIDs from the union of parents.
79  void fillVectorsOfMessagePassers(const std::vector<Label> & protIDs,
80  const std::vector<std::vector<Label>> & parentsOfPeps,
81  const std::vector<double> & pepEvidences,
82  InferenceGraphBuilder<Label> & igb);
83 
84  //void fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
85  // const std::vector<std::vector<Label>> & parentsOfPeps,
86  // const std::vector<double> & pepEvidences,
87  // InferenceGraphBuilder<Label> & igb);
88 
89  //const std::vector<std::set<Label>> getPosteriorVariables(const std::vector<uiint> & protIDs);
90  //const std::vector<std::vector<Label>> getPosteriorVariablesVectors(const std::vector<uiint> & protIDs);
91  //const std::vector<std::set<Label>> getPosteriorVariables(uiint rangeProtIDs);
92 };
93 
94 //IMPLEMENTATIONS:
95 
96 template <typename L>
97 MessagePasserFactory<L>::MessagePasserFactory(double alpha_, double beta_, double gamma_, double p_, double pep_prior_) {
98  assert(0. < alpha_ && alpha_ < 1.);
99  assert(0. < beta_ && beta_ < 1.);
100  assert(0. < gamma_ && gamma_ < 1.);
101  //Note: smaller than 1 might be possible but is untested right now.
102  assert(p_ >= 1.);
103  assert(0. < pep_prior_ && pep_prior_ < 1.);
104  alpha = alpha_;
105  beta = beta_;
106  gamma = gamma_;
107  p = p_;
108  pepPrior = pep_prior_;
109 }
110 
111 template <typename L>
112 TableDependency<L> MessagePasserFactory<L>::createProteinFactor(L id, int nrMissingPeps) {
113  double prior = gamma;
114  if (nrMissingPeps > 0)
115  {
116  double powFactor = std::pow(1.0 - alpha, -nrMissingPeps);
117  prior = -prior/(prior * powFactor - prior - powFactor);
118  }
119  double table[] = {1.0 - prior, prior};
120  LabeledPMF<L> lpmf({id}, PMF({0L}, Tensor<double>::from_array(table)));
121  return TableDependency<L>(lpmf,p);
122 }
123 
124 template <typename L>
125 TableDependency<L> MessagePasserFactory<L>::createProteinFactor(L id, double prior, int nrMissingPeps) {
126  if (nrMissingPeps > 0)
127  {
128  double powFactor = std::pow(1.0 - alpha, -nrMissingPeps);
129  prior = -prior/(prior * powFactor - prior - powFactor);
130  }
131  double table[] = {1.0 - prior, prior};
132  LabeledPMF<L> lpmf({id}, PMF({0L}, Tensor<double>::from_array(table)));
133  return TableDependency<L>(lpmf,p);
134 }
135 
136 template <typename L>
137 TableDependency<L> MessagePasserFactory<L>::createPeptideEvidenceFactor(L id, double prob) {
138  double table[] = {(1 - prob) * (1 - pepPrior), prob * pepPrior};
139  LabeledPMF<L> lpmf({id}, PMF({0L}, Tensor<double>::from_array(table)));
140  return TableDependency<L>(lpmf,p);
141 }
142 
143 
144 template <typename L>
145 TableDependency<L> MessagePasserFactory<L>::createSumEvidenceFactor(size_t nrParents, L nId, L pepId) {
146  Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
147  for (unsigned long i=0; i <= nrParents; ++i) {
148  double notConditional = notConditionalGivenSum(i);
149  unsigned long indexArr[2] = {i,0ul};
150  table[indexArr] = notConditional;
151  unsigned long indexArr2[2] = {i,1ul};
152  table[indexArr2] = 1.0 - notConditional;
153  }
154  //std::cout << table << std::endl;
155  LabeledPMF<L> lpmf({nId, pepId}, PMF({0L,0L}, table));
156  //std::cout << lpmf << std::endl;
157  return TableDependency<L>(lpmf,p);
158 }
159 
160 template <typename L>
161 TableDependency<L> MessagePasserFactory<L>::createRegularizingSumEvidenceFactor(size_t nrParents, L nId, L pepId) {
162  Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
163  unsigned long z[2]{0ul,0ul};
164  unsigned long z1[2]{0ul,1ul};
165  table[z] = 1. - beta;
166  table[z1] = beta;
167  for (unsigned long i=1; i <= nrParents; ++i) {
168  double notConditional = notConditionalGivenSum(i);
169  unsigned long indexArr[2] = {i,0ul};
170  table[indexArr] = notConditional / i;
171  unsigned long indexArr2[2] = {i,1ul};
172  table[indexArr2] = (1.0 - notConditional) / i;
173  }
174  //std::cout << table << std::endl;
175  LabeledPMF<L> lpmf({nId, pepId}, PMF({0L,0L}, table));
176  //std::cout << lpmf << std::endl;
177  return TableDependency<L>(lpmf,p);
178 }
179 
180 template <typename L>
181 TableDependency<L> MessagePasserFactory<L>::createSumFactor(size_t nrParents, L nId) {
182  Tensor<double> table({nrParents+1});
183  for (unsigned long i=0; i <= nrParents; ++i) {
184  table[i] = 1.0/(nrParents+1);
185  }
186  //std::cout << table << std::endl;
187  LabeledPMF<L> lpmf({nId}, PMF({0L}, table));
188  //std::cout << lpmf << std::endl;
189  return TableDependency<L>(lpmf,p);
190 }
191 
192 template <typename L>
193 AdditiveDependency<L> MessagePasserFactory<L>::createPeptideProbabilisticAdderFactor(const std::set<L> & parentProteinIDs, L nId) {
194  std::vector<std::vector<L>> parents;
195  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const L& l){return std::vector<L>{l};});
196  return AdditiveDependency<L>(parents, {nId}, p);
197 }
198 
199 template <typename L>
200 AdditiveDependency<L> MessagePasserFactory<L>::createPeptideProbabilisticAdderFactor(const std::vector<L> & parentProteinIDs, L nId) {
201  std::vector<std::vector<L>> parents;
202  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const L& l){return std::vector<L>{l};});
203  return AdditiveDependency<L>(parents, {nId}, p);
204 }
205 
206 template <typename L>
207 PseudoAdditiveDependency<L> MessagePasserFactory<L>::createBFPeptideProbabilisticAdderFactor(const std::set<L> & parentProteinIDs, L nId, const std::vector<TableDependency<L>> & deps) {
208  std::vector<std::vector<L>> parents;
209  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const L& l){return std::vector<L>{l};});
210  return PseudoAdditiveDependency<L>(parents, {nId}, deps, p);
211 }
212 
214 // TODO we could recollect the protIDs from the union of parents.
215 template <typename L>
216 void MessagePasserFactory<L>::fillVectorsOfMessagePassers(const std::vector<L> & protIDs,
217  const std::vector<std::vector<L>> & parentsOfPeps,
218  const std::vector<double> & pepEvidences,
219  InferenceGraphBuilder<L> & igb)
220 {
221  //TODO asserts could be loosened
222  assert(parentsOfPeps.size() == pepEvidences.size());
223  for (std::vector<uiint> parents : parentsOfPeps)
224  for (L parent : parents)
225  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
226 
227  for (uiint pid : protIDs)
228  igb.insert_dependency(createProteinFactor(pid));
229 
230  for (uiint j = 0; j < parentsOfPeps.size(); j++)
231  {
232  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
233  igb.insert_dependency(createSumEvidenceFactor(parentsOfPeps[j],j,j));
234  igb.insert_dependency(createPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
235  }
236 }
237 
238 /* unused but working
239 template <typename L>
240 void MessagePasserFactory<L>::fillVectorsOfMessagePassersBruteForce(const std::vector<L> & protIDs,
241  const std::vector<std::vector<L>> & parentsOfPeps,
242  const std::vector<double> & pepEvidences,
243  InferenceGraphBuilder<L> & igb)
244 {
245  assert(parentsOfPeps.size() == pepEvidences.size());
246  for (std::vector<uiint> parents : parentsOfPeps)
247  for (uiint parent : parents)
248  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
249 
250  for (uiint pid : protIDs)
251  igb.insert_dependency(createProteinFactor(pid));
252 
253  for (uiint j = 0; j < parentsOfPeps.size(); j++)
254  {
255  std::vector<TableDependency<std::string> > deps;
256  auto pepdep = createSumEvidenceFactor(parentsOfPeps[j],j,j);
257  auto sumdep = createSumFactor(parentsOfPeps[j],j);
258  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
259  igb.insert_dependency(pepdep);
260  deps.push_back(sumdep);
261  for (auto parent : parentsOfPeps[j]) {
262  deps.push_back(createProteinFactor(parent));
263  }
264 
265  //igb.insert_dependency(createEmptyPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
266  igb.insert_dependency(createBFPeptideProbabilisticAdderFactor(parentsOfPeps[j],j,deps));
267  }
268 }
269  */
270 
271 /* Not needed anymore. We use indices directly now.
272 template <typename L>
273 const std::vector<std::set<L>> MessagePasserFactory<L>::getPosteriorVariables(const std::vector<L> & protIDs){
274  std::vector<std::set<L>> varSets{};
275  for (L protID : protIDs){
276  std::set<L> varSet{"Pr" + std::to_string(protID)};
277  varSets.push_back(varSet);
278  }
279  return varSets;
280 }
281 
282 template <typename L>
283 const std::vector<std::vector<std::string>> MessagePasserFactory<L>::getPosteriorVariablesVectors(const std::vector<uiint> & protIDs){
284  std::vector<std::vector<std::string>> varVecs{};
285  for (uiint protID : protIDs){
286  std::vector<std::string> varVec{"Pr" + std::to_string(protID)};
287  varVecs.push_back(varVec);
288  }
289  return varVecs;
290 }
291 
292 template <typename L>
293 const std::vector<std::set<std::string>> MessagePasserFactory<L>::getPosteriorVariables(uiint rangeProtIDs){
294  std::vector<std::set<std::string>> varSets{};
295  for (uiint i=0; i < rangeProtIDs; ++i){
296  std::set<std::string> varSet{"Pr" + std::to_string(i)};
297  varSets.push_back(varSet);
298  }
299  return varSets;
300 }*/
301 
302 #endif //OPENMS_ANALYSIS_ID_MESSAGEPASSERFACTORY_HPP
unsigned long int uiint
Definition: MessagePasserFactory.h:42
TableDependency< Label > createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId)
Definition: MessagePasserFactory.h:161
double notConditionalGivenSum(unsigned long summ)
Definition: MessagePasserFactory.h:51
TableDependency< Label > createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId)
Definition: MessagePasserFactory.h:145
double p
Definition: MessagePasserFactory.h:48
TableDependency< Label > createSumFactor(size_t nrParents, Label nId)
Definition: MessagePasserFactory.h:181
double pepPrior
Definition: MessagePasserFactory.h:48
void fillVectorsOfMessagePassers(const std::vector< Label > &protIDs, const std::vector< std::vector< Label >> &parentsOfPeps, const std::vector< double > &pepEvidences, InferenceGraphBuilder< Label > &igb)
Works on a vector of protein indices (potentially not consecutive)
Definition: MessagePasserFactory.h:216
double gamma
Definition: MessagePasserFactory.h:48
Definition: MessagePasserFactory.h:45
Label offset
Definition: MessagePasserFactory.h:49
PseudoAdditiveDependency< Label > createBFPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId, const std::vector< TableDependency< Label > > &deps)
Definition: MessagePasserFactory.h:207
double beta
Definition: MessagePasserFactory.h:48
const int minInputsPAF
Definition: MessagePasserFactory.h:47
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
double alpha
Definition: MessagePasserFactory.h:48
TableDependency< Label > createProteinFactor(Label id, int nrMissingPeps=0)
AdditiveDependency< Label > createPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId)
TableDependency< Label > createPeptideEvidenceFactor(Label id, double prob)
Definition: MessagePasserFactory.h:137
MessagePasserFactory(double alpha, double beta, double gamma, double p, double pepPrior)
Definition: MessagePasserFactory.h:97