OpenMS  2.4.0
MessagePasserFactory.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_ID_MESSAGEPASSERFACTORY_HPP
36 #define OPENMS_ANALYSIS_ID_MESSAGEPASSERFACTORY_HPP
37 
38 #include <cmath>
39 #include <Evergreen/evergreen.hpp>
40 #include <Utility/inference_utilities.hpp>
41 
42 typedef unsigned long int uiint;
43 
44 template <typename Label>
46 private:
47  const int minInputsPAF = 3;
48  double alpha, beta, gamma, p;
49  Label offset;
50 
51  inline double notConditionalGivenSum(unsigned long summ) {
52  // use log for better precision
53  return pow(2., log2(1. - beta) + summ * log2(1. - alpha));
54  //return std::pow((1.0 - alpha), summ) * (1.0 - beta);
55  }
56 
57 public:
58  TableDependency<Label> createProteinFactor(Label id, int nrMissingPeps = 0);
59  TableDependency<Label> createProteinFactor(Label id, double prior, int nrMissingPeps = 0);
60 
61  TableDependency<Label> createPeptideEvidenceFactor(Label id, double prob);
62 
63  TableDependency<Label> createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId);
64 
65  TableDependency<Label> createSumFactor(size_t nrParents, Label nId);
66 
67  AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId);
68  AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId);
69 
70  PseudoAdditiveDependency<Label> createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<TableDependency <Label> > & deps);
71 
72  MessagePasserFactory<Label>(double alpha, double beta, double gamma, double p);
73 
74 
75 
77  // TODO we could recollect the protIDs from the union of parents.
78  void fillVectorsOfMessagePassers(const std::vector<Label> & protIDs,
79  const std::vector<std::vector<Label>> & parentsOfPeps,
80  const std::vector<double> & pepEvidences,
81  InferenceGraphBuilder<Label> & igb);
82 
83  //void fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
84  // const std::vector<std::vector<Label>> & parentsOfPeps,
85  // const std::vector<double> & pepEvidences,
86  // InferenceGraphBuilder<Label> & igb);
87 
88  //const std::vector<std::set<Label>> getPosteriorVariables(const std::vector<uiint> & protIDs);
89  //const std::vector<std::vector<Label>> getPosteriorVariablesVectors(const std::vector<uiint> & protIDs);
90  //const std::vector<std::set<Label>> getPosteriorVariables(uiint rangeProtIDs);
91 };
92 
93 //IMPLEMENTATIONS:
94 
95 template <typename L>
96 MessagePasserFactory<L>::MessagePasserFactory(double alpha_, double beta_, double gamma_, double p_) {
97  assert(0 < alpha_ && alpha_ < 1);
98  assert(0 < beta_ && beta_ < 1);
99  assert(0 < gamma_ && gamma_ < 1);
100  //Note: smaller than 1 might be possible but is untested right now.
101  assert(p_ >= 1);
102  alpha = alpha_;
103  beta = beta_;
104  gamma = gamma_;
105  p = p_;
106 }
107 
108 template <typename L>
109 TableDependency<L> MessagePasserFactory<L>::createProteinFactor(L id, int nrMissingPeps) {
110  double prior = gamma;
111  if (nrMissingPeps > 0)
112  {
113  double powFactor = std::pow(1.0 - alpha, -nrMissingPeps);
114  prior = -prior/(prior * powFactor - prior - powFactor);
115  }
116  double table[] = {1.0 - prior, prior};
117  LabeledPMF<L> lpmf({id}, PMF({0L}, Tensor<double>::from_array(table)));
118  return TableDependency<L>(lpmf,p);
119 }
120 
121 template <typename L>
122 TableDependency<L> MessagePasserFactory<L>::createProteinFactor(L id, double prior, int nrMissingPeps) {
123  if (nrMissingPeps > 0)
124  {
125  double powFactor = std::pow(1.0 - alpha, -nrMissingPeps);
126  prior = -prior/(prior * powFactor - prior - powFactor);
127  }
128  double table[] = {1.0 - prior, prior};
129  LabeledPMF<L> lpmf({id}, PMF({0L}, Tensor<double>::from_array(table)));
130  return TableDependency<L>(lpmf,p);
131 }
132 
133 template <typename L>
134 TableDependency<L> MessagePasserFactory<L>::createPeptideEvidenceFactor(L id, double prob) {
135  double table[] = {1 - prob, prob};
136  LabeledPMF<L> lpmf({id}, PMF({0L}, Tensor<double>::from_array(table)));
137  return TableDependency<L>(lpmf,p);
138 }
139 
140 
141 template <typename L>
142 TableDependency<L> MessagePasserFactory<L>::createSumEvidenceFactor(size_t nrParents, L nId, L pepId) {
143  Tensor<double> table({nrParents + 1 , 2});
144  for (unsigned long i=0; i <= nrParents; ++i) {
145  double notConditional = notConditionalGivenSum(i);
146  u_long indexArr[] = {i,0};
147  table[indexArr] = notConditional;
148  u_long indexArr2[] = {i,1};
149  table[indexArr2] = 1.0 - notConditional;
150  }
151  //std::cout << table << std::endl;
152  LabeledPMF<L> lpmf({nId, pepId}, PMF({0L,0L}, table));
153  //std::cout << lpmf << std::endl;
154  return TableDependency<L>(lpmf,p);
155 }
156 
157 template <typename L>
158 TableDependency<L> MessagePasserFactory<L>::createSumFactor(size_t nrParents, L nId) {
159  Tensor<double> table({nrParents+1});
160  for (unsigned long i=0; i <= nrParents; ++i) {
161  table[i] = 1.0/(nrParents+1);
162  }
163  //std::cout << table << std::endl;
164  LabeledPMF<L> lpmf({nId}, PMF({0L}, table));
165  //std::cout << lpmf << std::endl;
166  return TableDependency<L>(lpmf,p);
167 }
168 
169 template <typename L>
170 AdditiveDependency<L> MessagePasserFactory<L>::createPeptideProbabilisticAdderFactor(const std::set<L> & parentProteinIDs, L nId) {
171  std::vector<std::vector<L>> parents;
172  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const L& l){return std::vector<L>{l};});
173  return AdditiveDependency<L>(parents, {nId}, p);
174 }
175 
176 template <typename L>
177 AdditiveDependency<L> MessagePasserFactory<L>::createPeptideProbabilisticAdderFactor(const std::vector<L> & parentProteinIDs, L nId) {
178  std::vector<std::vector<L>> parents;
179  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const L& l){return std::vector<L>{l};});
180  return AdditiveDependency<L>(parents, {nId}, p);
181 }
182 
183 template <typename L>
184 PseudoAdditiveDependency<L> MessagePasserFactory<L>::createBFPeptideProbabilisticAdderFactor(const std::set<L> & parentProteinIDs, L nId, const std::vector<TableDependency<L>> & deps) {
185  std::vector<std::vector<L>> parents;
186  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const L& l){return std::vector<L>{l};});
187  return PseudoAdditiveDependency<L>(parents, {nId}, deps, p);
188 }
189 
191 // TODO we could recollect the protIDs from the union of parents.
192 template <typename L>
193 void MessagePasserFactory<L>::fillVectorsOfMessagePassers(const std::vector<L> & protIDs,
194  const std::vector<std::vector<L>> & parentsOfPeps,
195  const std::vector<double> & pepEvidences,
196  InferenceGraphBuilder<L> & igb)
197 {
198  //TODO asserts could be loosened
199  assert(parentsOfPeps.size() == pepEvidences.size());
200  for (std::vector<uiint> parents : parentsOfPeps)
201  for (L parent : parents)
202  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
203 
204  for (uiint pid : protIDs)
205  igb.insert_dependency(createProteinFactor(pid));
206 
207  for (uiint j = 0; j < parentsOfPeps.size(); j++)
208  {
209  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
210  igb.insert_dependency(createSumEvidenceFactor(parentsOfPeps[j],j,j));
211  igb.insert_dependency(createPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
212  }
213 }
214 
215 /* unused but working
216 template <typename L>
217 void MessagePasserFactory<L>::fillVectorsOfMessagePassersBruteForce(const std::vector<L> & protIDs,
218  const std::vector<std::vector<L>> & parentsOfPeps,
219  const std::vector<double> & pepEvidences,
220  InferenceGraphBuilder<L> & igb)
221 {
222  assert(parentsOfPeps.size() == pepEvidences.size());
223  for (std::vector<uiint> parents : parentsOfPeps)
224  for (uiint parent : parents)
225  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
226 
227  for (uiint pid : protIDs)
228  igb.insert_dependency(createProteinFactor(pid));
229 
230  for (uiint j = 0; j < parentsOfPeps.size(); j++)
231  {
232  std::vector<TableDependency<std::string> > deps;
233  auto pepdep = createSumEvidenceFactor(parentsOfPeps[j],j,j);
234  auto sumdep = createSumFactor(parentsOfPeps[j],j);
235  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
236  igb.insert_dependency(pepdep);
237  deps.push_back(sumdep);
238  for (auto parent : parentsOfPeps[j]) {
239  deps.push_back(createProteinFactor(parent));
240  }
241 
242  //igb.insert_dependency(createEmptyPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
243  igb.insert_dependency(createBFPeptideProbabilisticAdderFactor(parentsOfPeps[j],j,deps));
244  }
245 }
246  */
247 
248 /* Not needed anymore. We use indices directly now.
249 template <typename L>
250 const std::vector<std::set<L>> MessagePasserFactory<L>::getPosteriorVariables(const std::vector<L> & protIDs){
251  std::vector<std::set<L>> varSets{};
252  for (L protID : protIDs){
253  std::set<L> varSet{"Pr" + std::to_string(protID)};
254  varSets.push_back(varSet);
255  }
256  return varSets;
257 }
258 
259 template <typename L>
260 const std::vector<std::vector<std::string>> MessagePasserFactory<L>::getPosteriorVariablesVectors(const std::vector<uiint> & protIDs){
261  std::vector<std::vector<std::string>> varVecs{};
262  for (uiint protID : protIDs){
263  std::vector<std::string> varVec{"Pr" + std::to_string(protID)};
264  varVecs.push_back(varVec);
265  }
266  return varVecs;
267 }
268 
269 template <typename L>
270 const std::vector<std::set<std::string>> MessagePasserFactory<L>::getPosteriorVariables(uiint rangeProtIDs){
271  std::vector<std::set<std::string>> varSets{};
272  for (uiint i=0; i < rangeProtIDs; ++i){
273  std::set<std::string> varSet{"Pr" + std::to_string(i)};
274  varSets.push_back(varSet);
275  }
276  return varSets;
277 }*/
278 
279 #endif //OPENMS_ANALYSIS_ID_MESSAGEPASSERFACTORY_HPP
double gamma
Definition: MessagePasserFactory.h:48
Label offset
Definition: MessagePasserFactory.h:49
unsigned long int uiint
Definition: MessagePasserFactory.h:42
AdditiveDependency< Label > createPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId)
TableDependency< Label > createPeptideEvidenceFactor(Label id, double prob)
Definition: MessagePasserFactory.h:134
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
Definition: MessagePasserFactory.h:45
void fillVectorsOfMessagePassers(const std::vector< Label > &protIDs, const std::vector< std::vector< Label >> &parentsOfPeps, const std::vector< double > &pepEvidences, InferenceGraphBuilder< Label > &igb)
Works on a vector of protein indices (potentially not consecutive)
Definition: MessagePasserFactory.h:193
double p
Definition: MessagePasserFactory.h:48
TableDependency< Label > createSumFactor(size_t nrParents, Label nId)
Definition: MessagePasserFactory.h:158
double alpha
Definition: MessagePasserFactory.h:48
double beta
Definition: MessagePasserFactory.h:48
const int minInputsPAF
Definition: MessagePasserFactory.h:47
double notConditionalGivenSum(unsigned long summ)
Definition: MessagePasserFactory.h:51
TableDependency< Label > createProteinFactor(Label id, int nrMissingPeps=0)
MessagePasserFactory(double alpha, double beta, double gamma, double p)
Definition: MessagePasserFactory.h:96
TableDependency< Label > createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId)
Definition: MessagePasserFactory.h:142
PseudoAdditiveDependency< Label > createBFPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId, const std::vector< TableDependency< Label > > &deps)
Definition: MessagePasserFactory.h:184