OpenMS  2.4.0
MSstatsFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg, Lukas Heumos $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 #include <OpenMS/FORMAT/TextFile.h>
41 #include <OpenMS/SYSTEM/File.h>
44 #include <OpenMS/FORMAT/MzTab.h>
47 #include <OpenMS/SYSTEM/File.h>
48 
49 #include <boost/regex.hpp>
50 
51 using namespace OpenMS;
52 using namespace std;
53 
54 namespace OpenMS
55 {
60  class OPENMS_DLLAPI MSstatsFile
61  {
62  public:
64  MSstatsFile();
66  ~MSstatsFile();
67 
68  // store MSStats file
69  void store(const String& filename, ConsensusMap &consensus_map,
70  const ExperimentalDesign& design,
71  const StringList& reannotate_filenames,
72  const bool is_isotope_label_type,
73  const String& bioreplicate,
74  const String& condition,
75  const String& retention_time_summarization_method);
76 
77  private:
79  {
80  public :
82  bool _has_fraction,
83  const String& _accession,
84  const String& _sequence,
85  const String& _precursor_charge,
86  const String& _fragment_ion,
87  const String& _frag_charge,
88  const String& _isotope_label_type,
89  const String& _condition,
90  const String& _bioreplicate,
91  const String& _run,
92  const String& _fraction
93  ): has_fraction_(_has_fraction),
94  accession_(_accession),
95  sequence_(_sequence),
96  precursor_charge_(_precursor_charge),
97  fragment_ion_(_fragment_ion),
98  frag_charge_(_frag_charge),
99  isotope_label_type_(_isotope_label_type),
100  condition_(_condition),
101  bioreplicate_(_bioreplicate),
102  run_(_run),
103  fraction_(_fraction) {}
104 
105  const String& accession() const {return this->accession_;}
106  const String& sequence() const {return this->sequence_;}
107  const String& precursor_charge() const {return this->precursor_charge_;}
108  const String& run() const {return this->run_;}
109 
110  String toString() const
111  {
112  const String delim(",");
113  return accession_
114  + delim + sequence_
115  + delim + precursor_charge_
116  + delim + fragment_ion_
117  + delim + frag_charge_
118  + delim + isotope_label_type_
119  + delim + condition_
120  + delim + bioreplicate_
121  + delim + run_
122  + (this->has_fraction_ ? delim + String(fraction_) : "");
123  }
124 
125  friend bool operator<(const MSstatsLine &l,
126  const MSstatsLine &r) {
127 
128  return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.precursor_charge_, l.sequence_) <
130  }
131 
132 
133  private:
145  };
146 
147 
148  const String na_string = "NA";
149  // The meta value of the peptide identification which is going to be used for the experimental design link
150  const String meta_value_exp_design_key = "spectra_data";
151 
152  /*
153  * MSstats treats runs differently than OpenMS. In MSstats, runs are an enumeration of (SpectraFilePath, Fraction)
154  * In OpenMS, a run is split into multiple fractions.
155  *
156  */
157  static void assembleRunMap(
158  std::map< std::pair< String, unsigned>, unsigned> &run_map,
159  const ExperimentalDesign &design)
160  {
161  run_map.clear();
162  const ExperimentalDesign::MSFileSection& msfile_section = design.getMSFileSection();
163  unsigned run_counter = 1;
164 
165  for (ExperimentalDesign::MSFileSectionEntry const& r : msfile_section)
166  {
167  std::pair< String, unsigned> tpl = std::make_pair(File::basename(r.path), r.fraction);
168  if (run_map.find(tpl) == run_map.end())
169  {
170  run_map[tpl] = run_counter++;
171  }
172  }
173  }
174 
175  bool checkUnorderedContent_(const std::vector< String> &first, const std::vector< String > &second)
176  {
177  const std::set< String > lhs(first.begin(), first.end());
178  const std::set< String > rhs(second.begin(), second.end());
179  return lhs.size() == rhs.size()
180  && std::equal(lhs.begin(), lhs.end(), rhs.begin());
181  }
182 
183  OpenMS::Peak2D::IntensityType sumIntensity(const set< OpenMS::Peak2D::IntensityType > &intensities)
184  {
186  for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
187  {
188  result += intensity;
189  }
190  return result;
191  }
192 
193  OpenMS::Peak2D::IntensityType meanIntensity(const set< OpenMS::Peak2D::IntensityType > &intensities)
194  {
195  return sumIntensity(intensities) / intensities.size();
196  }
197  };
198 } // namespace OpenMS
File adapter for MzTab files.
Definition: MSstatsFile.h:60
bool has_fraction_
Definition: MSstatsFile.h:134
OpenMS::Peak2D::IntensityType sumIntensity(const set< OpenMS::Peak2D::IntensityType > &intensities)
Definition: MSstatsFile.h:183
A more convenient string class.
Definition: String.h:58
const String & precursor_charge() const
Definition: MSstatsFile.h:107
bool checkUnorderedContent_(const std::vector< String > &first, const std::vector< String > &second)
Definition: MSstatsFile.h:175
String condition_
Definition: MSstatsFile.h:141
A container for consensus elements.
Definition: ConsensusMap.h:75
STL namespace.
const String & accession() const
Definition: MSstatsFile.h:105
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
MSstatsLine(bool _has_fraction, const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_fragment_ion, const String &_frag_charge, const String &_isotope_label_type, const String &_condition, const String &_bioreplicate, const String &_run, const String &_fraction)
Definition: MSstatsFile.h:81
String run_
Definition: MSstatsFile.h:143
Representation of the Experimental Design in OpenMS. Instances can be loaded via the ExperimentalDesi...
Definition: ExperimentalDesign.h:85
Definition: ExperimentalDesign.h:95
String fragment_ion_
Definition: MSstatsFile.h:138
const String & sequence() const
Definition: MSstatsFile.h:106
friend bool operator<(const MSstatsLine &l, const MSstatsLine &r)
Definition: MSstatsFile.h:125
String accession_
Definition: MSstatsFile.h:135
String fraction_
Definition: MSstatsFile.h:144
String frag_charge_
Definition: MSstatsFile.h:139
static String basename(const String &file)
Returns the basename of the file (without the path).
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
String precursor_charge_
Definition: MSstatsFile.h:137
String isotope_label_type_
Definition: MSstatsFile.h:140
String sequence_
Definition: MSstatsFile.h:136
String toString() const
Definition: MSstatsFile.h:110
String bioreplicate_
Definition: MSstatsFile.h:142
OpenMS::Peak2D::IntensityType meanIntensity(const set< OpenMS::Peak2D::IntensityType > &intensities)
Definition: MSstatsFile.h:193
static void assembleRunMap(std::map< std::pair< String, unsigned >, unsigned > &run_map, const ExperimentalDesign &design)
Definition: MSstatsFile.h:157
const MSFileSection & getMSFileSection() const
Definition: MSstatsFile.h:78
const String & run() const
Definition: MSstatsFile.h:108
std::vector< MSFileSectionEntry > MSFileSection
Definition: ExperimentalDesign.h:151