OpenMS
MSstatsFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg, Lukas Heumos $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 #include <OpenMS/FORMAT/TextFile.h>
40 
41 #include <map>
42 #include <utility>
43 #include <unordered_map>
44 #include <set>
45 #include <vector>
46 
47 namespace OpenMS
48 {
50  using IndProtGrps = std::vector<IndProtGrp>;
51 
57  class OPENMS_DLLAPI MSstatsFile
58  {
59  public:
61  MSstatsFile() = default;
63  ~MSstatsFile() = default;
64 
66  void storeLFQ(const String& filename,
67  const ConsensusMap &consensus_map, // we might add singleton protein groups
68  const ExperimentalDesign& design,
69  const StringList& reannotate_filenames,
70  const bool is_isotope_label_type,
71  const String& bioreplicate,
72  const String& condition,
73  const String& retention_time_summarization_method);
74 
76  void storeISO(const String& filename,
77  const ConsensusMap &consensus_map,
78  const ExperimentalDesign& design,
79  const StringList& reannotate_filenames,
80  const String& bioreplicate,
81  const String& condition,
82  const String& mixture,
83  const String& retention_time_summarization_method);
84 
85  private:
88 
89  static const String na_string_;
90  static const char delim_ = ',';
91  static const char accdelim_ = ';';
92  static const char quote_ = '"';
93 
94  /*
95  * @brief: Struct to aggregate intermediate information from ConsensusFeature and ConsensusMap,
96  * such as filenames, intensities, retention times, labels and features (for further processing)
97  */
99  {
100  std::vector< std::vector< String > > consensus_feature_filenames; //< Filenames of ConsensusFeature
101  std::vector< std::vector< Intensity > > consensus_feature_intensities; //< Intensities of ConsensusFeature
102  std::vector< std::vector< Coordinate > > consensus_feature_retention_times; //< Retention times of ConsensusFeature
103  std::vector< std::vector< unsigned > > consensus_feature_labels; //< Labels of ConsensusFeature
104  std::vector<BaseFeature> features; //<s Features of ConsensusMap
105  };
106 
107  /*
108  * @brief: Aggregates information from ConsensusFeature and ConsensusMap,
109  * such as filenames, intensities, retention times, labels and features.
110  * Stores them in AggregatedConsensusInfo for later processing
111  */
113  const std::vector<String>& spectra_paths);
114 
115  /*
116  * @brief: Internal function to check if MSstats_BioReplicate and MSstats_Condition exists in Experimental Design
117  */
118  static void checkConditionLFQ_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition);
119 
120  /*
121  * @brief: Internal function to check if MSstats_BioReplicate, MSstats_Condition and MSstats_Mixture in Experimental Design
122  */
123  static void checkConditionISO_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition, const String& mixture);
124 
125  /*
126  * @brief MSstats treats runs differently than OpenMS. In MSstats, runs are an enumeration of (SpectraFilePath, Fraction)
127  * In OpenMS, a run is split into multiple fractions.
128  */
129  static void assembleRunMap_(
130  std::map< std::pair< String, unsigned>, unsigned> &run_map,
131  const ExperimentalDesign &design);
132 
133  /*
134  * @brief checks two vectors for same content
135  */
136  static bool checkUnorderedContent_(const std::vector< String> &first, const std::vector< String > &second);
137 
138  OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
139  {
141  for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
142  {
143  result += intensity;
144  }
145  return result;
146  }
147 
148  OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
149  {
150  return sumIntensity_(intensities) / intensities.size();
151  }
152 
154  {
155  public :
157  bool _has_fraction,
158  const String& _accession,
159  const String& _sequence,
160  const String& _precursor_charge,
161  const String& _fragment_ion,
162  const String& _frag_charge,
163  const String& _isotope_label_type,
164  const String& _condition,
165  const String& _bioreplicate,
166  const String& _run,
167  const String& _fraction
168  ): has_fraction_(_has_fraction),
169  accession_(_accession),
170  sequence_(_sequence),
171  precursor_charge_(_precursor_charge),
172  fragment_ion_(_fragment_ion),
173  frag_charge_(_frag_charge),
174  isotope_label_type_(_isotope_label_type),
175  condition_(_condition),
176  bioreplicate_(_bioreplicate),
177  run_(_run),
178  fraction_(_fraction) {}
179 
180  const String& accession() const {return this->accession_;}
181  const String& sequence() const {return this->sequence_;}
182  const String& precursor_charge() const {return this->precursor_charge_;}
183  const String& run() const {return this->run_;}
184 
185  String toString() const
186  {
187  const String delim(",");
188  return accession_
189  + delim + sequence_
190  + delim + precursor_charge_
191  + delim + fragment_ion_
192  + delim + frag_charge_
193  + delim + isotope_label_type_
194  + delim + condition_
195  + delim + bioreplicate_
196  + delim + run_
197  + (this->has_fraction_ ? delim + String(fraction_) : "");
198  }
199 
200  friend bool operator<(const MSstatsLine_ &l,
201  const MSstatsLine_ &r) {
202 
203  return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.precursor_charge_, l.sequence_) <
205  }
206 
207 
208  private:
220  };
221 
223  {
224  public :
226  const String& _accession,
227  const String& _sequence,
228  const String& _precursor_charge,
229  const String& _channel,
230  const String& _condition,
231  const String& _bioreplicate,
232  const String& _run,
233  const String& _mixture,
234  const String& _techrepmixture,
235  const String& _fraction
236  ): accession_(_accession),
237  sequence_(_sequence),
238  precursor_charge_(_precursor_charge),
239  channel_(_channel),
240  condition_(_condition),
241  bioreplicate_(_bioreplicate),
242  run_(_run),
243  mixture_(_mixture),
244  techrepmixture_(_techrepmixture),
245  fraction_(_fraction) {}
246 
247  const String& accession() const {return this->accession_;}
248  const String& sequence() const {return this->sequence_;}
249  const String& precursor_charge() const {return this->precursor_charge_;}
250  const String& run() const {return this->run_;}
251 
252  String toString() const
253  {
254  const String delim(",");
255  return accession_
256  + delim + sequence_
257  + delim + precursor_charge_
258  + delim + channel_
259  + delim + condition_
260  + delim + bioreplicate_
261  + delim + run_
262  + delim + mixture_
263  + delim + techrepmixture_
264  + delim + String(fraction_);
265  }
266 
267  friend bool operator<(const MSstatsTMTLine_ &l,
268  const MSstatsTMTLine_ &r) {
269 
270  return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.mixture_, l.precursor_charge_, l.sequence_, l.channel_) <
272  }
273 
274 
275  private:
286  };
287 
288  /*
289  * @brief Constructs the lines and adds them to the TextFile
290  * @param peptideseq_quantifyable Has to be a set (only) for deterministic ordered output
291  */
292  template <class LineType>
293  void constructFile_(const String& retention_time_summarization_method,
294  const bool rt_summarization_manual,
295  TextFile& csv_out,
296  const std::set<String>& peptideseq_quantifyable,
297  LineType & peptideseq_to_prefix_to_intensities) const;
298 
299  /*
300  * @brief Constructs the accession to indist. group mapping
301  */
302  static std::unordered_map<OpenMS::String, const IndProtGrp* > getAccessionToGroupMap_(const IndProtGrps& ind_prots);
303 
304 
305  /*
306  * @brief Based on the evidence accession set in a PeptideHit, checks if is unique and therefore quantifyable
307  * in a group context.
308  *
309  */
311  const std::set<String>& accs,
312  const std::unordered_map<String, const IndProtGrp*>& accession_to_group) const;
313 
314  };
315 } // namespace OpenMS
A container for consensus elements.
Definition: ConsensusMap.h:92
Definition: ExperimentalDesign.h:267
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:245
Definition: MSstatsFile.h:154
String toString() const
Definition: MSstatsFile.h:185
const String & sequence() const
Definition: MSstatsFile.h:181
String condition_
Definition: MSstatsFile.h:216
String sequence_
Definition: MSstatsFile.h:211
MSstatsLine_(bool _has_fraction, const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_fragment_ion, const String &_frag_charge, const String &_isotope_label_type, const String &_condition, const String &_bioreplicate, const String &_run, const String &_fraction)
Definition: MSstatsFile.h:156
bool has_fraction_
Definition: MSstatsFile.h:209
const String & precursor_charge() const
Definition: MSstatsFile.h:182
String isotope_label_type_
Definition: MSstatsFile.h:215
String frag_charge_
Definition: MSstatsFile.h:214
const String & accession() const
Definition: MSstatsFile.h:180
String bioreplicate_
Definition: MSstatsFile.h:217
friend bool operator<(const MSstatsLine_ &l, const MSstatsLine_ &r)
Definition: MSstatsFile.h:200
String accession_
Definition: MSstatsFile.h:210
String fraction_
Definition: MSstatsFile.h:219
const String & run() const
Definition: MSstatsFile.h:183
String fragment_ion_
Definition: MSstatsFile.h:213
String precursor_charge_
Definition: MSstatsFile.h:212
String run_
Definition: MSstatsFile.h:218
Definition: MSstatsFile.h:223
MSstatsTMTLine_(const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_channel, const String &_condition, const String &_bioreplicate, const String &_run, const String &_mixture, const String &_techrepmixture, const String &_fraction)
Definition: MSstatsFile.h:225
String channel_
Definition: MSstatsFile.h:279
String toString() const
Definition: MSstatsFile.h:252
const String & sequence() const
Definition: MSstatsFile.h:248
String condition_
Definition: MSstatsFile.h:280
String sequence_
Definition: MSstatsFile.h:277
const String & precursor_charge() const
Definition: MSstatsFile.h:249
String mixture_
Definition: MSstatsFile.h:283
const String & accession() const
Definition: MSstatsFile.h:247
String bioreplicate_
Definition: MSstatsFile.h:281
String techrepmixture_
Definition: MSstatsFile.h:284
String accession_
Definition: MSstatsFile.h:276
friend bool operator<(const MSstatsTMTLine_ &l, const MSstatsTMTLine_ &r)
Definition: MSstatsFile.h:267
String fraction_
Definition: MSstatsFile.h:285
const String & run() const
Definition: MSstatsFile.h:250
String precursor_charge_
Definition: MSstatsFile.h:278
String run_
Definition: MSstatsFile.h:282
File adapter for MSstats files.
Definition: MSstatsFile.h:58
OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: MSstatsFile.h:138
void constructFile_(const String &retention_time_summarization_method, const bool rt_summarization_manual, TextFile &csv_out, const std::set< String > &peptideseq_quantifyable, LineType &peptideseq_to_prefix_to_intensities) const
std::vector< std::vector< unsigned > > consensus_feature_labels
Definition: MSstatsFile.h:103
static bool checkUnorderedContent_(const std::vector< String > &first, const std::vector< String > &second)
OpenMS::Peak2D::CoordinateType Coordinate
Definition: MSstatsFile.h:87
static void checkConditionLFQ_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition)
MSstatsFile::AggregatedConsensusInfo aggregateInfo_(const ConsensusMap &consensus_map, const std::vector< String > &spectra_paths)
static const String na_string_
Definition: MSstatsFile.h:89
std::vector< std::vector< Intensity > > consensus_feature_intensities
Definition: MSstatsFile.h:101
std::vector< BaseFeature > features
Definition: MSstatsFile.h:104
std::vector< std::vector< String > > consensus_feature_filenames
Definition: MSstatsFile.h:100
static std::unordered_map< OpenMS::String, const IndProtGrp * > getAccessionToGroupMap_(const IndProtGrps &ind_prots)
void storeISO(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const String &bioreplicate, const String &condition, const String &mixture, const String &retention_time_summarization_method)
store isobaric experiment (MSstatsTMT)
bool isQuantifyable_(const std::set< String > &accs, const std::unordered_map< String, const IndProtGrp * > &accession_to_group) const
std::vector< std::vector< Coordinate > > consensus_feature_retention_times
Definition: MSstatsFile.h:102
static void assembleRunMap_(std::map< std::pair< String, unsigned >, unsigned > &run_map, const ExperimentalDesign &design)
OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: MSstatsFile.h:148
MSstatsFile()=default
Default constructor.
~MSstatsFile()=default
Destructor.
static void checkConditionISO_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition, const String &mixture)
OpenMS::Peak2D::IntensityType Intensity
Definition: MSstatsFile.h:86
void storeLFQ(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const bool is_isotope_label_type, const String &bioreplicate, const String &condition, const String &retention_time_summarization_method)
store label free experiment (MSstats)
float IntensityType
Intensity type.
Definition: Peak2D.h:62
double CoordinateType
Coordinate type (of the position)
Definition: Peak2D.h:64
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:131
A more convenient string class.
Definition: String.h:60
This class provides some basic file handling methods for text files.
Definition: TextFile.h:47
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
std::vector< IndProtGrp > IndProtGrps
Definition: MSstatsFile.h:50