OpenMS
MSstatsFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Timo Sachsenberg, Lukas Heumos $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
13 #include <OpenMS/FORMAT/TextFile.h>
14 
15 #include <map>
16 #include <utility>
17 #include <unordered_map>
18 #include <set>
19 #include <vector>
20 
21 namespace OpenMS
22 {
24  using IndProtGrps = std::vector<IndProtGrp>;
25 
31  class OPENMS_DLLAPI MSstatsFile
32  {
33  public:
35  MSstatsFile() = default;
37  ~MSstatsFile() = default;
38 
40  void storeLFQ(const String& filename,
41  const ConsensusMap &consensus_map, // we might add singleton protein groups
42  const ExperimentalDesign& design,
43  const StringList& reannotate_filenames,
44  const bool is_isotope_label_type,
45  const String& bioreplicate,
46  const String& condition,
47  const String& retention_time_summarization_method);
48 
50  void storeISO(const String& filename,
51  const ConsensusMap &consensus_map,
52  const ExperimentalDesign& design,
53  const StringList& reannotate_filenames,
54  const String& bioreplicate,
55  const String& condition,
56  const String& mixture,
57  const String& retention_time_summarization_method);
58 
59  private:
62 
63  static const String na_string_;
64  static const char delim_ = ',';
65  static const char accdelim_ = ';';
66  static const char quote_ = '"';
67 
68  /*
69  * @brief: Struct to aggregate intermediate information from ConsensusFeature and ConsensusMap,
70  * such as filenames, intensities, retention times, labels and features (for further processing)
71  */
73  {
74  std::vector< std::vector< String > > consensus_feature_filenames; //< Filenames of ConsensusFeature
75  std::vector< std::vector< Intensity > > consensus_feature_intensities; //< Intensities of ConsensusFeature
76  std::vector< std::vector< Coordinate > > consensus_feature_retention_times; //< Retention times of ConsensusFeature
77  std::vector< std::vector< unsigned > > consensus_feature_labels; //< Labels of ConsensusFeature
78  std::vector<BaseFeature> features; //<s Features of ConsensusMap
79  };
80 
81  /*
82  * @brief: Aggregates information from ConsensusFeature and ConsensusMap,
83  * such as filenames, intensities, retention times, labels and features.
84  * Stores them in AggregatedConsensusInfo for later processing
85  */
87  const std::vector<String>& spectra_paths);
88 
89  /*
90  * @brief: Internal function to check if MSstats_BioReplicate and MSstats_Condition exists in Experimental Design
91  */
92  static void checkConditionLFQ_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition);
93 
94  /*
95  * @brief: Internal function to check if MSstats_BioReplicate, MSstats_Condition and MSstats_Mixture in Experimental Design
96  */
97  static void checkConditionISO_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition, const String& mixture);
98 
99  /*
100  * @brief MSstats treats runs differently than OpenMS. In MSstats, runs are an enumeration of (SpectraFilePath, Fraction)
101  * In OpenMS, a run is split into multiple fractions.
102  */
103  static void assembleRunMap_(
104  std::map< std::pair< String, unsigned>, unsigned> &run_map,
105  const ExperimentalDesign &design);
106 
107  /*
108  * @brief checks two vectors for same content
109  */
110  static bool checkUnorderedContent_(const std::vector< String> &first, const std::vector< String > &second);
111 
112  OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
113  {
115  for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
116  {
117  result += intensity;
118  }
119  return result;
120  }
121 
122  OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
123  {
124  return sumIntensity_(intensities) / intensities.size();
125  }
126 
128  {
129  public :
131  bool _has_fraction,
132  const String& _accession,
133  const String& _sequence,
134  const String& _precursor_charge,
135  const String& _fragment_ion,
136  const String& _frag_charge,
137  const String& _isotope_label_type,
138  const String& _condition,
139  const String& _bioreplicate,
140  const String& _run,
141  const String& _fraction
142  ): has_fraction_(_has_fraction),
143  accession_(_accession),
144  sequence_(_sequence),
145  precursor_charge_(_precursor_charge),
146  fragment_ion_(_fragment_ion),
147  frag_charge_(_frag_charge),
148  isotope_label_type_(_isotope_label_type),
149  condition_(_condition),
150  bioreplicate_(_bioreplicate),
151  run_(_run),
152  fraction_(_fraction) {}
153 
154  const String& accession() const {return this->accession_;}
155  const String& sequence() const {return this->sequence_;}
156  const String& precursor_charge() const {return this->precursor_charge_;}
157  const String& run() const {return this->run_;}
158 
159  String toString() const
160  {
161  const String delim(",");
162  return accession_
163  + delim + sequence_
164  + delim + precursor_charge_
165  + delim + fragment_ion_
166  + delim + frag_charge_
167  + delim + isotope_label_type_
168  + delim + condition_
169  + delim + bioreplicate_
170  + delim + run_
171  + (this->has_fraction_ ? delim + String(fraction_) : "");
172  }
173 
174  friend bool operator<(const MSstatsLine_ &l,
175  const MSstatsLine_ &r) {
176 
177  return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.precursor_charge_, l.sequence_) <
179  }
180 
181 
182  private:
194  };
195 
197  {
198  public :
200  const String& _accession,
201  const String& _sequence,
202  const String& _precursor_charge,
203  const String& _channel,
204  const String& _condition,
205  const String& _bioreplicate,
206  const String& _run,
207  const String& _mixture,
208  const String& _techrepmixture,
209  const String& _fraction
210  ): accession_(_accession),
211  sequence_(_sequence),
212  precursor_charge_(_precursor_charge),
213  channel_(_channel),
214  condition_(_condition),
215  bioreplicate_(_bioreplicate),
216  run_(_run),
217  mixture_(_mixture),
218  techrepmixture_(_techrepmixture),
219  fraction_(_fraction) {}
220 
221  const String& accession() const {return this->accession_;}
222  const String& sequence() const {return this->sequence_;}
223  const String& precursor_charge() const {return this->precursor_charge_;}
224  const String& run() const {return this->run_;}
225 
226  String toString() const
227  {
228  const String delim(",");
229  return accession_
230  + delim + sequence_
231  + delim + precursor_charge_
232  + delim + channel_
233  + delim + condition_
234  + delim + bioreplicate_
235  + delim + run_
236  + delim + mixture_
237  + delim + techrepmixture_
238  + delim + String(fraction_);
239  }
240 
241  friend bool operator<(const MSstatsTMTLine_ &l,
242  const MSstatsTMTLine_ &r) {
243 
244  return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.mixture_, l.precursor_charge_, l.sequence_, l.channel_) <
246  }
247 
248 
249  private:
260  };
261 
262  /*
263  * @brief Constructs the lines and adds them to the TextFile
264  * @param peptideseq_quantifyable Has to be a set (only) for deterministic ordered output
265  */
266  template <class LineType>
267  void constructFile_(const String& retention_time_summarization_method,
268  const bool rt_summarization_manual,
269  TextFile& csv_out,
270  const std::set<String>& peptideseq_quantifyable,
271  LineType & peptideseq_to_prefix_to_intensities) const;
272 
273  /*
274  * @brief Constructs the accession to indist. group mapping
275  */
276  static std::unordered_map<OpenMS::String, const IndProtGrp* > getAccessionToGroupMap_(const IndProtGrps& ind_prots);
277 
278 
279  /*
280  * @brief Based on the evidence accession set in a PeptideHit, checks if is unique and therefore quantifyable
281  * in a group context.
282  *
283  */
285  const std::set<String>& accs,
286  const std::unordered_map<String, const IndProtGrp*>& accession_to_group) const;
287 
288  };
289 } // namespace OpenMS
A container for consensus elements.
Definition: ConsensusMap.h:66
Definition: ExperimentalDesign.h:241
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:219
Definition: MSstatsFile.h:128
String toString() const
Definition: MSstatsFile.h:159
const String & sequence() const
Definition: MSstatsFile.h:155
String condition_
Definition: MSstatsFile.h:190
String sequence_
Definition: MSstatsFile.h:185
MSstatsLine_(bool _has_fraction, const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_fragment_ion, const String &_frag_charge, const String &_isotope_label_type, const String &_condition, const String &_bioreplicate, const String &_run, const String &_fraction)
Definition: MSstatsFile.h:130
bool has_fraction_
Definition: MSstatsFile.h:183
const String & precursor_charge() const
Definition: MSstatsFile.h:156
String isotope_label_type_
Definition: MSstatsFile.h:189
String frag_charge_
Definition: MSstatsFile.h:188
const String & accession() const
Definition: MSstatsFile.h:154
String bioreplicate_
Definition: MSstatsFile.h:191
friend bool operator<(const MSstatsLine_ &l, const MSstatsLine_ &r)
Definition: MSstatsFile.h:174
String accession_
Definition: MSstatsFile.h:184
String fraction_
Definition: MSstatsFile.h:193
const String & run() const
Definition: MSstatsFile.h:157
String fragment_ion_
Definition: MSstatsFile.h:187
String precursor_charge_
Definition: MSstatsFile.h:186
String run_
Definition: MSstatsFile.h:192
Definition: MSstatsFile.h:197
MSstatsTMTLine_(const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_channel, const String &_condition, const String &_bioreplicate, const String &_run, const String &_mixture, const String &_techrepmixture, const String &_fraction)
Definition: MSstatsFile.h:199
String channel_
Definition: MSstatsFile.h:253
String toString() const
Definition: MSstatsFile.h:226
const String & sequence() const
Definition: MSstatsFile.h:222
String condition_
Definition: MSstatsFile.h:254
String sequence_
Definition: MSstatsFile.h:251
const String & precursor_charge() const
Definition: MSstatsFile.h:223
String mixture_
Definition: MSstatsFile.h:257
const String & accession() const
Definition: MSstatsFile.h:221
String bioreplicate_
Definition: MSstatsFile.h:255
String techrepmixture_
Definition: MSstatsFile.h:258
String accession_
Definition: MSstatsFile.h:250
friend bool operator<(const MSstatsTMTLine_ &l, const MSstatsTMTLine_ &r)
Definition: MSstatsFile.h:241
String fraction_
Definition: MSstatsFile.h:259
const String & run() const
Definition: MSstatsFile.h:224
String precursor_charge_
Definition: MSstatsFile.h:252
String run_
Definition: MSstatsFile.h:256
File adapter for MSstats files.
Definition: MSstatsFile.h:32
OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: MSstatsFile.h:112
void constructFile_(const String &retention_time_summarization_method, const bool rt_summarization_manual, TextFile &csv_out, const std::set< String > &peptideseq_quantifyable, LineType &peptideseq_to_prefix_to_intensities) const
std::vector< std::vector< unsigned > > consensus_feature_labels
Definition: MSstatsFile.h:77
static bool checkUnorderedContent_(const std::vector< String > &first, const std::vector< String > &second)
OpenMS::Peak2D::CoordinateType Coordinate
Definition: MSstatsFile.h:61
static void checkConditionLFQ_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition)
MSstatsFile::AggregatedConsensusInfo aggregateInfo_(const ConsensusMap &consensus_map, const std::vector< String > &spectra_paths)
static const String na_string_
Definition: MSstatsFile.h:63
std::vector< std::vector< Intensity > > consensus_feature_intensities
Definition: MSstatsFile.h:75
std::vector< BaseFeature > features
Definition: MSstatsFile.h:78
std::vector< std::vector< String > > consensus_feature_filenames
Definition: MSstatsFile.h:74
static std::unordered_map< OpenMS::String, const IndProtGrp * > getAccessionToGroupMap_(const IndProtGrps &ind_prots)
void storeISO(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const String &bioreplicate, const String &condition, const String &mixture, const String &retention_time_summarization_method)
store isobaric experiment (MSstatsTMT)
bool isQuantifyable_(const std::set< String > &accs, const std::unordered_map< String, const IndProtGrp * > &accession_to_group) const
std::vector< std::vector< Coordinate > > consensus_feature_retention_times
Definition: MSstatsFile.h:76
static void assembleRunMap_(std::map< std::pair< String, unsigned >, unsigned > &run_map, const ExperimentalDesign &design)
OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: MSstatsFile.h:122
MSstatsFile()=default
Default constructor.
~MSstatsFile()=default
Destructor.
static void checkConditionISO_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition, const String &mixture)
OpenMS::Peak2D::IntensityType Intensity
Definition: MSstatsFile.h:60
void storeLFQ(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const bool is_isotope_label_type, const String &bioreplicate, const String &condition, const String &retention_time_summarization_method)
store label free experiment (MSstats)
float IntensityType
Intensity type.
Definition: Peak2D.h:36
double CoordinateType
Coordinate type (of the position)
Definition: Peak2D.h:38
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:105
A more convenient string class.
Definition: String.h:34
This class provides some basic file handling methods for text files.
Definition: TextFile.h:21
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
std::vector< IndProtGrp > IndProtGrps
Definition: MSstatsFile.h:24