OpenMS  2.5.0
MSstatsFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg, Lukas Heumos $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 #include <OpenMS/FORMAT/TextFile.h>
40 
41 #include <map>
42 #include <utility>
43 #include <unordered_map>
44 #include <set>
45 #include <vector>
46 
47 namespace OpenMS
48 {
50  using IndProtGrps = std::vector<IndProtGrp>;
51 
57  class OPENMS_DLLAPI MSstatsFile
58  {
59  public:
61  MSstatsFile() = default;
63  ~MSstatsFile() = default;
64 
66  void storeLFQ(const String& filename,
67  const ConsensusMap &consensus_map, // we might add singleton protein groups
68  const ExperimentalDesign& design,
69  const StringList& reannotate_filenames,
70  const bool is_isotope_label_type,
71  const String& bioreplicate,
72  const String& condition,
73  const String& retention_time_summarization_method);
74 
76  void storeISO(const String& filename,
77  const ConsensusMap &consensus_map,
78  const ExperimentalDesign& design,
79  const StringList& reannotate_filenames,
80  const String& bioreplicate,
81  const String& condition,
82  const String& mixture,
83  const String& retention_time_summarization_method);
84 
85  private:
86 
89 
90  static const String na_string_;
91  static const char delim_ = ',';
92  static const char accdelim_ = ';';
93  static const char quote_ = '"';
94 
95  /*
96  * @brief: Struct to aggregate intermediate information from ConsensusFeature and ConsensusMap,
97  * such as filenames, intensities, retention times, labels and features (for further processing)
98  */
100  {
101  std::vector< std::vector< String > > consensus_feature_filenames; //< Filenames of ConsensusFeature
102  std::vector< std::vector< Intensity > > consensus_feature_intensities; //< Intensities of ConsensusFeature
103  std::vector< std::vector< Coordinate > > consensus_feature_retention_times; //< Retention times of ConsensusFeature
104  std::vector< std::vector< unsigned > > consensus_feature_labels; //< Labels of ConsensusFeature
105  std::vector<BaseFeature> features; //<s Features of ConsensusMap
106  };
107 
108  /*
109  * @brief: Aggregates information from ConsensusFeature and ConsensusMap,
110  * such as filenames, intensities, retention times, labels and features.
111  * Stores them in AggregatedConsensusInfo for later processing
112  */
113  MSstatsFile::AggregatedConsensusInfo aggregateInfo_(const ConsensusMap& consensus_map,
114  const std::vector<String>& spectra_paths);
115 
116  /*
117  * @brief: Internal function to check if MSstats_BioReplicate and MSstats_Condition exists in Experimental Design
118  */
119  static void checkConditionLFQ_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition);
120 
121  /*
122  * @brief: Internal function to check if MSstats_BioReplicate, MSstats_Condition and MSstats_Mixture in Experimental Design
123  */
124  static void checkConditionISO_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition, const String& mixture);
125 
126  /*
127  * @brief MSstats treats runs differently than OpenMS. In MSstats, runs are an enumeration of (SpectraFilePath, Fraction)
128  * In OpenMS, a run is split into multiple fractions.
129  */
130  static void assembleRunMap_(
131  std::map< std::pair< String, unsigned>, unsigned> &run_map,
132  const ExperimentalDesign &design);
133 
134  /*
135  * @brief checks two vectors for same content
136  */
137  static bool checkUnorderedContent_(const std::vector< String> &first, const std::vector< String > &second);
138 
139  OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
140  {
142  for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
143  {
144  result += intensity;
145  }
146  return result;
147  }
148 
149  OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
150  {
151  return sumIntensity_(intensities) / intensities.size();
152  }
153 
155  {
156  public :
158  bool _has_fraction,
159  const String& _accession,
160  const String& _sequence,
161  const String& _precursor_charge,
162  const String& _fragment_ion,
163  const String& _frag_charge,
164  const String& _isotope_label_type,
165  const String& _condition,
166  const String& _bioreplicate,
167  const String& _run,
168  const String& _fraction
169  ): has_fraction_(_has_fraction),
170  accession_(_accession),
171  sequence_(_sequence),
172  precursor_charge_(_precursor_charge),
173  fragment_ion_(_fragment_ion),
174  frag_charge_(_frag_charge),
175  isotope_label_type_(_isotope_label_type),
176  condition_(_condition),
177  bioreplicate_(_bioreplicate),
178  run_(_run),
179  fraction_(_fraction) {}
180 
181  const String& accession() const {return this->accession_;}
182  const String& sequence() const {return this->sequence_;}
183  const String& precursor_charge() const {return this->precursor_charge_;}
184  const String& run() const {return this->run_;}
185 
186  String toString() const
187  {
188  const String delim(",");
189  return accession_
190  + delim + sequence_
191  + delim + precursor_charge_
192  + delim + fragment_ion_
193  + delim + frag_charge_
194  + delim + isotope_label_type_
195  + delim + condition_
196  + delim + bioreplicate_
197  + delim + run_
198  + (this->has_fraction_ ? delim + String(fraction_) : "");
199  }
200 
201  friend bool operator<(const MSstatsLine_ &l,
202  const MSstatsLine_ &r) {
203 
204  return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.precursor_charge_, l.sequence_) <
206  }
207 
208 
209  private:
221  };
222 
224  {
225  public :
227  const String& _accession,
228  const String& _sequence,
229  const String& _precursor_charge,
230  const String& _channel,
231  const String& _condition,
232  const String& _bioreplicate,
233  const String& _run,
234  const String& _mixture,
235  const String& _techrepmixture,
236  const String& _fraction
237  ): accession_(_accession),
238  sequence_(_sequence),
239  precursor_charge_(_precursor_charge),
240  channel_(_channel),
241  condition_(_condition),
242  bioreplicate_(_bioreplicate),
243  run_(_run),
244  mixture_(_mixture),
245  techrepmixture_(_techrepmixture),
246  fraction_(_fraction) {}
247 
248  const String& accession() const {return this->accession_;}
249  const String& sequence() const {return this->sequence_;}
250  const String& precursor_charge() const {return this->precursor_charge_;}
251  const String& run() const {return this->run_;}
252 
253  String toString() const
254  {
255  const String delim(",");
256  return accession_
257  + delim + sequence_
258  + delim + precursor_charge_
259  + delim + channel_
260  + delim + condition_
261  + delim + bioreplicate_
262  + delim + run_
263  + delim + mixture_
264  + delim + techrepmixture_
265  + delim + String(fraction_);
266  }
267 
268  friend bool operator<(const MSstatsTMTLine_ &l,
269  const MSstatsTMTLine_ &r) {
270 
271  return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.mixture_, l.precursor_charge_, l.sequence_) <
273  }
274 
275 
276  private:
287  };
288 
289  /*
290  * @brief Constructs the lines and adds them to the TextFile
291  * @param peptideseq_quantifyable Has to be a set (only) for deterministic ordered output
292  */
293  template <class LineType>
294  void constructFile_(const String& retention_time_summarization_method,
295  const bool rt_summarization_manual,
296  TextFile& csv_out,
297  const std::set<String>& peptideseq_quantifyable,
298  LineType & peptideseq_to_prefix_to_intensities) const;
299 
300  /*
301  * @brief Constructs the accession to indist. group mapping
302  */
303  static std::unordered_map<OpenMS::String, const IndProtGrp* > getAccessionToGroupMap_(const IndProtGrps& ind_prots);
304 
305 
306  /*
307  * @brief Based on the evidence accession set in a PeptideHit, checks if is unique and therefore quantifyable
308  * in a group context.
309  *
310  */
311  bool isQuantifyable_(
312  const std::set<String>& accs,
313  const std::unordered_map<String, const IndProtGrp*>& accession_to_group) const;
314 
315  };
316 } // namespace OpenMS
OpenMS::MSstatsFile::AggregatedConsensusInfo::consensus_feature_labels
std::vector< std::vector< unsigned > > consensus_feature_labels
Definition: MSstatsFile.h:104
OpenMS::MSstatsFile::MSstatsLine_::operator<
friend bool operator<(const MSstatsLine_ &l, const MSstatsLine_ &r)
Definition: MSstatsFile.h:201
OpenMS::MSstatsFile::MSstatsLine_::sequence_
String sequence_
Definition: MSstatsFile.h:212
OpenMS::MSstatsFile::MSstatsTMTLine_::toString
String toString() const
Definition: MSstatsFile.h:253
OpenMS::FileTypes::CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:67
OpenMS::MSstatsFile::AggregatedConsensusInfo
Definition: MSstatsFile.h:99
OpenMS::MSstatsFile::MSstatsTMTLine_::accession_
String accession_
Definition: MSstatsFile.h:277
OpenMS::MSstatsFile::MSstatsTMTLine_::run_
String run_
Definition: MSstatsFile.h:283
File.h
OpenMS::TextFile
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
OpenMS::MSstatsFile::meanIntensity_
OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: MSstatsFile.h:149
OpenMS::MSstatsFile::storeISO
void storeISO(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const String &bioreplicate, const String &condition, const String &mixture, const String &retention_time_summarization_method)
store isobaric experiment (MSstatsTMT)
OpenMS::ExperimentalDesign::getSampleSection
const ExperimentalDesign::SampleSection & getSampleSection() const
OpenMS::MSstatsFile::MSstatsTMTLine_::bioreplicate_
String bioreplicate_
Definition: MSstatsFile.h:282
OpenMS::MSstatsFile::MSstatsTMTLine_::operator<
friend bool operator<(const MSstatsTMTLine_ &l, const MSstatsTMTLine_ &r)
Definition: MSstatsFile.h:268
OpenMS::ExperimentalDesign::SampleSection
Definition: ExperimentalDesign.h:106
OpenMS::MSstatsFile::MSstatsTMTLine_::precursor_charge
const String & precursor_charge() const
Definition: MSstatsFile.h:250
MzTab.h
OpenMS::MSstatsFile::MSstatsLine_::frag_charge_
String frag_charge_
Definition: MSstatsFile.h:215
MSstatsFile.h
OpenMS::MSstatsFile::AggregatedConsensusInfo::features
std::vector< BaseFeature > features
Definition: MSstatsFile.h:105
OpenMS::MSstatsFile::MSstatsTMTLine_::channel_
String channel_
Definition: MSstatsFile.h:280
OpenMS::MSstatsFile::MSstatsLine_::sequence
const String & sequence() const
Definition: MSstatsFile.h:182
OpenMS::MSstatsFile::MSstatsTMTLine_::fraction_
String fraction_
Definition: MSstatsFile.h:286
OpenMS::MSstatsFile::MSstatsLine_::fragment_ion_
String fragment_ion_
Definition: MSstatsFile.h:214
OpenMS::MSstatsFile::MSstatsLine_::isotope_label_type_
String isotope_label_type_
Definition: MSstatsFile.h:216
OpenMS::MSstatsFile::MSstatsTMTLine_::condition_
String condition_
Definition: MSstatsFile.h:281
double
float
OpenMS::MSstatsFile::na_string_
static const String na_string_
Definition: MSstatsFile.h:90
OpenMS::MSstatsFile::MSstatsTMTLine_::accession
const String & accession() const
Definition: MSstatsFile.h:248
ExperimentalDesignFile.h
ConsensusMap.h
OpenMS::MSstatsFile::MSstatsLine_::MSstatsLine_
MSstatsLine_(bool _has_fraction, const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_fragment_ion, const String &_frag_charge, const String &_isotope_label_type, const String &_condition, const String &_bioreplicate, const String &_run, const String &_fraction)
Definition: MSstatsFile.h:157
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:61
OpenMS::MSstatsFile::MSstatsTMTLine_
Definition: MSstatsFile.h:223
OpenMS::MSstatsFile::MSstatsTMTLine_::precursor_charge_
String precursor_charge_
Definition: MSstatsFile.h:279
OpenMS::MSstatsFile
File adapter for MzTab files.
Definition: MSstatsFile.h:57
OPENMS_LOG_FATAL_ERROR
#define OPENMS_LOG_FATAL_ERROR
Macro to be used if fatal error are reported (processing stops)
Definition: LogStream.h:450
OpenMS::MSstatsFile::MSstatsLine_::toString
String toString() const
Definition: MSstatsFile.h:186
OpenMS::MSstatsFile::MSstatsTMTLine_::techrepmixture_
String techrepmixture_
Definition: MSstatsFile.h:285
FeatureXMLFile.h
ConsensusXMLFile.h
OpenMS::MSstatsFile::MSstatsLine_::has_fraction_
bool has_fraction_
Definition: MSstatsFile.h:210
OpenMS::MSstatsFile::MSstatsLine_
Definition: MSstatsFile.h:154
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:79
OpenMS::MSstatsFile::MSstatsLine_::precursor_charge
const String & precursor_charge() const
Definition: MSstatsFile.h:183
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::MSstatsFile::MSstatsLine_::run
const String & run() const
Definition: MSstatsFile.h:184
OpenMS::ProteinIdentification::ProteinGroup
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:81
TOPPBase.h
OpenMS::MSstatsFile::MSstatsLine_::fraction_
String fraction_
Definition: MSstatsFile.h:220
OpenMS::ConsensusXMLFile::load
void load(const String &filename, ConsensusMap &map)
Loads a consensus map from file and calls updateRanges.
FileHandler.h
OpenMS::MSstatsFile::Intensity
OpenMS::Peak2D::IntensityType Intensity
Definition: MSstatsFile.h:87
TextFile.h
OpenMS::MSstatsFile::MSstatsTMTLine_::sequence_
String sequence_
Definition: MSstatsFile.h:278
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:58
MzTabFile.h
OpenMS::MSstatsFile::MSstatsLine_::accession
const String & accession() const
Definition: MSstatsFile.h:181
ExperimentalDesign.h
OpenMS::MSstatsFile::MSstatsLine_::accession_
String accession_
Definition: MSstatsFile.h:211
OpenMS::MSstatsFile::MSstatsTMTLine_::MSstatsTMTLine_
MSstatsTMTLine_(const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_channel, const String &_condition, const String &_bioreplicate, const String &_run, const String &_mixture, const String &_techrepmixture, const String &_fraction)
Definition: MSstatsFile.h:226
OpenMS::MSstatsFile::MSstatsLine_::run_
String run_
Definition: MSstatsFile.h:219
OpenMS::MSstatsFile::MSstatsLine_::precursor_charge_
String precursor_charge_
Definition: MSstatsFile.h:213
OpenMS::MSstatsFile::AggregatedConsensusInfo::consensus_feature_intensities
std::vector< std::vector< Intensity > > consensus_feature_intensities
Definition: MSstatsFile.h:102
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::MSstatsFile::sumIntensity_
OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: MSstatsFile.h:139
OpenMS::MSstatsFile::AggregatedConsensusInfo::consensus_feature_filenames
std::vector< std::vector< String > > consensus_feature_filenames
Definition: MSstatsFile.h:101
OpenMS::ExperimentalDesign
Representation of the Experimental Design in OpenMS. Instances can be loaded via the ExperimentalDesi...
Definition: ExperimentalDesign.h:85
OpenMS::MSstatsFile::MSstatsLine_::bioreplicate_
String bioreplicate_
Definition: MSstatsFile.h:218
OpenMS::MSstatsFile::Coordinate
OpenMS::Peak2D::CoordinateType Coordinate
Definition: MSstatsFile.h:88
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::MSstatsFile::AggregatedConsensusInfo::consensus_feature_retention_times
std::vector< std::vector< Coordinate > > consensus_feature_retention_times
Definition: MSstatsFile.h:103
OpenMS::IndProtGrps
std::vector< IndProtGrp > IndProtGrps
Definition: MSstatsFile.h:50
OpenMS::MSstatsFile::storeLFQ
void storeLFQ(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const bool is_isotope_label_type, const String &bioreplicate, const String &condition, const String &retention_time_summarization_method)
store label free experiment (MSstats)
OpenMS::MSstatsFile::MSstatsTMTLine_::sequence
const String & sequence() const
Definition: MSstatsFile.h:249
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::MSstatsFile::MSstatsLine_::condition_
String condition_
Definition: MSstatsFile.h:217
OpenMS::MSstatsFile::MSstatsTMTLine_::mixture_
String mixture_
Definition: MSstatsFile.h:284
OpenMS::MSstatsFile::MSstatsTMTLine_::run
const String & run() const
Definition: MSstatsFile.h:251
OpenMS::ExperimentalDesignFile::load
static ExperimentalDesign load(const String &tsv_file, bool require_spectra_files)
Loads an experimental design from a tabular separated file.