OpenMS
Loading...
Searching...
No Matches
MSstatsFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg, Lukas Heumos $
7// --------------------------------------------------------------------------
8
9#pragma once
10
14
15#include <map>
16#include <utility>
17#include <unordered_map>
18#include <set>
19#include <vector>
20
21namespace OpenMS
22{
24 using IndProtGrps = std::vector<IndProtGrp>;
25
31 class OPENMS_DLLAPI MSstatsFile
32 {
33 public:
35 MSstatsFile() = default;
37 ~MSstatsFile() = default;
38
40 void storeLFQ(const String& filename,
41 const ConsensusMap &consensus_map, // we might add singleton protein groups
42 const ExperimentalDesign& design,
43 const StringList& reannotate_filenames,
44 const bool is_isotope_label_type,
45 const String& bioreplicate,
46 const String& condition,
47 const String& retention_time_summarization_method);
48
60 void storeISO(const String& filename,
61 const ConsensusMap &consensus_map,
62 const ExperimentalDesign& design,
63 const StringList& reannotate_filenames,
64 const String& bioreplicate,
65 const String& condition,
66 const String& mixture,
67 const String& retention_time_summarization_method);
68
69 private:
72
73 static const String na_string_;
74 static const char delim_ = ',';
75 static const char accdelim_ = ';';
76 static const char quote_ = '"';
77
78 /*
79 * @brief: Struct to aggregate intermediate information from ConsensusFeature and ConsensusMap,
80 * such as filenames, intensities, retention times, labels and features (for further processing)
81 */
83 {
84 std::vector< std::vector< String > > consensus_feature_filenames; //< Filenames of ConsensusFeature
85 std::vector< std::vector< Intensity > > consensus_feature_intensities; //< Intensities of ConsensusFeature
86 std::vector< std::vector< Coordinate > > consensus_feature_retention_times; //< Retention times of ConsensusFeature
87 std::vector< std::vector< unsigned > > consensus_feature_labels; //< Labels of ConsensusFeature
88 std::vector<BaseFeature> features; //<s Features of ConsensusMap
89 };
90
91 /*
92 * @brief: Aggregates information from ConsensusFeature and ConsensusMap,
93 * such as filenames, intensities, retention times, labels and features.
94 * Stores them in AggregatedConsensusInfo for later processing
95 */
97 const std::vector<String>& spectra_paths);
98
99 /*
100 * @brief: Internal function to check if MSstats_BioReplicate and MSstats_Condition exists in Experimental Design
101 */
102 static void checkConditionLFQ_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition);
103
104 /*
105 * @brief: Internal function to check if MSstats_BioReplicate, MSstats_Condition and MSstats_Mixture in Experimental Design
106 */
107 static void checkConditionISO_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition, const String& mixture);
108
109 /*
110 * @brief MSstats treats runs differently than OpenMS. In MSstats, runs are an enumeration of (SpectraFilePath, Fraction)
111 * In OpenMS, a run is split into multiple fractions.
112 */
113 static void assembleRunMap_(
114 std::map< std::pair< String, unsigned>, unsigned> &run_map,
115 const ExperimentalDesign &design);
116
117 /*
118 * @brief checks if the first vector is a subset of the second
119 */
120 static bool isSubsetOf_(const std::vector< String> &first, const std::vector< String > &second);
121 static void warnOnSubsetFiles_(const std::vector<String>& spectra_paths, const std::vector<String>& design_filenames);
122
123 OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
124 {
126 for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
127 {
128 result += intensity;
129 }
130 return result;
131 }
132
133 OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
134 {
135 return sumIntensity_(intensities) / intensities.size();
136 }
137
139 {
140 public :
142 bool _has_fraction,
143 const String& _accession,
144 const String& _sequence,
145 const String& _precursor_charge,
146 const String& _fragment_ion,
147 const String& _frag_charge,
148 const String& _isotope_label_type,
149 const String& _condition,
150 const String& _bioreplicate,
151 const String& _run,
152 const String& _fraction
153 ): has_fraction_(_has_fraction),
154 accession_(_accession),
155 sequence_(_sequence),
156 precursor_charge_(_precursor_charge),
157 fragment_ion_(_fragment_ion),
158 frag_charge_(_frag_charge),
159 isotope_label_type_(_isotope_label_type),
160 condition_(_condition),
161 bioreplicate_(_bioreplicate),
162 run_(_run),
163 fraction_(_fraction) {}
164
165 const String& accession() const {return this->accession_;}
166 const String& sequence() const {return this->sequence_;}
167 const String& precursor_charge() const {return this->precursor_charge_;}
168 const String& run() const {return this->run_;}
169
171 {
172 const String delim(",");
173 return accession_
174 + delim + sequence_
175 + delim + precursor_charge_
176 + delim + fragment_ion_
177 + delim + frag_charge_
178 + delim + isotope_label_type_
179 + delim + condition_
180 + delim + bioreplicate_
181 + delim + run_
182 + (this->has_fraction_ ? delim + String(fraction_) : "");
183 }
184
185 friend bool operator<(const MSstatsLine_ &l,
186 const MSstatsLine_ &r) {
187
188 return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.precursor_charge_, l.sequence_) <
190 }
191
192
193 private:
205 };
206
208 {
209 public :
211 const String& _accession,
212 const String& _sequence,
213 const String& _precursor_charge,
214 const String& _channel,
215 const String& _condition,
216 const String& _bioreplicate,
217 const String& _run,
218 const String& _mixture,
219 const String& _techrepmixture,
220 const String& _fraction
221 ): accession_(_accession),
222 sequence_(_sequence),
223 precursor_charge_(_precursor_charge),
224 channel_(_channel),
225 condition_(_condition),
226 bioreplicate_(_bioreplicate),
227 run_(_run),
228 mixture_(_mixture),
229 techrepmixture_(_techrepmixture),
230 fraction_(_fraction) {}
231
232 const String& accession() const {return this->accession_;}
233 const String& sequence() const {return this->sequence_;}
234 const String& precursor_charge() const {return this->precursor_charge_;}
235 const String& run() const {return this->run_;}
236
238 {
239 const String delim(",");
240 return accession_
241 + delim + sequence_
242 + delim + precursor_charge_
243 + delim + channel_
244 + delim + condition_
245 + delim + bioreplicate_
246 + delim + run_
247 + delim + mixture_
248 + delim + techrepmixture_
249 + delim + String(fraction_);
250 }
251
252 friend bool operator<(const MSstatsTMTLine_ &l,
253 const MSstatsTMTLine_ &r) {
254
255 return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.mixture_, l.precursor_charge_, l.sequence_, l.channel_) <
257 }
258
259
260 private:
271 };
272
273 /*
274 * @brief Constructs the lines and adds them to the TextFile
275 * @param[out] peptideseq_quantifyable Has to be a set (only) for deterministic ordered output
276 */
277 template <class LineType>
278 void constructFile_(const String& retention_time_summarization_method,
279 const bool rt_summarization_manual,
280 TextFile& csv_out,
281 const std::set<String>& peptideseq_quantifyable,
282 LineType & peptideseq_to_prefix_to_intensities) const;
283
284 /*
285 * @brief Constructs the accession to indist. group mapping
286 */
287 static std::unordered_map<OpenMS::String, const IndProtGrp* > getAccessionToGroupMap_(const IndProtGrps& ind_prots);
288
289
290 /*
291 * @brief Based on the evidence accession set in a PeptideHit, checks if is unique and therefore quantifyable
292 * in a group context.
293 *
294 */
296 const std::set<String>& accs,
297 const std::unordered_map<String, const IndProtGrp*>& accession_to_group) const;
298
299 };
300} // namespace OpenMS
A container for consensus elements.
Definition ConsensusMap.h:67
Definition ExperimentalDesign.h:131
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition ExperimentalDesign.h:109
Definition MSstatsFile.h:139
const String & sequence() const
Definition MSstatsFile.h:166
const String & run() const
Definition MSstatsFile.h:168
String toString() const
Definition MSstatsFile.h:170
String condition_
Definition MSstatsFile.h:201
String sequence_
Definition MSstatsFile.h:196
MSstatsLine_(bool _has_fraction, const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_fragment_ion, const String &_frag_charge, const String &_isotope_label_type, const String &_condition, const String &_bioreplicate, const String &_run, const String &_fraction)
Definition MSstatsFile.h:141
bool has_fraction_
Definition MSstatsFile.h:194
String isotope_label_type_
Definition MSstatsFile.h:200
const String & accession() const
Definition MSstatsFile.h:165
String frag_charge_
Definition MSstatsFile.h:199
String bioreplicate_
Definition MSstatsFile.h:202
friend bool operator<(const MSstatsLine_ &l, const MSstatsLine_ &r)
Definition MSstatsFile.h:185
String accession_
Definition MSstatsFile.h:195
const String & precursor_charge() const
Definition MSstatsFile.h:167
String fraction_
Definition MSstatsFile.h:204
String fragment_ion_
Definition MSstatsFile.h:198
String precursor_charge_
Definition MSstatsFile.h:197
String run_
Definition MSstatsFile.h:203
Definition MSstatsFile.h:208
MSstatsTMTLine_(const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_channel, const String &_condition, const String &_bioreplicate, const String &_run, const String &_mixture, const String &_techrepmixture, const String &_fraction)
Definition MSstatsFile.h:210
const String & sequence() const
Definition MSstatsFile.h:233
const String & run() const
Definition MSstatsFile.h:235
String channel_
Definition MSstatsFile.h:264
String toString() const
Definition MSstatsFile.h:237
String condition_
Definition MSstatsFile.h:265
String sequence_
Definition MSstatsFile.h:262
const String & accession() const
Definition MSstatsFile.h:232
String mixture_
Definition MSstatsFile.h:268
String bioreplicate_
Definition MSstatsFile.h:266
String techrepmixture_
Definition MSstatsFile.h:269
String accession_
Definition MSstatsFile.h:261
friend bool operator<(const MSstatsTMTLine_ &l, const MSstatsTMTLine_ &r)
Definition MSstatsFile.h:252
const String & precursor_charge() const
Definition MSstatsFile.h:234
String fraction_
Definition MSstatsFile.h:270
String precursor_charge_
Definition MSstatsFile.h:263
String run_
Definition MSstatsFile.h:267
File adapter for MSstats files.
Definition MSstatsFile.h:32
OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition MSstatsFile.h:123
void constructFile_(const String &retention_time_summarization_method, const bool rt_summarization_manual, TextFile &csv_out, const std::set< String > &peptideseq_quantifyable, LineType &peptideseq_to_prefix_to_intensities) const
std::vector< std::vector< unsigned > > consensus_feature_labels
Definition MSstatsFile.h:87
OpenMS::Peak2D::CoordinateType Coordinate
Definition MSstatsFile.h:71
static void checkConditionLFQ_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition)
static std::unordered_map< OpenMS::String, const IndProtGrp * > getAccessionToGroupMap_(const IndProtGrps &ind_prots)
MSstatsFile::AggregatedConsensusInfo aggregateInfo_(const ConsensusMap &consensus_map, const std::vector< String > &spectra_paths)
static const String na_string_
Definition MSstatsFile.h:73
static bool isSubsetOf_(const std::vector< String > &first, const std::vector< String > &second)
std::vector< std::vector< Intensity > > consensus_feature_intensities
Definition MSstatsFile.h:85
std::vector< BaseFeature > features
Definition MSstatsFile.h:88
std::vector< std::vector< String > > consensus_feature_filenames
Definition MSstatsFile.h:84
void storeISO(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const String &bioreplicate, const String &condition, const String &mixture, const String &retention_time_summarization_method)
Store isobaric experiment (MSstatsTMT)
bool isQuantifyable_(const std::set< String > &accs, const std::unordered_map< String, const IndProtGrp * > &accession_to_group) const
std::vector< std::vector< Coordinate > > consensus_feature_retention_times
Definition MSstatsFile.h:86
static void assembleRunMap_(std::map< std::pair< String, unsigned >, unsigned > &run_map, const ExperimentalDesign &design)
OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition MSstatsFile.h:133
static void warnOnSubsetFiles_(const std::vector< String > &spectra_paths, const std::vector< String > &design_filenames)
MSstatsFile()=default
Default constructor.
~MSstatsFile()=default
Destructor.
static void checkConditionISO_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition, const String &mixture)
OpenMS::Peak2D::IntensityType Intensity
Definition MSstatsFile.h:70
void storeLFQ(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const bool is_isotope_label_type, const String &bioreplicate, const String &condition, const String &retention_time_summarization_method)
store label free experiment (MSstats)
double CoordinateType
Coordinate type (of the position)
Definition Peak2D.h:39
float IntensityType
Intensity type.
Definition Peak2D.h:37
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition ProteinIdentification.h:73
A more convenient string class.
Definition String.h:32
Definition TextFile.h:21
std::vector< String > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::vector< IndProtGrp > IndProtGrps
Definition MSstatsFile.h:24