OpenMS
Loading...
Searching...
No Matches
MSstatsFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg, Lukas Heumos $
7// --------------------------------------------------------------------------
8
9#pragma once
10
14
15#include <map>
16#include <utility>
17#include <unordered_map>
18#include <set>
19#include <vector>
20
21namespace OpenMS
22{
24 using IndProtGrps = std::vector<IndProtGrp>;
25
31 class OPENMS_DLLAPI MSstatsFile
32 {
33 public:
35 MSstatsFile() = default;
37 ~MSstatsFile() = default;
38
40 void storeLFQ(const String& filename,
41 const ConsensusMap &consensus_map, // we might add singleton protein groups
42 const ExperimentalDesign& design,
43 const StringList& reannotate_filenames,
44 const bool is_isotope_label_type,
45 const String& bioreplicate,
46 const String& condition,
47 const String& retention_time_summarization_method);
48
50 void storeISO(const String& filename,
51 const ConsensusMap &consensus_map,
52 const ExperimentalDesign& design,
53 const StringList& reannotate_filenames,
54 const String& bioreplicate,
55 const String& condition,
56 const String& mixture,
57 const String& retention_time_summarization_method);
58
59 private:
62
63 static const String na_string_;
64 static const char delim_ = ',';
65 static const char accdelim_ = ';';
66 static const char quote_ = '"';
67
68 /*
69 * @brief: Struct to aggregate intermediate information from ConsensusFeature and ConsensusMap,
70 * such as filenames, intensities, retention times, labels and features (for further processing)
71 */
73 {
74 std::vector< std::vector< String > > consensus_feature_filenames; //< Filenames of ConsensusFeature
75 std::vector< std::vector< Intensity > > consensus_feature_intensities; //< Intensities of ConsensusFeature
76 std::vector< std::vector< Coordinate > > consensus_feature_retention_times; //< Retention times of ConsensusFeature
77 std::vector< std::vector< unsigned > > consensus_feature_labels; //< Labels of ConsensusFeature
78 std::vector<BaseFeature> features; //<s Features of ConsensusMap
79 };
80
81 /*
82 * @brief: Aggregates information from ConsensusFeature and ConsensusMap,
83 * such as filenames, intensities, retention times, labels and features.
84 * Stores them in AggregatedConsensusInfo for later processing
85 */
87 const std::vector<String>& spectra_paths);
88
89 /*
90 * @brief: Internal function to check if MSstats_BioReplicate and MSstats_Condition exists in Experimental Design
91 */
92 static void checkConditionLFQ_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition);
93
94 /*
95 * @brief: Internal function to check if MSstats_BioReplicate, MSstats_Condition and MSstats_Mixture in Experimental Design
96 */
97 static void checkConditionISO_(const ExperimentalDesign::SampleSection& sampleSection, const String& bioreplicate, const String& condition, const String& mixture);
98
99 /*
100 * @brief MSstats treats runs differently than OpenMS. In MSstats, runs are an enumeration of (SpectraFilePath, Fraction)
101 * In OpenMS, a run is split into multiple fractions.
102 */
103 static void assembleRunMap_(
104 std::map< std::pair< String, unsigned>, unsigned> &run_map,
105 const ExperimentalDesign &design);
106
107 /*
108 * @brief checks two vectors for same content
109 */
110 static bool checkUnorderedContent_(const std::vector< String> &first, const std::vector< String > &second);
111
112 OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
113 {
115 for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
116 {
117 result += intensity;
118 }
119 return result;
120 }
121
122 OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
123 {
124 return sumIntensity_(intensities) / intensities.size();
125 }
126
128 {
129 public :
131 bool _has_fraction,
132 const String& _accession,
133 const String& _sequence,
134 const String& _precursor_charge,
135 const String& _fragment_ion,
136 const String& _frag_charge,
137 const String& _isotope_label_type,
138 const String& _condition,
139 const String& _bioreplicate,
140 const String& _run,
141 const String& _fraction
142 ): has_fraction_(_has_fraction),
143 accession_(_accession),
144 sequence_(_sequence),
145 precursor_charge_(_precursor_charge),
146 fragment_ion_(_fragment_ion),
147 frag_charge_(_frag_charge),
148 isotope_label_type_(_isotope_label_type),
149 condition_(_condition),
150 bioreplicate_(_bioreplicate),
151 run_(_run),
152 fraction_(_fraction) {}
153
154 const String& accession() const {return this->accession_;}
155 const String& sequence() const {return this->sequence_;}
156 const String& precursor_charge() const {return this->precursor_charge_;}
157 const String& run() const {return this->run_;}
158
160 {
161 const String delim(",");
162 return accession_
163 + delim + sequence_
164 + delim + precursor_charge_
165 + delim + fragment_ion_
166 + delim + frag_charge_
167 + delim + isotope_label_type_
168 + delim + condition_
169 + delim + bioreplicate_
170 + delim + run_
171 + (this->has_fraction_ ? delim + String(fraction_) : "");
172 }
173
174 friend bool operator<(const MSstatsLine_ &l,
175 const MSstatsLine_ &r) {
176
177 return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.precursor_charge_, l.sequence_) <
179 }
180
181
182 private:
194 };
195
197 {
198 public :
200 const String& _accession,
201 const String& _sequence,
202 const String& _precursor_charge,
203 const String& _channel,
204 const String& _condition,
205 const String& _bioreplicate,
206 const String& _run,
207 const String& _mixture,
208 const String& _techrepmixture,
209 const String& _fraction
210 ): accession_(_accession),
211 sequence_(_sequence),
212 precursor_charge_(_precursor_charge),
213 channel_(_channel),
214 condition_(_condition),
215 bioreplicate_(_bioreplicate),
216 run_(_run),
217 mixture_(_mixture),
218 techrepmixture_(_techrepmixture),
219 fraction_(_fraction) {}
220
221 const String& accession() const {return this->accession_;}
222 const String& sequence() const {return this->sequence_;}
223 const String& precursor_charge() const {return this->precursor_charge_;}
224 const String& run() const {return this->run_;}
225
227 {
228 const String delim(",");
229 return accession_
230 + delim + sequence_
231 + delim + precursor_charge_
232 + delim + channel_
233 + delim + condition_
234 + delim + bioreplicate_
235 + delim + run_
236 + delim + mixture_
237 + delim + techrepmixture_
238 + delim + String(fraction_);
239 }
240
241 friend bool operator<(const MSstatsTMTLine_ &l,
242 const MSstatsTMTLine_ &r) {
243
244 return std::tie(l.accession_, l.run_, l.condition_, l.bioreplicate_, l.mixture_, l.precursor_charge_, l.sequence_, l.channel_) <
246 }
247
248
249 private:
260 };
261
262 /*
263 * @brief Constructs the lines and adds them to the TextFile
264 * @param[out] peptideseq_quantifyable Has to be a set (only) for deterministic ordered output
265 */
266 template <class LineType>
267 void constructFile_(const String& retention_time_summarization_method,
268 const bool rt_summarization_manual,
269 TextFile& csv_out,
270 const std::set<String>& peptideseq_quantifyable,
271 LineType & peptideseq_to_prefix_to_intensities) const;
272
273 /*
274 * @brief Constructs the accession to indist. group mapping
275 */
276 static std::unordered_map<OpenMS::String, const IndProtGrp* > getAccessionToGroupMap_(const IndProtGrps& ind_prots);
277
278
279 /*
280 * @brief Based on the evidence accession set in a PeptideHit, checks if is unique and therefore quantifyable
281 * in a group context.
282 *
283 */
285 const std::set<String>& accs,
286 const std::unordered_map<String, const IndProtGrp*>& accession_to_group) const;
287
288 };
289} // namespace OpenMS
A container for consensus elements.
Definition ConsensusMap.h:68
Definition ExperimentalDesign.h:131
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition ExperimentalDesign.h:109
Definition MSstatsFile.h:128
const String & sequence() const
Definition MSstatsFile.h:155
const String & run() const
Definition MSstatsFile.h:157
String toString() const
Definition MSstatsFile.h:159
String condition_
Definition MSstatsFile.h:190
String sequence_
Definition MSstatsFile.h:185
MSstatsLine_(bool _has_fraction, const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_fragment_ion, const String &_frag_charge, const String &_isotope_label_type, const String &_condition, const String &_bioreplicate, const String &_run, const String &_fraction)
Definition MSstatsFile.h:130
bool has_fraction_
Definition MSstatsFile.h:183
String isotope_label_type_
Definition MSstatsFile.h:189
const String & accession() const
Definition MSstatsFile.h:154
String frag_charge_
Definition MSstatsFile.h:188
String bioreplicate_
Definition MSstatsFile.h:191
friend bool operator<(const MSstatsLine_ &l, const MSstatsLine_ &r)
Definition MSstatsFile.h:174
String accession_
Definition MSstatsFile.h:184
const String & precursor_charge() const
Definition MSstatsFile.h:156
String fraction_
Definition MSstatsFile.h:193
String fragment_ion_
Definition MSstatsFile.h:187
String precursor_charge_
Definition MSstatsFile.h:186
String run_
Definition MSstatsFile.h:192
Definition MSstatsFile.h:197
MSstatsTMTLine_(const String &_accession, const String &_sequence, const String &_precursor_charge, const String &_channel, const String &_condition, const String &_bioreplicate, const String &_run, const String &_mixture, const String &_techrepmixture, const String &_fraction)
Definition MSstatsFile.h:199
const String & sequence() const
Definition MSstatsFile.h:222
const String & run() const
Definition MSstatsFile.h:224
String channel_
Definition MSstatsFile.h:253
String toString() const
Definition MSstatsFile.h:226
String condition_
Definition MSstatsFile.h:254
String sequence_
Definition MSstatsFile.h:251
const String & accession() const
Definition MSstatsFile.h:221
String mixture_
Definition MSstatsFile.h:257
String bioreplicate_
Definition MSstatsFile.h:255
String techrepmixture_
Definition MSstatsFile.h:258
String accession_
Definition MSstatsFile.h:250
friend bool operator<(const MSstatsTMTLine_ &l, const MSstatsTMTLine_ &r)
Definition MSstatsFile.h:241
const String & precursor_charge() const
Definition MSstatsFile.h:223
String fraction_
Definition MSstatsFile.h:259
String precursor_charge_
Definition MSstatsFile.h:252
String run_
Definition MSstatsFile.h:256
File adapter for MSstats files.
Definition MSstatsFile.h:32
OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition MSstatsFile.h:112
void constructFile_(const String &retention_time_summarization_method, const bool rt_summarization_manual, TextFile &csv_out, const std::set< String > &peptideseq_quantifyable, LineType &peptideseq_to_prefix_to_intensities) const
std::vector< std::vector< unsigned > > consensus_feature_labels
Definition MSstatsFile.h:77
static bool checkUnorderedContent_(const std::vector< String > &first, const std::vector< String > &second)
OpenMS::Peak2D::CoordinateType Coordinate
Definition MSstatsFile.h:61
static void checkConditionLFQ_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition)
static std::unordered_map< OpenMS::String, const IndProtGrp * > getAccessionToGroupMap_(const IndProtGrps &ind_prots)
MSstatsFile::AggregatedConsensusInfo aggregateInfo_(const ConsensusMap &consensus_map, const std::vector< String > &spectra_paths)
static const String na_string_
Definition MSstatsFile.h:63
std::vector< std::vector< Intensity > > consensus_feature_intensities
Definition MSstatsFile.h:75
std::vector< BaseFeature > features
Definition MSstatsFile.h:78
std::vector< std::vector< String > > consensus_feature_filenames
Definition MSstatsFile.h:74
void storeISO(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const String &bioreplicate, const String &condition, const String &mixture, const String &retention_time_summarization_method)
store isobaric experiment (MSstatsTMT)
bool isQuantifyable_(const std::set< String > &accs, const std::unordered_map< String, const IndProtGrp * > &accession_to_group) const
std::vector< std::vector< Coordinate > > consensus_feature_retention_times
Definition MSstatsFile.h:76
static void assembleRunMap_(std::map< std::pair< String, unsigned >, unsigned > &run_map, const ExperimentalDesign &design)
OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition MSstatsFile.h:122
MSstatsFile()=default
Default constructor.
~MSstatsFile()=default
Destructor.
static void checkConditionISO_(const ExperimentalDesign::SampleSection &sampleSection, const String &bioreplicate, const String &condition, const String &mixture)
OpenMS::Peak2D::IntensityType Intensity
Definition MSstatsFile.h:60
void storeLFQ(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const bool is_isotope_label_type, const String &bioreplicate, const String &condition, const String &retention_time_summarization_method)
store label free experiment (MSstats)
float IntensityType
Intensity type.
Definition Peak2D.h:37
double CoordinateType
Coordinate type (of the position)
Definition Peak2D.h:39
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition ProteinIdentification.h:109
A more convenient string class.
Definition String.h:34
Definition TextFile.h:21
std::vector< String > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::vector< IndProtGrp > IndProtGrps
Definition MSstatsFile.h:24