OpenMS
Loading...
Searching...
No Matches
FeatureLinkerBase.cpp
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Marc Sturm, Clemens Groepl, Steffen Sass $
7// --------------------------------------------------------------------------
8
18
20
22
23#include <iomanip> // setw
24
25using namespace OpenMS;
26using namespace std;
27
28//-------------------------------------------------------------
29//Doxygen docu
30//-------------------------------------------------------------
31
39// We do not want this class to show up in the docu:
41
42// Output is always a ConsensusMap. The serialization track follows the input:
43// parquet inputs → consensusparquet; XML (or other) inputs → consensusXML.
44static FileTypes::Type consensusOutTypeFor(FileTypes::Type in_type)
45{
46 switch (in_type)
47 {
51 default:
53 }
54}
55
56class TOPPFeatureLinkerBase :
57 public TOPPBase,
58 public ProgressLogger
59{
60
61public:
62 TOPPFeatureLinkerBase(std::string name, std::string description, bool official = true) :
63 TOPPBase(name, description, official)
64 {
65 }
66
67protected:
68 void registerOptionsAndFlags_() override // only for "unlabeled" algorithms!
69 {
70 registerInputFileList_("in", "<files>", ListUtils::create<std::string>(""), "input files separated by blanks", true);
71 setValidFormats_("in", ListUtils::create<std::string>("featureXML,consensusXML,featureparquet,consensusparquet"));
72 registerOutputFile_("out", "<file>", "", "Output file", true);
73 setValidFormats_("out", ListUtils::create<std::string>("consensusXML,consensusparquet"));
74 registerInputFile_("design", "<file>", "", "input file containing the experimental design", false);
75 setValidFormats_("design", ListUtils::create<std::string>("tsv"));
77 registerFlag_("keep_subelements", "For consensusXML/consensusparquet input only: If set, the sub-features of the inputs are transferred to the output.");
78 }
79
80 ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm,
81 bool labeled = false)
82 {
83 //-------------------------------------------------------------
84 // parameter handling
85 //-------------------------------------------------------------
86 StringList ins;
87 if (labeled)
88 {
89 ins.push_back(getStringOption_("in"));
90 }
91 else
92 {
93 ins = getStringList_("in");
94 }
95 std::string out = getStringOption_("out");
96
97 //-------------------------------------------------------------
98 // check for valid input
99 //-------------------------------------------------------------
100 // check if all input files have the correct type
101 FileTypes::Type file_type = FileHandler::getType(ins[0]);
102 for (Size i = 0; i < ins.size(); ++i)
103 {
104 if (FileHandler::getType(ins[i]) != file_type)
105 {
106 writeLogError_("Error: All input files must be of the same type!");
107 return ILLEGAL_PARAMETERS;
108 }
109 }
110
111 //-------------------------------------------------------------
112 // set up algorithm
113 //-------------------------------------------------------------
114 Param algorithm_param = getParam_().copy("algorithm:", true);
115 writeDebug_("Used algorithm parameters", algorithm_param, 3);
116 algorithm->setParameters(algorithm_param);
117
118 //-------------------------------------------------------------
119 // perform grouping
120 //-------------------------------------------------------------
121 // load input
122 ConsensusMap out_map;
123 StringList ms_run_locations;
124
125 std::string design_file;
126
127 // TODO: support design in labeled feature linker
128 if (!labeled)
129 {
130 design_file = getStringOption_("design");
131 }
132
133 if ((file_type == FileTypes::CONSENSUSXML || file_type == FileTypes::CONSENSUSPARQUET) && !design_file.empty())
134 {
135 writeLogError_("Error: Using fractionated design with consensusXML/consensusparquet as input is not supported!");
136 return ILLEGAL_PARAMETERS;
137 }
138
139 if (file_type == FileTypes::FEATUREXML || file_type == FileTypes::FEATUREPARQUET)
140 {
141 OPENMS_LOG_INFO << "Linking " << ins.size() << " feature maps." << endl;
142
143 //-------------------------------------------------------------
144 // Extract (optional) fraction identifiers and associate with featureXMLs
145 //-------------------------------------------------------------
146
147 // determine map of fractions to MS files
148 map<unsigned, vector<std::string>> frac2files;
149
150 if (!design_file.empty())
151 {
152 // parse design file and determine fractions
153 ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false);
154
155 // determine if design defines more than one fraction
156 frac2files = ed.getFractionToMSFilesMapping();
157
158 writeDebug_("Grouping " + StringUtils::toStr(ed.getNumberOfFractions()) + " fractions.", 3);
159
160 // check if all fractions have the same number of MS runs associated
162 {
163 writeLogError_("Error: Number of runs must match for every fraction!");
164 return ILLEGAL_PARAMETERS;
165 }
166 }
167 else // no design file given
168 {
169 for (Size i = 0; i != ins.size(); ++i)
170 {
171 frac2files[1].emplace_back("file" + StringUtils::toStr(i)); // associate each run with fraction 1
172 }
173 }
174
175 vector<FeatureMap > maps(ins.size());
176 FileHandler f;
178
179 // to save memory don't load convex hulls and subordinates
180 param.setLoadSubordinates(false);
181 param.setLoadConvexHull(false);
182 f.setFeatOptions(param);
183
184 Size progress = 0;
186 startProgress(0, ins.size(), "reading input");
187 for (Size i = 0; i < ins.size(); ++i)
188 {
189 FeatureMap tmp;
191
192 StringList ms_runs;
193 tmp.getPrimaryMSRunPath(ms_runs);
194
195 // associate mzML file with map i in consensusXML
196 if (ms_runs.size() > 1 || ms_runs.empty())
197 {
198 OPENMS_LOG_WARN << "Exactly one MS run should be associated with a FeatureMap. "
199 << ms_runs.size()
200 << " provided." << endl;
201 }
202 else
203 {
204 out_map.getColumnHeaders()[i].filename = ms_runs.front();
205 }
206 out_map.getColumnHeaders()[i].size = tmp.size();
207 out_map.getColumnHeaders()[i].unique_id = tmp.getUniqueId();
208
209 // copy over information on the primary MS run
210 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
211
212 // to save memory, remove convex hulls, subordinates:
213 for (Feature& ft : tmp)
214 {
215 std::string adduct;
216 std::string group;
217 //exception: addduct information
218 if (ft.metaValueExists(Constants::UserParam::DC_CHARGE_ADDUCTS))
219 {
220 adduct = ft.getMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS).toString();
221 }
222 if (ft.metaValueExists(Constants::UserParam::ADDUCT_GROUP))
223 {
224 group = ft.getMetaValue(Constants::UserParam::ADDUCT_GROUP).toString();
225 }
226 ft.getSubordinates().clear();
227 ft.getConvexHulls().clear();
228 ft.clearMetaInfo();
229 if (!adduct.empty())
230 {
231 ft.setMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS, adduct);
232 }
233 if (!group.empty())
234 {
235 ft.setMetaValue("Group", group);
236 }
237
238 }
239
240 maps[i] = tmp;
241 maps[i].updateRanges();
242
243 setProgress(progress++);
244 }
245 endProgress();
246
247 // exception for "labeled" algorithms: copy file descriptions
248 if (labeled)
249 {
250 out_map.getColumnHeaders()[1] = out_map.getColumnHeaders()[0];
251 out_map.getColumnHeaders()[0].label = "light";
252 out_map.getColumnHeaders()[1].label = "heavy";
253 ms_run_locations.push_back(ms_run_locations[0]);
254 }
255
257 // invoke feature grouping algorithm
258
259 if (frac2files.size() == 1) // group one fraction
260 {
261 algorithm->group(maps, out_map);
262 }
263 else // group multiple fractions
264 {
265 writeDebug_("Stored in " + StringUtils::toStr(maps.size()) + " maps.", 3);
266 for (Size i = 1; i <= frac2files.size(); ++i)
267 {
268 vector<FeatureMap> fraction_maps;
269 // TODO FRACTIONS: here we assume that the order of featureXML is from fraction 1..n
270 // we should check if these are shuffled and error / warn
271 for (size_t feature_map_index = 0; feature_map_index != frac2files[i].size(); ++feature_map_index)
272 {
273 fraction_maps.push_back(maps[feature_map_index]);
274 }
275 algorithm->group(fraction_maps, out_map);
276 }
277 }
278 }
279 else
280 {
281 //TODO isn't it better to have this option/functionality in the FeatureGroupingAlgorithm class?
282 // Otherwise everyone has to remember e.g. to annotate the old map_index etc.
283 bool keep_subelements = getFlag_("keep_subelements");
284 vector<ConsensusMap> maps(ins.size());
285 FileHandler f;
286 for (Size i = 0; i < ins.size(); ++i)
287 {
289 maps[i].updateRanges();
290 // copy over information on the primary MS run
291 StringList ms_runs;
292 maps[i].getPrimaryMSRunPath(ms_runs);
293 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
294 if (keep_subelements)
295 {
296 auto saveOldMapIndex =
298 {
299 if (p.metaValueExists("map_index"))
300 {
301 p.setMetaValue("old_map_index", p.getMetaValue("map_index"));
302 }
303 else
304 {
305 OPENMS_LOG_WARN << "Warning: map_index not found in PeptideID. The tool will not be able to assign a"
306 "consistent one. Check the settings of previous tools." << std::endl;
307 }
308 };
309 maps[i].applyFunctionOnPeptideIDs(saveOldMapIndex, true);
310 }
311 }
312 // group
313 algorithm->group(maps, out_map);
314
315 // set file descriptions:
316
317 if (!keep_subelements)
318 {
319 for (Size i = 0; i < ins.size(); ++i)
320 {
321 out_map.getColumnHeaders()[i].filename = ins[i];
322 out_map.getColumnHeaders()[i].size = maps[i].size();
323 out_map.getColumnHeaders()[i].unique_id = maps[i].getUniqueId();
324 }
325 }
326 else
327 {
328 // components of the output map are not the input maps themselves, but
329 // the components of the input maps:
330 algorithm->transferSubelements(maps, out_map);
331 }
332 }
333
334 // assign unique ids
336
337 // annotate output with data processing info
338 addDataProcessing_(out_map,
340
341
342 // sort list of peptide identifications in each consensus feature by map index
344
345 // write output
346 FileHandler().storeConsensusFeatures(out, out_map, {consensusOutTypeFor(file_type)});
347
348 // some statistics
349 map<Size, UInt> num_consfeat_of_size;
350 for (const ConsensusFeature& cf : out_map)
351 {
352 ++num_consfeat_of_size[cf.size()];
353 }
354
355 OPENMS_LOG_INFO << "Number of consensus features:" << endl;
356 for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin();
357 i != num_consfeat_of_size.rend(); ++i)
358 {
359 OPENMS_LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6)
360 << i->second << endl;
361 }
362 OPENMS_LOG_INFO << " total: " << setw(6) << out_map.size() << endl;
363
364 return EXECUTION_OK;
365 }
366
367};
368
#define OPENMS_LOG_WARN
Macro for warnings.
Definition LogStream.h:581
#define OPENMS_LOG_INFO
Macro for information/status messages.
Definition LogStream.h:585
A consensus feature spanning multiple LC-MS/MS experiments.
Definition ConsensusFeature.h:45
A container for consensus elements.
Definition ConsensusMap.h:67
Size applyMemberFunction(Size(Type::*member_function)())
Applies a member function of Type to the container itself and all consensus features....
Definition ConsensusMap.h:288
void sortPeptideIdentificationsByMapIndex()
Sorts PeptideIdentifications of consensus features with respect to their map index.
const ColumnHeaders & getColumnHeaders() const
Non-mutable access to the file descriptions.
@ FEATURE_GROUPING
Feature grouping
Definition DataProcessing.h:48
void setParameters(const Param &param)
Sets the parameters.
static ExperimentalDesign load(const std::string &tsv_file, bool require_spectra_files)
Loads an experimental design from a tabular separated file.
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition ExperimentalDesign.h:109
unsigned getNumberOfFractions() const
bool sameNrOfMSFilesPerFraction() const
std::map< unsigned int, std::vector< std::string > > getFractionToMSFilesMapping() const
return fraction index to file paths (ordered by fraction_group)
size_t size() const noexcept
Definition ExposedVector.h:128
Options for loading files containing features.
Definition FeatureFileOptions.h:35
void setLoadConvexHull(bool convex)
void setLoadSubordinates(bool sub)
Base class for all feature grouping algorithms.
Definition FeatureGroupingAlgorithm.h:25
void transferSubelements(const std::vector< ConsensusMap > &maps, ConsensusMap &out) const
Transfers subelements (grouped features) from input consensus maps to the result consensus map.
virtual void group(const std::vector< FeatureMap > &maps, ConsensusMap &out)=0
Applies the algorithm. The features in the input maps are grouped and the output is written to the co...
A container for features.
Definition FeatureMap.h:78
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
void updateRanges() override
An LC-MS feature.
Definition Feature.h:46
Facilitates file handling by file type recognition.
Definition FileHandler.h:45
void loadConsensusFeatures(const std::string &filename, ConsensusMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Loads a file into a ConsensusMap.
void storeConsensusFeatures(const std::string &filename, const ConsensusMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Store a ConsensusFeatureMap.
static FileTypes::Type getType(const std::string &filename)
Tries to determine the file type (by name or content)
FeatureFileOptions & getFeatOptions()
Mutable access to the feature file options for loading/storing.
void setFeatOptions(const FeatureFileOptions &)
set feature file options for loading/storing
void loadFeatures(const std::string &filename, FeatureMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Loads a file into a FeatureMap.
Management and storage of parameters / INI files.
Definition Param.h:46
Param copy(const std::string &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition PeptideIdentification.h:66
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
void setProgress(SignedSize value) const
Sets the current progress.
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
void endProgress(UInt64 bytes_processed=0) const
void startProgress(SignedSize begin, SignedSize end, const std::string &label) const
Initializes the progress display.
@ CMD
Command line progress.
Definition ProgressLogger.h:44
Base class for TOPP applications.
Definition TOPPBase.h:120
std::string getStringOption_(const std::string &name) const
Returns the value of a previously registered string option (use getOutputDirOption() for output direc...
void setValidFormats_(const std::string &name, const std::vector< std::string > &formats, const bool force_OpenMS_format=true)
Sets the formats for a input/output file option or for all members of an input/output file lists.
void registerInputFileList_(const std::string &name, const std::string &argument, const StringList &default_value, const std::string &description, bool required=true, bool advanced=false, const StringList &tags=StringList())
Registers a list of input files option.
Param const & getParam_() const
Return all parameters relevant to this TOPP tool.
bool getFlag_(const std::string &name) const
Returns the value of a previously registered flag.
void addEmptyLine_()
Adds an empty line between registered variables in the documentation.
StringList getStringList_(const std::string &name) const
Returns the value of a previously registered StringList.
virtual void registerOptionsAndFlags_()=0
Sets the valid command line options (with argument) and flags (without argument).
DataProcessing getProcessingInfo_(DataProcessing::ProcessingAction action) const
Returns the data processing information.
void writeDebug_(const std::string &text, UInt min_level) const
Writes a string to the log file and to OPENMS_LOG_DEBUG if the debug level is at least min_level.
void registerOutputFile_(const std::string &name, const std::string &argument, const std::string &default_value, const std::string &description, bool required=true, bool advanced=false)
Registers an output file option.
void registerInputFile_(const std::string &name, const std::string &argument, const std::string &default_value, const std::string &description, bool required=true, bool advanced=false, const StringList &tags=StringList())
Registers an input file option.
void registerFlag_(const std::string &name, const std::string &description, bool advanced=false)
Registers a flag.
@ ILLEGAL_PARAMETERS
Definition TOPPBase.h:138
@ EXECUTION_OK
Definition TOPPBase.h:132
void writeLogError_(const std::string &text) const
Writes a string to the log file and to OPENMS_LOG_ERROR.
void addDataProcessing_(ConsensusMap &map, const DataProcessing &dp) const
Data processing setter for consensus maps.
UInt64 getUniqueId() const
Non-mutable access to unique id - returns the unique id.
Definition UniqueIdInterface.h:78
Size setUniqueId()
Assigns a new, valid unique id. Always returns 1.
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
const std::string ADDUCT_GROUP
Definition Constants.h:592
const std::string DC_CHARGE_ADDUCTS
Definition Constants.h:602
std::string toStr(int i)
Definition StringUtils.h:257
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
STL namespace.
Type
Actual file types enum.
Definition FileTypes.h:31
@ FEATUREPARQUET
OpenMS internal feature map parquet bundle (directory: features.parquet + psms.parquet + proteins....
Definition FileTypes.h:99
@ CONSENSUSPARQUET
OpenMS internal consensus map parquet bundle (directory: consensus_features.parquet + psms....
Definition FileTypes.h:100
@ CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition FileTypes.h:39
@ FEATUREXML
OpenMS feature file (.featureXML)
Definition FileTypes.h:37