OpenMS
ExperimentalDesign.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Timo Sachsenberg $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 
15 #include <vector>
16 #include <map>
17 #include <set>
18 
19 namespace OpenMS
20 {
21  class ConsensusMap;
22  class FeatureMap;
23 
218  class OPENMS_DLLAPI ExperimentalDesign
219  {
220 
221  public:
228  class OPENMS_DLLAPI MSFileSectionEntry
229  {
230  public:
231  MSFileSectionEntry() = default;
232  unsigned fraction_group = 1;
233  unsigned fraction = 1;
234  std::string path = "UNKNOWN_FILE";
235  unsigned label = 1;
236  unsigned sample = 0;
237  String sample_name = "0";
238  };
239 
240  class OPENMS_DLLAPI SampleSection
241  {
242  public:
243 
244  SampleSection() = default;
245 
247  const std::vector< std::vector < String > >& content,
248  const std::map< String, Size >& sample_to_rowindex,
249  const std::map< String, Size >& columnname_to_columnindex
250  );
251 
252  // Get set of all samples that are present in the sample section
253  std::set< String > getSamples() const;
254 
255  // Add a sample as the last row
256  void addSample(const String& sample, const std::vector<String>& content = {});
257 
258  // TODO should it include the Sample ID column or not??
259  // Get set of all factors (column names) that were defined for the sample section
260  std::set< String > getFactors() const;
261 
262  // Checks whether sample section has row for a sample number
263  bool hasSample(const String& sample) const;
264 
265  // Checks whether Sample Section has a specific factor (i.e. column name)
266  bool hasFactor(const String &factor) const;
267 
268  // Returns value of factor for given sample and factor name
269  String getFactorValue(const String& sample_name, const String &factor) const;
270 
271  // Returns value of factor for given sample index and factor name
272  String getFactorValue(unsigned sample_idx, const String &factor) const;
273 
274  // Returns column index of factor
275  Size getFactorColIdx(const String &factor) const;
276 
277  // Returns the name/ID of the sample. Not necessarily the row index
278  String getSampleName(unsigned sample_row) const;
279 
280  // Returns the row index in the sample section for a sample name/ID
281  unsigned getSampleRow(const String& sample) const;
282 
285 
286  private:
287 
288  // The entries of the Sample Section, filled while parsing
289  // the Experimental Design File
290  std::vector< std::vector < String > > content_;
291 
292  // Maps the Sample Entry name to the row where the sample
293  // appears in the Sample section, its sample index
294  std::map< String, Size > sample_to_rowindex_;
295 
296  // Maps the column name of the SampleSection to the
297  // Index of the column
298  std::map< String, Size > columnname_to_columnindex_;
299  };
300 
301  using MSFileSection = std::vector<MSFileSectionEntry>;
302 
303  // Experimental Design c'tors
304  ExperimentalDesign() = default;
305 
306  ExperimentalDesign(const MSFileSection& msfile_section, const SampleSection& sample_section);
307 
309 
310  void setMSFileSection(const MSFileSection& msfile_section);
311 
312  // Returns the Sample Section of the experimental design file
314 
315  void setSampleSection(const SampleSection& sample_section);
316 
319  std::map<std::vector<String>, std::set<String>> getUniqueSampleRowToSampleMapping() const;
320 
323  std::map<String, unsigned> getSampleToPrefractionationMapping() const;
324 
326  //TODO this probably needs a basename parameter to be fully compatible with the other mappings!! Implicit full path.
327  std::map<unsigned int, std::vector<String> > getFractionToMSFilesMapping() const;
328 
331  //TODO this probably needs a basename parameter to be fully compatible with the other mappings!! Implicit full path.
332  std::vector<std::vector<std::pair<String, unsigned>>> getConditionToPathLabelVector() const;
333 
335  std::map<std::vector<String>, std::set<unsigned>> getConditionToSampleMapping() const;
336 
337  /*
338  * The (Path, Label) tuples in the experimental design have to be unique, so we can map them
339  * uniquely to the sample number, fraction number, and fraction_group number
340  */
341 
344  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToPrefractionationMapping(bool use_basename_only) const;
345 
348  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToConditionMapping(bool use_basename_only) const;
349 
352  std::map<String, unsigned> getSampleToConditionMapping() const;
353 
355  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToSampleMapping(bool use_basename_only) const;
356 
358  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToFractionMapping(bool use_basename_only) const;
359 
361  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToFractionGroupMapping(bool use_basename_only) const;
362 
363  // @return the number of samples measured (= highest sample index)
364  unsigned getNumberOfSamples() const;
365 
366  // @return the number of fractions (= highest fraction index)
367  unsigned getNumberOfFractions() const;
368 
369  // @return the number of labels per file
370  unsigned getNumberOfLabels() const;
371 
372  // @return the number of MS files (= fractions * fraction groups)
373  unsigned getNumberOfMSFiles() const;
374 
375  // @return the number of fraction_groups
376  // Allows to group fraction ids and source files
377  unsigned getNumberOfFractionGroups() const;
378 
379  // @return sample index (depends on fraction_group and label)
380  unsigned getSample(unsigned fraction_group, unsigned label = 1);
381 
383  // This is the case if we have at least one fraction group with >= 2 fractions
384  bool isFractionated() const;
385 
389  Size filterByBasenames(const std::set<String>& bns);
390 
393 
396 
399 
401  static ExperimentalDesign fromIdentifications(const std::vector<ProteinIdentification>& proteins);
402  //TODO create another overload here, that takes two enums outerVec and innerVec with entries Replicate, Fraction, Sample
403 
404  private:
405  // MS filename column, optionally trims to basename
406  std::vector< String > getFileNames_(bool basename) const;
407 
408  // returns label column
409  std::vector<unsigned> getLabels_() const;
410 
411  // returns fraction column
412  std::vector<unsigned> getFractions_() const;
413 
415  std::map< std::pair< String, unsigned >, unsigned> pathLabelMapper_(
416  bool,
417  unsigned (*f)(const ExperimentalDesign::MSFileSectionEntry&)) const;
418 
419  // sort to obtain the default order
420  void sort_();
421 
422  template<typename T>
423  static void errorIfAlreadyExists(std::set<T> &container, T &item, const String &message);
424 
425  // basic consistency checks
426  void isValid_();
427 
430  };
431 }
432 
A container for consensus elements.
Definition: ConsensusMap.h:66
Definition: ExperimentalDesign.h:229
Definition: ExperimentalDesign.h:241
SampleSection(const std::vector< std::vector< String > > &content, const std::map< String, Size > &sample_to_rowindex, const std::map< String, Size > &columnname_to_columnindex)
bool hasSample(const String &sample) const
std::vector< std::vector< String > > content_
Definition: ExperimentalDesign.h:290
std::map< String, Size > columnname_to_columnindex_
Definition: ExperimentalDesign.h:298
void addSample(const String &sample, const std::vector< String > &content={})
String getSampleName(unsigned sample_row) const
std::map< String, Size > sample_to_rowindex_
Definition: ExperimentalDesign.h:294
Size getContentSize() const
returns the number of entries in content_ member
std::set< String > getFactors() const
Size getFactorColIdx(const String &factor) const
std::set< String > getSamples() const
bool hasFactor(const String &factor) const
String getFactorValue(const String &sample_name, const String &factor) const
unsigned getSampleRow(const String &sample) const
String getFactorValue(unsigned sample_idx, const String &factor) const
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:219
unsigned getNumberOfLabels() const
static void errorIfAlreadyExists(std::set< T > &container, T &item, const String &message)
unsigned getNumberOfFractions() const
static ExperimentalDesign fromConsensusMap(const ConsensusMap &c)
Extract experimental design from consensus map.
unsigned getSample(unsigned fraction_group, unsigned label=1)
std::map< std::vector< String >, std::set< String > > getUniqueSampleRowToSampleMapping() const
unsigned getNumberOfSamples() const
void setSampleSection(const SampleSection &sample_section)
std::vector< unsigned > getLabels_() const
bool sameNrOfMSFilesPerFraction() const
Size filterByBasenames(const std::set< String > &bns)
unsigned getNumberOfFractionGroups() const
std::map< unsigned int, std::vector< String > > getFractionToMSFilesMapping() const
return fraction index to file paths (ordered by fraction_group)
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToFractionMapping(bool use_basename_only) const
return <file_path, label> to fraction mapping
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToPrefractionationMapping(bool use_basename_only) const
std::map< std::vector< String >, std::set< unsigned > > getConditionToSampleMapping() const
return a condition (unique combination of sample section values except replicate) to Sample index map...
void setMSFileSection(const MSFileSection &msfile_section)
std::vector< MSFileSectionEntry > MSFileSection
Definition: ExperimentalDesign.h:301
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToConditionMapping(bool use_basename_only) const
MSFileSection msfile_section_
Definition: ExperimentalDesign.h:428
static ExperimentalDesign fromIdentifications(const std::vector< ProteinIdentification > &proteins)
Extract experimental design from identifications.
std::map< std::pair< String, unsigned >, unsigned > pathLabelMapper_(bool, unsigned(*f)(const ExperimentalDesign::MSFileSectionEntry &)) const
Generic Mapper (Path, Label) -> f(row)
ExperimentalDesign(const MSFileSection &msfile_section, const SampleSection &sample_section)
std::map< String, unsigned > getSampleToPrefractionationMapping() const
std::vector< unsigned > getFractions_() const
const MSFileSection & getMSFileSection() const
std::vector< String > getFileNames_(bool basename) const
const ExperimentalDesign::SampleSection & getSampleSection() const
static ExperimentalDesign fromFeatureMap(const FeatureMap &f)
Extract experimental design from feature map.
std::vector< std::vector< std::pair< String, unsigned > > > getConditionToPathLabelVector() const
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToSampleMapping(bool use_basename_only) const
return <file_path, label> to sample index mapping
SampleSection sample_section_
Definition: ExperimentalDesign.h:429
std::map< String, unsigned > getSampleToConditionMapping() const
unsigned getNumberOfMSFiles() const
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToFractionGroupMapping(bool use_basename_only) const
return <file_path, label> to fraction_group mapping
A container for features.
Definition: FeatureMap.h:80
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
const double c
Definition: Constants.h:188
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22