OpenMS
Loading...
Searching...
No Matches
IDRipper.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg$
6// $Authors: Immanuel Luhn, Leon Kuchenbecker$
7// --------------------------------------------------------------------------
8#pragma once
9
14#include <unordered_map>
15
16
17namespace OpenMS
18{
28 class OPENMS_DLLAPI IDRipper :
30 {
31public:
33 enum OriginAnnotationFormat { FILE_ORIGIN = 0, MAP_INDEX = 1, ID_MERGE_INDEX = 2, UNKNOWN_OAF = 3, SIZE_OF_ORIGIN_ANNOTATION_FORMAT = 4 };
34
36 static const std::array<std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT> names_of_OriginAnnotationFormat;
37
39 struct OPENMS_DLLAPI IdentificationRuns
40 {
42 std::map<String, UInt> index_map;
44 std::vector<StringList> spectra_data;
45
47 IdentificationRuns(const std::vector<ProteinIdentification>& prot_ids);
48 };
49
51 struct OPENMS_DLLAPI RipFileIdentifier
52 {
54 UInt ident_run_idx{};
56 UInt file_origin_idx{};
61
64 const PeptideIdentification& pep_id,
65 const std::map<String, UInt>& file_origin_map,
66 const IDRipper::OriginAnnotationFormat origin_annotation_fmt,
67 bool split_ident_runs);
68
71
74
76 const String & getOriginFullname() const;
77
79 const String & getOutputBasename() const;
80 };
81
84 {
85 bool operator()(const RipFileIdentifier& left, const RipFileIdentifier& right) const;
86 };
87
89 struct OPENMS_DLLAPI RipFileContent
90 {
92 std::vector<ProteinIdentification> prot_idents;
96 RipFileContent(const std::vector<ProteinIdentification>& prot_idents, const PeptideIdentificationList& pep_idents)
97 : prot_idents(prot_idents), pep_idents(pep_idents) {}
99 const std::vector<ProteinIdentification> & getProteinIdentifications();
102 };
103
105 typedef std::map<RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator> RipFileMap;
106
109
111 ~IDRipper() override;
112
125 void rip(
126 RipFileMap& ripped,
127 std::vector<ProteinIdentification>& proteins,
129 bool numeric_filenames,
130 bool split_ident_runs);
131
145 // Autowrap compatible wrapper for rip(RipFileMap,...)
146 void rip(
147 std::vector<RipFileIdentifier>& rfis,
148 std::vector<RipFileContent>& rfcs,
149 std::vector<ProteinIdentification>& proteins,
151 bool numeric_filenames,
152 bool split_ident_runs);
153
154private:
155 // Not implemented
157 IDRipper(const IDRipper & rhs);
158
159 // Not implemented
162
164 OriginAnnotationFormat detectOriginAnnotationFormat_(std::map<String, UInt> & file_origin_map, const PeptideIdentificationList & peptide_idents);
166 void getProteinHits_(std::vector<ProteinHit> & result, const std::unordered_map<String, const ProteinHit*> & acc2protein_hits, const std::set<String> & protein_accessions);
168 std::set<String> getProteinAccessions_(const std::vector<PeptideHit> & peptide_hits);
172 bool registerBasename_(std::map<String, std::pair<UInt, UInt> >& basename_to_numeric, const IDRipper::RipFileIdentifier& rfi);
174 bool setOriginAnnotationMode_(short& mode, short const new_value);
175 };
176
177} // namespace OpenMS
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Ripping protein/peptide identification according their file origin.
Definition IDRipper.h:30
bool registerBasename_(std::map< String, std::pair< UInt, UInt > > &basename_to_numeric, const IDRipper::RipFileIdentifier &rfi)
helper function, register a potential output file basename to detect duplicate output basenames
IDRipper()
Default constructor.
OriginAnnotationFormat detectOriginAnnotationFormat_(std::map< String, UInt > &file_origin_map, const PeptideIdentificationList &peptide_idents)
helper function, detects file origin annotation standard from collections of protein and peptide hits
~IDRipper() override
Destructor.
static const std::array< std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT > names_of_OriginAnnotationFormat
String representations for the OriginAnnotationFormat enum.
Definition IDRipper.h:36
void getProteinHits_(std::vector< ProteinHit > &result, const std::unordered_map< String, const ProteinHit * > &acc2protein_hits, const std::set< String > &protein_accessions)
helper function, extracts all protein hits that match the protein accession
std::map< RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator > RipFileMap
Represents the result of an IDRipper process, a map assigning file content to output file identifiers...
Definition IDRipper.h:105
int getProteinIdentification_(const PeptideIdentification &pep_ident, const IdentificationRuns &id_runs)
helper function, returns the index of the protein identification for the given peptide identification...
OriginAnnotationFormat
Possible input file encodings for the origin as used by different versions of IDMerger.
Definition IDRipper.h:33
std::set< String > getProteinAccessions_(const std::vector< PeptideHit > &peptide_hits)
helper function, returns the string representation of the peptide hit accession
IDRipper(const IDRipper &rhs)
Copy constructor.
bool setOriginAnnotationMode_(short &mode, short const new_value)
helper function, sets the value of mode to new_value and returns true if the old value was identical ...
void rip(std::vector< RipFileIdentifier > &rfis, std::vector< RipFileContent > &rfcs, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, bool numeric_filenames, bool split_ident_runs)
Ripping protein/peptide identification according their file origin.
void rip(RipFileMap &ripped, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, bool numeric_filenames, bool split_ident_runs)
Ripping protein/peptide identification according their file origin.
IDRipper & operator=(const IDRipper &rhs)
Assignment.
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition PeptideIdentification.h:64
A more convenient string class.
Definition String.h:34
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Represents a set of IdentificationRuns.
Definition IDRipper.h:40
IdentificationRuns(const std::vector< ProteinIdentification > &prot_ids)
Generates a new IdentificationRuns object from a vector of ProteinIdentification objects.
std::vector< StringList > spectra_data
Maps the list of spectra data elements to every IdentificationRun index.
Definition IDRipper.h:44
std::map< String, UInt > index_map
Maps a unique index to every IdentificationRun string representation (getIdentifier()).
Definition IDRipper.h:42
Represents the content of an IDRipper output file.
Definition IDRipper.h:90
const PeptideIdentificationList & getPeptideIdentifications()
Get peptide identifications.
PeptideIdentificationList pep_idents
Peptide identifications.
Definition IDRipper.h:94
RipFileContent(const std::vector< ProteinIdentification > &prot_idents, const PeptideIdentificationList &pep_idents)
Constructs a new RipFileContent object.
Definition IDRipper.h:96
const std::vector< ProteinIdentification > & getProteinIdentifications()
Get protein identifications.
std::vector< ProteinIdentification > prot_idents
Protein identifications.
Definition IDRipper.h:92
Provides a 'less' operation for RipFileIdentifiers that ignores the out_basename and origin_fullname ...
Definition IDRipper.h:84
bool operator()(const RipFileIdentifier &left, const RipFileIdentifier &right) const
Identifies an IDRipper output file.
Definition IDRipper.h:52
RipFileIdentifier(const IDRipper::IdentificationRuns &id_runs, const PeptideIdentification &pep_id, const std::map< String, UInt > &file_origin_map, const IDRipper::OriginAnnotationFormat origin_annotation_fmt, bool split_ident_runs)
Constructs a new RipFileIdentifier object.
String out_basename
The output basename derived from the file_origin / spectra_data element.
Definition IDRipper.h:58
const String & getOriginFullname() const
Get origin full name.
UInt getFileOriginIdx() const
Get file origin index.
const String & getOutputBasename() const
Get output base name.
String origin_fullname
The full length origin read from the file_origin / spectra_data element.
Definition IDRipper.h:60
UInt getIdentRunIdx() const
Get identification run index.