49 class PeptideIdentification;
50 class PeptideEvidence;
87 explicit Mapping(
const std::vector<ProteinIdentification>& prot_ids)
91 void create(
const std::vector<ProteinIdentification>& prot_ids)
93 identifier_to_msrunpath.clear();
94 runpath_to_identifier.clear();
98 prot_id.getPrimaryMSRunPath(filenames);
99 if (filenames.empty())
103 identifier_to_msrunpath[prot_id.getIdentifier()] = filenames;
104 const auto& it = runpath_to_identifier.find(filenames);
105 if (it != runpath_to_identifier.end())
108 "Multiple protein identifications with the same ms-run-path in Consensus/FeatureXML. Check input!\n",
111 runpath_to_identifier[filenames] = prot_id.getIdentifier();
171 return float_data_arrays_;
175 void setFloatDataArrays(
const FloatDataArrays& fda);
178 const StringDataArrays& getStringDataArrays()
const;
181 StringDataArrays& getStringDataArrays();
184 void setStringDataArrays(
const StringDataArrays& sda);
187 const IntegerDataArrays& getIntegerDataArrays()
const;
190 IntegerDataArrays& getIntegerDataArrays();
193 void setIntegerDataArrays(
const IntegerDataArrays& ida);
198 return *std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
205 return *std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
212 return *std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
213 [&name](
const FloatDataArray& da) {
return da.getName() == name; } );
219 return *std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
226 return *std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
233 return *std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
234 [&name](
const FloatDataArray& da) {
return da.getName() == name; } );
257 static const std::string NamesOfPeakMassType[SIZE_OF_PEAKMASSTYPE];
296 std::pair<int,int> getChargeRange()
const;
305 int getChargeValue_(
String& charge_str)
const;
332 const std::vector<ProteinHit>& getHits()
const;
335 std::vector<ProteinHit>& getHits();
346 void setHits(
const std::vector<ProteinHit>& hits);
349 std::vector<ProteinHit>::iterator findHit(
const String& accession);
352 const std::vector<ProteinGroup>& getProteinGroups()
const;
354 std::vector<ProteinGroup>& getProteinGroups();
359 const std::vector<ProteinGroup>& getIndistinguishableProteins()
const;
361 std::vector<ProteinGroup>& getIndistinguishableProteins();
363 void insertIndistinguishableProteins(
const ProteinGroup& group);
365 void fillIndistinguishableGroupsWithSingletons();
368 double getSignificanceThreshold()
const;
370 void setSignificanceThreshold(
double value);
372 const String& getScoreType()
const;
374 void setScoreType(
const String& type);
376 bool isHigherScoreBetter()
const;
378 void setHigherScoreBetter(
bool higher_is_better);
390 void computeCoverage(
const std::vector<PeptideIdentification>& pep_ids);
391 void computeCoverage(
const ConsensusMap& cmap,
bool use_unassigned_ids);
400 void computeModifications(
401 const std::vector<PeptideIdentification>& pep_ids,
403 void computeModifications(
406 bool use_unassigned_ids);
411 const DateTime& getDateTime()
const;
414 void setDateTime(
const DateTime& date);
416 void setSearchEngine(
const String& search_engine);
418 const String& getSearchEngine()
const;
420 const String getOriginalSearchEngineName()
const;
422 void setSearchEngineVersion(
const String& search_engine_version);
424 const String& getSearchEngineVersion()
const;
426 void setInferenceEngine(
const String& search_engine);
428 const String getInferenceEngine()
const;
430 void setInferenceEngineVersion(
const String& inference_engine_version);
432 const String getInferenceEngineVersion()
const;
442 const String& getIdentifier()
const;
444 void setIdentifier(
const String&
id);
450 void setPrimaryMSRunPath(
const StringList& s,
bool raw =
false);
454 void addPrimaryMSRunPath(
const String& s,
bool raw =
false);
455 void addPrimaryMSRunPath(
const StringList& s,
bool raw =
false);
462 void getPrimaryMSRunPath(
StringList& output,
bool raw =
false)
const;
465 Size nrPrimaryMSRunPaths(
bool raw =
false)
const;
469 bool hasInferenceData()
const;
472 bool hasInferenceEngineAsSearchEngine()
const;
481 std::vector<std::pair<String,String>> getSearchEngineSettingsAsPairs(
const String& se =
"")
const;
507 void computeCoverageFromEvidenceMapping_(
const std::unordered_map<
String, std::set<PeptideEvidence>>& map);
508 void fillEvidenceMapping_(std::unordered_map<
String, std::set<PeptideEvidence> >& map_acc_2_evidence,
509 const std::vector<PeptideIdentification>& pep_ids)
const;
511 void fillModMapping_(
const std::vector<PeptideIdentification>& pep_ids,
const StringList& skip_modifications,
512 std::unordered_map<
String, std::set<std::pair<Size, ResidueModification>>>& prot2mod)
const;
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
std::vector< FloatDataArray > FloatDataArrays
Definition: ProteinIdentification.h:124
String db
The used database.
Definition: ProteinIdentification.h:263
std::vector< StringDataArray > StringDataArrays
Definition: ProteinIdentification.h:127
A more convenient string class.
Definition: String.h:58
IntegerDataArrays integer_data_arrays_
Integer data arrays.
Definition: ProteinIdentification.h:245
StringDataArray & getStringDataArrayByName(String name)
Returns a mutable reference to the first string meta data array with the given name.
Definition: ProteinIdentification.h:203
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:268
String db_version
The database version.
Definition: ProteinIdentification.h:264
std::map< String, StringList > identifier_to_msrunpath
Definition: ProteinIdentification.h:82
IntegerDataArray & getIntegerDataArrayByName(String name)
Returns a mutable reference to the first integer meta data array with the given name.
Definition: ProteinIdentification.h:196
void create(const std::vector< ProteinIdentification > &prot_ids)
Definition: ProteinIdentification.h:91
Definition: ProteinIdentification.h:252
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:271
const StringDataArray & getStringDataArrayByName(String name) const
Returns a const reference to the first string meta data array with the given name.
Definition: ProteinIdentification.h:224
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
FloatDataArrays float_data_arrays_
Float data arrays.
Definition: ProteinIdentification.h:239
String search_engine_version_
Definition: ProteinIdentification.h:490
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:273
FloatDataArrays & getFloatDataArrays()
Returns a mutable reference to the float meta data arrays.
Definition: ProteinIdentification.h:169
A container for consensus elements.
Definition: ConsensusMap.h:82
std::map< StringList, String > runpath_to_identifier
Definition: ProteinIdentification.h:83
OpenMS::DataArrays::FloatDataArray FloatDataArray
Float data array vector type.
Definition: ProteinIdentification.h:123
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
ProteinHit HitType
Hit type definition.
Definition: ProteinIdentification.h:77
Mapping(const std::vector< ProteinIdentification > &prot_ids)
Definition: ProteinIdentification.h:87
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Search parameters of the DB search.
Definition: ProteinIdentification.h:260
bool operator<(const MultiplexDeltaMasses &dm1, const MultiplexDeltaMasses &dm2)
Float data array class.
Definition: DataArrays.h:45
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:274
String taxonomy
The taxonomy restriction.
Definition: ProteinIdentification.h:265
Integer data array class.
Definition: DataArrays.h:52
PeakMassType
Peak mass type.
Definition: ProteinIdentification.h:249
FloatDataArray & getFloatDataArrayByName(String name)
Returns a mutable reference to the first float meta data array with the given name.
Definition: ProteinIdentification.h:210
const IntegerDataArray & getIntegerDataArrayByName(String name) const
Returns a const reference to the first integer meta data array with the given name.
Definition: ProteinIdentification.h:217
std::vector< String > accessions
Accessions of (indistinguishable) proteins that belong to the same group.
Definition: ProteinIdentification.h:136
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition: EnzymaticDigestion.h:67
std::vector< ProteinHit > protein_hits_
Definition: ProteinIdentification.h:499
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:209
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:275
EnzymaticDigestion::Specificity enzyme_term_specificity
The number of required cutting-rule matching termini during search (none=0, semi=1, or full=2)
Definition: ProteinIdentification.h:276
String protein_score_type_
Definition: ProteinIdentification.h:497
std::vector< IntegerDataArray > IntegerDataArrays
Definition: ProteinIdentification.h:130
double protein_significance_threshold_
Definition: ProteinIdentification.h:503
two way mapping from ms-run-path to protID|pepID-identifier
Definition: ProteinIdentification.h:80
String id_
Definition: ProteinIdentification.h:488
UInt missed_cleavages
The number of allowed missed cleavages.
Definition: ProteinIdentification.h:270
std::vector< ProteinGroup > protein_groups_
Definition: ProteinIdentification.h:500
String search_engine_
Definition: ProteinIdentification.h:489
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
Representation of a protein hit.
Definition: ProteinHit.h:58
Invalid value exception.
Definition: Exception.h:327
bool higher_score_better_
Definition: ProteinIdentification.h:498
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
DateTime date_
Definition: ProteinIdentification.h:492
Definition: ProteinIdentification.h:251
bool operator!=(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:824
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
DateTime Class.
Definition: DateTime.h:58
const FloatDataArray & getFloatDataArrayByName(String name) const
Returns a const reference to the first float meta data array with the given name. ...
Definition: ProteinIdentification.h:231
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:272
std::vector< ProteinGroup > indistinguishable_proteins_
Indistinguishable proteins: accessions[0] is "group leader", probability is meaningless.
Definition: ProteinIdentification.h:502
String data array class.
Definition: DataArrays.h:59
PeakMassType mass_type
Mass type of the peaks.
Definition: ProteinIdentification.h:267
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:119
SearchParameters search_parameters_
Definition: ProteinIdentification.h:491
OpenMS::DataArrays::StringDataArray StringDataArray
String data array vector type.
Definition: ProteinIdentification.h:126
OpenMS::DataArrays::IntegerDataArray IntegerDataArray
Integer data array vector type.
Definition: ProteinIdentification.h:129
String charges
The allowed charges for the search.
Definition: ProteinIdentification.h:266
double probability
Probability of this group.
Definition: ProteinIdentification.h:133
StringDataArrays string_data_arrays_
String data arrays.
Definition: ProteinIdentification.h:242
Representation of a digestion enzyme for proteins (protease)
Definition: DigestionEnzymeProtein.h:48
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:269