51 class PeptideIdentification;
52 class PeptideEvidence;
89 explicit Mapping(
const std::vector<ProteinIdentification>& prot_ids)
94 void create(
const std::vector<ProteinIdentification>& prot_ids)
96 identifier_to_msrunpath.clear();
97 runpath_to_identifier.clear();
101 prot_id.getPrimaryMSRunPath(filenames);
102 if (filenames.empty())
106 identifier_to_msrunpath[prot_id.getIdentifier()] = filenames;
107 const auto& it = runpath_to_identifier.find(filenames);
108 if (it != runpath_to_identifier.end())
111 "Multiple protein identifications with the same ms-run-path in Consensus/FeatureXML. Check input!\n",
114 runpath_to_identifier[filenames] = prot_id.getIdentifier();
122 const auto& filenames = identifier_to_msrunpath.at(pepid.
getIdentifier());
123 return (merge_index < filenames.size()) ? filenames[merge_index] :
"";
182 return float_data_arrays_;
186 void setFloatDataArrays(
const FloatDataArrays& fda);
189 const StringDataArrays& getStringDataArrays()
const;
192 StringDataArrays& getStringDataArrays();
195 void setStringDataArrays(
const StringDataArrays& sda);
198 const IntegerDataArrays& getIntegerDataArrays()
const;
201 IntegerDataArrays& getIntegerDataArrays();
204 void setIntegerDataArrays(
const IntegerDataArrays& ida);
209 return *std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
216 return *std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
223 return *std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
224 [&name](
const FloatDataArray& da) {
return da.getName() == name; } );
230 return *std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
237 return *std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
244 return *std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
245 [&name](
const FloatDataArray& da) {
return da.getName() == name; } );
268 static const std::string NamesOfPeakMassType[SIZE_OF_PEAKMASSTYPE];
307 std::pair<int,int> getChargeRange()
const;
316 int getChargeValue_(
String& charge_str)
const;
343 const std::vector<ProteinHit>& getHits()
const;
346 std::vector<ProteinHit>& getHits();
357 void setHits(
const std::vector<ProteinHit>& hits);
360 std::vector<ProteinHit>::iterator findHit(
const String& accession);
363 const std::vector<ProteinGroup>& getProteinGroups()
const;
365 std::vector<ProteinGroup>& getProteinGroups();
370 const std::vector<ProteinGroup>& getIndistinguishableProteins()
const;
372 std::vector<ProteinGroup>& getIndistinguishableProteins();
374 void insertIndistinguishableProteins(
const ProteinGroup& group);
376 void fillIndistinguishableGroupsWithSingletons();
379 double getSignificanceThreshold()
const;
381 void setSignificanceThreshold(
double value);
383 const String& getScoreType()
const;
385 void setScoreType(
const String& type);
387 bool isHigherScoreBetter()
const;
389 void setHigherScoreBetter(
bool higher_is_better);
401 void computeCoverage(
const std::vector<PeptideIdentification>& pep_ids);
402 void computeCoverage(
const ConsensusMap& cmap,
bool use_unassigned_ids);
411 void computeModifications(
412 const std::vector<PeptideIdentification>& pep_ids,
414 void computeModifications(
417 bool use_unassigned_ids);
422 const DateTime& getDateTime()
const;
425 void setDateTime(
const DateTime& date);
427 void setSearchEngine(
const String& search_engine);
429 const String& getSearchEngine()
const;
431 const String getOriginalSearchEngineName()
const;
433 void setSearchEngineVersion(
const String& search_engine_version);
435 const String& getSearchEngineVersion()
const;
437 void setInferenceEngine(
const String& search_engine);
439 const String getInferenceEngine()
const;
441 void setInferenceEngineVersion(
const String& inference_engine_version);
443 const String getInferenceEngineVersion()
const;
453 const String& getIdentifier()
const;
455 void setIdentifier(
const String&
id);
461 void setPrimaryMSRunPath(
const StringList& s,
bool raw =
false);
465 void addPrimaryMSRunPath(
const String& s,
bool raw =
false);
466 void addPrimaryMSRunPath(
const StringList& s,
bool raw =
false);
473 void getPrimaryMSRunPath(
StringList& output,
bool raw =
false)
const;
476 Size nrPrimaryMSRunPaths(
bool raw =
false)
const;
480 bool hasInferenceData()
const;
483 bool hasInferenceEngineAsSearchEngine()
const;
492 std::vector<std::pair<String,String>> getSearchEngineSettingsAsPairs(
const String& se =
"")
const;
520 void computeCoverageFromEvidenceMapping_(
const std::unordered_map<
String, std::set<PeptideEvidence>>& map);
521 void fillEvidenceMapping_(std::unordered_map<
String, std::set<PeptideEvidence> >& map_acc_2_evidence,
522 const std::vector<PeptideIdentification>& pep_ids)
const;
524 void fillModMapping_(
const std::vector<PeptideIdentification>& pep_ids,
const StringList& skip_modifications,
525 std::unordered_map<
String, std::set<std::pair<Size, ResidueModification>>>& prot2mod)
const;
Representation of a protein identification run.
Definition: ProteinIdentification.h:74
std::vector< FloatDataArray > FloatDataArrays
Definition: ProteinIdentification.h:135
String db
The used database.
Definition: ProteinIdentification.h:274
std::vector< StringDataArray > StringDataArrays
Definition: ProteinIdentification.h:138
A more convenient string class.
Definition: String.h:58
IntegerDataArrays integer_data_arrays_
Integer data arrays.
Definition: ProteinIdentification.h:256
StringDataArray & getStringDataArrayByName(String name)
Returns a mutable reference to the first string meta data array with the given name.
Definition: ProteinIdentification.h:214
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:279
String db_version
The database version.
Definition: ProteinIdentification.h:275
std::map< String, StringList > identifier_to_msrunpath
Definition: ProteinIdentification.h:84
IntegerDataArray & getIntegerDataArrayByName(String name)
Returns a mutable reference to the first integer meta data array with the given name.
Definition: ProteinIdentification.h:207
void create(const std::vector< ProteinIdentification > &prot_ids)
Definition: ProteinIdentification.h:94
Definition: ProteinIdentification.h:263
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:282
const StringDataArray & getStringDataArrayByName(String name) const
Returns a const reference to the first string meta data array with the given name.
Definition: ProteinIdentification.h:235
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
FloatDataArrays float_data_arrays_
Float data arrays.
Definition: ProteinIdentification.h:250
String search_engine_version_
Definition: ProteinIdentification.h:503
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:284
FloatDataArrays & getFloatDataArrays()
Returns a mutable reference to the float meta data arrays.
Definition: ProteinIdentification.h:180
A container for consensus elements.
Definition: ConsensusMap.h:83
std::map< StringList, String > runpath_to_identifier
Definition: ProteinIdentification.h:85
OpenMS::DataArrays::FloatDataArray FloatDataArray
Float data array vector type.
Definition: ProteinIdentification.h:134
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
ProteinHit HitType
Hit type definition.
Definition: ProteinIdentification.h:79
Mapping(const std::vector< ProteinIdentification > &prot_ids)
Definition: ProteinIdentification.h:89
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Search parameters of the DB search.
Definition: ProteinIdentification.h:271
bool operator<(const MultiplexDeltaMasses &dm1, const MultiplexDeltaMasses &dm2)
Float data array class.
Definition: DataArrays.h:45
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:285
String taxonomy
The taxonomy restriction.
Definition: ProteinIdentification.h:276
Integer data array class.
Definition: DataArrays.h:52
String getPrimaryMSRunPath(const PeptideIdentification &pepid) const
Definition: ProteinIdentification.h:118
PeakMassType
Peak mass type.
Definition: ProteinIdentification.h:260
FloatDataArray & getFloatDataArrayByName(String name)
Returns a mutable reference to the first float meta data array with the given name.
Definition: ProteinIdentification.h:221
const std::string ID_MERGE_INDEX
Definition: Constants.h:323
const IntegerDataArray & getIntegerDataArrayByName(String name) const
Returns a const reference to the first integer meta data array with the given name.
Definition: ProteinIdentification.h:228
std::vector< String > accessions
Accessions of (indistinguishable) proteins that belong to the same group.
Definition: ProteinIdentification.h:147
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition: EnzymaticDigestion.h:67
std::vector< ProteinHit > protein_hits_
Definition: ProteinIdentification.h:512
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:209
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:286
EnzymaticDigestion::Specificity enzyme_term_specificity
The number of required cutting-rule matching termini during search (none=0, semi=1, or full=2)
Definition: ProteinIdentification.h:287
String protein_score_type_
Definition: ProteinIdentification.h:510
std::vector< IntegerDataArray > IntegerDataArrays
Definition: ProteinIdentification.h:141
double protein_significance_threshold_
Definition: ProteinIdentification.h:516
two way mapping from ms-run-path to protID|pepID-identifier
Definition: ProteinIdentification.h:82
String id_
Definition: ProteinIdentification.h:501
UInt missed_cleavages
The number of allowed missed cleavages.
Definition: ProteinIdentification.h:281
std::vector< ProteinGroup > protein_groups_
Definition: ProteinIdentification.h:513
String search_engine_
Definition: ProteinIdentification.h:502
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
Representation of a protein hit.
Definition: ProteinHit.h:58
Invalid value exception.
Definition: Exception.h:327
bool higher_score_better_
Definition: ProteinIdentification.h:511
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
DateTime date_
Definition: ProteinIdentification.h:505
Definition: ProteinIdentification.h:262
bool operator!=(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:824
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
DateTime Class.
Definition: DateTime.h:58
const FloatDataArray & getFloatDataArrayByName(String name) const
Returns a const reference to the first float meta data array with the given name. ...
Definition: ProteinIdentification.h:242
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:283
std::vector< ProteinGroup > indistinguishable_proteins_
Indistinguishable proteins: accessions[0] is "group leader", probability is meaningless.
Definition: ProteinIdentification.h:515
String data array class.
Definition: DataArrays.h:59
PeakMassType mass_type
Mass type of the peaks.
Definition: ProteinIdentification.h:278
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:130
SearchParameters search_parameters_
Definition: ProteinIdentification.h:504
OpenMS::DataArrays::StringDataArray StringDataArray
String data array vector type.
Definition: ProteinIdentification.h:137
OpenMS::DataArrays::IntegerDataArray IntegerDataArray
Integer data array vector type.
Definition: ProteinIdentification.h:140
String charges
The allowed charges for the search.
Definition: ProteinIdentification.h:277
double probability
Probability of this group.
Definition: ProteinIdentification.h:144
StringDataArrays string_data_arrays_
String data arrays.
Definition: ProteinIdentification.h:253
const String & getIdentifier() const
Returns the identifier which links this PI to its corresponding ProteinIdentification.
Representation of a digestion enzyme for proteins (protease)
Definition: DigestionEnzymeProtein.h:48
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:280