OpenMS
InspectOutfile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Martin Langwisch $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
44 
45 
46 namespace OpenMS
47 {
57  class OPENMS_DLLAPI InspectOutfile
58  {
59 public:
62 
64  InspectOutfile(const InspectOutfile & inspect_outfile);
65 
67  virtual ~InspectOutfile();
68 
70  InspectOutfile & operator=(const InspectOutfile & inspect_outfile);
71 
73  bool operator==(const InspectOutfile & inspect_outfile) const;
74 
86  std::vector<Size> load(const String & result_filename, std::vector<PeptideIdentification> & peptide_identifications, ProteinIdentification & protein_identification, const double p_value_threshold, const String & database_filename = "");
87 
95  std::vector<Size> getWantedRecords(const String & result_filename, double p_value_threshold);
96 
104  void compressTrieDB(const String & database_filename, const String & index_filename, std::vector<Size> & wanted_records, const String & snd_database_filename, const String & snd_index_filename, bool append = false);
105 
110  void generateTrieDB(const String & source_database_filename, const String & database_filename, const String & index_filename, bool append = false, const String& species = "");
111 
112 
115  void getACAndACType(String line, String & accession, String & accession_type);
116 
121  void getPrecursorRTandMZ(const std::vector<std::pair<String, std::vector<std::pair<Size, Size> > > > & files_and_peptide_identification_with_scan_number, std::vector<PeptideIdentification> & ids);
122 
128  void getLabels(const String & source_database_filename, String & ac_label, String & sequence_start_label, String & sequence_end_label, String & comment_label, String & species_label);
129 
134  std::vector<Size> getSequences(const String & database_filename, const std::map<Size, Size> & wanted_records, std::vector<String> & sequences);
135 
141  void getExperiment(PeakMap & exp, String & type, const String & in_filename)
142  {
143  type.clear();
144  exp.reset();
145  //input file type
146  FileHandler fh;
147  FileTypes::Type in_type = fh.getTypeByContent(in_filename);
148  if (in_type == FileTypes::UNKNOWN)
149  {
150  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Could not determine type of the file. Aborting!", in_filename);
151  }
152  type = FileTypes::typeToName(in_type);
153  fh.loadExperiment(in_filename, exp, in_type, ProgressLogger::NONE, false, false);
154  }
155 
161  bool getSearchEngineAndVersion(const String & cmd_output, ProteinIdentification & protein_identification);
162 
166  void readOutHeader(const String & filename, const String & header_line, Int & spectrum_file_column, Int & scan_column, Int & peptide_column, Int & protein_column, Int & charge_column, Int & MQ_score_column, Int & p_value_column, Int & record_number_column, Int & DB_file_pos_column, Int & spec_file_pos_column, Size & number_of_columns);
167 
168 protected:
173  static const Size db_pos_length_;
174  static const Size trie_db_pos_length_;
175  static const Size protein_name_length_;
176  static const Size record_length_;
177  static const char trie_delimiter_;
178  static const String score_type_;
179  };
180 
181 } //namespace OpenMS
182 
Parse Error exception.
Definition: Exception.h:624
Facilitates file handling by file type recognition.
Definition: FileHandler.h:67
static FileTypes::Type getTypeByContent(const String &filename)
Determines the file type of a file by parsing the first few lines.
bool loadExperiment(const String &filename, MSExperiment &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool rewrite_source_file=true, const bool compute_hash=true)
Loads a file into an MSExperiment.
Representation of an Inspect outfile.
Definition: InspectOutfile.h:58
InspectOutfile & operator=(const InspectOutfile &inspect_outfile)
assignment operator
static const Size record_length_
length of the whole record
Definition: InspectOutfile.h:176
static const Size trie_db_pos_length_
length of 2)
Definition: InspectOutfile.h:174
static const Size db_pos_length_
length of 1)
Definition: InspectOutfile.h:173
void getExperiment(PeakMap &exp, String &type, const String &in_filename)
Definition: InspectOutfile.h:141
void getLabels(const String &source_database_filename, String &ac_label, String &sequence_start_label, String &sequence_end_label, String &comment_label, String &species_label)
InspectOutfile(const InspectOutfile &inspect_outfile)
copy constructor
bool operator==(const InspectOutfile &inspect_outfile) const
equality operator
virtual ~InspectOutfile()
destructor
static const String score_type_
type of score
Definition: InspectOutfile.h:178
void compressTrieDB(const String &database_filename, const String &index_filename, std::vector< Size > &wanted_records, const String &snd_database_filename, const String &snd_index_filename, bool append=false)
void getPrecursorRTandMZ(const std::vector< std::pair< String, std::vector< std::pair< Size, Size > > > > &files_and_peptide_identification_with_scan_number, std::vector< PeptideIdentification > &ids)
std::vector< Size > getSequences(const String &database_filename, const std::map< Size, Size > &wanted_records, std::vector< String > &sequences)
std::vector< Size > getWantedRecords(const String &result_filename, double p_value_threshold)
void generateTrieDB(const String &source_database_filename, const String &database_filename, const String &index_filename, bool append=false, const String &species="")
void readOutHeader(const String &filename, const String &header_line, Int &spectrum_file_column, Int &scan_column, Int &peptide_column, Int &protein_column, Int &charge_column, Int &MQ_score_column, Int &p_value_column, Int &record_number_column, Int &DB_file_pos_column, Int &spec_file_pos_column, Size &number_of_columns)
read the header of an inspect output file and retrieve various information
bool getSearchEngineAndVersion(const String &cmd_output, ProteinIdentification &protein_identification)
get the search engine and its version from the output of the InsPecT executable without parameters
void getACAndACType(String line, String &accession, String &accession_type)
std::vector< Size > load(const String &result_filename, std::vector< PeptideIdentification > &peptide_identifications, ProteinIdentification &protein_identification, const double p_value_threshold, const String &database_filename="")
static const char trie_delimiter_
the sequences in the trie database are delimited by this character
Definition: InspectOutfile.h:177
static const Size protein_name_length_
length of 3)
Definition: InspectOutfile.h:175
InspectOutfile()
default constructor
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
void reset()
Clear all internal data (spectra, ranges, metadata)
@ NONE
No progress logging.
Definition: ProgressLogger.h:72
Representation of a protein identification run.
Definition: ProteinIdentification.h:76
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
void append(const T &i, String &target)
Definition: StringConversions.h:118
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Type
Actual file types enum.
Definition: FileTypes.h:57
@ UNKNOWN
Unknown file extension.
Definition: FileTypes.h:58
static String typeToName(Type type)
Returns the name/extension of the type.