OpenMS
FASTAFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Chris Bielow, Nora Wild $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 
15 #include <fstream>
16 #include <utility>
17 #include <vector>
18 
19 namespace OpenMS
20 {
34  class OPENMS_DLLAPI FASTAFile : public ProgressLogger
35  {
36  public:
45  struct FASTAEntry
46  {
50 
51  FASTAEntry() = default;
52 
53  FASTAEntry(const String& id, const String& desc, const String& seq) :
54  identifier(id),
55  description(desc),
56  sequence(seq)
57  {
58  }
59 
60  FASTAEntry(const FASTAEntry& rhs) = default;
61 
62  FASTAEntry(FASTAEntry&& rhs) noexcept
63  :
64  identifier(::std::move(rhs.identifier)),
65  description(::std::move(rhs.description)),
66  sequence(::std::move(rhs.sequence))
67  {
68  }
69 
70 
71  FASTAEntry& operator=(const FASTAEntry& rhs) = default;
72 
73  bool operator==(const FASTAEntry& rhs) const
74  {
75  return identifier == rhs.identifier
76  && description == rhs.description
77  && sequence == rhs.sequence;
78  }
79 
80  bool headerMatches(const FASTAEntry& rhs) const
81  {
82  return identifier == rhs.identifier &&
83  description == rhs.description;
84  }
85 
86  bool sequenceMatches(const FASTAEntry& rhs) const
87  {
88  return sequence == rhs.sequence;
89  }
90  };
91 
93  FASTAFile() = default;
94 
96  ~FASTAFile() override = default;
97 
104  void readStart(const String& filename);
105 
107  void readStartWithProgress(const String& filename, const String& progress_label);
108 
117  bool readNext(FASTAEntry& protein);
118 
122 
124  std::streampos position();
125 
127  bool atEnd();
128 
130  bool setPosition(const std::streampos& pos);
131 
136  void writeStart(const String& filename);
137 
143  void writeNext(const FASTAEntry& protein);
144 
148  void writeEnd();
149 
150 
157  void load(const String& filename, std::vector<FASTAEntry>& data) const;
158 
165  void store(const String& filename, const std::vector<FASTAEntry>& data) const;
166 
167  protected:
172  bool readEntry_(std::string& id, std::string& description, std::string& seq);
173 
174  std::fstream infile_;
175  std::ofstream outfile_;
176  Size entries_read_{0};
177  std::streampos fileSize_{};
178  std::string seq_;
179  std::string id_;
180  std::string description_;
181  };
182 
183 } // namespace OpenMS
This class serves for reading in and writing FASTA files If the protein/gene sequence contains unusua...
Definition: FASTAFile.h:35
void writeEnd()
Closes the file (flush). Called implicitly when FASTAFile object goes out of scope.
void load(const String &filename, std::vector< FASTAEntry > &data) const
loads a FASTA file given by 'filename' and stores the information in 'data' This uses more RAM than r...
bool readNext(FASTAEntry &protein)
Reads the next FASTA entry from file.
bool setPosition(const std::streampos &pos)
seek stream to pos
std::string seq_
sequence of currently read protein
Definition: FASTAFile.h:178
void readStart(const String &filename)
Prepares a FASTA file given by filename for streamed reading using readNext().
void readStartWithProgress(const String &filename, const String &progress_label)
same as readStart(), but does internal progress logging whenever readNextWithProgress() is called
~FASTAFile() override=default
Destructor.
void writeStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed writing using writeNext().
std::ofstream outfile_
filestream for writing; init using FastaFile::writeStart()
Definition: FASTAFile.h:175
bool readNextWithProgress(FASTAEntry &protein)
FASTAFile()=default
Default constructor.
bool atEnd()
is stream at EOF?
bool readEntry_(std::string &id, std::string &description, std::string &seq)
Reads a protein entry from the current file position and returns the ID and sequence.
std::string id_
identifier of currently read protein
Definition: FASTAFile.h:179
void writeNext(const FASTAEntry &protein)
Stores the data given by protein. Call writeStart() once before calling writeNext()....
std::fstream infile_
filestream for reading; init using FastaFile::readStart()
Definition: FASTAFile.h:174
std::string description_
description of currently read protein
Definition: FASTAFile.h:180
void store(const String &filename, const std::vector< FASTAEntry > &data) const
stores the data given by 'data' at the file 'filename'
std::streampos position()
current stream position when reading a file
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
bool headerMatches(const FASTAEntry &rhs) const
Definition: FASTAFile.h:80
String sequence
Definition: FASTAFile.h:49
String description
Definition: FASTAFile.h:48
FASTAEntry(const String &id, const String &desc, const String &seq)
Definition: FASTAFile.h:53
FASTAEntry(const FASTAEntry &rhs)=default
bool operator==(const FASTAEntry &rhs) const
Definition: FASTAFile.h:73
String identifier
Definition: FASTAFile.h:47
FASTAEntry & operator=(const FASTAEntry &rhs)=default
bool sequenceMatches(const FASTAEntry &rhs) const
Definition: FASTAFile.h:86
FASTAEntry(FASTAEntry &&rhs) noexcept
Definition: FASTAFile.h:62