OpenMS
Loading...
Searching...
No Matches
FASTAFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow $
6// $Authors: Chris Bielow, Nora Wild $
7// --------------------------------------------------------------------------
8
9#pragma once
10
14
15#include <fstream>
16#include <utility>
17#include <vector>
18
19namespace OpenMS
20{
34 class OPENMS_DLLAPI FASTAFile : public ProgressLogger
35 {
36 public:
46 {
50
51 FASTAEntry() = default;
52
53 FASTAEntry(const String& id, const String& desc, const String& seq) :
54 identifier(id),
55 description(desc),
56 sequence(seq)
57 {
58 }
59
60 FASTAEntry(const FASTAEntry& rhs) = default;
61
62 FASTAEntry(FASTAEntry&& rhs) noexcept
63 :
64 identifier(::std::move(rhs.identifier)),
65 description(::std::move(rhs.description)),
66 sequence(::std::move(rhs.sequence))
67 {
68 }
69
70
71 FASTAEntry& operator=(const FASTAEntry& rhs) = default;
72
73 bool operator==(const FASTAEntry& rhs) const
74 {
75 return identifier == rhs.identifier
76 && description == rhs.description
77 && sequence == rhs.sequence;
78 }
79
80 bool headerMatches(const FASTAEntry& rhs) const
81 {
82 return identifier == rhs.identifier &&
83 description == rhs.description;
84 }
85
86 bool sequenceMatches(const FASTAEntry& rhs) const
87 {
88 return sequence == rhs.sequence;
89 }
90 };
91
93 FASTAFile() = default;
94
96 ~FASTAFile() override = default;
97
104 void readStart(const String& filename);
105
107 void readStartWithProgress(const String& filename, const String& progress_label);
108
117 bool readNext(FASTAEntry& protein);
118
122
124 std::streampos position();
125
127 bool atEnd();
128
130 bool setPosition(const std::streampos& pos);
131
136 void writeStart(const String& filename);
137
143 void writeNext(const FASTAEntry& protein);
144
148 void writeEnd();
149
150
157 void load(const String& filename, std::vector<FASTAEntry>& data) const;
158
165 void store(const String& filename, const std::vector<FASTAEntry>& data) const;
166
167 protected:
172 bool readEntry_(std::string& id, std::string& description, std::string& seq);
173
174 std::fstream infile_;
175 std::ofstream outfile_;
176 Size entries_read_{0};
177 std::streampos fileSize_{};
178 std::string seq_;
179 std::string id_;
180 std::string description_;
181 };
182
183} // namespace OpenMS
This class serves for reading in and writing FASTA files If the protein/gene sequence contains unusua...
Definition FASTAFile.h:35
void writeEnd()
Closes the file (flush). Called implicitly when FASTAFile object goes out of scope.
void load(const String &filename, std::vector< FASTAEntry > &data) const
loads a FASTA file given by 'filename' and stores the information in 'data' This uses more RAM than r...
bool readNext(FASTAEntry &protein)
Reads the next FASTA entry from file.
bool setPosition(const std::streampos &pos)
seek stream to pos
std::string seq_
sequence of currently read protein
Definition FASTAFile.h:178
void readStart(const String &filename)
Prepares a FASTA file given by filename for streamed reading using readNext().
void readStartWithProgress(const String &filename, const String &progress_label)
same as readStart(), but does internal progress logging whenever readNextWithProgress() is called
~FASTAFile() override=default
Destructor.
void writeStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed writing using writeNext().
std::ofstream outfile_
filestream for writing; init using FastaFile::writeStart()
Definition FASTAFile.h:175
bool readNextWithProgress(FASTAEntry &protein)
FASTAFile()=default
Default constructor.
bool atEnd()
is stream at EOF?
bool readEntry_(std::string &id, std::string &description, std::string &seq)
Reads a protein entry from the current file position and returns the ID and sequence.
std::string id_
identifier of currently read protein
Definition FASTAFile.h:179
void writeNext(const FASTAEntry &protein)
Stores the data given by protein. Call writeStart() once before calling writeNext()....
std::fstream infile_
filestream for reading; init using FastaFile::readStart()
Definition FASTAFile.h:174
std::string description_
description of currently read protein
Definition FASTAFile.h:180
void store(const String &filename, const std::vector< FASTAEntry > &data) const
stores the data given by 'data' at the file 'filename'
std::streampos position()
current stream position when reading a file
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
A more convenient string class.
Definition String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition FASTAFile.h:46
FASTAEntry & operator=(const FASTAEntry &rhs)=default
bool headerMatches(const FASTAEntry &rhs) const
Definition FASTAFile.h:80
String sequence
Definition FASTAFile.h:49
String description
Definition FASTAFile.h:48
FASTAEntry(const String &id, const String &desc, const String &seq)
Definition FASTAFile.h:53
FASTAEntry(const FASTAEntry &rhs)=default
bool operator==(const FASTAEntry &rhs) const
Definition FASTAFile.h:73
String identifier
Definition FASTAFile.h:47
bool sequenceMatches(const FASTAEntry &rhs) const
Definition FASTAFile.h:86
FASTAEntry(FASTAEntry &&rhs) noexcept
Definition FASTAFile.h:62