OpenMS
FuzzyStringComparator.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Clemens Groepl, Stephan Aiche $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
40 
41 
42 #include <map>
43 #include <sstream>
44 
45 namespace OpenMS
46 {
47  namespace Internal
48  {
49  namespace ClassTest
50  {
51  void OPENMS_DLLAPI testStringSimilar(const char * file,
52  int line,
53  const std::string & string_1,
54  const char * string_1_stringified,
55  const std::string & string_2,
56  const char * string_2_stringified);
57 
58  bool OPENMS_DLLAPI isFileSimilar(const std::string &,
59  const std::string &);
60  }
61  }
62 
66  class OPENMS_DLLAPI FuzzyStringComparator
67  {
68 
69  friend void OPENMS_DLLAPI
71  const char * file,
72  int line,
73  const std::string & string_1,
74  const char * string_1_stringified,
75  const std::string & string_2,
76  const char * string_2_stringified);
77 
78  friend bool OPENMS_DLLAPI
80  const std::string &);
81 
84  {
85  };
86 
87 public:
88 
90 
91 
94 
96  virtual
98 
101 
104 
106 
108  const double & getAcceptableRelative() const;
109 
111  void setAcceptableRelative(const double rhs);
112 
114  const double & getAcceptableAbsolute() const;
115 
117  void setAcceptableAbsolute(const double rhs);
118 
120  const StringList & getWhitelist() const;
121 
124 
126  void setWhitelist(const StringList & rhs);
127 
129  void setMatchedWhitelist(const std::vector< std::pair<std::string, std::string> >& rhs);
130 
132  const std::vector< std::pair<std::string, std::string> >& getMatchedWhitelist() const;
133 
142  const int & getVerboseLevel() const;
143 
152  void setVerboseLevel(const int rhs);
153 
157  const int & getTabWidth() const;
158 
162  void setTabWidth(const int rhs);
163 
167  const int & getFirstColumn() const;
168 
172  void setFirstColumn(const int rhs);
173 
180  std::ostream & getLogDestination() const;
181 
192  void setLogDestination(std::ostream & rhs);
193 
201  bool compareStrings(std::string const & lhs, std::string const & rhs);
202 
211  bool compareStreams(std::istream & input_1, std::istream & input_2);
212 
227  bool compareFiles(const std::string & filename_1,
228  const std::string & filename_2);
229 
230 protected:
231 
240  bool compareLines_(std::string const & line_str_1, std::string const & line_str_2);
241 
243  void reportSuccess_() const;
244 
247  void reportFailure_(char const * const message) const;
248 
250  void writeWhitelistCases_(const std::string & prefix) const;
251 
254  void readNextLine_(std::istream & input_stream, std::string & line_string, int & line_number) const;
255 
257  bool openInputFileStream_(const std::string & filename, std::ifstream & input_stream) const;
258 
260  std::ostream * log_dest_;
261 
263  std::string input_1_name_;
265  std::string input_2_name_;
266 
268  struct InputLine
269  {
270  std::stringstream line_;
271  std::ios::pos_type line_position_;
272 
274 
276  void setToString(const std::string & s);
277 
280 
283 
291  bool ok() const;
292  };
293 
296 
299 
302 
303  std::string line_str_1_max_;
304  std::string line_str_2_max_;
305 
308 
310  double ratio_max_;
311 
314 
316  double absdiff_max_;
317 
320  {
321  double number;
322  unsigned char letter;
323  bool is_number;
324  bool is_space;
325 
327 
329  void reset();
330 
334  void fillFromInputLine(InputLine& input_line, const std::string& str_line);
335  };
336 
341 
343  struct PrefixInfo_
344  {
348 
349  PrefixInfo_(const InputLine & input_line, const int tab_width_, const int first_column_);
350  };
351 
353 
357 
363 
366 
370  std::map<String, UInt> whitelist_cases_;
371 
373  std::vector< std::pair<std::string, std::string> > matched_whitelist_;
374  }; // class FuzzyStringComparator
375 
376 } //namespace OpenMS
377 
Fuzzy comparison of strings, tolerates numeric differences.
Definition: FuzzyStringComparator.h:67
double ratio_max_allowed_
Maximum ratio of numbers allowed, see ratio_max_.
Definition: FuzzyStringComparator.h:307
InputLine input_line_2_
Definition: FuzzyStringComparator.h:295
FuzzyStringComparator(const FuzzyStringComparator &rhs)
Copy constructor intentionally not implemented.
int line_num_1_max_
Definition: FuzzyStringComparator.h:300
bool openInputFileStream_(const std::string &filename, std::ifstream &input_stream) const
opens and checks an input file stream std::ifstream
void reportSuccess_() const
Report good news.
void setFirstColumn(const int rhs)
set first column (for column numbers)
void setWhitelist(const StringList &rhs)
White list. If both lines contain the same element from this list, they are skipped over.
void setAcceptableRelative(const double rhs)
Acceptable relative error (a number >= 1.0)
std::string input_1_name_
Name of first input e.g., filename.
Definition: FuzzyStringComparator.h:263
int tab_width_
Definition: FuzzyStringComparator.h:355
void setTabWidth(const int rhs)
set tab width (for column numbers)
void writeWhitelistCases_(const std::string &prefix) const
Write info about hits in the whitelist.
void setMatchedWhitelist(const std::vector< std::pair< std::string, std::string > > &rhs)
Matched white list. If file 1 contains element 1 and file 2 contains element 2, they are skipped over...
const StringList & getWhitelist() const
White list. If both lines contain the same element from this list, they are skipped over.
std::string line_str_1_max_
Definition: FuzzyStringComparator.h:303
int verbose_level_
Definition: FuzzyStringComparator.h:354
const int & getFirstColumn() const
get first column (for column numbers)
StreamElement_ element_1_
Stores information about characters, numbers, and white spaces loaded from the first input stream.
Definition: FuzzyStringComparator.h:338
bool compareFiles(const std::string &filename_1, const std::string &filename_2)
Simple diff-like application to compare two input files. Numeric differences are tolerated up to a ce...
int line_num_2_
Definition: FuzzyStringComparator.h:298
bool compareStrings(std::string const &lhs, std::string const &rhs)
Compare two strings.
const int & getTabWidth() const
get tab width (for column numbers)
std::vector< std::pair< std::string, std::string > > matched_whitelist_
Alternative Whitelist.
Definition: FuzzyStringComparator.h:373
FuzzyStringComparator()
Constructor.
void setLogDestination(std::ostream &rhs)
Log output is written to this destination.
StringList & getWhitelist()
White list. If both lines contain the same element from this list, they are skipped over.
void reportFailure_(char const *const message) const
void setVerboseLevel(const int rhs)
verbose level
const std::vector< std::pair< std::string, std::string > > & getMatchedWhitelist() const
Matched white list. If file 1 contains element 1 and file 2 contains element 2, they are skipped over...
bool is_status_success_
Has comparison been successful so far? Note: this flag is changed in reportFailure_();.
Definition: FuzzyStringComparator.h:362
double absdiff_max_allowed_
Maximum absolute difference of numbers allowed, see absdiff_max_.
Definition: FuzzyStringComparator.h:313
const double & getAcceptableAbsolute() const
Acceptable absolute difference (a number >= 0.0)
StreamElement_ element_2_
Stores information about characters, numbers, and white spaces loaded from the second input stream.
Definition: FuzzyStringComparator.h:340
FuzzyStringComparator & operator=(const FuzzyStringComparator &rhs)
Assignment operator intentionally not implemented.
double ratio_max_
Maximum ratio of numbers observed so far, see ratio_max_allowed_.
Definition: FuzzyStringComparator.h:310
std::string line_str_2_max_
Definition: FuzzyStringComparator.h:304
double absdiff_max_
Maximum difference of numbers observed so far, see absdiff_max_allowed_.
Definition: FuzzyStringComparator.h:316
const int & getVerboseLevel() const
verbose level
virtual ~FuzzyStringComparator()
Destructor.
StringList whitelist_
Whitelist.
Definition: FuzzyStringComparator.h:368
int first_column_
Definition: FuzzyStringComparator.h:356
int line_num_1_
Definition: FuzzyStringComparator.h:297
bool compareLines_(std::string const &line_str_1, std::string const &line_str_2)
Compare two lines of input.
std::ostream & getLogDestination() const
Log output is written to this destination.
std::ostream * log_dest_
Log and results output goes here.
Definition: FuzzyStringComparator.h:260
std::map< String, UInt > whitelist_cases_
Occurrences of whitelist entries.
Definition: FuzzyStringComparator.h:370
void setAcceptableAbsolute(const double rhs)
Acceptable absolute difference (a number >= 0.0)
const double & getAcceptableRelative() const
Acceptable relative error (a number >= 1.0)
bool compareStreams(std::istream &input_1, std::istream &input_2)
Compare two streams of input.
bool is_absdiff_small_
Definition: FuzzyStringComparator.h:352
std::string input_2_name_
Name of second input e.g., filename.
Definition: FuzzyStringComparator.h:265
InputLine input_line_1_
Definition: FuzzyStringComparator.h:294
bool use_prefix_
use a prefix when reporting
Definition: FuzzyStringComparator.h:365
int line_num_2_max_
Definition: FuzzyStringComparator.h:301
void readNextLine_(std::istream &input_stream, std::string &line_string, int &line_number) const
Internal exception class.
Definition: FuzzyStringComparator.h:84
A more convenient string class.
Definition: String.h:60
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
void testStringSimilar(const char *file, int line, const std::string &string_1, const char *string_1_stringified, const std::string &string_2, const char *string_2_stringified)
Compare strings using absdiff_max_allowed and ratio_max_allowed.
bool isFileSimilar(const std::string &filename_1, const std::string &filename_2)
Compare files using absdiff_max_allowed and ratio_max_allowed.
static String prefix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:147
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Stores information about the current input line (i.e., stream for the line and the current position i...
Definition: FuzzyStringComparator.h:269
bool ok() const
Convert to bool.
void updatePosition()
Save current position of the stream.
void seekGToSavedPosition()
Resets the stream to the last saved position.
void setToString(const std::string &s)
Initialize the input line to the passed string.
std::stringstream line_
Definition: FuzzyStringComparator.h:270
std::ios::pos_type line_position_
Definition: FuzzyStringComparator.h:271
Wrapper for the prefix information computed for the failure report.
Definition: FuzzyStringComparator.h:344
OpenMS::String prefix_whitespaces
Definition: FuzzyStringComparator.h:346
int line_column
Definition: FuzzyStringComparator.h:347
OpenMS::String prefix
Definition: FuzzyStringComparator.h:345
PrefixInfo_(const InputLine &input_line, const int tab_width_, const int first_column_)
Stores information about characters, numbers, and white spaces loaded from the InputStream.
Definition: FuzzyStringComparator.h:320
double number
Definition: FuzzyStringComparator.h:321
bool is_number
Definition: FuzzyStringComparator.h:323
void fillFromInputLine(InputLine &input_line, const std::string &str_line)
bool is_space
Definition: FuzzyStringComparator.h:324
void reset()
reset all elements of the element to default value
unsigned char letter
Definition: FuzzyStringComparator.h:322