OpenMS
Base64.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Marc Sturm, Chris Bielow, Moritz Aubermann $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #ifndef OPENMS_IS_BIG_ENDIAN
12 #if defined OPENMS_BIG_ENDIAN
13 #define OPENMS_IS_BIG_ENDIAN true
14 #else
15 #define OPENMS_IS_BIG_ENDIAN false
16 #endif
17 #endif
18 
19 #include <OpenMS/CONCEPT/Types.h>
23 
24 #include <algorithm>
25 #include <array>
26 #include <cmath>
27 #include <iostream>
28 #include <iterator>
29 #include <string>
30 #include <vector>
31 
32 #ifdef OPENMS_COMPILER_MSVC
33 #pragma comment(linker, "/export:compress")
34 #endif
35 
36 namespace OpenMS
37 {
43  class OPENMS_DLLAPI Base64
44  {
45 
46 public:
47 
49  Base64() = default;
50 
52  enum ByteOrder
53  {
55  BYTEORDER_LITTLEENDIAN
56  };
57 
65  template <typename FromType>
66  static void encode(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression = false);
67 
73  template <typename ToType>
74  static void decode(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression = false);
75 
83  template <typename FromType>
84  static void encodeIntegers(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression = false);
85 
91  template <typename ToType>
92  static void decodeIntegers(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression = false);
93 
106  static void encodeStrings(const std::vector<String> & in, String & out, bool zlib_compression = false, bool append_null_byte = true);
107 
117  static void decodeStrings(const String & in, std::vector<String> & out, bool zlib_compression = false);
118 
126  static void decodeSingleString(const String& in, String& out, bool zlib_compression);
127 
128 private:
129 
132  {
133  double f;
135  };
136 
139  {
140  float f;
142  };
143 
144  static const char encoder_[];
145  static const char decoder_[];
147  template <typename ToType>
148  static void decodeUncompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
149 
151  template <typename ToType>
152  static void decodeCompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
153 
155  template <typename ToType>
156  static void decodeIntegersUncompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
157 
159  template <typename ToType>
160  static void decodeIntegersCompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
161 
162  static void stringSimdEncoder_(std::string& in, std::string& out);
163 
164  static void stringSimdDecoder_(const std::string& in, std::string& out);
165  };
166 
167  // Possible optimization: add simd registerwise endianizer (this will only be beneficial for ARM, since mzML + x64 CPU does not need to convert since both use LITTLE_ENDIAN).
168  // mzXML(!), which is outdated uses BIG_ENDIAN, i.e. "network", in its base64 encoding, so there x64 will benefit, but not ARM.
169  // However: the code below gets optimized to the bswap instruction by most compilers, which is very fast (1 cycle latency + 1 ops)
170  // and it is doubtful that SSE4's _mm_shuffle_epi8 will do better, see https://dev.to/wunk/fast-array-reversal-with-simd-j3p
172  inline UInt32 endianize32(const UInt32& n)
173  {
174  return ((n & 0x000000ff) << 24) |
175  ((n & 0x0000ff00) << 8) |
176  ((n & 0x00ff0000) >> 8) |
177  ((n & 0xff000000) >> 24);
178  }
179 
181  inline UInt64 endianize64(const UInt64& n)
182  {
183  return ((n >> 56) & 0x00000000000000FF) |
184  ((n >> 40) & 0x000000000000FF00) |
185  ((n >> 24) & 0x0000000000FF0000) |
186  ((n >> 8) & 0x00000000FF000000) |
187  ((n << 8) & 0x000000FF00000000) |
188  ((n << 24) & 0x0000FF0000000000) |
189  ((n << 40) & 0x00FF000000000000) |
190  ((n << 56) & 0xFF00000000000000);
191  }
192 
193  template <typename FromType>
194  void Base64::encode(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression)
195  {
196  out.clear();
197  if (in.empty())
198  {
199  return;
200  }
201 
202  // initialize
203  const Size element_size = sizeof(FromType);
204  const Size input_bytes = element_size * in.size();
205  // change endianness if necessary
207  {
208  if (element_size == 4)
209  {
210  for (Size i = 0; i < in.size(); ++i)
211  {
212  Reinterpreter32_ tmp;
213  tmp.f = in[i];
214  tmp.i = endianize32(tmp.i);
215  in[i] = tmp.f;
216  }
217  }
218  else
219  {
220  for (Size i = 0; i < in.size(); ++i)
221  {
222  Reinterpreter64_ tmp;
223  tmp.f = static_cast<double>(in[i]);
224  tmp.i = endianize64(tmp.i);
225  in[i] = tmp.f;
226  }
227  }
229  }
230 
231  // encode with compression
232  if (zlib_compression)
233  {
234  String compressed;
235  ZlibCompression::compressData((void*)in.data(), input_bytes, compressed);
236  stringSimdEncoder_(compressed, out);
237  }
238  else // encode without compression
239  {
240  String str((char*)in.data(), input_bytes);
241  stringSimdEncoder_(str, out);
242  }
243 
244  }
245 
246  template <typename ToType>
247  void Base64::decode(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression)
248  {
249  if (zlib_compression)
250  {
251  decodeCompressed_(in, from_byte_order, out);
252  }
253  else
254  {
255  decodeUncompressed_(in, from_byte_order, out);
256  }
257  }
258 
259  template <int type_size>
260  inline void invertEndianess(void* byte_buffer, const size_t element_count);
261  template<>
262  inline void invertEndianess<4>(void* byte_buffer, const size_t element_count)
263  {
264  UInt32* p = reinterpret_cast<UInt32*>(byte_buffer);
265  std::transform(p, p + element_count, p, endianize32);
266  }
267  template<>
268  inline void invertEndianess<8>(void* byte_buffer, const size_t element_count)
269  {
270  UInt64* p = reinterpret_cast<UInt64*>(byte_buffer);
271  std::transform(p, p + element_count, p, endianize64);
272  }
273 
274 
275  template <typename ToType>
276  void Base64::decodeCompressed_(const String& in, ByteOrder from_byte_order, std::vector<ToType>& out)
277  {
278  out.clear();
279  if (in.empty())
280  {
281  return;
282  }
283 
284  String decompressed;
285  Base64::decodeSingleString(in, decompressed, true);
286 
287  void* byte_buffer = reinterpret_cast<void*>(&decompressed[0]);
288  Size buffer_size = decompressed.size();
289 
290  const ToType * float_buffer = reinterpret_cast<const ToType *>(byte_buffer);
291  constexpr Size element_size = sizeof(ToType);
292  if (buffer_size % element_size != 0)
293  {
294  throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount?");
295  }
296 
297  Size float_count = buffer_size / element_size;
298 
299  // change endianness if necessary
300  if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
301  {
302  invertEndianess<element_size>(byte_buffer, float_count);
303  }
304 
305  // copy values
306  out.assign(float_buffer, float_buffer + float_count);
307  }
308 
309  template <typename ToType>
310  void Base64::decodeUncompressed_(const String& in, ByteOrder from_byte_order , std::vector<ToType>& out)
311  {
312  out.clear();
313 
314  // The length of a base64 string is always a multiple of 4 (always 3
315  // bytes are encoded as 4 characters)
316  if (in.size() < 4)
317  {
318  return;
319  }
320  if (in.size() % 4 != 0)
321  {
322  throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Malformed base64 input, length is not a multiple of 4.");
323  }
324 
325  Size src_size = in.size();
326  // last one or two '=' are skipped if contained
327  int padding = 0;
328  if (in[src_size - 1] == '=') padding++;
329  if (in[src_size - 2] == '=') padding++;
330 
331  src_size -= padding;
332 
333  constexpr Size element_size = sizeof(ToType);
334  String s;
335  stringSimdDecoder_(in,s);
336 
337  // change endianness if necessary (mzML is always LITTLE_ENDIAN; x64 is LITTLE_ENDIAN)
338  if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
339  {
340  invertEndianess<element_size>((void*)s.data(), s.size() / element_size);
341  }
342 
343  const char* cptr = s.data();
344  const ToType * fptr = reinterpret_cast<const ToType*>(cptr);
345  out.assign(fptr,fptr + s.size()/element_size);
346  }
347 
348  template <typename FromType>
349  void Base64::encodeIntegers(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression)
350  {
351  out.clear();
352  if (in.empty())
353  return;
354 
355  // initialize
356  const Size element_size = sizeof(FromType);
357  const Size input_bytes = element_size * in.size();
358 
359  // change endianness if necessary
361  {
362  if (element_size == 4)
363  {
364  for (Size i = 0; i < in.size(); ++i)
365  {
366  UInt32 tmp = in[i];
367  tmp = endianize32(tmp);
368  in[i] = tmp;
369  }
370  }
371  else
372  {
373  for (Size i = 0; i < in.size(); ++i)
374  {
375  UInt64 tmp = in[i];
376  tmp = endianize64(tmp);
377  in[i] = tmp;
378  }
379  }
380  }
381 
382  // encode with compression (use Qt because of zlib support)
383  if (zlib_compression)
384  {
385  String compressed;
386  ZlibCompression::compressData((void*)in.data(), input_bytes, compressed);
387  stringSimdEncoder_(compressed, out);
388  }
389  else // encode without compression
390  {
391  String str((char*)in.data(), input_bytes);
392  stringSimdEncoder_(str, out);
393  }
394  }
395 
396  template <typename ToType>
397  void Base64::decodeIntegers(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression)
398  {
399  if (zlib_compression)
400  {
401  decodeIntegersCompressed_(in, from_byte_order, out);
402  }
403  else
404  {
405  decodeIntegersUncompressed_(in, from_byte_order, out);
406  }
407  }
408 
409  template <typename ToType>
410  void Base64::decodeIntegersCompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out)
411  {
412  out.clear();
413  if (in.empty())
414  return;
415 
416  constexpr Size element_size = sizeof(ToType);
417 
418  String decompressed;
419  Base64::decodeSingleString(in, decompressed, true);
420  if (decompressed.empty())
421  {
422  throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Decompression error?");
423  }
424 
425  void* byte_buffer = reinterpret_cast<void*>(&decompressed[0]);
426  Size buffer_size = decompressed.size();
427 
428  // change endianness if necessary
429  if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
430  {
431  if constexpr(element_size == 4)
432  {
433  const Int32 * float_buffer = reinterpret_cast<const Int32 *>(byte_buffer);
434  if (buffer_size % element_size != 0)
435  throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount?");
436  Size float_count = buffer_size / element_size;
437  UInt32 * p = reinterpret_cast<UInt32 *>(byte_buffer);
438  std::transform(p, p + float_count, p, endianize32);
439 
440  out.resize(float_count);
441  // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
442  for (Size i = 0; i < float_count; ++i)
443  {
444  out[i] = (ToType) * float_buffer;
445  ++float_buffer;
446  }
447  }
448  else
449  {
450  const Int64 * float_buffer = reinterpret_cast<const Int64 *>(byte_buffer);
451 
452  if (buffer_size % element_size != 0)
453  throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount?");
454 
455  Size float_count = buffer_size / element_size;
456 
457  UInt64 * p = reinterpret_cast<UInt64 *>(byte_buffer);
458  std::transform(p, p + float_count, p, endianize64);
459 
460  out.resize(float_count);
461  // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
462  for (Size i = 0; i < float_count; ++i)
463  {
464  out[i] = (ToType) * float_buffer;
465  ++float_buffer;
466  }
467  }
468  }
469  else
470  {
471  if constexpr(element_size == 4)
472  {
473  const Int * float_buffer = reinterpret_cast<const Int *>(byte_buffer);
474  if (buffer_size % element_size != 0)
475  throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount while decoding?");
476 
477  Size float_count = buffer_size / element_size;
478  out.resize(float_count);
479  // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
480  for (Size i = 0; i < float_count; ++i)
481  {
482  out[i] = (ToType) * float_buffer;
483  ++float_buffer;
484  }
485  }
486  else
487  {
488  const Int64 * float_buffer = reinterpret_cast<const Int64 *>(byte_buffer);
489 
490  if (buffer_size % element_size != 0)
491  throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount while decoding?");
492 
493  Size float_count = buffer_size / element_size;
494  out.resize(float_count);
495  // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
496  for (Size i = 0; i < float_count; ++i)
497  {
498  out[i] = (ToType) * float_buffer;
499  ++float_buffer;
500  }
501  }
502  }
503 
504  }
505 
506  template <typename ToType>
507  void Base64::decodeIntegersUncompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out)
508  {
509  out.clear();
510 
511  // The length of a base64 string is a always a multiple of 4 (always 3
512  // bytes are encoded as 4 characters)
513  if (in.size() < 4)
514  {
515  return;
516  }
517 
518  Size src_size = in.size();
519  // last one or two '=' are skipped if contained
520  int padding = 0;
521  if (in[src_size - 1] == '=') padding++;
522  if (in[src_size - 2] == '=') padding++;
523 
524  src_size -= padding;
525 
526  UInt a;
527  UInt b;
528 
529  UInt offset = 0;
530  int inc = 1;
531  UInt written = 0;
532 
533  const Size element_size = sizeof(ToType);
534 
535  // enough for either float or double
536  char element[8] = "\x00\x00\x00\x00\x00\x00\x00";
537 
538  if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
539  {
540  offset = (element_size - 1); // other endian
541  inc = -1;
542  }
543  else
544  {
545  offset = 0;
546  inc = 1;
547  }
548 
549  // reserve enough space in the output vector
550  out.reserve((UInt)(std::ceil((4.0 * src_size) / 3.0) + 6.0));
551 
552  // sort all read bytes correctly into a char[4] (double) or
553  // char[8] (float) and push_back when necessary.
554  for (Size i = 0; i < src_size; i += 4)
555  {
556 
557  // decode 4 Base64-Chars to 3 Byte
558  // -------------------------------
559 
560  // decode the first two chars
561  a = decoder_[(int)in[i] - 43] - 62;
562  b = decoder_[(int)in[i + 1] - 43] - 62;
563  if (i + 1 >= src_size)
564  {
565  b = 0;
566  }
567  // write first byte (6 bits from a and 2 highest bits from b)
568  element[offset] = (unsigned char) ((a << 2) | (b >> 4));
569  written++;
570  offset = (offset + inc) % element_size;
571 
572  if (written % element_size == 0)
573  {
574  ToType float_value;
575  if (element_size == 4)
576  {
577  Int32 * value = reinterpret_cast<Int32 *>(&element[0]);
578  float_value = (ToType) * value;
579  }
580  else
581  {
582  Int64 * value = reinterpret_cast<Int64 *>(&element[0]);
583  float_value = (ToType) * value;
584  }
585  out.push_back(float_value);
586  strcpy(element, "");
587  }
588 
589  // decode the third char
590  a = decoder_[(int)in[i + 2] - 43] - 62;
591  if (i + 2 >= src_size)
592  {
593  a = 0;
594  }
595  // write second byte (4 lowest bits from b and 4 highest bits from a)
596  element[offset] = (unsigned char) (((b & 15) << 4) | (a >> 2));
597  written++;
598  offset = (offset + inc) % element_size;
599 
600  if (written % element_size == 0)
601  {
602  ToType float_value;
603  if (element_size == 4)
604  {
605  Int32 * value = reinterpret_cast<Int32 *>(&element[0]);
606  float_value = (ToType) * value;
607  }
608  else
609  {
610  Int64 * value = reinterpret_cast<Int64 *>(&element[0]);
611  float_value = (ToType) * value;
612  }
613  out.push_back(float_value);
614  strcpy(element, "");
615  }
616 
617  // decode the fourth char
618  b = decoder_[(int)in[i + 3] - 43] - 62;
619  if (i + 3 >= src_size)
620  {
621  b = 0;
622  }
623  // write third byte (2 lowest bits from a and 6 bits from b)
624  element[offset] = (unsigned char) (((a & 3) << 6) | b);
625  written++;
626  offset = (offset + inc) % element_size;
627 
628  if (written % element_size == 0)
629  {
630  ToType float_value;
631  if (element_size == 4)
632  {
633  Int32 * value = reinterpret_cast<Int32 *>(&element[0]);
634  float_value = (ToType) * value;
635  }
636  else
637  {
638  Int64 * value = reinterpret_cast<Int64 *>(&element[0]);
639  float_value = (ToType) * value;
640  }
641  out.push_back(float_value);
642  strcpy(element, "");
643  }
644  }
645  }
646 
647 } //namespace OpenMS
648 
#define OPENMS_IS_BIG_ENDIAN
Definition: Base64.h:15
Class to encode and decode Base64.
Definition: Base64.h:44
static void stringSimdDecoder_(const std::string &in, std::string &out)
static void decode(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out, bool zlib_compression=false)
Decodes a Base64 string to a vector of floating point numbers.
Definition: Base64.h:247
static void decodeIntegersCompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a compressed Base64 string to a vector of integer numbers.
Definition: Base64.h:410
double f
Definition: Base64.h:133
static const char decoder_[]
Definition: Base64.h:145
static void decodeCompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a compressed Base64 string to a vector of floating point numbers.
Definition: Base64.h:276
static void decodeIntegersUncompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a Base64 string to a vector of integer numbers.
Definition: Base64.h:507
static void decodeStrings(const String &in, std::vector< String > &out, bool zlib_compression=false)
Decodes a Base64 string to a vector of (null-terminated) strings.
static void stringSimdEncoder_(std::string &in, std::string &out)
static void decodeUncompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a Base64 string to a vector of floating point numbers.
Definition: Base64.h:310
Base64()=default
default constructor
static void encodeStrings(const std::vector< String > &in, String &out, bool zlib_compression=false, bool append_null_byte=true)
Encodes a vector of strings to a Base64 string.
UInt64 i
Definition: Base64.h:134
ByteOrder
Byte order type.
Definition: Base64.h:53
@ BYTEORDER_BIGENDIAN
Big endian type.
Definition: Base64.h:54
@ BYTEORDER_LITTLEENDIAN
Little endian type.
Definition: Base64.h:55
static void encode(std::vector< FromType > &in, ByteOrder to_byte_order, String &out, bool zlib_compression=false)
Encodes a vector of floating point numbers to a Base64 string.
Definition: Base64.h:194
static void decodeIntegers(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out, bool zlib_compression=false)
Decodes a Base64 string to a vector of integer numbers.
Definition: Base64.h:397
UInt32 i
Definition: Base64.h:141
static void decodeSingleString(const String &in, String &out, bool zlib_compression)
Decodes a Base64 string.
static void encodeIntegers(std::vector< FromType > &in, ByteOrder to_byte_order, String &out, bool zlib_compression=false)
Encodes a vector of integer point numbers to a Base64 string.
Definition: Base64.h:349
float f
Definition: Base64.h:140
Internal class needed for type-punning.
Definition: Base64.h:139
Internal class needed for type-punning.
Definition: Base64.h:132
Invalid conversion exception.
Definition: Exception.h:330
A more convenient string class.
Definition: String.h:34
static void compressData(const void *raw_data, const size_t in_length, std::string &compressed_data)
Compresses data using zlib directly.
int32_t Int32
Signed integer type (32bit)
Definition: Types.h:26
int64_t Int64
Signed integer type (64bit)
Definition: Types.h:40
int Int
Signed integer type.
Definition: Types.h:72
uint32_t UInt32
Unsigned integer type (32bit)
Definition: Types.h:33
uint64_t UInt64
Unsigned integer type (64bit)
Definition: Types.h:47
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
UInt32 endianize32(const UInt32 &n)
Endianizes a 32 bit type from big endian to little endian and vice versa.
Definition: Base64.h:172
void invertEndianess< 4 >(void *byte_buffer, const size_t element_count)
Definition: Base64.h:262
void invertEndianess(void *byte_buffer, const size_t element_count)
void invertEndianess< 8 >(void *byte_buffer, const size_t element_count)
Definition: Base64.h:268
UInt64 endianize64(const UInt64 &n)
Endianizes a 64 bit type from big endian to little endian and vice versa.
Definition: Base64.h:181