Bitcoin Core  28.1.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1 // Copyright (c) 2009-2010 Satoshi Nakamoto
2 // Copyright (c) 2009-2022 The Bitcoin Core developers
3 // Distributed under the MIT software license, see the accompanying
4 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
5 
6 #include <util/strencodings.h>
7 
8 #include <crypto/hex_base.h>
9 #include <span.h>
10 
11 #include <array>
12 #include <cassert>
13 #include <cstring>
14 #include <limits>
15 #include <optional>
16 #include <ostream>
17 #include <string>
18 #include <vector>
19 
20 static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
21 
22 static const std::string SAFE_CHARS[] =
23 {
24  CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
25  CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
26  CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
27  CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
28 };
29 
30 std::string SanitizeString(std::string_view str, int rule)
31 {
32  std::string result;
33  for (char c : str) {
34  if (SAFE_CHARS[rule].find(c) != std::string::npos) {
35  result.push_back(c);
36  }
37  }
38  return result;
39 }
40 
41 bool IsHex(std::string_view str)
42 {
43  for (char c : str) {
44  if (HexDigit(c) < 0) return false;
45  }
46  return (str.size() > 0) && (str.size()%2 == 0);
47 }
48 
49 bool IsHexNumber(std::string_view str)
50 {
51  if (str.substr(0, 2) == "0x") str.remove_prefix(2);
52  for (char c : str) {
53  if (HexDigit(c) < 0) return false;
54  }
55  // Return false for empty string or "0x".
56  return str.size() > 0;
57 }
58 
59 template <typename Byte>
60 std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
61 {
62  std::vector<Byte> vch;
63  vch.reserve(str.size() / 2); // two hex characters form a single byte
64 
65  auto it = str.begin();
66  while (it != str.end()) {
67  if (IsSpace(*it)) {
68  ++it;
69  continue;
70  }
71  auto c1 = HexDigit(*(it++));
72  if (it == str.end()) return std::nullopt;
73  auto c2 = HexDigit(*(it++));
74  if (c1 < 0 || c2 < 0) return std::nullopt;
75  vch.push_back(Byte(c1 << 4) | Byte(c2));
76  }
77  return vch;
78 }
79 template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
80 template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
81 
82 bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
83 {
84  bool valid = false;
85  size_t colon = in.find_last_of(':');
86  // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
87  bool fHaveColon = colon != in.npos;
88  bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
89  bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
90  if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
91  uint16_t n;
92  if (ParseUInt16(in.substr(colon + 1), &n)) {
93  in = in.substr(0, colon);
94  portOut = n;
95  valid = (portOut != 0);
96  }
97  } else {
98  valid = true;
99  }
100  if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
101  hostOut = in.substr(1, in.size() - 2);
102  } else {
103  hostOut = in;
104  }
105 
106  return valid;
107 }
108 
110 {
111  static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
112 
113  std::string str;
114  str.reserve(((input.size() + 2) / 3) * 4);
115  ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
116  while (str.size() % 4) str += '=';
117  return str;
118 }
119 
120 std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
121 {
122  static const int8_t decode64_table[256]{
123  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125  -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
126  -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
127  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
128  29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
129  49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
130  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
131  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
132  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
133  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
134  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
135  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
136  };
137 
138  if (str.size() % 4 != 0) return {};
139  /* One or two = characters at the end are permitted. */
140  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
141  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
142 
143  std::vector<unsigned char> ret;
144  ret.reserve((str.size() * 3) / 4);
145  bool valid = ConvertBits<6, 8, false>(
146  [&](unsigned char c) { ret.push_back(c); },
147  str.begin(), str.end(),
148  [](char c) { return decode64_table[uint8_t(c)]; }
149  );
150  if (!valid) return {};
151 
152  return ret;
153 }
154 
155 std::string EncodeBase32(Span<const unsigned char> input, bool pad)
156 {
157  static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
158 
159  std::string str;
160  str.reserve(((input.size() + 4) / 5) * 8);
161  ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
162  if (pad) {
163  while (str.size() % 8) {
164  str += '=';
165  }
166  }
167  return str;
168 }
169 
170 std::string EncodeBase32(std::string_view str, bool pad)
171 {
172  return EncodeBase32(MakeUCharSpan(str), pad);
173 }
174 
175 std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
176 {
177  static const int8_t decode32_table[256]{
178  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
181  -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
182  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
183  3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
184  23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
185  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
186  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
187  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
188  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
189  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
190  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
191  };
192 
193  if (str.size() % 8 != 0) return {};
194  /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
195  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
196  if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
197  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
198  if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
199 
200  std::vector<unsigned char> ret;
201  ret.reserve((str.size() * 5) / 8);
202  bool valid = ConvertBits<5, 8, false>(
203  [&](unsigned char c) { ret.push_back(c); },
204  str.begin(), str.end(),
205  [](char c) { return decode32_table[uint8_t(c)]; }
206  );
207 
208  if (!valid) return {};
209 
210  return ret;
211 }
212 
213 namespace {
214 template <typename T>
215 bool ParseIntegral(std::string_view str, T* out)
216 {
217  static_assert(std::is_integral<T>::value);
218  // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
219  // handling leading +/- for backwards compatibility.
220  if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
221  return false;
222  }
223  const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
224  if (!opt_int) {
225  return false;
226  }
227  if (out != nullptr) {
228  *out = *opt_int;
229  }
230  return true;
231 }
232 }; // namespace
233 
234 bool ParseInt32(std::string_view str, int32_t* out)
235 {
236  return ParseIntegral<int32_t>(str, out);
237 }
238 
239 bool ParseInt64(std::string_view str, int64_t* out)
240 {
241  return ParseIntegral<int64_t>(str, out);
242 }
243 
244 bool ParseUInt8(std::string_view str, uint8_t* out)
245 {
246  return ParseIntegral<uint8_t>(str, out);
247 }
248 
249 bool ParseUInt16(std::string_view str, uint16_t* out)
250 {
251  return ParseIntegral<uint16_t>(str, out);
252 }
253 
254 bool ParseUInt32(std::string_view str, uint32_t* out)
255 {
256  return ParseIntegral<uint32_t>(str, out);
257 }
258 
259 bool ParseUInt64(std::string_view str, uint64_t* out)
260 {
261  return ParseIntegral<uint64_t>(str, out);
262 }
263 
264 std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
265 {
266  assert(width >= indent);
267  std::stringstream out;
268  size_t ptr = 0;
269  size_t indented = 0;
270  while (ptr < in.size())
271  {
272  size_t lineend = in.find_first_of('\n', ptr);
273  if (lineend == std::string::npos) {
274  lineend = in.size();
275  }
276  const size_t linelen = lineend - ptr;
277  const size_t rem_width = width - indented;
278  if (linelen <= rem_width) {
279  out << in.substr(ptr, linelen + 1);
280  ptr = lineend + 1;
281  indented = 0;
282  } else {
283  size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
284  if (finalspace == std::string::npos || finalspace < ptr) {
285  // No place to break; just include the entire word and move on
286  finalspace = in.find_first_of("\n ", ptr);
287  if (finalspace == std::string::npos) {
288  // End of the string, just add it and break
289  out << in.substr(ptr);
290  break;
291  }
292  }
293  out << in.substr(ptr, finalspace - ptr) << "\n";
294  if (in[finalspace] == '\n') {
295  indented = 0;
296  } else if (indent) {
297  out << std::string(indent, ' ');
298  indented = indent;
299  }
300  ptr = finalspace + 1;
301  }
302  }
303  return out.str();
304 }
305 
314 static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
315 
317 static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
318 {
319  if(ch == '0')
320  ++mantissa_tzeros;
321  else {
322  for (int i=0; i<=mantissa_tzeros; ++i) {
323  if (mantissa > (UPPER_BOUND / 10LL))
324  return false; /* overflow */
325  mantissa *= 10;
326  }
327  mantissa += ch - '0';
328  mantissa_tzeros = 0;
329  }
330  return true;
331 }
332 
333 bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
334 {
335  int64_t mantissa = 0;
336  int64_t exponent = 0;
337  int mantissa_tzeros = 0;
338  bool mantissa_sign = false;
339  bool exponent_sign = false;
340  int ptr = 0;
341  int end = val.size();
342  int point_ofs = 0;
343 
344  if (ptr < end && val[ptr] == '-') {
345  mantissa_sign = true;
346  ++ptr;
347  }
348  if (ptr < end)
349  {
350  if (val[ptr] == '0') {
351  /* pass single 0 */
352  ++ptr;
353  } else if (val[ptr] >= '1' && val[ptr] <= '9') {
354  while (ptr < end && IsDigit(val[ptr])) {
355  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
356  return false; /* overflow */
357  ++ptr;
358  }
359  } else return false; /* missing expected digit */
360  } else return false; /* empty string or loose '-' */
361  if (ptr < end && val[ptr] == '.')
362  {
363  ++ptr;
364  if (ptr < end && IsDigit(val[ptr]))
365  {
366  while (ptr < end && IsDigit(val[ptr])) {
367  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
368  return false; /* overflow */
369  ++ptr;
370  ++point_ofs;
371  }
372  } else return false; /* missing expected digit */
373  }
374  if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
375  {
376  ++ptr;
377  if (ptr < end && val[ptr] == '+')
378  ++ptr;
379  else if (ptr < end && val[ptr] == '-') {
380  exponent_sign = true;
381  ++ptr;
382  }
383  if (ptr < end && IsDigit(val[ptr])) {
384  while (ptr < end && IsDigit(val[ptr])) {
385  if (exponent > (UPPER_BOUND / 10LL))
386  return false; /* overflow */
387  exponent = exponent * 10 + val[ptr] - '0';
388  ++ptr;
389  }
390  } else return false; /* missing expected digit */
391  }
392  if (ptr != end)
393  return false; /* trailing garbage */
394 
395  /* finalize exponent */
396  if (exponent_sign)
397  exponent = -exponent;
398  exponent = exponent - point_ofs + mantissa_tzeros;
399 
400  /* finalize mantissa */
401  if (mantissa_sign)
402  mantissa = -mantissa;
403 
404  /* convert to one 64-bit fixed-point value */
405  exponent += decimals;
406  if (exponent < 0)
407  return false; /* cannot represent values smaller than 10^-decimals */
408  if (exponent >= 18)
409  return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
410 
411  for (int i=0; i < exponent; ++i) {
412  if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
413  return false; /* overflow */
414  mantissa *= 10;
415  }
416  if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
417  return false; /* overflow */
418 
419  if (amount_out)
420  *amount_out = mantissa;
421 
422  return true;
423 }
424 
425 std::string ToLower(std::string_view str)
426 {
427  std::string r;
428  r.reserve(str.size());
429  for (auto ch : str) r += ToLower(ch);
430  return r;
431 }
432 
433 std::string ToUpper(std::string_view str)
434 {
435  std::string r;
436  r.reserve(str.size());
437  for (auto ch : str) r += ToUpper(ch);
438  return r;
439 }
440 
441 std::string Capitalize(std::string str)
442 {
443  if (str.empty()) return str;
444  str[0] = ToUpper(str.front());
445  return str;
446 }
447 
448 std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
449 {
450  if (str.empty()) {
451  return std::nullopt;
452  }
453  auto multiplier = default_multiplier;
454  char unit = str.back();
455  switch (unit) {
456  case 'k':
457  multiplier = ByteUnit::k;
458  break;
459  case 'K':
460  multiplier = ByteUnit::K;
461  break;
462  case 'm':
463  multiplier = ByteUnit::m;
464  break;
465  case 'M':
466  multiplier = ByteUnit::M;
467  break;
468  case 'g':
469  multiplier = ByteUnit::g;
470  break;
471  case 'G':
472  multiplier = ByteUnit::G;
473  break;
474  case 't':
475  multiplier = ByteUnit::t;
476  break;
477  case 'T':
478  multiplier = ByteUnit::T;
479  break;
480  default:
481  unit = 0;
482  break;
483  }
484 
485  uint64_t unit_amount = static_cast<uint64_t>(multiplier);
486  auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
487  if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
488  return std::nullopt;
489  }
490  return *parsed_num * unit_amount;
491 }
int ret
assert(!tx.IsCoinBase())
static const std::string SAFE_CHARS[]
constexpr C * end() const noexcept
Definition: span.h:176
bool IsHexNumber(std::string_view str)
Return true if the string is a hex number, optionally prefixed with "0x".
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
bool IsHex(std::string_view str)
constexpr std::size_t size() const noexcept
Definition: span.h:187
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:152
bool ParseUInt64(std::string_view str, uint64_t *out)
Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
std::string EncodeBase64(Span< const unsigned char > input)
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
signed char HexDigit(char c)
Definition: hex_base.cpp:63
bool ParseUInt32(std::string_view str, uint32_t *out)
Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool ParseUInt16(std::string_view str, uint16_t *out)
Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
bool ParseUInt8(std::string_view str, uint8_t *out)
Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line...
static const std::string CHARS_ALPHA_NUM
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:41
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
bool ParseInt32(std::string_view str, int32_t *out)
Convert string to signed 32-bit integer with strict parse error feedback.
constexpr C * begin() const noexcept
Definition: span.h:175
bool ParseInt64(std::string_view str, int64_t *out)
Convert string to signed 64-bit integer with strict parse error feedback.
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:168
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
constexpr auto MakeUCharSpan(V &&v) -> decltype(UCharSpanCast(Span
Like the Span constructor, but for (const) unsigned char member types only.
Definition: span.h:304
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
std::string EncodeBase32(Span< const unsigned char > input, bool pad)
Base32 encode.
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.