Bitcoin Core  29.1.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1 // Copyright (c) 2009-2010 Satoshi Nakamoto
2 // Copyright (c) 2009-2022 The Bitcoin Core developers
3 // Distributed under the MIT software license, see the accompanying
4 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
5 
6 #include <util/strencodings.h>
7 
8 #include <crypto/hex_base.h>
9 #include <span.h>
10 
11 #include <array>
12 #include <cassert>
13 #include <cstring>
14 #include <limits>
15 #include <optional>
16 #include <ostream>
17 #include <string>
18 #include <vector>
19 
20 static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
21 
22 static const std::string SAFE_CHARS[] =
23 {
24  CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
25  CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
26  CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
27  CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
28 };
29 
30 std::string SanitizeString(std::string_view str, int rule)
31 {
32  std::string result;
33  for (char c : str) {
34  if (SAFE_CHARS[rule].find(c) != std::string::npos) {
35  result.push_back(c);
36  }
37  }
38  return result;
39 }
40 
41 bool IsHex(std::string_view str)
42 {
43  for (char c : str) {
44  if (HexDigit(c) < 0) return false;
45  }
46  return (str.size() > 0) && (str.size()%2 == 0);
47 }
48 
49 template <typename Byte>
50 std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
51 {
52  std::vector<Byte> vch;
53  vch.reserve(str.size() / 2); // two hex characters form a single byte
54 
55  auto it = str.begin();
56  while (it != str.end()) {
57  if (IsSpace(*it)) {
58  ++it;
59  continue;
60  }
61  auto c1 = HexDigit(*(it++));
62  if (it == str.end()) return std::nullopt;
63  auto c2 = HexDigit(*(it++));
64  if (c1 < 0 || c2 < 0) return std::nullopt;
65  vch.push_back(Byte(c1 << 4) | Byte(c2));
66  }
67  return vch;
68 }
69 template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
70 template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
71 
72 bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
73 {
74  bool valid = false;
75  size_t colon = in.find_last_of(':');
76  // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
77  bool fHaveColon = colon != in.npos;
78  bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
79  bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
80  if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
81  uint16_t n;
82  if (ParseUInt16(in.substr(colon + 1), &n)) {
83  in = in.substr(0, colon);
84  portOut = n;
85  valid = (portOut != 0);
86  }
87  } else {
88  valid = true;
89  }
90  if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
91  hostOut = in.substr(1, in.size() - 2);
92  } else {
93  hostOut = in;
94  }
95 
96  return valid;
97 }
98 
100 {
101  static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
102 
103  std::string str;
104  str.reserve(((input.size() + 2) / 3) * 4);
105  ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
106  while (str.size() % 4) str += '=';
107  return str;
108 }
109 
110 std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
111 {
112  static const int8_t decode64_table[256]{
113  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
115  -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
116  -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
117  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
118  29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
119  49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
126  };
127 
128  if (str.size() % 4 != 0) return {};
129  /* One or two = characters at the end are permitted. */
130  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
131  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
132 
133  std::vector<unsigned char> ret;
134  ret.reserve((str.size() * 3) / 4);
135  bool valid = ConvertBits<6, 8, false>(
136  [&](unsigned char c) { ret.push_back(c); },
137  str.begin(), str.end(),
138  [](char c) { return decode64_table[uint8_t(c)]; }
139  );
140  if (!valid) return {};
141 
142  return ret;
143 }
144 
145 std::string EncodeBase32(Span<const unsigned char> input, bool pad)
146 {
147  static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
148 
149  std::string str;
150  str.reserve(((input.size() + 4) / 5) * 8);
151  ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
152  if (pad) {
153  while (str.size() % 8) {
154  str += '=';
155  }
156  }
157  return str;
158 }
159 
160 std::string EncodeBase32(std::string_view str, bool pad)
161 {
162  return EncodeBase32(MakeUCharSpan(str), pad);
163 }
164 
165 std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
166 {
167  static const int8_t decode32_table[256]{
168  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
171  -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
172  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
173  3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
174  23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
181  };
182 
183  if (str.size() % 8 != 0) return {};
184  /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
185  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
186  if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
187  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
188  if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
189 
190  std::vector<unsigned char> ret;
191  ret.reserve((str.size() * 5) / 8);
192  bool valid = ConvertBits<5, 8, false>(
193  [&](unsigned char c) { ret.push_back(c); },
194  str.begin(), str.end(),
195  [](char c) { return decode32_table[uint8_t(c)]; }
196  );
197 
198  if (!valid) return {};
199 
200  return ret;
201 }
202 
203 namespace {
204 template <typename T>
205 bool ParseIntegral(std::string_view str, T* out)
206 {
207  static_assert(std::is_integral<T>::value);
208  // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
209  // handling leading +/- for backwards compatibility.
210  if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
211  return false;
212  }
213  const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
214  if (!opt_int) {
215  return false;
216  }
217  if (out != nullptr) {
218  *out = *opt_int;
219  }
220  return true;
221 }
222 }; // namespace
223 
224 bool ParseInt32(std::string_view str, int32_t* out)
225 {
226  return ParseIntegral<int32_t>(str, out);
227 }
228 
229 bool ParseInt64(std::string_view str, int64_t* out)
230 {
231  return ParseIntegral<int64_t>(str, out);
232 }
233 
234 bool ParseUInt8(std::string_view str, uint8_t* out)
235 {
236  return ParseIntegral<uint8_t>(str, out);
237 }
238 
239 bool ParseUInt16(std::string_view str, uint16_t* out)
240 {
241  return ParseIntegral<uint16_t>(str, out);
242 }
243 
244 bool ParseUInt32(std::string_view str, uint32_t* out)
245 {
246  return ParseIntegral<uint32_t>(str, out);
247 }
248 
249 bool ParseUInt64(std::string_view str, uint64_t* out)
250 {
251  return ParseIntegral<uint64_t>(str, out);
252 }
253 
254 std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
255 {
256  assert(width >= indent);
257  std::stringstream out;
258  size_t ptr = 0;
259  size_t indented = 0;
260  while (ptr < in.size())
261  {
262  size_t lineend = in.find_first_of('\n', ptr);
263  if (lineend == std::string::npos) {
264  lineend = in.size();
265  }
266  const size_t linelen = lineend - ptr;
267  const size_t rem_width = width - indented;
268  if (linelen <= rem_width) {
269  out << in.substr(ptr, linelen + 1);
270  ptr = lineend + 1;
271  indented = 0;
272  } else {
273  size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
274  if (finalspace == std::string::npos || finalspace < ptr) {
275  // No place to break; just include the entire word and move on
276  finalspace = in.find_first_of("\n ", ptr);
277  if (finalspace == std::string::npos) {
278  // End of the string, just add it and break
279  out << in.substr(ptr);
280  break;
281  }
282  }
283  out << in.substr(ptr, finalspace - ptr) << "\n";
284  if (in[finalspace] == '\n') {
285  indented = 0;
286  } else if (indent) {
287  out << std::string(indent, ' ');
288  indented = indent;
289  }
290  ptr = finalspace + 1;
291  }
292  }
293  return out.str();
294 }
295 
304 static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
305 
307 static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
308 {
309  if(ch == '0')
310  ++mantissa_tzeros;
311  else {
312  for (int i=0; i<=mantissa_tzeros; ++i) {
313  if (mantissa > (UPPER_BOUND / 10LL))
314  return false; /* overflow */
315  mantissa *= 10;
316  }
317  mantissa += ch - '0';
318  mantissa_tzeros = 0;
319  }
320  return true;
321 }
322 
323 bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
324 {
325  int64_t mantissa = 0;
326  int64_t exponent = 0;
327  int mantissa_tzeros = 0;
328  bool mantissa_sign = false;
329  bool exponent_sign = false;
330  int ptr = 0;
331  int end = val.size();
332  int point_ofs = 0;
333 
334  if (ptr < end && val[ptr] == '-') {
335  mantissa_sign = true;
336  ++ptr;
337  }
338  if (ptr < end)
339  {
340  if (val[ptr] == '0') {
341  /* pass single 0 */
342  ++ptr;
343  } else if (val[ptr] >= '1' && val[ptr] <= '9') {
344  while (ptr < end && IsDigit(val[ptr])) {
345  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
346  return false; /* overflow */
347  ++ptr;
348  }
349  } else return false; /* missing expected digit */
350  } else return false; /* empty string or loose '-' */
351  if (ptr < end && val[ptr] == '.')
352  {
353  ++ptr;
354  if (ptr < end && IsDigit(val[ptr]))
355  {
356  while (ptr < end && IsDigit(val[ptr])) {
357  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
358  return false; /* overflow */
359  ++ptr;
360  ++point_ofs;
361  }
362  } else return false; /* missing expected digit */
363  }
364  if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
365  {
366  ++ptr;
367  if (ptr < end && val[ptr] == '+')
368  ++ptr;
369  else if (ptr < end && val[ptr] == '-') {
370  exponent_sign = true;
371  ++ptr;
372  }
373  if (ptr < end && IsDigit(val[ptr])) {
374  while (ptr < end && IsDigit(val[ptr])) {
375  if (exponent > (UPPER_BOUND / 10LL))
376  return false; /* overflow */
377  exponent = exponent * 10 + val[ptr] - '0';
378  ++ptr;
379  }
380  } else return false; /* missing expected digit */
381  }
382  if (ptr != end)
383  return false; /* trailing garbage */
384 
385  /* finalize exponent */
386  if (exponent_sign)
387  exponent = -exponent;
388  exponent = exponent - point_ofs + mantissa_tzeros;
389 
390  /* finalize mantissa */
391  if (mantissa_sign)
392  mantissa = -mantissa;
393 
394  /* convert to one 64-bit fixed-point value */
395  exponent += decimals;
396  if (exponent < 0)
397  return false; /* cannot represent values smaller than 10^-decimals */
398  if (exponent >= 18)
399  return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
400 
401  for (int i=0; i < exponent; ++i) {
402  if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
403  return false; /* overflow */
404  mantissa *= 10;
405  }
406  if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
407  return false; /* overflow */
408 
409  if (amount_out)
410  *amount_out = mantissa;
411 
412  return true;
413 }
414 
415 std::string ToLower(std::string_view str)
416 {
417  std::string r;
418  r.reserve(str.size());
419  for (auto ch : str) r += ToLower(ch);
420  return r;
421 }
422 
423 std::string ToUpper(std::string_view str)
424 {
425  std::string r;
426  r.reserve(str.size());
427  for (auto ch : str) r += ToUpper(ch);
428  return r;
429 }
430 
431 std::string Capitalize(std::string str)
432 {
433  if (str.empty()) return str;
434  str[0] = ToUpper(str.front());
435  return str;
436 }
437 
438 std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
439 {
440  if (str.empty()) {
441  return std::nullopt;
442  }
443  auto multiplier = default_multiplier;
444  char unit = str.back();
445  switch (unit) {
446  case 'k':
447  multiplier = ByteUnit::k;
448  break;
449  case 'K':
450  multiplier = ByteUnit::K;
451  break;
452  case 'm':
453  multiplier = ByteUnit::m;
454  break;
455  case 'M':
456  multiplier = ByteUnit::M;
457  break;
458  case 'g':
459  multiplier = ByteUnit::g;
460  break;
461  case 'G':
462  multiplier = ByteUnit::G;
463  break;
464  case 't':
465  multiplier = ByteUnit::t;
466  break;
467  case 'T':
468  multiplier = ByteUnit::T;
469  break;
470  default:
471  unit = 0;
472  break;
473  }
474 
475  uint64_t unit_amount = static_cast<uint64_t>(multiplier);
476  auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
477  if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
478  return std::nullopt;
479  }
480  return *parsed_num * unit_amount;
481 }
int ret
assert(!tx.IsCoinBase())
static const std::string SAFE_CHARS[]
constexpr C * end() const noexcept
Definition: span.h:176
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
bool IsHex(std::string_view str)
constexpr std::size_t size() const noexcept
Definition: span.h:187
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:150
bool ParseUInt64(std::string_view str, uint64_t *out)
Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
std::string EncodeBase64(Span< const unsigned char > input)
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
signed char HexDigit(char c)
Definition: hex_base.cpp:63
bool ParseUInt32(std::string_view str, uint32_t *out)
Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool ParseUInt16(std::string_view str, uint16_t *out)
Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
bool ParseUInt8(std::string_view str, uint8_t *out)
Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line...
static const std::string CHARS_ALPHA_NUM
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:43
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
bool ParseInt32(std::string_view str, int32_t *out)
Convert string to signed 32-bit integer with strict parse error feedback.
constexpr C * begin() const noexcept
Definition: span.h:175
auto result
Definition: common-types.h:74
bool ParseInt64(std::string_view str, int64_t *out)
Convert string to signed 64-bit integer with strict parse error feedback.
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:166
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
constexpr auto MakeUCharSpan(V &&v) -> decltype(UCharSpanCast(Span
Like the Span constructor, but for (const) unsigned char member types only.
Definition: span.h:296
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
std::string EncodeBase32(Span< const unsigned char > input, bool pad)
Base32 encode.
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.