Bitcoin Core  31.0.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1 // Copyright (c) 2009-2010 Satoshi Nakamoto
2 // Copyright (c) 2009-present The Bitcoin Core developers
3 // Distributed under the MIT software license, see the accompanying
4 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
5 
6 #include <util/strencodings.h>
7 
8 #include <crypto/hex_base.h>
9 #include <span.h>
10 
11 #include <array>
12 #include <cassert>
13 #include <cstring>
14 #include <limits>
15 #include <optional>
16 #include <ostream>
17 #include <string>
18 #include <vector>
19 
20 static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
21 
22 static const std::string SAFE_CHARS[] =
23 {
24  CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
25  CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
26  CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
27  CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
28 };
29 
30 std::string SanitizeString(std::string_view str, int rule)
31 {
32  std::string result;
33  for (char c : str) {
34  if (SAFE_CHARS[rule].find(c) != std::string::npos) {
35  result.push_back(c);
36  }
37  }
38  return result;
39 }
40 
41 bool IsHex(std::string_view str)
42 {
43  for (char c : str) {
44  if (HexDigit(c) < 0) return false;
45  }
46  return (str.size() > 0) && (str.size()%2 == 0);
47 }
48 
49 template <typename Byte>
50 std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
51 {
52  std::vector<Byte> vch;
53  vch.reserve(str.size() / 2); // two hex characters form a single byte
54 
55  auto it = str.begin();
56  while (it != str.end()) {
57  if (IsSpace(*it)) {
58  ++it;
59  continue;
60  }
61  auto c1 = HexDigit(*(it++));
62  if (it == str.end()) return std::nullopt;
63  auto c2 = HexDigit(*(it++));
64  if (c1 < 0 || c2 < 0) return std::nullopt;
65  vch.push_back(Byte(c1 << 4) | Byte(c2));
66  }
67  return vch;
68 }
69 template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
70 template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
71 
72 bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
73 {
74  bool valid = false;
75  size_t colon = in.find_last_of(':');
76  // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
77  bool fHaveColon = colon != in.npos;
78  bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
79  bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
80  if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
81  if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) {
82  in = in.substr(0, colon);
83  portOut = *n;
84  valid = (portOut != 0);
85  }
86  } else {
87  valid = true;
88  }
89  if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
90  hostOut = in.substr(1, in.size() - 2);
91  } else {
92  hostOut = in;
93  }
94 
95  return valid;
96 }
97 
98 std::string EncodeBase64(std::span<const unsigned char> input)
99 {
100  static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
101 
102  std::string str;
103  str.reserve(((input.size() + 2) / 3) * 4);
104  ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
105  while (str.size() % 4) str += '=';
106  return str;
107 }
108 
109 std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
110 {
111  static const int8_t decode64_table[256]{
112  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
113  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114  -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
115  -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
116  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
117  29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
118  49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
119  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
125  };
126 
127  if (str.size() % 4 != 0) return {};
128  /* One or two = characters at the end are permitted. */
129  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
130  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
131 
132  std::vector<unsigned char> ret;
133  ret.reserve((str.size() * 3) / 4);
134  bool valid = ConvertBits<6, 8, false>(
135  [&](unsigned char c) { ret.push_back(c); },
136  str.begin(), str.end(),
137  [](char c) { return decode64_table[uint8_t(c)]; }
138  );
139  if (!valid) return {};
140 
141  return ret;
142 }
143 
144 std::string EncodeBase32(std::span<const unsigned char> input, bool pad)
145 {
146  static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
147 
148  std::string str;
149  str.reserve(((input.size() + 4) / 5) * 8);
150  ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
151  if (pad) {
152  while (str.size() % 8) {
153  str += '=';
154  }
155  }
156  return str;
157 }
158 
159 std::string EncodeBase32(std::string_view str, bool pad)
160 {
161  return EncodeBase32(MakeUCharSpan(str), pad);
162 }
163 
164 std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
165 {
166  static const int8_t decode32_table[256]{
167  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
170  -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
171  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
172  3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
173  23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
180  };
181 
182  if (str.size() % 8 != 0) return {};
183  /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
184  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
185  if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
186  if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
187  if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
188 
189  std::vector<unsigned char> ret;
190  ret.reserve((str.size() * 5) / 8);
191  bool valid = ConvertBits<5, 8, false>(
192  [&](unsigned char c) { ret.push_back(c); },
193  str.begin(), str.end(),
194  [](char c) { return decode32_table[uint8_t(c)]; }
195  );
196 
197  if (!valid) return {};
198 
199  return ret;
200 }
201 
202 std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
203 {
204  assert(width >= indent);
205  std::stringstream out;
206  size_t ptr = 0;
207  size_t indented = 0;
208  while (ptr < in.size())
209  {
210  size_t lineend = in.find_first_of('\n', ptr);
211  if (lineend == std::string::npos) {
212  lineend = in.size();
213  }
214  const size_t linelen = lineend - ptr;
215  const size_t rem_width = width - indented;
216  if (linelen <= rem_width) {
217  out << in.substr(ptr, linelen + 1);
218  ptr = lineend + 1;
219  indented = 0;
220  } else {
221  size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
222  if (finalspace == std::string::npos || finalspace < ptr) {
223  // No place to break; just include the entire word and move on
224  finalspace = in.find_first_of("\n ", ptr);
225  if (finalspace == std::string::npos) {
226  // End of the string, just add it and break
227  out << in.substr(ptr);
228  break;
229  }
230  }
231  out << in.substr(ptr, finalspace - ptr) << "\n";
232  if (in[finalspace] == '\n') {
233  indented = 0;
234  } else if (indent) {
235  out << std::string(indent, ' ');
236  indented = indent;
237  }
238  ptr = finalspace + 1;
239  }
240  }
241  return out.str();
242 }
243 
252 static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
253 
255 static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
256 {
257  if(ch == '0')
258  ++mantissa_tzeros;
259  else {
260  for (int i=0; i<=mantissa_tzeros; ++i) {
261  if (mantissa > (UPPER_BOUND / 10LL))
262  return false; /* overflow */
263  mantissa *= 10;
264  }
265  mantissa += ch - '0';
266  mantissa_tzeros = 0;
267  }
268  return true;
269 }
270 
271 bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
272 {
273  int64_t mantissa = 0;
274  int64_t exponent = 0;
275  int mantissa_tzeros = 0;
276  bool mantissa_sign = false;
277  bool exponent_sign = false;
278  int ptr = 0;
279  int end = val.size();
280  int point_ofs = 0;
281 
282  if (ptr < end && val[ptr] == '-') {
283  mantissa_sign = true;
284  ++ptr;
285  }
286  if (ptr < end)
287  {
288  if (val[ptr] == '0') {
289  /* pass single 0 */
290  ++ptr;
291  } else if (val[ptr] >= '1' && val[ptr] <= '9') {
292  while (ptr < end && IsDigit(val[ptr])) {
293  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
294  return false; /* overflow */
295  ++ptr;
296  }
297  } else return false; /* missing expected digit */
298  } else return false; /* empty string or loose '-' */
299  if (ptr < end && val[ptr] == '.')
300  {
301  ++ptr;
302  if (ptr < end && IsDigit(val[ptr]))
303  {
304  while (ptr < end && IsDigit(val[ptr])) {
305  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
306  return false; /* overflow */
307  ++ptr;
308  ++point_ofs;
309  }
310  } else return false; /* missing expected digit */
311  }
312  if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
313  {
314  ++ptr;
315  if (ptr < end && val[ptr] == '+')
316  ++ptr;
317  else if (ptr < end && val[ptr] == '-') {
318  exponent_sign = true;
319  ++ptr;
320  }
321  if (ptr < end && IsDigit(val[ptr])) {
322  while (ptr < end && IsDigit(val[ptr])) {
323  if (exponent > (UPPER_BOUND / 10LL))
324  return false; /* overflow */
325  exponent = exponent * 10 + val[ptr] - '0';
326  ++ptr;
327  }
328  } else return false; /* missing expected digit */
329  }
330  if (ptr != end)
331  return false; /* trailing garbage */
332 
333  /* finalize exponent */
334  if (exponent_sign)
335  exponent = -exponent;
336  exponent = exponent - point_ofs + mantissa_tzeros;
337 
338  /* finalize mantissa */
339  if (mantissa_sign)
340  mantissa = -mantissa;
341 
342  /* convert to one 64-bit fixed-point value */
343  exponent += decimals;
344  if (exponent < 0)
345  return false; /* cannot represent values smaller than 10^-decimals */
346  if (exponent >= 18)
347  return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
348 
349  for (int i=0; i < exponent; ++i) {
350  if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
351  return false; /* overflow */
352  mantissa *= 10;
353  }
354  if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
355  return false; /* overflow */
356 
357  if (amount_out)
358  *amount_out = mantissa;
359 
360  return true;
361 }
362 
363 std::string ToLower(std::string_view str)
364 {
365  std::string r;
366  r.reserve(str.size());
367  for (auto ch : str) r += ToLower(ch);
368  return r;
369 }
370 
371 std::string ToUpper(std::string_view str)
372 {
373  std::string r;
374  r.reserve(str.size());
375  for (auto ch : str) r += ToUpper(ch);
376  return r;
377 }
378 
379 std::string Capitalize(std::string str)
380 {
381  if (str.empty()) return str;
382  str[0] = ToUpper(str.front());
383  return str;
384 }
385 
386 std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
387 {
388  if (str.empty()) {
389  return std::nullopt;
390  }
391  auto multiplier = default_multiplier;
392  char unit = str.back();
393  switch (unit) {
394  case 'k':
395  multiplier = ByteUnit::k;
396  break;
397  case 'K':
398  multiplier = ByteUnit::K;
399  break;
400  case 'm':
401  multiplier = ByteUnit::m;
402  break;
403  case 'M':
404  multiplier = ByteUnit::M;
405  break;
406  case 'g':
407  multiplier = ByteUnit::g;
408  break;
409  case 'G':
410  multiplier = ByteUnit::G;
411  break;
412  case 't':
413  multiplier = ByteUnit::t;
414  break;
415  case 'T':
416  multiplier = ByteUnit::T;
417  break;
418  default:
419  unit = 0;
420  break;
421  }
422 
423  uint64_t unit_amount = static_cast<uint64_t>(multiplier);
424  auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
425  if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
426  return std::nullopt;
427  }
428  return *parsed_num * unit_amount;
429 }
int ret
constexpr auto MakeUCharSpan(const V &v) -> decltype(UCharSpanCast(std::span
Like the std::span constructor, but for (const) unsigned char member types only.
Definition: span.h:111
assert(!tx.IsCoinBase())
static const std::string SAFE_CHARS[]
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
bool IsHex(std::string_view str)
std::string EncodeBase32(std::span< const unsigned char > input, bool pad)
Base32 encode.
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:150
std::string EncodeBase64(std::span< const unsigned char > input)
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
signed char HexDigit(char c)
Definition: hex_base.cpp:64
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line...
static const std::string CHARS_ALPHA_NUM
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
Definition: strencodings.h:44
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
auto result
Definition: common-types.h:74
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:166
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.