Bitcoin Core  31.0.0
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1 // Copyright 2014 BitPay Inc.
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or https://opensource.org/licenses/mit-license.php.
4 
5 #include <univalue.h>
6 #include <univalue_utffilter.h>
7 
8 #include <cstdint>
9 #include <cstring>
10 #include <string>
11 #include <string_view>
12 #include <vector>
13 
14 /*
15  * According to stackexchange, the original json test suite wanted
16  * to limit depth to 22. Widely-deployed PHP bails at depth 512,
17  * so we will follow PHP's lead, which should be more than sufficient
18  * (further stackexchange comments indicate depth > 32 rarely occurs).
19  */
20 static constexpr size_t MAX_JSON_DEPTH = 512;
21 
22 static bool json_isdigit(int ch)
23 {
24  return ((ch >= '0') && (ch <= '9'));
25 }
26 
27 // convert hexadecimal string to unsigned integer
28 static const char *hatoui(const char *first, const char *last,
29  unsigned int& out)
30 {
31  unsigned int result = 0;
32  for (; first != last; ++first)
33  {
34  int digit;
35  if (json_isdigit(*first))
36  digit = *first - '0';
37 
38  else if (*first >= 'a' && *first <= 'f')
39  digit = *first - 'a' + 10;
40 
41  else if (*first >= 'A' && *first <= 'F')
42  digit = *first - 'A' + 10;
43 
44  else
45  break;
46 
47  result = 16 * result + digit;
48  }
49  out = result;
50 
51  return first;
52 }
53 
54 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
55  const char *raw, const char *end)
56 {
57  tokenVal.clear();
58  consumed = 0;
59 
60  const char *rawStart = raw;
61 
62  while (raw < end && (json_isspace(*raw))) // skip whitespace
63  raw++;
64 
65  if (raw >= end)
66  return JTOK_NONE;
67 
68  switch (*raw) {
69 
70  case '{':
71  raw++;
72  consumed = (raw - rawStart);
73  return JTOK_OBJ_OPEN;
74  case '}':
75  raw++;
76  consumed = (raw - rawStart);
77  return JTOK_OBJ_CLOSE;
78  case '[':
79  raw++;
80  consumed = (raw - rawStart);
81  return JTOK_ARR_OPEN;
82  case ']':
83  raw++;
84  consumed = (raw - rawStart);
85  return JTOK_ARR_CLOSE;
86 
87  case ':':
88  raw++;
89  consumed = (raw - rawStart);
90  return JTOK_COLON;
91  case ',':
92  raw++;
93  consumed = (raw - rawStart);
94  return JTOK_COMMA;
95 
96  case 'n':
97  case 't':
98  case 'f':
99  if (!strncmp(raw, "null", 4)) {
100  raw += 4;
101  consumed = (raw - rawStart);
102  return JTOK_KW_NULL;
103  } else if (!strncmp(raw, "true", 4)) {
104  raw += 4;
105  consumed = (raw - rawStart);
106  return JTOK_KW_TRUE;
107  } else if (!strncmp(raw, "false", 5)) {
108  raw += 5;
109  consumed = (raw - rawStart);
110  return JTOK_KW_FALSE;
111  } else
112  return JTOK_ERR;
113 
114  case '-':
115  case '0':
116  case '1':
117  case '2':
118  case '3':
119  case '4':
120  case '5':
121  case '6':
122  case '7':
123  case '8':
124  case '9': {
125  // part 1: int
126  std::string numStr;
127 
128  const char *first = raw;
129 
130  const char *firstDigit = first;
131  if (!json_isdigit(*firstDigit))
132  firstDigit++;
133  if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
134  return JTOK_ERR;
135 
136  numStr += *raw; // copy first char
137  raw++;
138 
139  if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
140  return JTOK_ERR;
141 
142  while (raw < end && json_isdigit(*raw)) { // copy digits
143  numStr += *raw;
144  raw++;
145  }
146 
147  // part 2: frac
148  if (raw < end && *raw == '.') {
149  numStr += *raw; // copy .
150  raw++;
151 
152  if (raw >= end || !json_isdigit(*raw))
153  return JTOK_ERR;
154  while (raw < end && json_isdigit(*raw)) { // copy digits
155  numStr += *raw;
156  raw++;
157  }
158  }
159 
160  // part 3: exp
161  if (raw < end && (*raw == 'e' || *raw == 'E')) {
162  numStr += *raw; // copy E
163  raw++;
164 
165  if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
166  numStr += *raw;
167  raw++;
168  }
169 
170  if (raw >= end || !json_isdigit(*raw))
171  return JTOK_ERR;
172  while (raw < end && json_isdigit(*raw)) { // copy digits
173  numStr += *raw;
174  raw++;
175  }
176  }
177 
178  tokenVal = numStr;
179  consumed = (raw - rawStart);
180  return JTOK_NUMBER;
181  }
182 
183  case '"': {
184  raw++; // skip "
185 
186  std::string valStr;
187  JSONUTF8StringFilter writer(valStr);
188 
189  while (true) {
190  if (raw >= end || (unsigned char)*raw < 0x20)
191  return JTOK_ERR;
192 
193  else if (*raw == '\\') {
194  raw++; // skip backslash
195 
196  if (raw >= end)
197  return JTOK_ERR;
198 
199  switch (*raw) {
200  case '"': writer.push_back('\"'); break;
201  case '\\': writer.push_back('\\'); break;
202  case '/': writer.push_back('/'); break;
203  case 'b': writer.push_back('\b'); break;
204  case 'f': writer.push_back('\f'); break;
205  case 'n': writer.push_back('\n'); break;
206  case 'r': writer.push_back('\r'); break;
207  case 't': writer.push_back('\t'); break;
208 
209  case 'u': {
210  unsigned int codepoint;
211  if (raw + 1 + 4 >= end ||
212  hatoui(raw + 1, raw + 1 + 4, codepoint) !=
213  raw + 1 + 4)
214  return JTOK_ERR;
215  writer.push_back_u(codepoint);
216  raw += 4;
217  break;
218  }
219  default:
220  return JTOK_ERR;
221 
222  }
223 
224  raw++; // skip esc'd char
225  }
226 
227  else if (*raw == '"') {
228  raw++; // skip "
229  break; // stop scanning
230  }
231 
232  else {
233  writer.push_back(static_cast<unsigned char>(*raw));
234  raw++;
235  }
236  }
237 
238  if (!writer.finalize())
239  return JTOK_ERR;
240  tokenVal = valStr;
241  consumed = (raw - rawStart);
242  return JTOK_STRING;
243  }
244 
245  default:
246  return JTOK_ERR;
247  }
248 }
249 
250 enum expect_bits : unsigned {
251  EXP_OBJ_NAME = (1U << 0),
252  EXP_COLON = (1U << 1),
253  EXP_ARR_VALUE = (1U << 2),
254  EXP_VALUE = (1U << 3),
255  EXP_NOT_VALUE = (1U << 4),
256 };
257 
258 #define expect(bit) (expectMask & (EXP_##bit))
259 #define setExpect(bit) (expectMask |= EXP_##bit)
260 #define clearExpect(bit) (expectMask &= ~EXP_##bit)
261 
262 bool UniValue::read(std::string_view str_in)
263 {
264  clear();
265 
266  uint32_t expectMask = 0;
267  std::vector<UniValue*> stack;
268 
269  std::string tokenVal;
270  unsigned int consumed;
271  enum jtokentype tok = JTOK_NONE;
272  enum jtokentype last_tok = JTOK_NONE;
273  const char* raw{str_in.data()};
274  const char* end{raw + str_in.size()};
275  do {
276  last_tok = tok;
277 
278  tok = getJsonToken(tokenVal, consumed, raw, end);
279  if (tok == JTOK_NONE || tok == JTOK_ERR)
280  return false;
281  raw += consumed;
282 
283  bool isValueOpen = jsonTokenIsValue(tok) ||
284  tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
285 
286  if (expect(VALUE)) {
287  if (!isValueOpen)
288  return false;
289  clearExpect(VALUE);
290 
291  } else if (expect(ARR_VALUE)) {
292  bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
293  if (!isArrValue)
294  return false;
295 
296  clearExpect(ARR_VALUE);
297 
298  } else if (expect(OBJ_NAME)) {
299  bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
300  if (!isObjName)
301  return false;
302 
303  } else if (expect(COLON)) {
304  if (tok != JTOK_COLON)
305  return false;
306  clearExpect(COLON);
307 
308  } else if (!expect(COLON) && (tok == JTOK_COLON)) {
309  return false;
310  }
311 
312  if (expect(NOT_VALUE)) {
313  if (isValueOpen)
314  return false;
315  clearExpect(NOT_VALUE);
316  }
317 
318  switch (tok) {
319 
320  case JTOK_OBJ_OPEN:
321  case JTOK_ARR_OPEN: {
322  VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
323  if (!stack.size()) {
324  if (utyp == VOBJ)
325  setObject();
326  else
327  setArray();
328  stack.push_back(this);
329  } else {
330  UniValue tmpVal(utyp);
331  UniValue *top = stack.back();
332  top->values.push_back(tmpVal);
333 
334  UniValue *newTop = &(top->values.back());
335  stack.push_back(newTop);
336  }
337 
338  if (stack.size() > MAX_JSON_DEPTH)
339  return false;
340 
341  if (utyp == VOBJ)
342  setExpect(OBJ_NAME);
343  else
344  setExpect(ARR_VALUE);
345  break;
346  }
347 
348  case JTOK_OBJ_CLOSE:
349  case JTOK_ARR_CLOSE: {
350  if (!stack.size() || (last_tok == JTOK_COMMA))
351  return false;
352 
353  VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
354  UniValue *top = stack.back();
355  if (utyp != top->getType())
356  return false;
357 
358  stack.pop_back();
359  clearExpect(OBJ_NAME);
360  setExpect(NOT_VALUE);
361  break;
362  }
363 
364  case JTOK_COLON: {
365  if (!stack.size())
366  return false;
367 
368  UniValue *top = stack.back();
369  if (top->getType() != VOBJ)
370  return false;
371 
372  setExpect(VALUE);
373  break;
374  }
375 
376  case JTOK_COMMA: {
377  if (!stack.size() ||
378  (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
379  return false;
380 
381  UniValue *top = stack.back();
382  if (top->getType() == VOBJ)
383  setExpect(OBJ_NAME);
384  else
385  setExpect(ARR_VALUE);
386  break;
387  }
388 
389  case JTOK_KW_NULL:
390  case JTOK_KW_TRUE:
391  case JTOK_KW_FALSE: {
392  UniValue tmpVal;
393  switch (tok) {
394  case JTOK_KW_NULL:
395  // do nothing more
396  break;
397  case JTOK_KW_TRUE:
398  tmpVal.setBool(true);
399  break;
400  case JTOK_KW_FALSE:
401  tmpVal.setBool(false);
402  break;
403  default: /* impossible */ break;
404  }
405 
406  if (!stack.size()) {
407  *this = tmpVal;
408  break;
409  }
410 
411  UniValue *top = stack.back();
412  top->values.push_back(tmpVal);
413 
414  setExpect(NOT_VALUE);
415  break;
416  }
417 
418  case JTOK_NUMBER: {
419  UniValue tmpVal(VNUM, tokenVal);
420  if (!stack.size()) {
421  *this = tmpVal;
422  break;
423  }
424 
425  UniValue *top = stack.back();
426  top->values.push_back(tmpVal);
427 
428  setExpect(NOT_VALUE);
429  break;
430  }
431 
432  case JTOK_STRING: {
433  if (expect(OBJ_NAME)) {
434  UniValue *top = stack.back();
435  top->keys.push_back(tokenVal);
436  clearExpect(OBJ_NAME);
437  setExpect(COLON);
438  } else {
439  UniValue tmpVal(VSTR, tokenVal);
440  if (!stack.size()) {
441  *this = tmpVal;
442  break;
443  }
444  UniValue *top = stack.back();
445  top->values.push_back(tmpVal);
446  }
447 
448  setExpect(NOT_VALUE);
449  break;
450  }
451 
452  default:
453  return false;
454  }
455  } while (!stack.empty ());
456 
457  /* Check that nothing follows the initial construct (parsed above). */
458  tok = getJsonToken(tokenVal, consumed, raw, end);
459  if (tok != JTOK_NONE)
460  return false;
461 
462  return true;
463 }
464 
void push_back(UniValue val)
Definition: univalue.cpp:103
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:172
void setBool(bool val)
Definition: univalue.cpp:30
bool read(std::string_view raw)
#define setExpect(bit)
#define expect(bit)
enum VType getType() const
Definition: univalue.h:67
std::vector< UniValue > values
Definition: univalue.h:108
void push_back(unsigned char ch)
static constexpr size_t MAX_JSON_DEPTH
static bool json_isspace(int ch)
Definition: univalue.h:189
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
jtokentype
Definition: univalue.h:152
void setObject()
Definition: univalue.cpp:97
auto result
Definition: common-types.h:74
std::vector< std::string > keys
Definition: univalue.h:107
void clear()
Definition: univalue.cpp:17
void setArray()
Definition: univalue.cpp:91
static bool json_isdigit(int ch)
void push_back_u(unsigned int codepoint_)
#define clearExpect(bit)
expect_bits
static const char * hatoui(const char *first, const char *last, unsigned int &out)