10template <
typename out_iter>
11void encode_json(std::string_view view, out_iter out) {
13 const char* hexvalues =
14 "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
15 for (uint8_t c : view) {
19 }
else if (c ==
'"') {
22 }
else if (c <= 0x1f) {
27 *out++ = hexvalues[2 * c];
28 *out++ = hexvalues[2 * c + 1];
40 return "Scheme Start";
50 return "Relative Scheme";
52 return "Relative Slash";
60 return "Path or Authority";
62 return "Special Authority Ignore Slashes";
64 return "Special Authority Slashes";
66 return "Special Relative or Authority";
78 return "unknown state";
83 std::string_view& input)
noexcept {
86 size_t location_of_first = input.find(
'#');
87 if (location_of_first == std::string_view::npos) {
90 std::string_view hash = input;
91 hash.remove_prefix(location_of_first + 1);
92 input.remove_suffix(input.size() - location_of_first);
102 path.find(
'/', 1) == std::string_view::npos && !path.empty()) {
104 helpers::substring(path, 1))) {
110 size_t last_delimiter = path.rfind(
'/');
111 if (last_delimiter != std::string::npos) {
112 path.erase(last_delimiter);
125 path.find(
'/', 1) == std::string_view::npos && !path.empty()) {
127 helpers::substring(path, 1))) {
134 size_t slash_loc = path.rfind(
'/');
135 if (slash_loc != std::string_view::npos) {
136 path.remove_suffix(path.size() - slash_loc);
145 std::string& input)
noexcept {
148 std::erase_if(input, ada::unicode::is_ascii_tab_or_newline);
152 size_t pos)
noexcept {
156 return input.substr(pos);
161 input.remove_suffix(input.size() - pos);
167#ifdef ADA_REGULAR_VISUAL_STUDIO
171 _BitScanForward(&ret, input_num);
174 return __builtin_ctzl(input_num);
185#ifndef ada_make_uint8x16_t
186#define ada_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \
187 x13, x14, x15, x16) \
189 static uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \
190 x9, x10, x11, x12, x13, x14, x15, x16}; \
191 return vld1q_u8(array); \
196 std::string_view view,
size_t location)
noexcept {
198 if (view.size() - location < 16) {
199 for (
size_t i = location; i < view.size(); i++) {
200 if (view[i] ==
':' || view[i] ==
'/' || view[i] ==
'\\' ||
201 view[i] ==
'?' || view[i] ==
'[') {
205 return size_t(view.size());
207 auto to_bitmask = [](uint8x16_t input) -> uint16_t {
208 uint8x16_t bit_mask =
209 ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
210 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
211 uint8x16_t minput = vandq_u8(input, bit_mask);
212 uint8x16_t tmp = vpaddq_u8(minput, minput);
213 tmp = vpaddq_u8(tmp, tmp);
214 tmp = vpaddq_u8(tmp, tmp);
215 return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
220 uint8x16_t low_mask =
221 ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
222 0x00, 0x01, 0x04, 0x04, 0x00, 0x00, 0x03);
223 uint8x16_t high_mask =
224 ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00,
225 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
226 uint8x16_t fmask = vmovq_n_u8(0xf);
228 for (; i + 15 < view.size(); i += 16) {
229 uint8x16_t word = vld1q_u8((
const uint8_t*)view.data() + i);
230 uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
231 uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
232 uint8x16_t classify = vandq_u8(lowpart, highpart);
233 if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
234 uint8x16_t is_zero = vceqq_u8(classify, zero);
235 uint16_t is_non_zero = ~to_bitmask(is_zero);
240 if (i < view.size()) {
242 vld1q_u8((
const uint8_t*)view.data() + view.length() - 16);
243 uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
244 uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
245 uint8x16_t classify = vandq_u8(lowpart, highpart);
246 if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
247 uint8x16_t is_zero = vceqq_u8(classify, zero);
248 uint16_t is_non_zero = ~to_bitmask(is_zero);
252 return size_t(view.size());
256 std::string_view view,
size_t location)
noexcept {
258 if (view.size() - location < 16) {
259 for (
size_t i = location; i < view.size(); i++) {
260 if (view[i] ==
':' || view[i] ==
'/' || view[i] ==
'\\' ||
261 view[i] ==
'?' || view[i] ==
'[') {
265 return size_t(view.size());
269 const __m128i mask1 = _mm_set1_epi8(
':');
270 const __m128i mask2 = _mm_set1_epi8(
'/');
271 const __m128i mask3 = _mm_set1_epi8(
'\\');
272 const __m128i mask4 = _mm_set1_epi8(
'?');
273 const __m128i mask5 = _mm_set1_epi8(
'[');
275 for (; i + 15 < view.size(); i += 16) {
276 __m128i word = _mm_loadu_si128((
const __m128i*)(view.data() + i));
277 __m128i m1 = _mm_cmpeq_epi8(word, mask1);
278 __m128i m2 = _mm_cmpeq_epi8(word, mask2);
279 __m128i m3 = _mm_cmpeq_epi8(word, mask3);
280 __m128i m4 = _mm_cmpeq_epi8(word, mask4);
281 __m128i m5 = _mm_cmpeq_epi8(word, mask5);
282 __m128i m = _mm_or_si128(
283 _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5);
284 int mask = _mm_movemask_epi8(m);
289 if (i < view.size()) {
291 _mm_loadu_si128((
const __m128i*)(view.data() + view.length() - 16));
292 __m128i m1 = _mm_cmpeq_epi8(word, mask1);
293 __m128i m2 = _mm_cmpeq_epi8(word, mask2);
294 __m128i m3 = _mm_cmpeq_epi8(word, mask3);
295 __m128i m4 = _mm_cmpeq_epi8(word, mask4);
296 __m128i m5 = _mm_cmpeq_epi8(word, mask5);
297 __m128i m = _mm_or_si128(
298 _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5);
299 int mask = _mm_movemask_epi8(m);
304 return size_t(view.length());
308 std::string_view view,
size_t location)
noexcept {
310 if (view.size() - location < 16) {
311 for (
size_t i = location; i < view.size(); i++) {
312 if (view[i] ==
':' || view[i] ==
'/' || view[i] ==
'\\' ||
313 view[i] ==
'?' || view[i] ==
'[') {
317 return size_t(view.size());
321 const __m128i mask1 = __lsx_vrepli_b(
':');
322 const __m128i mask2 = __lsx_vrepli_b(
'/');
323 const __m128i mask3 = __lsx_vrepli_b(
'\\');
324 const __m128i mask4 = __lsx_vrepli_b(
'?');
325 const __m128i mask5 = __lsx_vrepli_b(
'[');
327 for (; i + 15 < view.size(); i += 16) {
328 __m128i word = __lsx_vld((
const __m128i*)(view.data() + i), 0);
329 __m128i m1 = __lsx_vseq_b(word, mask1);
330 __m128i m2 = __lsx_vseq_b(word, mask2);
331 __m128i m3 = __lsx_vseq_b(word, mask3);
332 __m128i m4 = __lsx_vseq_b(word, mask4);
333 __m128i m5 = __lsx_vseq_b(word, mask5);
335 __lsx_vor_v(__lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m3, m4)), m5);
336 int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
341 if (i < view.size()) {
343 __lsx_vld((
const __m128i*)(view.data() + view.length() - 16), 0);
344 __m128i m1 = __lsx_vseq_b(word, mask1);
345 __m128i m2 = __lsx_vseq_b(word, mask2);
346 __m128i m3 = __lsx_vseq_b(word, mask3);
347 __m128i m4 = __lsx_vseq_b(word, mask4);
348 __m128i m5 = __lsx_vseq_b(word, mask5);
350 __lsx_vor_v(__lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m3, m4)), m5);
351 int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
356 return size_t(view.length());
362 std::array<uint8_t, 256>
result{};
363 for (
int i : {
':',
'/',
'[',
'\\',
'?'}) {
370 std::string_view view,
size_t location)
noexcept {
371 auto const str = view.substr(location);
372 for (
auto pos = str.begin(); pos != str.end(); ++pos) {
374 return pos - str.begin() + location;
377 return size_t(view.size());
386 size_t location)
noexcept {
388 if (view.size() - location < 16) {
389 for (
size_t i = location; i < view.size(); i++) {
390 if (view[i] ==
':' || view[i] ==
'/' || view[i] ==
'?' ||
395 return size_t(view.size());
397 auto to_bitmask = [](uint8x16_t input) -> uint16_t {
398 uint8x16_t bit_mask =
399 ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
400 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
401 uint8x16_t minput = vandq_u8(input, bit_mask);
402 uint8x16_t tmp = vpaddq_u8(minput, minput);
403 tmp = vpaddq_u8(tmp, tmp);
404 tmp = vpaddq_u8(tmp, tmp);
405 return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
410 uint8x16_t low_mask =
411 ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
412 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, 0x03);
413 uint8x16_t high_mask =
414 ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00,
415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
416 uint8x16_t fmask = vmovq_n_u8(0xf);
418 for (; i + 15 < view.size(); i += 16) {
419 uint8x16_t word = vld1q_u8((
const uint8_t*)view.data() + i);
420 uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
421 uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
422 uint8x16_t classify = vandq_u8(lowpart, highpart);
423 if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
424 uint8x16_t is_zero = vceqq_u8(classify, zero);
425 uint16_t is_non_zero = ~to_bitmask(is_zero);
430 if (i < view.size()) {
432 vld1q_u8((
const uint8_t*)view.data() + view.length() - 16);
433 uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
434 uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
435 uint8x16_t classify = vandq_u8(lowpart, highpart);
436 if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
437 uint8x16_t is_zero = vceqq_u8(classify, zero);
438 uint16_t is_non_zero = ~to_bitmask(is_zero);
442 return size_t(view.size());
446 size_t location)
noexcept {
448 if (view.size() - location < 16) {
449 for (
size_t i = location; i < view.size(); i++) {
450 if (view[i] ==
':' || view[i] ==
'/' || view[i] ==
'?' ||
455 return size_t(view.size());
459 const __m128i mask1 = _mm_set1_epi8(
':');
460 const __m128i mask2 = _mm_set1_epi8(
'/');
461 const __m128i mask4 = _mm_set1_epi8(
'?');
462 const __m128i mask5 = _mm_set1_epi8(
'[');
464 for (; i + 15 < view.size(); i += 16) {
465 __m128i word = _mm_loadu_si128((
const __m128i*)(view.data() + i));
466 __m128i m1 = _mm_cmpeq_epi8(word, mask1);
467 __m128i m2 = _mm_cmpeq_epi8(word, mask2);
468 __m128i m4 = _mm_cmpeq_epi8(word, mask4);
469 __m128i m5 = _mm_cmpeq_epi8(word, mask5);
470 __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5));
471 int mask = _mm_movemask_epi8(m);
476 if (i < view.size()) {
478 _mm_loadu_si128((
const __m128i*)(view.data() + view.length() - 16));
479 __m128i m1 = _mm_cmpeq_epi8(word, mask1);
480 __m128i m2 = _mm_cmpeq_epi8(word, mask2);
481 __m128i m4 = _mm_cmpeq_epi8(word, mask4);
482 __m128i m5 = _mm_cmpeq_epi8(word, mask5);
483 __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5));
484 int mask = _mm_movemask_epi8(m);
489 return size_t(view.length());
493 size_t location)
noexcept {
495 if (view.size() - location < 16) {
496 for (
size_t i = location; i < view.size(); i++) {
497 if (view[i] ==
':' || view[i] ==
'/' || view[i] ==
'?' ||
502 return size_t(view.size());
506 const __m128i mask1 = __lsx_vrepli_b(
':');
507 const __m128i mask2 = __lsx_vrepli_b(
'/');
508 const __m128i mask4 = __lsx_vrepli_b(
'?');
509 const __m128i mask5 = __lsx_vrepli_b(
'[');
511 for (; i + 15 < view.size(); i += 16) {
512 __m128i word = __lsx_vld((
const __m128i*)(view.data() + i), 0);
513 __m128i m1 = __lsx_vseq_b(word, mask1);
514 __m128i m2 = __lsx_vseq_b(word, mask2);
515 __m128i m4 = __lsx_vseq_b(word, mask4);
516 __m128i m5 = __lsx_vseq_b(word, mask5);
517 __m128i m = __lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m4, m5));
518 int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
523 if (i < view.size()) {
525 __lsx_vld((
const __m128i*)(view.data() + view.length() - 16), 0);
526 __m128i m1 = __lsx_vseq_b(word, mask1);
527 __m128i m2 = __lsx_vseq_b(word, mask2);
528 __m128i m4 = __lsx_vseq_b(word, mask4);
529 __m128i m5 = __lsx_vseq_b(word, mask5);
530 __m128i m = __lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m4, m5));
531 int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
536 return size_t(view.length());
541 std::array<uint8_t, 256>
result{};
542 for (
int i : {
':',
'/',
'?',
'['}) {
549 size_t location)
noexcept {
550 auto const str = view.substr(location);
551 for (
auto pos = str.begin(); pos != str.end(); ++pos) {
553 return pos - str.begin() + location;
556 return size_t(view.size());
561 const bool is_special, std::string_view& view)
noexcept {
570 const size_t view_size = view.size();
572 bool found_colon =
false;
597 for (; location < view_size;
599 if (view[location] ==
'[') {
600 location = view.find(
']', location);
601 if (location == std::string_view::npos) {
605 location = view_size;
609 found_colon = view[location] ==
':';
618 for (; location < view_size;
620 if (view[location] ==
'[') {
621 location = view.find(
']', location);
622 if (location == std::string_view::npos) {
626 location = view_size;
630 found_colon = view[location] ==
':';
636 view.remove_suffix(view_size - location);
637 return {location, found_colon};
640void trim_c0_whitespace(std::string_view& input)
noexcept {
641 while (!input.empty() &&
642 ada::unicode::is_c0_control_or_space(input.front())) {
643 input.remove_prefix(1);
645 while (!input.empty() && ada::unicode::is_c0_control_or_space(input.back())) {
646 input.remove_suffix(1);
653 ada_log(
"parse_prepared_path ", input);
654 uint8_t accumulator = checkers::path_signature(input);
659 constexpr uint8_t need_encoding = 1;
660 constexpr uint8_t backslash_char = 2;
661 constexpr uint8_t dot_char = 4;
662 constexpr uint8_t percent_char = 8;
667 (special ? (accumulator == 0)
668 : ((accumulator & (need_encoding | dot_char | percent_char)) ==
670 (!may_need_slow_file_handling);
671 if (accumulator == dot_char && !may_need_slow_file_handling) {
679 if (input[0] !=
'.') {
681 bool dot_is_file =
true;
683 slashdot = input.find(
"/.", slashdot);
684 if (slashdot == std::string_view::npos) {
689 dot_is_file &= !(slashdot == input.size() || input[slashdot] ==
'.' ||
690 input[slashdot] ==
'/');
693 trivial_path = dot_is_file;
697 ada_log(
"parse_path trivial");
708 (accumulator & (need_encoding | backslash_char | percent_char)) == 0) &&
711 ada_log(
"parse_prepared_path fast");
716 size_t previous_location = 0;
718 size_t new_location = input.find(
'/', previous_location);
721 if (new_location == std::string_view::npos) {
722 std::string_view path_view = input.substr(previous_location);
723 if (path_view ==
"..") {
730 if (path.back() ==
'/') {
735 path.resize(path.rfind(
'/') + 1);
739 if (path_view !=
".") {
740 path.append(path_view);
745 std::string_view path_view =
746 input.substr(previous_location, new_location - previous_location);
747 previous_location = new_location + 1;
748 if (path_view ==
"..") {
749 size_t last_delimiter = path.rfind(
'/');
750 if (last_delimiter != std::string::npos) {
751 path.erase(last_delimiter);
753 }
else if (path_view !=
".") {
755 path.append(path_view);
760 ada_log(
"parse_path slow");
762 bool needs_percent_encoding = (accumulator & 1);
763 std::string path_buffer_tmp;
765 size_t location = (special && (accumulator & 2))
766 ? input.find_first_of(
"/\\")
768 std::string_view path_view = input;
769 if (location != std::string_view::npos) {
770 path_view.remove_suffix(path_view.size() - location);
771 input.remove_prefix(location + 1);
775 std::string_view path_buffer =
776 (needs_percent_encoding &&
777 ada::unicode::percent_encode<false>(
781 if (unicode::is_double_dot_path_segment(path_buffer)) {
782 helpers::shorten_path(path, type);
783 if (location == std::string_view::npos) {
786 }
else if (unicode::is_single_dot_path_segment(path_buffer) &&
787 (location == std::string_view::npos)) {
791 else if (!unicode::is_single_dot_path_segment(path_buffer)) {
798 path += path_buffer[0];
800 path_buffer.remove_prefix(2);
801 path.append(path_buffer);
805 path.append(path_buffer);
808 if (location == std::string_view::npos) {
815bool overlaps(std::string_view input1,
const std::string& input2)
noexcept {
816 ada_log(
"helpers::overlaps check if string_view '", input1,
"' [",
817 input1.size(),
" bytes] is part of string '", input2,
"' [",
818 input2.size(),
" bytes]");
819 return !input1.empty() && !input2.empty() && input1.data() >= input2.data() &&
820 input1.data() < input2.data() + input2.size();
823template <
class url_type>
825 url_type& url)
noexcept {
826 ada_log(
"helpers::strip_trailing_spaces_from_opaque_path");
827 if (!url.has_opaque_path)
return;
828 if (url.has_hash())
return;
829 if (url.has_search())
return;
831 auto path = std::string(url.get_pathname());
832 while (!path.empty() && path.back() ==
' ') {
833 path.resize(path.size() - 1);
835 url.update_base_pathname(path);
841 std::array<uint8_t, 256>
result{};
842 for (uint8_t i : {
'@',
'/',
'\\',
'?'}) {
849find_authority_delimiter_special(std::string_view view)
noexcept {
852 for (
auto pos = view.begin(); pos != view.end(); ++pos) {
854 return pos - view.begin();
857 return size_t(view.size());
862 std::array<uint8_t, 256>
result{};
863 for (uint8_t i : {
'@',
'/',
'?'}) {
870find_authority_delimiter(std::string_view view)
noexcept {
873 for (
auto pos = view.begin(); pos != view.end(); ++pos) {
875 return pos - view.begin();
878 return size_t(view.size());
887#undef ada_make_uint8x16_t
Definitions for URL specific checkers used within Ada.
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_really_inline
constexpr uint8_t PATH_PERCENT_ENCODE[32]
constexpr bool is_normalized_windows_drive_letter(std::string_view input) noexcept
constexpr bool is_windows_drive_letter(std::string_view input) noexcept
Includes the definitions for helper functions.
ada_really_inline size_t find_next_host_delimiter(std::string_view view, size_t location) noexcept
static constexpr std::array< uint8_t, 256 > authority_delimiter_special
static constexpr std::array< uint8_t, 256 > host_delimiters
ada_really_inline size_t find_next_host_delimiter_special(std::string_view view, size_t location) noexcept
ada_unused std::string get_state(ada::state s)
static constexpr std::array< uint8_t, 256 > authority_delimiter
static constexpr std::array< uint8_t, 256 > special_host_delimiters
ada_really_inline int trailing_zeroes(uint32_t input_num) noexcept
@ SPECIAL_RELATIVE_OR_AUTHORITY
@ SPECIAL_AUTHORITY_SLASHES
@ SPECIAL_AUTHORITY_IGNORE_SLASHES
ada_warn_unused std::string_view to_string(encoding_type type)
tl::expected< result_type, ada::errors > result
Declarations for the URL scheme.