Bitcoin Core  31.0.0
P2P Digital Currency
asmap.cpp
Go to the documentation of this file.
1 // Copyright (c) 2019-present The Bitcoin Core developers
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 
5 #include <util/asmap.h>
6 
7 #include <clientversion.h>
8 #include <hash.h>
9 #include <serialize.h>
10 #include <streams.h>
11 #include <uint256.h>
12 #include <util/fs.h>
13 #include <util/log.h>
14 
15 #include <algorithm>
16 #include <bit>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdio>
20 #include <span>
21 #include <utility>
22 #include <vector>
23 
24 /*
25  * ASMap (Autonomous System Map) Implementation
26  *
27  * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
28  * Uses a binary trie structure encoded as bytecode instructions that are interpreted
29  * at runtime to find the ASN for a given IP address.
30  *
31  * The format of the asmap data is a bit-packed binary format where the entire mapping
32  * is treated as a continuous sequence of bits. Instructions and their arguments are
33  * encoded using variable numbers of bits and concatenated together without regard for
34  * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
35  *
36  * The data structure internally represents the mapping as a binary trie where:
37  * - Unassigned subnets (no ASN mapping present) map to 0
38  * - Subnets mapped entirely to one ASN become leaf nodes
39  * - Subnets whose lower and upper halves have different mappings branch into subtrees
40  *
41  * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
42  * MATCH, DEFAULT) to efficiently represent the trie.
43  */
44 
45 namespace {
46 
47 // Indicates decoding errors or invalid data
48 constexpr uint32_t INVALID = 0xFFFFFFFF;
49 
54 inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
55 {
56  const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
57  ++bitpos;
58  return bit;
59 }
60 
65 inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
66 {
67  const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
68  ++bitpos;
69  return bit;
70 }
71 
87 uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
88 {
89  uint32_t val = minval; // Start with minimum encodable value
90  bool bit;
91  for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
92  // Read continuation bit to determine if we're in this class
93  if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class
94  if (bitpos >= data.size() * 8) break;
95  bit = ConsumeBitLE(bitpos, data);
96  } else {
97  bit = 0; // Last class has no continuation bit
98  }
99  if (bit) {
100  // If the value will not fit in this class, subtract its range from val,
101  // emit a "1" bit and continue with the next class
102  val += (1 << *bit_sizes_it); // Add size of this class
103  } else {
104  // Decode the position within this class in big endian
105  for (int b = 0; b < *bit_sizes_it; b++) {
106  if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
107  bit = ConsumeBitLE(bitpos, data);
108  val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
109  }
110  return val;
111  }
112  }
113  return INVALID; // Reached EOF in exponent
114 }
115 
123 enum class Instruction : uint32_t
124 {
125  // A return instruction, encoded as [0], returns a constant ASN.
126  // It is followed by an integer using the ASN encoding.
127  RETURN = 0,
128  // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
129  // and either continues execution (if 0), or skips a specified number of bits (if 1).
130  // It is followed by an integer using jump encoding.
131  JUMP = 1,
132  // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
133  // in the input. If they all match, execution continues. If not, the default ASN is returned
134  // (or 0 if unset). The match value encodes both the pattern and its length.
135  MATCH = 2,
136  // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
137  // and continues execution. It is followed by an integer in ASN encoding.
138  DEFAULT = 3,
139 };
140 
141 // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
142 constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
143 Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
144 {
145  return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
146 }
147 
148 // ASN encoding: Can encode ASNs from 1 to ~16.7 million.
149 // Uses variable-length encoding optimized for real-world ASN distribution.
150 // ASN 0 is reserved and used if there isn't a match.
151 constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
152 uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
153 {
154  return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
155 }
156 
157 // MATCH argument: Values in [2, 511]. The highest set bit determines the match length
158 // n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
159 constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
160 uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
161 {
162  return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
163 }
164 
165 // JUMP offset: Minimum value 17. Variable-length coded and may be large
166 // for skipping big subtrees.
167 constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
168 uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
169 {
170  return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
171 }
172 
173 } // anonymous namespace
174 
182 uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
183 {
184  size_t pos{0};
185  const size_t endpos{asmap.size() * 8};
186  uint8_t ip_bit{0};
187  const uint8_t ip_bits_end = ip.size() * 8;
188  uint32_t default_asn = 0;
189  while (pos < endpos) {
190  Instruction opcode = DecodeType(pos, asmap);
191  if (opcode == Instruction::RETURN) {
192  // Found leaf node - return the ASN
193  uint32_t asn = DecodeASN(pos, asmap);
194  if (asn == INVALID) break; // ASN straddles EOF
195  return asn;
196  } else if (opcode == Instruction::JUMP) {
197  // Binary branch: if IP bit is 1, jump forward; else continue
198  uint32_t jump = DecodeJump(pos, asmap);
199  if (jump == INVALID) break; // Jump offset straddles EOF
200  if (ip_bit == ip_bits_end) break; // No input bits left
201  if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
202  if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian)
203  pos += jump; // Bit = 1: skip to right subtree
204  }
205  // Bit = 0: fall through to left subtree
206  } else if (opcode == Instruction::MATCH) {
207  // Compare multiple IP bits against a pattern
208  // The match value encodes both length and pattern:
209  // - highest set bit position determines length (bit_width - 1)
210  // - lower bits contain the pattern to compare
211  uint32_t match = DecodeMatch(pos, asmap);
212  if (match == INVALID) break; // Match bits straddle EOF
213  int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits
214  if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
215  for (int bit = 0; bit < matchlen; bit++) {
216  if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
217  return default_asn; // Pattern mismatch - use default
218  }
219  }
220  // Pattern matched - continue execution
221  } else if (opcode == Instruction::DEFAULT) {
222  // Update the default ASN for subsequent MATCH failures
223  default_asn = DecodeASN(pos, asmap);
224  if (default_asn == INVALID) break; // ASN straddles EOF
225  } else {
226  break; // Instruction straddles EOF
227  }
228  }
229  // Reached EOF without RETURN, or aborted (see any of the breaks above)
230  // - should have been caught by SanityCheckAsmap below
231  assert(false);
232  return 0; // 0 is not a valid ASN
233 }
234 
239 bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
240 {
241  size_t pos{0};
242  const size_t endpos{asmap.size() * 8};
243  std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
244  jumps.reserve(bits);
245  Instruction prevopcode = Instruction::JUMP;
246  bool had_incomplete_match = false; // Track <8 bit matches for efficiency check
247 
248  while (pos != endpos) {
249  // There was a jump into the middle of the previous instruction
250  if (!jumps.empty() && pos >= jumps.back().first) return false;
251 
252  Instruction opcode = DecodeType(pos, asmap);
253  if (opcode == Instruction::RETURN) {
254  // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
255  if (prevopcode == Instruction::DEFAULT) return false;
256  uint32_t asn = DecodeASN(pos, asmap);
257  if (asn == INVALID) return false; // ASN straddles EOF
258  if (jumps.empty()) {
259  // Nothing to execute anymore
260  if (endpos - pos > 7) return false; // Excessive padding
261  while (pos != endpos) {
262  if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
263  }
264  return true; // Sanely reached EOF
265  } else {
266  // Continue by pretending we jumped to the next instruction
267  if (pos != jumps.back().first) return false; // Unreachable code
268  bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
269  jumps.pop_back();
270  prevopcode = Instruction::JUMP;
271  }
272  } else if (opcode == Instruction::JUMP) {
273  uint32_t jump = DecodeJump(pos, asmap);
274  if (jump == INVALID) return false; // Jump offset straddles EOF
275  if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
276  if (bits == 0) return false; // Consuming bits past the end of the input
277  --bits;
278  uint32_t jump_offset = pos + jump;
279  if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
280  jumps.emplace_back(jump_offset, bits); // Queue jump target for validation
281  prevopcode = Instruction::JUMP;
282  } else if (opcode == Instruction::MATCH) {
283  uint32_t match = DecodeMatch(pos, asmap);
284  if (match == INVALID) return false; // Match bits straddle EOF
285  int matchlen = std::bit_width(match) - 1;
286  if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
287  // Within a sequence of matches only at most one should be incomplete
288  if (matchlen < 8 && had_incomplete_match) return false;
289  had_incomplete_match = (matchlen < 8);
290  if (bits < matchlen) return false; // Consuming bits past the end of the input
291  bits -= matchlen;
292  prevopcode = Instruction::MATCH;
293  } else if (opcode == Instruction::DEFAULT) {
294  // There should not be two successive DEFAULTs (they could be combined into one)
295  if (prevopcode == Instruction::DEFAULT) return false;
296  uint32_t asn = DecodeASN(pos, asmap);
297  if (asn == INVALID) return false; // ASN straddles EOF
298  prevopcode = Instruction::DEFAULT;
299  } else {
300  return false; // Instruction straddles EOF
301  }
302  }
303  return false; // Reached EOF without RETURN instruction
304 }
305 
310 bool CheckStandardAsmap(const std::span<const std::byte> data)
311 {
312  if (!SanityCheckAsmap(data, 128)) {
313  LogWarning("Sanity check of asmap data failed\n");
314  return false;
315  }
316  return true;
317 }
318 
322 std::vector<std::byte> DecodeAsmap(fs::path path)
323 {
324  FILE *filestr = fsbridge::fopen(path, "rb");
325  AutoFile file{filestr};
326  if (file.IsNull()) {
327  LogWarning("Failed to open asmap file from disk");
328  return {};
329  }
330  int64_t length{file.size()};
331  LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
332 
333  // Read entire file into memory
334  std::vector<std::byte> buffer(length);
335  file.read(buffer);
336 
337  if (!CheckStandardAsmap(buffer)) {
338  LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
339  return {};
340  }
341 
342  return buffer;
343 }
344 
348 uint256 AsmapVersion(const std::span<const std::byte> data)
349 {
350  if (data.empty()) return {};
351 
352  HashWriter asmap_hasher;
353  asmap_hasher << data;
354  return asmap_hasher.GetHash();
355 }
bool SanityCheckAsmap(const std::span< const std::byte > asmap, int bits)
Validates ASMap structure by simulating all possible execution paths.
Definition: asmap.cpp:239
uint256 GetHash()
Compute the double-SHA256 hash of all data written to this object.
Definition: hash.h:115
Instruction
Instruction Set.
Definition: asmap.cpp:123
assert(!tx.IsCoinBase())
FILE * fopen(const fs::path &p, const char *mode)
Definition: fs.cpp:25
#define LogWarning(...)
Definition: log.h:96
Non-refcounted RAII wrapper for FILE*.
Definition: streams.h:372
std::vector< std::byte > DecodeAsmap(fs::path path)
Loads an ASMap file from disk and validates it.
Definition: asmap.cpp:322
#define LogInfo(...)
Definition: log.h:95
A writer stream (for serialization) that computes a 256-bit hash.
Definition: hash.h:100
static CService ip(uint32_t i)
uint32_t Interpret(const std::span< const std::byte > asmap, const std::span< const std::byte > ip)
Execute the ASMap bytecode to find the ASN for an IP.
Definition: asmap.cpp:182
256-bit opaque blob.
Definition: uint256.h:195
static auto quoted(const std::string &s)
Definition: fs.h:101
uint256 AsmapVersion(const std::span< const std::byte > data)
Computes SHA256 hash of ASMap data for versioning and consistency checks.
Definition: asmap.cpp:348
static std::string PathToString(const path &path)
Convert path object to a byte string.
Definition: fs.h:157
Path class wrapper to block calls to the fs::path(std::string) implicit constructor and the fs::path:...
Definition: fs.h:33
static const auto INVALID
A stack representing the lack of any (dis)satisfactions.
Definition: miniscript.h:351
bool CheckStandardAsmap(const std::span< const std::byte > data)
Provides a safe interface for validating ASMap data before use.
Definition: asmap.cpp:310