sumolib.xml.parsing
1# -*- coding: utf-8 -*- 2# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo 3# Copyright (C) 2011-2026 German Aerospace Center (DLR) and others. 4# This program and the accompanying materials are made available under the 5# terms of the Eclipse Public License 2.0 which is available at 6# https://www.eclipse.org/legal/epl-2.0/ 7# This Source Code may also be made available under the following Secondary 8# Licenses when the conditions for such availability set forth in the Eclipse 9# Public License 2.0 are satisfied: GNU General Public License, version 2 10# or later which is available at 11# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html 12# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later 13 14# @file parsing.py 15# @author Michael Behrisch 16# @author Jakob Erdmann 17# @author Mirko Barthauer 18# @date 2011-06-23 19 20from __future__ import print_function 21from __future__ import absolute_import 22import os 23import sys 24import re 25import gzip 26import io 27try: 28 import xml.etree.cElementTree as ET 29except ImportError as e: 30 print("recovering from ImportError '%s'" % e) 31 import xml.etree.ElementTree as ET 32from collections import defaultdict, namedtuple, OrderedDict 33from keyword import iskeyword 34from functools import reduce 35import xml.sax 36from xml.sax import saxutils 37 38from . import xsd 39from .. import miscutils 40 41DEFAULT_ATTR_CONVERSIONS = { 42 # shape-like 43 'shape': lambda coords: map(lambda xy: map(float, xy.split(',')), coords.split()), 44 # float 45 'speed': float, 46 'length': float, 47 'width': float, 48 'angle': float, 49 'endOffset': float, 50 'radius': float, 51 'contPos': float, 52 'visibility': float, 53 'startPos': float, 54 'endPos': float, 55 'position': float, 56 'x': float, 57 'y': float, 58 'lon': float, 59 'lat': float, 60 'freq': float, 61 # int 62 'priority': int, 63 'numLanes': int, 64 'index': int, 65 'linkIndex': int, 66 'linkIndex2': int, 67 'fromLane': int, 68 'toLane': int, 69} 70 71 72class NestingHandler(xml.sax.handler.ContentHandler): 73 74 """A handler which knows the current nesting of tags""" 75 76 def __init__(self): 77 self.tagstack = [] 78 79 def startElement(self, name, attrs): 80 self.tagstack.append(name) 81 82 def endElement(self, name): 83 self.tagstack.pop() 84 85 def depth(self): 86 # do not count the root element 87 return len(self.tagstack) - 1 88 89 90class AttrFinder(NestingHandler): 91 92 def __init__(self, xsdFile, source, split, keepAttrs=None): 93 NestingHandler.__init__(self) 94 self.tagDepths = {} # tag -> depth of appearance 95 self.tagAttrs = defaultdict(OrderedDict) # tag -> set of attrs 96 self.renamedAttrs = {} # (name, attr) -> renamedAttr 97 self.attrs = {} 98 self.depthTags = {} # child of root: depth of appearance -> tag list 99 self.rootDepth = 1 if split else 0 100 self.keepAttrs = keepAttrs 101 if xsdFile: 102 self.xsdStruc = xsd.XsdStructure(xsdFile) 103 if split: 104 for ele in self.xsdStruc.root.children: 105 self.attrs[ele.name] = [] 106 self.depthTags[ele.name] = [[]] 107 self.recursiveAttrFind(ele, ele, 1) 108 else: 109 self.attrs[self.xsdStruc.root.name] = [] 110 self.depthTags[self.xsdStruc.root.name] = [] 111 self.recursiveAttrFind( 112 self.xsdStruc.root, self.xsdStruc.root, 0) 113 else: 114 self.xsdStruc = None 115 xml.sax.parse(source, self) 116 117 def addElement(self, root, name, depth): 118 # print("adding", root, name, depth) 119 if len(self.depthTags[root]) == depth: 120 self.tagDepths[name] = depth 121 self.depthTags[root].append([name]) 122 return True 123 if name not in self.tagDepths: 124 self.depthTags[root][depth].append(name) 125 return True 126 if name not in self.depthTags[root][depth]: 127 print("Ignoring tag %s at depth %s" % 128 (name, depth), file=sys.stderr) 129 return False 130 131 def recursiveAttrFind(self, root, currEle, depth): 132 if not self.addElement(root.name, currEle.name, depth): 133 return 134 for a in currEle.attributes: 135 if ":" not in a.name: # no namespace support yet 136 self.tagAttrs[currEle.name][a.name] = a 137 anew = "%s_%s" % (currEle.name, a.name) 138 self.renamedAttrs[(currEle.name, a.name)] = anew 139 attrList = self.attrs[root.name] 140 if anew in attrList: 141 del attrList[attrList.index(anew)] 142 attrList.append(anew) 143 for ele in currEle.children: 144 # print("attr", root.name, ele.name, depth) 145 self.recursiveAttrFind(root, ele, depth + 1) 146 147 def startElement(self, name, attrs): 148 NestingHandler.startElement(self, name, attrs) 149 if self.depth() >= self.rootDepth: 150 root = self.tagstack[self.rootDepth] 151 if self.depth() == self.rootDepth and root not in self.attrs: 152 self.attrs[root] = [] 153 self.depthTags[root] = [[]] * self.rootDepth 154 if not self.addElement(root, name, self.depth()): 155 return 156 # collect attributes 157 for a in sorted(list(attrs.keys())): 158 if self.keepAttrs is not None and a not in self.keepAttrs: 159 continue 160 if a not in self.tagAttrs[name] and ":" not in a: 161 self.tagAttrs[name][a] = xsd.XmlAttribute(a) 162 if not (name, a) in self.renamedAttrs: 163 anew = "%s_%s" % (name, a) 164 self.renamedAttrs[(name, a)] = anew 165 self.attrs[root].append(anew) 166 167 168def xmlescape(value): 169 return saxutils.escape(str(value), {'"': '"'}) 170 171 172def supports_comments(): 173 return sys.version_info[0] >= 3 and sys.version_info[1] >= 8 174 175 176def _prefix_keyword(name, warn=False): 177 result = name 178 # create a legal identifier (xml allows '-', ':' and '.' ...) 179 result = ''.join([c for c in name if c.isalnum() or c == '_']) 180 if result != name: 181 if result == '': 182 result = 'attr_' 183 if warn: 184 print("Warning: Renaming attribute '%s' to '%s' because it contains illegal characters" % ( 185 name, result), file=sys.stderr) 186 if name == "name": 187 result = 'attr_name' 188 if warn: 189 print("Warning: Renaming attribute '%s' to '%s' because it conflicts with a reserved field" % ( 190 name, result), file=sys.stderr) 191 192 if iskeyword(name): 193 result = 'attr_' + name 194 if warn: 195 print("Warning: Renaming attribute '%s' to '%s' because it conflicts with a python keyword" % ( 196 name, result), file=sys.stderr) 197 return result 198 199 200def compound_object(element_name, attrnames, warn=False, sort=False): 201 """return a class which delegates bracket access to an internal dict. 202 Missing attributes are delegated to the child dict for convenience. 203 @note: Care must be taken when child nodes and attributes have the same names""" 204 class CompoundObject(): 205 _original_fields = sorted(attrnames) if sort else tuple(attrnames) 206 _fields = [_prefix_keyword(a, warn) for a in _original_fields] 207 208 def __init__(self, values, child_dict=None, text=None, child_list=None): 209 for name, val in zip(self._fields, values): 210 self.__dict__[name] = val 211 self._child_dict = child_dict if child_dict else {} 212 self.name = element_name 213 self._text = text 214 self._child_list = child_list if child_list else [] 215 self._commented = False 216 217 def getAttributes(self): 218 return [(k, getattr(self, k)) for k in self._fields] 219 220 def hasAttribute(self, name): 221 return name in self._fields 222 223 def getAttribute(self, name): 224 if self.hasAttribute(name): 225 return self.__dict__[name] 226 raise AttributeError 227 228 def getAttributeSecure(self, name, default=None): 229 if self.hasAttribute(name): 230 return self.__dict__[name] 231 return default 232 233 def setAttribute(self, name, value): 234 if name in self._fields: 235 self.__dict__[name] = value 236 else: 237 if name not in self._original_fields: 238 if isinstance(self._original_fields, tuple): 239 tempList = list(self._original_fields) 240 tempList.append(name) 241 self._original_fields = tuple(tempList) 242 else: 243 self._original_fields.append(name) 244 self._fields.append(_prefix_keyword(name, warn)) 245 self.__dict__[_prefix_keyword(name, warn)] = value 246 247 def hasChild(self, name): 248 return name in self._child_dict 249 250 def getChild(self, name): 251 return self._child_dict[name] 252 253 def addChild(self, name, attrs=None, sortAttrs=True): 254 if attrs is None: 255 attrs = {} 256 clazz = compound_object(name, attrs.keys(), sort=sortAttrs) 257 child = clazz([attrs.get(a) for a in (sorted(attrs.keys()) if sortAttrs else attrs.keys())]) 258 self._child_dict.setdefault(name, []).append(child) 259 self._child_list.append(child) 260 return child 261 262 def removeChild(self, child): 263 self._child_dict[child.name].remove(child) 264 self._child_list.remove(child) 265 266 def setChildList(self, childs): 267 for c in self._child_list: 268 self._child_dict[c.name].remove(c) 269 for c in childs: 270 self._child_dict.setdefault(c.name, []).append(c) 271 self._child_list = childs 272 273 def getChildList(self, withComments=False): 274 if withComments: 275 return self._child_list 276 else: 277 return [c for c in self._child_list if not c.isComment()] 278 279 def getText(self): 280 return self._text 281 282 def setText(self, text): 283 self._text = text 284 285 def isComment(self): 286 return "function Comment" in str(self.name) 287 288 def getComments(self): 289 if not supports_comments: 290 sys.stderr.write("Comment parsing is only supported with version 3.8 or higher by sumolib.xml\n") 291 for name, children in self._child_dict.items(): 292 if "function Comment" in str(name): 293 return [c.getText() for c in children] 294 return [] 295 296 def setCommented(self, commented=True, recurse=False): 297 self._commented = commented 298 if commented or recurse: 299 for c in self._child_list: 300 c.setCommented(False, True) 301 302 def isCommented(self): 303 return self._commented 304 305 def __getattr__(self, name): 306 if name[:2] != "__": 307 return self._child_dict.get(name, None) 308 raise AttributeError 309 310 def __setattr__(self, name, value): 311 if name != "_child_dict" and name in self._child_dict: 312 # this could be optimized by using the child_list only if there are different children 313 for c in self._child_dict[name]: 314 self._child_list.remove(c) 315 self._child_dict[name] = value 316 for c in value: 317 self._child_list.append(c) 318 else: 319 self.__dict__[name] = value 320 321 def __delattr__(self, name): 322 if name in self._child_dict: 323 for c in self._child_dict[name]: 324 self._child_list.remove(c) 325 del self._child_dict[name] 326 else: 327 if name in self.__dict__: 328 del self.__dict__[name] 329 self._original_fields.remove(name) 330 self._fields.remove(_prefix_keyword(name, False)) 331 332 def __getitem__(self, name): 333 return self._child_dict[name] 334 335 def __str__(self): 336 nodeText = '' if self._text is None else ",text=%s" % self._text 337 return "<%s,child_dict=%s%s>" % (self.getAttributes(), dict(self._child_dict), nodeText) 338 339 def toXML(self, initialIndent="", indent=" ", withComments=False): 340 fields = [' %s="%s"' % (self._original_fields[i], xmlescape(getattr(self, k))) 341 for i, k in enumerate(self._fields) if getattr(self, k) is not None and 342 # see #3454 343 '{' not in self._original_fields[i]] 344 if self.isComment(): 345 if withComments: 346 return initialIndent + "<!-- %s -->\n" % self._text 347 else: 348 return "" 349 commentStart = "" 350 commentEnd = "" 351 if self._commented: 352 commentStart = "!--" 353 commentEnd = "--" 354 if not self._child_dict and self._text is None: 355 return initialIndent + "<%s%s%s/%s>\n" % (commentStart, self.name, "".join(fields), commentEnd) 356 else: 357 s = initialIndent + "<%s%s%s>\n" % (commentStart, self.name, "".join(fields)) 358 for i, c in enumerate(self._child_list): 359 if i > 0 and c.isComment() and withComments == "inline": 360 s = s[:-1] 361 s += c.toXML(initialIndent + indent, indent=indent, withComments=withComments) 362 if self._text is not None and self._text.strip(): 363 s += self._text.strip(" ") 364 return s + "%s</%s%s>\n" % (initialIndent, self.name, commentEnd) 365 366 def __repr__(self): 367 return str(self) 368 369 def __lt__(self, other): 370 return str(self) < str(other) 371 372 return CompoundObject 373 374 375def parselines(xmlline, element_name, element_attrs=None, attr_conversions=None, 376 heterogeneous=True, warn=False, addRoot="dummy"): 377 tagStart1 = "<%s>" % element_name 378 tagStart2 = "<%s " % element_name 379 if tagStart1 in xmlline or tagStart2 in xmlline: 380 if addRoot is not None: 381 xmlline = "<%s>\n%s</%s>\n" % (addRoot, xmlline, addRoot) 382 xmlfile = io.StringIO(xmlline) 383 for x in parse(xmlfile, element_name, element_attrs, attr_conversions, 384 heterogeneous, warn): 385 yield x 386 387 388def _handle_namespace(tag, ignoreXmlns): 389 if ignoreXmlns and "}" in tag: 390 # see https://bugs.python.org/issue18304 391 return tag.split("}")[1] 392 return tag 393 394 395def _check_file_like(xmlfile): 396 if not hasattr(xmlfile, "read"): 397 return miscutils.openz(xmlfile), True 398 return xmlfile, False 399 400 401def parse(xmlfile, element_names=None, element_attrs=None, attr_conversions=None, 402 heterogeneous=True, warn=False, ignoreXmlns=False, outputLevel=1): 403 """ 404 Parses the given element_names from xmlfile and yield compound objects for 405 their xml subtrees (no extra objects are returned if element_names appear in 406 the subtree) The compound objects provide all element attributes of 407 the root of the subtree as attributes unless attr_names are supplied. In this 408 case attr_names maps element names to a list of attributes which are 409 supplied. If attr_conversions is not empty it must map attribute names to 410 callables which will be called upon the attribute value before storing under 411 the attribute name. 412 The compound objects gives dictionary style access to list of compound 413 objects o for any children with the given element name 414 o['child_element_name'] = [osub0, osub1, ...] 415 As a shorthand, attribute style access to the list of child elements is 416 provided unless an attribute with the same name as the child elements 417 exists (i.e. o.child_element_name = [osub0, osub1, ...]) 418 @Note: All elements with the same name must have the same type regardless of 419 the subtree in which they occur (heterogeneous cases may be handled by 420 setting heterogeneous=True (with reduced parsing speed) 421 @Note: Attribute names may be modified to avoid name clashes 422 with python keywords. (set warn=True to receive renaming warnings) 423 @Note: The element_names may be either a single string or a list of strings. 424 @Example: parse('plain.edg.xml', ['edge']) 425 """ 426 if isinstance(element_names, str): 427 element_names = [element_names] 428 if element_attrs is None: 429 element_attrs = {} 430 if attr_conversions is None: 431 attr_conversions = {} 432 element_types = {} 433 kwargs = {'parser': ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))} if supports_comments() else {} 434 xmlfile, close_source = _check_file_like(xmlfile) 435 try: 436 level = -1 437 for event, parsenode in ET.iterparse(xmlfile, events=('start', 'end'), **kwargs): 438 if event == 'start': 439 level += 1 440 else: 441 tag = _handle_namespace(parsenode.tag, ignoreXmlns) 442 if (element_names is None and level == outputLevel) or (element_names and tag in element_names): 443 yield _get_compound_object(parsenode, element_types, 444 tag, element_attrs, 445 attr_conversions, heterogeneous, warn, 446 ignoreXmlns) 447 parsenode.clear() 448 level -= 1 449 finally: 450 if close_source: 451 xmlfile.close() 452 453 454def _IDENTITY(x): 455 return x 456 457 458def _get_compound_object(node, element_types, element_name, element_attrs, attr_conversions, 459 heterogeneous, warn, ignoreXmlns): 460 if element_name not in element_types or heterogeneous: 461 # initialized the compound_object type from the first encountered # 462 # element 463 attrnames = element_attrs.get(element_name, node.keys()) 464 if len(attrnames) != len(set(attrnames)): 465 raise Exception( 466 "non-unique attributes %s for element '%s'" % (attrnames, element_name)) 467 element_types[element_name] = compound_object( 468 element_name, attrnames, warn) 469 # prepare children 470 child_dict = {} 471 child_list = [] 472 if len(node) > 0: 473 for c in node: 474 tag = _handle_namespace(c.tag, ignoreXmlns) 475 child = _get_compound_object(c, element_types, tag, element_attrs, attr_conversions, 476 heterogeneous, warn, ignoreXmlns) 477 child_dict.setdefault(tag, []).append(child) 478 child_list.append(child) 479 attrnames = element_types[element_name]._original_fields 480 return element_types[element_name]( 481 [attr_conversions.get(a, _IDENTITY)(node.get(a)) for a in attrnames], 482 child_dict, node.text, child_list) 483 484 485def create_document(root_element_name, attrs=None, schema=None): 486 if attrs is None: 487 attrs = {} 488 if schema is None: 489 attrs["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance" 490 attrs["xsi:noNamespaceSchemaLocation"] = "http://sumo.dlr.de/xsd/" + root_element_name + "_file.xsd" 491 clazz = compound_object(root_element_name, sorted(attrs.keys())) 492 return clazz([attrs.get(a) for a in sorted(attrs.keys())], OrderedDict()) 493 494 495def sum(elements, attrname): 496 # for the given elements (as returned by method parse) compute the sum for attrname 497 # attrname must be the name of a numerical attribute 498 return reduce(lambda x, y: x + y, [float(getattr(e, attrname)) for e in elements]) 499 500 501def average(elements, attrname): 502 # for the given elements (as returned by method parse) compute the average for attrname 503 # attrname must be the name of a numerical attribute 504 if elements: 505 return sum(elements, attrname) / len(elements) 506 else: 507 raise Exception("average of 0 elements is not defined") 508 509 510def _createRecordAndPattern(element_name, attrnames, warn, optional, extra=None): 511 if isinstance(attrnames, str): 512 attrnames = [attrnames] 513 prefixedAttrnames = [_prefix_keyword(a, warn) for a in attrnames] 514 if optional: 515 pattern = ''.join(['<%s' % element_name] + 516 ['(\\s+%s="(?P<%s>[^"]*?)")?' % a for a in zip(attrnames, prefixedAttrnames)]) 517 else: 518 pattern = '(|\\s+.*)'.join(['<%s' % element_name] + 519 ['\\s+%s="(?P<%s>[^"]*?)"' % a for a in zip(attrnames, prefixedAttrnames)]) 520 if extra is not None: 521 prefixedAttrnames += [_prefix_keyword(a, warn) for a in extra] 522 Record = namedtuple(_prefix_keyword(element_name, warn), prefixedAttrnames) 523 reprog = re.compile(pattern) 524 return Record, reprog 525 526 527def _open(xmlfile, encoding="utf8"): 528 if isinstance(xmlfile, str): 529 if xmlfile.endswith(".gz"): 530 if encoding is None: 531 return gzip.open(xmlfile, "r") 532 return gzip.open(xmlfile, "rt") 533 if encoding is not None: 534 return io.open(xmlfile, encoding=encoding) 535 return xmlfile 536 537 538def _comment_filter(stream): 539 """ 540 Filters given stream for comments. Is used by parse_fast and parse_fast_nested 541 """ 542 in_comment = False 543 for line in stream: 544 if "<!--" in line or in_comment: 545 if "-->" in line: 546 yield re.sub(".*-->" if in_comment else "<!--.*-->", "", line) 547 in_comment = False 548 elif not in_comment: 549 yield re.sub("<!--.*", "", line) 550 in_comment = True 551 else: 552 yield line 553 554 555def _attrs_from_xsd_url(schema_url, element_name): 556 """Given a noNamespaceSchemaLocation URL and element name, return XSD attribute names or None.""" 557 xsd_filename = schema_url.split('/')[-1] 558 sumo_home = os.environ.get('SUMO_HOME', os.path.normpath( 559 os.path.join(os.path.dirname(__file__), '..', '..', '..'))) 560 xsd_path = os.path.join(sumo_home, 'data', 'xsd', xsd_filename) 561 if not os.path.isfile(xsd_path): 562 return None 563 xsd_struc = xsd.XsdStructure(xsd_path) 564 element = xsd_struc._namedElements.get(element_name) 565 if element is None: 566 return None 567 return [a.name for a in element.attributes] 568 569 570def parse_fast(xmlfile, element_name, attrnames=None, warn=False, optional=False, encoding="utf8", line_filter=None): 571 """ 572 Parses the given attrnames from all elements with element_name 573 @Note: The element must be on its own line and the attributes must appear in 574 the given order. If you set "optional=True", missing attributes will be set to None. 575 Make sure that you list all (potential) attributes (even the ones you are not interested in) 576 in this case. You can only leave out attributes at the end. 577 If attrnames is None, the attribute list is read from the XSD schema referenced in the 578 XML file header (detected while reading); in this case optional is forced to True. 579 @Example: parse_fast('plain.edg.xml', 'edge', ['id', 'speed']) 580 @Example: parse_fast('fcd.xml', 'vehicle') 581 """ 582 reprog = None 583 if attrnames is not None: 584 Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional) 585 xmlfile, close_source = _check_file_like(xmlfile) 586 try: 587 for line in _comment_filter(xmlfile): 588 if reprog is None: 589 m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line) 590 if m_schema: 591 detected = _attrs_from_xsd_url(m_schema.group(1), element_name) 592 if detected is not None: 593 Record, reprog = _createRecordAndPattern(element_name, detected, warn, True) 594 if reprog is None: 595 continue 596 m = reprog.search(line) 597 if m: 598 if line_filter is not None and line_filter(line): 599 continue 600 yield Record(**m.groupdict()) 601 finally: 602 if close_source: 603 xmlfile.close() 604 605 606def parse_fast_nested(xmlfile, element_name, attrnames=None, element_name2=None, attrnames2=None, 607 warn=False, optional=False, encoding="utf8"): 608 """ 609 Parses the given attrnames from all elements with element_name 610 And attrnames2 from element_name2 where element_name2 is a child element of element_name 611 If you set "optional=True", missing attributes will be set to None. 612 Make sure that you list all (potential) attributes (even the ones you are not interested in) 613 in this case. You can only leave out attributes at the end. 614 If attrnames or attrnames2 is None, the attribute list is read from the XSD schema 615 referenced in the XML file header (detected while reading); in this case optional is forced to True. 616 @Note: The element must be on its own line and the attributes must appear in 617 the given order. 618 @Example: parse_fast_nested('fcd.xml', 'timestep', ['time'], 'vehicle', ['id', 'speed', 'lane']): 619 @Example: parse_fast_nested('fcd.xml', 'timestep', None, 'vehicle', None): 620 """ 621 reprog = reprog2 = record = None 622 if attrnames is not None: 623 Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional) 624 if attrnames2 is not None: 625 Record2, reprog2 = _createRecordAndPattern(element_name2, attrnames2, warn, optional) 626 xmlfile, close_source = _check_file_like(xmlfile) 627 try: 628 for line in _comment_filter(xmlfile): 629 if reprog is None or reprog2 is None: 630 m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line) 631 if m_schema: 632 if reprog is None: 633 detected = _attrs_from_xsd_url(m_schema.group(1), element_name) 634 if detected is not None: 635 Record, reprog = _createRecordAndPattern(element_name, detected, warn, True) 636 if reprog2 is None: 637 detected2 = _attrs_from_xsd_url(m_schema.group(1), element_name2) 638 if detected2 is not None: 639 Record2, reprog2 = _createRecordAndPattern(element_name2, detected2, warn, True) 640 if reprog is None or reprog2 is None: 641 continue 642 m2 = reprog2.search(line) 643 if record and m2: 644 yield record, Record2(**m2.groupdict()) 645 else: 646 m = reprog.search(line) 647 if m: 648 record = Record(**m.groupdict()) 649 elif element_name in line: 650 record = None 651 finally: 652 if close_source: 653 xmlfile.close() 654 655 656def parse_fast_structured(xmlfile, element_name, attrnames=None, nested=None, 657 warn=False, optional=False, encoding="utf8"): 658 """ 659 Parses the given attrnames from all elements with element_name and nested elements of level 1. 660 Unlike parse_fast_nested this function can handle multiple different child elements and 661 returns objects where the child elements can be accessed by name (e.g. timestep.vehicle[0]) 662 as with the parse method. The returned object is not modifiable though. 663 If you set "optional=True", missing attributes will be set to None. 664 Make sure that you list all (potential) attributes (even the ones you are not interested in) 665 in this case. You can only leave out attributes at the end. 666 If attrnames or any value in nested is None, the attribute list is read from the XSD schema 667 referenced in the XML file header (detected while reading); in this case optional is forced to True. 668 @Note: Every element must be on its own line and the attributes must appear in the given order. 669 @Example: parse_fast_structured('fcd.xml', 'timestep', ['time'], 670 {'vehicle': ['id', 'speed', 'lane'], 'person': ['id', 'speed', 'edge']}): 671 @Example: parse_fast_structured('fcd.xml', 'timestep', None, {'vehicle': None, 'person': None}): 672 """ 673 if nested is None: 674 nested = {} 675 reprog = record = None 676 if attrnames is not None and all(v is not None for v in nested.values()): 677 Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional, nested.keys()) 678 re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional) for elem, attr in nested.items()] 679 finalizer = "</%s>" % element_name 680 xmlfile, close_source = _check_file_like(xmlfile) 681 try: 682 for line in _comment_filter(xmlfile): 683 if reprog is None: 684 m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line) 685 if m_schema: 686 resolved = attrnames if attrnames is not None else _attrs_from_xsd_url( 687 m_schema.group(1), element_name) 688 resolved_nested = {name: (attrs if attrs is not None 689 else _attrs_from_xsd_url(m_schema.group(1), name)) 690 for name, attrs in nested.items()} 691 if resolved is not None and all(v is not None for v in resolved_nested.values()): 692 opt_parent = optional if attrnames is not None else True 693 Record, reprog = _createRecordAndPattern( 694 element_name, resolved, warn, opt_parent, resolved_nested.keys()) 695 re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional if nested[elem] is not None else True) # noqa 696 for elem, attr in resolved_nested.items()] 697 if reprog is None: 698 continue 699 if record: 700 for name2, Record2, reprog2 in re2: 701 m2 = reprog2.search(line) 702 if m2: 703 inner = Record2(**m2.groupdict()) 704 getattr(record, name2).append(inner) 705 break 706 else: 707 if finalizer in line: 708 yield record 709 record = None 710 else: 711 m = reprog.search(line) 712 if m: 713 args = dict(m.groupdict()) 714 for name, _, __ in re2: 715 args[name] = [] 716 record = Record(**args) 717 finally: 718 if close_source: 719 xmlfile.close() 720 721 722def quoteattr(val, ensureUnicode=False): 723 # saxutils sometimes uses single quotes around the attribute 724 # we can prevent this by adding an artificial single quote to the value and removing it again 725 if ensureUnicode and type(val) is bytes: 726 val = val.decode("utf-8") 727 return '"' + saxutils.quoteattr("'" + val)[2:] 728 729 730def contextualRename(xmlTree, prefixes, attribute='id', ids=None): 731 """ 732 Renames the given attribute in a specified set of child elements within 733 xmlTree and also replaces all attribute values that referred to such an id 734 with the new value. 735 Example: 736 Given an opendrive file, when called with prefixes={'road': 'r', 'junction': 'j'} 737 - all road ids will be renamed to rN where N is a running integer 738 - all junction ids will be renamed to jN where N is also a running integer 739 - all attributes that refered to roads or junctions will now refer to 740 their new ids 741 """ 742 if ids is None: 743 ids = {} 744 newIds = set() 745 attribute = _prefix_keyword(attribute) 746 747 def rename(obj, index=0): 748 if obj.name in prefixes: 749 if obj.hasAttribute(attribute): 750 oldID = obj.getAttribute(attribute) 751 if oldID not in ids: 752 newID = prefixes[obj.name] + str(len(ids)) 753 while newID in newIds: 754 index += 1 755 newID = prefixes[obj.name] + str(index) 756 newIds.add(newID) 757 ids[oldID] = newID 758 # keep id on second pass 759 ids[newID] = newID 760 obj.setAttribute(attribute, ids[oldID]) 761 for a, v in obj.getAttributes(): 762 if v in ids: 763 obj.setAttribute(a, ids[v]) 764 for child in obj.getChildList(): 765 index = rename(child, index) 766 return index 767 768 index = rename(xmlTree) 769 rename(xmlTree, index) # call again in case usage came before definition
73class NestingHandler(xml.sax.handler.ContentHandler): 74 75 """A handler which knows the current nesting of tags""" 76 77 def __init__(self): 78 self.tagstack = [] 79 80 def startElement(self, name, attrs): 81 self.tagstack.append(name) 82 83 def endElement(self, name): 84 self.tagstack.pop() 85 86 def depth(self): 87 # do not count the root element 88 return len(self.tagstack) - 1
A handler which knows the current nesting of tags
Signals the start of an element in non-namespace mode.
The name parameter contains the raw XML 1.0 name of the element type as a string and the attrs parameter holds an instance of the Attributes class containing the attributes of the element.
91class AttrFinder(NestingHandler): 92 93 def __init__(self, xsdFile, source, split, keepAttrs=None): 94 NestingHandler.__init__(self) 95 self.tagDepths = {} # tag -> depth of appearance 96 self.tagAttrs = defaultdict(OrderedDict) # tag -> set of attrs 97 self.renamedAttrs = {} # (name, attr) -> renamedAttr 98 self.attrs = {} 99 self.depthTags = {} # child of root: depth of appearance -> tag list 100 self.rootDepth = 1 if split else 0 101 self.keepAttrs = keepAttrs 102 if xsdFile: 103 self.xsdStruc = xsd.XsdStructure(xsdFile) 104 if split: 105 for ele in self.xsdStruc.root.children: 106 self.attrs[ele.name] = [] 107 self.depthTags[ele.name] = [[]] 108 self.recursiveAttrFind(ele, ele, 1) 109 else: 110 self.attrs[self.xsdStruc.root.name] = [] 111 self.depthTags[self.xsdStruc.root.name] = [] 112 self.recursiveAttrFind( 113 self.xsdStruc.root, self.xsdStruc.root, 0) 114 else: 115 self.xsdStruc = None 116 xml.sax.parse(source, self) 117 118 def addElement(self, root, name, depth): 119 # print("adding", root, name, depth) 120 if len(self.depthTags[root]) == depth: 121 self.tagDepths[name] = depth 122 self.depthTags[root].append([name]) 123 return True 124 if name not in self.tagDepths: 125 self.depthTags[root][depth].append(name) 126 return True 127 if name not in self.depthTags[root][depth]: 128 print("Ignoring tag %s at depth %s" % 129 (name, depth), file=sys.stderr) 130 return False 131 132 def recursiveAttrFind(self, root, currEle, depth): 133 if not self.addElement(root.name, currEle.name, depth): 134 return 135 for a in currEle.attributes: 136 if ":" not in a.name: # no namespace support yet 137 self.tagAttrs[currEle.name][a.name] = a 138 anew = "%s_%s" % (currEle.name, a.name) 139 self.renamedAttrs[(currEle.name, a.name)] = anew 140 attrList = self.attrs[root.name] 141 if anew in attrList: 142 del attrList[attrList.index(anew)] 143 attrList.append(anew) 144 for ele in currEle.children: 145 # print("attr", root.name, ele.name, depth) 146 self.recursiveAttrFind(root, ele, depth + 1) 147 148 def startElement(self, name, attrs): 149 NestingHandler.startElement(self, name, attrs) 150 if self.depth() >= self.rootDepth: 151 root = self.tagstack[self.rootDepth] 152 if self.depth() == self.rootDepth and root not in self.attrs: 153 self.attrs[root] = [] 154 self.depthTags[root] = [[]] * self.rootDepth 155 if not self.addElement(root, name, self.depth()): 156 return 157 # collect attributes 158 for a in sorted(list(attrs.keys())): 159 if self.keepAttrs is not None and a not in self.keepAttrs: 160 continue 161 if a not in self.tagAttrs[name] and ":" not in a: 162 self.tagAttrs[name][a] = xsd.XmlAttribute(a) 163 if not (name, a) in self.renamedAttrs: 164 anew = "%s_%s" % (name, a) 165 self.renamedAttrs[(name, a)] = anew 166 self.attrs[root].append(anew)
A handler which knows the current nesting of tags
93 def __init__(self, xsdFile, source, split, keepAttrs=None): 94 NestingHandler.__init__(self) 95 self.tagDepths = {} # tag -> depth of appearance 96 self.tagAttrs = defaultdict(OrderedDict) # tag -> set of attrs 97 self.renamedAttrs = {} # (name, attr) -> renamedAttr 98 self.attrs = {} 99 self.depthTags = {} # child of root: depth of appearance -> tag list 100 self.rootDepth = 1 if split else 0 101 self.keepAttrs = keepAttrs 102 if xsdFile: 103 self.xsdStruc = xsd.XsdStructure(xsdFile) 104 if split: 105 for ele in self.xsdStruc.root.children: 106 self.attrs[ele.name] = [] 107 self.depthTags[ele.name] = [[]] 108 self.recursiveAttrFind(ele, ele, 1) 109 else: 110 self.attrs[self.xsdStruc.root.name] = [] 111 self.depthTags[self.xsdStruc.root.name] = [] 112 self.recursiveAttrFind( 113 self.xsdStruc.root, self.xsdStruc.root, 0) 114 else: 115 self.xsdStruc = None 116 xml.sax.parse(source, self)
118 def addElement(self, root, name, depth): 119 # print("adding", root, name, depth) 120 if len(self.depthTags[root]) == depth: 121 self.tagDepths[name] = depth 122 self.depthTags[root].append([name]) 123 return True 124 if name not in self.tagDepths: 125 self.depthTags[root][depth].append(name) 126 return True 127 if name not in self.depthTags[root][depth]: 128 print("Ignoring tag %s at depth %s" % 129 (name, depth), file=sys.stderr) 130 return False
132 def recursiveAttrFind(self, root, currEle, depth): 133 if not self.addElement(root.name, currEle.name, depth): 134 return 135 for a in currEle.attributes: 136 if ":" not in a.name: # no namespace support yet 137 self.tagAttrs[currEle.name][a.name] = a 138 anew = "%s_%s" % (currEle.name, a.name) 139 self.renamedAttrs[(currEle.name, a.name)] = anew 140 attrList = self.attrs[root.name] 141 if anew in attrList: 142 del attrList[attrList.index(anew)] 143 attrList.append(anew) 144 for ele in currEle.children: 145 # print("attr", root.name, ele.name, depth) 146 self.recursiveAttrFind(root, ele, depth + 1)
148 def startElement(self, name, attrs): 149 NestingHandler.startElement(self, name, attrs) 150 if self.depth() >= self.rootDepth: 151 root = self.tagstack[self.rootDepth] 152 if self.depth() == self.rootDepth and root not in self.attrs: 153 self.attrs[root] = [] 154 self.depthTags[root] = [[]] * self.rootDepth 155 if not self.addElement(root, name, self.depth()): 156 return 157 # collect attributes 158 for a in sorted(list(attrs.keys())): 159 if self.keepAttrs is not None and a not in self.keepAttrs: 160 continue 161 if a not in self.tagAttrs[name] and ":" not in a: 162 self.tagAttrs[name][a] = xsd.XmlAttribute(a) 163 if not (name, a) in self.renamedAttrs: 164 anew = "%s_%s" % (name, a) 165 self.renamedAttrs[(name, a)] = anew 166 self.attrs[root].append(anew)
Signals the start of an element in non-namespace mode.
The name parameter contains the raw XML 1.0 name of the element type as a string and the attrs parameter holds an instance of the Attributes class containing the attributes of the element.
Inherited Members
201def compound_object(element_name, attrnames, warn=False, sort=False): 202 """return a class which delegates bracket access to an internal dict. 203 Missing attributes are delegated to the child dict for convenience. 204 @note: Care must be taken when child nodes and attributes have the same names""" 205 class CompoundObject(): 206 _original_fields = sorted(attrnames) if sort else tuple(attrnames) 207 _fields = [_prefix_keyword(a, warn) for a in _original_fields] 208 209 def __init__(self, values, child_dict=None, text=None, child_list=None): 210 for name, val in zip(self._fields, values): 211 self.__dict__[name] = val 212 self._child_dict = child_dict if child_dict else {} 213 self.name = element_name 214 self._text = text 215 self._child_list = child_list if child_list else [] 216 self._commented = False 217 218 def getAttributes(self): 219 return [(k, getattr(self, k)) for k in self._fields] 220 221 def hasAttribute(self, name): 222 return name in self._fields 223 224 def getAttribute(self, name): 225 if self.hasAttribute(name): 226 return self.__dict__[name] 227 raise AttributeError 228 229 def getAttributeSecure(self, name, default=None): 230 if self.hasAttribute(name): 231 return self.__dict__[name] 232 return default 233 234 def setAttribute(self, name, value): 235 if name in self._fields: 236 self.__dict__[name] = value 237 else: 238 if name not in self._original_fields: 239 if isinstance(self._original_fields, tuple): 240 tempList = list(self._original_fields) 241 tempList.append(name) 242 self._original_fields = tuple(tempList) 243 else: 244 self._original_fields.append(name) 245 self._fields.append(_prefix_keyword(name, warn)) 246 self.__dict__[_prefix_keyword(name, warn)] = value 247 248 def hasChild(self, name): 249 return name in self._child_dict 250 251 def getChild(self, name): 252 return self._child_dict[name] 253 254 def addChild(self, name, attrs=None, sortAttrs=True): 255 if attrs is None: 256 attrs = {} 257 clazz = compound_object(name, attrs.keys(), sort=sortAttrs) 258 child = clazz([attrs.get(a) for a in (sorted(attrs.keys()) if sortAttrs else attrs.keys())]) 259 self._child_dict.setdefault(name, []).append(child) 260 self._child_list.append(child) 261 return child 262 263 def removeChild(self, child): 264 self._child_dict[child.name].remove(child) 265 self._child_list.remove(child) 266 267 def setChildList(self, childs): 268 for c in self._child_list: 269 self._child_dict[c.name].remove(c) 270 for c in childs: 271 self._child_dict.setdefault(c.name, []).append(c) 272 self._child_list = childs 273 274 def getChildList(self, withComments=False): 275 if withComments: 276 return self._child_list 277 else: 278 return [c for c in self._child_list if not c.isComment()] 279 280 def getText(self): 281 return self._text 282 283 def setText(self, text): 284 self._text = text 285 286 def isComment(self): 287 return "function Comment" in str(self.name) 288 289 def getComments(self): 290 if not supports_comments: 291 sys.stderr.write("Comment parsing is only supported with version 3.8 or higher by sumolib.xml\n") 292 for name, children in self._child_dict.items(): 293 if "function Comment" in str(name): 294 return [c.getText() for c in children] 295 return [] 296 297 def setCommented(self, commented=True, recurse=False): 298 self._commented = commented 299 if commented or recurse: 300 for c in self._child_list: 301 c.setCommented(False, True) 302 303 def isCommented(self): 304 return self._commented 305 306 def __getattr__(self, name): 307 if name[:2] != "__": 308 return self._child_dict.get(name, None) 309 raise AttributeError 310 311 def __setattr__(self, name, value): 312 if name != "_child_dict" and name in self._child_dict: 313 # this could be optimized by using the child_list only if there are different children 314 for c in self._child_dict[name]: 315 self._child_list.remove(c) 316 self._child_dict[name] = value 317 for c in value: 318 self._child_list.append(c) 319 else: 320 self.__dict__[name] = value 321 322 def __delattr__(self, name): 323 if name in self._child_dict: 324 for c in self._child_dict[name]: 325 self._child_list.remove(c) 326 del self._child_dict[name] 327 else: 328 if name in self.__dict__: 329 del self.__dict__[name] 330 self._original_fields.remove(name) 331 self._fields.remove(_prefix_keyword(name, False)) 332 333 def __getitem__(self, name): 334 return self._child_dict[name] 335 336 def __str__(self): 337 nodeText = '' if self._text is None else ",text=%s" % self._text 338 return "<%s,child_dict=%s%s>" % (self.getAttributes(), dict(self._child_dict), nodeText) 339 340 def toXML(self, initialIndent="", indent=" ", withComments=False): 341 fields = [' %s="%s"' % (self._original_fields[i], xmlescape(getattr(self, k))) 342 for i, k in enumerate(self._fields) if getattr(self, k) is not None and 343 # see #3454 344 '{' not in self._original_fields[i]] 345 if self.isComment(): 346 if withComments: 347 return initialIndent + "<!-- %s -->\n" % self._text 348 else: 349 return "" 350 commentStart = "" 351 commentEnd = "" 352 if self._commented: 353 commentStart = "!--" 354 commentEnd = "--" 355 if not self._child_dict and self._text is None: 356 return initialIndent + "<%s%s%s/%s>\n" % (commentStart, self.name, "".join(fields), commentEnd) 357 else: 358 s = initialIndent + "<%s%s%s>\n" % (commentStart, self.name, "".join(fields)) 359 for i, c in enumerate(self._child_list): 360 if i > 0 and c.isComment() and withComments == "inline": 361 s = s[:-1] 362 s += c.toXML(initialIndent + indent, indent=indent, withComments=withComments) 363 if self._text is not None and self._text.strip(): 364 s += self._text.strip(" ") 365 return s + "%s</%s%s>\n" % (initialIndent, self.name, commentEnd) 366 367 def __repr__(self): 368 return str(self) 369 370 def __lt__(self, other): 371 return str(self) < str(other) 372 373 return CompoundObject
return a class which delegates bracket access to an internal dict. Missing attributes are delegated to the child dict for convenience. @note: Care must be taken when child nodes and attributes have the same names
376def parselines(xmlline, element_name, element_attrs=None, attr_conversions=None, 377 heterogeneous=True, warn=False, addRoot="dummy"): 378 tagStart1 = "<%s>" % element_name 379 tagStart2 = "<%s " % element_name 380 if tagStart1 in xmlline or tagStart2 in xmlline: 381 if addRoot is not None: 382 xmlline = "<%s>\n%s</%s>\n" % (addRoot, xmlline, addRoot) 383 xmlfile = io.StringIO(xmlline) 384 for x in parse(xmlfile, element_name, element_attrs, attr_conversions, 385 heterogeneous, warn): 386 yield x
402def parse(xmlfile, element_names=None, element_attrs=None, attr_conversions=None, 403 heterogeneous=True, warn=False, ignoreXmlns=False, outputLevel=1): 404 """ 405 Parses the given element_names from xmlfile and yield compound objects for 406 their xml subtrees (no extra objects are returned if element_names appear in 407 the subtree) The compound objects provide all element attributes of 408 the root of the subtree as attributes unless attr_names are supplied. In this 409 case attr_names maps element names to a list of attributes which are 410 supplied. If attr_conversions is not empty it must map attribute names to 411 callables which will be called upon the attribute value before storing under 412 the attribute name. 413 The compound objects gives dictionary style access to list of compound 414 objects o for any children with the given element name 415 o['child_element_name'] = [osub0, osub1, ...] 416 As a shorthand, attribute style access to the list of child elements is 417 provided unless an attribute with the same name as the child elements 418 exists (i.e. o.child_element_name = [osub0, osub1, ...]) 419 @Note: All elements with the same name must have the same type regardless of 420 the subtree in which they occur (heterogeneous cases may be handled by 421 setting heterogeneous=True (with reduced parsing speed) 422 @Note: Attribute names may be modified to avoid name clashes 423 with python keywords. (set warn=True to receive renaming warnings) 424 @Note: The element_names may be either a single string or a list of strings. 425 @Example: parse('plain.edg.xml', ['edge']) 426 """ 427 if isinstance(element_names, str): 428 element_names = [element_names] 429 if element_attrs is None: 430 element_attrs = {} 431 if attr_conversions is None: 432 attr_conversions = {} 433 element_types = {} 434 kwargs = {'parser': ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))} if supports_comments() else {} 435 xmlfile, close_source = _check_file_like(xmlfile) 436 try: 437 level = -1 438 for event, parsenode in ET.iterparse(xmlfile, events=('start', 'end'), **kwargs): 439 if event == 'start': 440 level += 1 441 else: 442 tag = _handle_namespace(parsenode.tag, ignoreXmlns) 443 if (element_names is None and level == outputLevel) or (element_names and tag in element_names): 444 yield _get_compound_object(parsenode, element_types, 445 tag, element_attrs, 446 attr_conversions, heterogeneous, warn, 447 ignoreXmlns) 448 parsenode.clear() 449 level -= 1 450 finally: 451 if close_source: 452 xmlfile.close()
Parses the given element_names from xmlfile and yield compound objects for their xml subtrees (no extra objects are returned if element_names appear in the subtree) The compound objects provide all element attributes of the root of the subtree as attributes unless attr_names are supplied. In this case attr_names maps element names to a list of attributes which are supplied. If attr_conversions is not empty it must map attribute names to callables which will be called upon the attribute value before storing under the attribute name. The compound objects gives dictionary style access to list of compound objects o for any children with the given element name o['child_element_name'] = [osub0, osub1, ...] As a shorthand, attribute style access to the list of child elements is provided unless an attribute with the same name as the child elements exists (i.e. o.child_element_name = [osub0, osub1, ...]) @Note: All elements with the same name must have the same type regardless of the subtree in which they occur (heterogeneous cases may be handled by setting heterogeneous=True (with reduced parsing speed) @Note: Attribute names may be modified to avoid name clashes with python keywords. (set warn=True to receive renaming warnings) @Note: The element_names may be either a single string or a list of strings. @Example: parse('plain.edg.xml', ['edge'])
486def create_document(root_element_name, attrs=None, schema=None): 487 if attrs is None: 488 attrs = {} 489 if schema is None: 490 attrs["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance" 491 attrs["xsi:noNamespaceSchemaLocation"] = "http://sumo.dlr.de/xsd/" + root_element_name + "_file.xsd" 492 clazz = compound_object(root_element_name, sorted(attrs.keys())) 493 return clazz([attrs.get(a) for a in sorted(attrs.keys())], OrderedDict())
502def average(elements, attrname): 503 # for the given elements (as returned by method parse) compute the average for attrname 504 # attrname must be the name of a numerical attribute 505 if elements: 506 return sum(elements, attrname) / len(elements) 507 else: 508 raise Exception("average of 0 elements is not defined")
571def parse_fast(xmlfile, element_name, attrnames=None, warn=False, optional=False, encoding="utf8", line_filter=None): 572 """ 573 Parses the given attrnames from all elements with element_name 574 @Note: The element must be on its own line and the attributes must appear in 575 the given order. If you set "optional=True", missing attributes will be set to None. 576 Make sure that you list all (potential) attributes (even the ones you are not interested in) 577 in this case. You can only leave out attributes at the end. 578 If attrnames is None, the attribute list is read from the XSD schema referenced in the 579 XML file header (detected while reading); in this case optional is forced to True. 580 @Example: parse_fast('plain.edg.xml', 'edge', ['id', 'speed']) 581 @Example: parse_fast('fcd.xml', 'vehicle') 582 """ 583 reprog = None 584 if attrnames is not None: 585 Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional) 586 xmlfile, close_source = _check_file_like(xmlfile) 587 try: 588 for line in _comment_filter(xmlfile): 589 if reprog is None: 590 m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line) 591 if m_schema: 592 detected = _attrs_from_xsd_url(m_schema.group(1), element_name) 593 if detected is not None: 594 Record, reprog = _createRecordAndPattern(element_name, detected, warn, True) 595 if reprog is None: 596 continue 597 m = reprog.search(line) 598 if m: 599 if line_filter is not None and line_filter(line): 600 continue 601 yield Record(**m.groupdict()) 602 finally: 603 if close_source: 604 xmlfile.close()
Parses the given attrnames from all elements with element_name @Note: The element must be on its own line and the attributes must appear in the given order. If you set "optional=True", missing attributes will be set to None. Make sure that you list all (potential) attributes (even the ones you are not interested in) in this case. You can only leave out attributes at the end. If attrnames is None, the attribute list is read from the XSD schema referenced in the XML file header (detected while reading); in this case optional is forced to True. @Example: parse_fast('plain.edg.xml', 'edge', ['id', 'speed']) @Example: parse_fast('fcd.xml', 'vehicle')
607def parse_fast_nested(xmlfile, element_name, attrnames=None, element_name2=None, attrnames2=None, 608 warn=False, optional=False, encoding="utf8"): 609 """ 610 Parses the given attrnames from all elements with element_name 611 And attrnames2 from element_name2 where element_name2 is a child element of element_name 612 If you set "optional=True", missing attributes will be set to None. 613 Make sure that you list all (potential) attributes (even the ones you are not interested in) 614 in this case. You can only leave out attributes at the end. 615 If attrnames or attrnames2 is None, the attribute list is read from the XSD schema 616 referenced in the XML file header (detected while reading); in this case optional is forced to True. 617 @Note: The element must be on its own line and the attributes must appear in 618 the given order. 619 @Example: parse_fast_nested('fcd.xml', 'timestep', ['time'], 'vehicle', ['id', 'speed', 'lane']): 620 @Example: parse_fast_nested('fcd.xml', 'timestep', None, 'vehicle', None): 621 """ 622 reprog = reprog2 = record = None 623 if attrnames is not None: 624 Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional) 625 if attrnames2 is not None: 626 Record2, reprog2 = _createRecordAndPattern(element_name2, attrnames2, warn, optional) 627 xmlfile, close_source = _check_file_like(xmlfile) 628 try: 629 for line in _comment_filter(xmlfile): 630 if reprog is None or reprog2 is None: 631 m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line) 632 if m_schema: 633 if reprog is None: 634 detected = _attrs_from_xsd_url(m_schema.group(1), element_name) 635 if detected is not None: 636 Record, reprog = _createRecordAndPattern(element_name, detected, warn, True) 637 if reprog2 is None: 638 detected2 = _attrs_from_xsd_url(m_schema.group(1), element_name2) 639 if detected2 is not None: 640 Record2, reprog2 = _createRecordAndPattern(element_name2, detected2, warn, True) 641 if reprog is None or reprog2 is None: 642 continue 643 m2 = reprog2.search(line) 644 if record and m2: 645 yield record, Record2(**m2.groupdict()) 646 else: 647 m = reprog.search(line) 648 if m: 649 record = Record(**m.groupdict()) 650 elif element_name in line: 651 record = None 652 finally: 653 if close_source: 654 xmlfile.close()
Parses the given attrnames from all elements with element_name And attrnames2 from element_name2 where element_name2 is a child element of element_name If you set "optional=True", missing attributes will be set to None. Make sure that you list all (potential) attributes (even the ones you are not interested in) in this case. You can only leave out attributes at the end. If attrnames or attrnames2 is None, the attribute list is read from the XSD schema referenced in the XML file header (detected while reading); in this case optional is forced to True. @Note: The element must be on its own line and the attributes must appear in the given order. @Example: parse_fast_nested('fcd.xml', 'timestep', ['time'], 'vehicle', ['id', 'speed', 'lane']): @Example: parse_fast_nested('fcd.xml', 'timestep', None, 'vehicle', None):
657def parse_fast_structured(xmlfile, element_name, attrnames=None, nested=None, 658 warn=False, optional=False, encoding="utf8"): 659 """ 660 Parses the given attrnames from all elements with element_name and nested elements of level 1. 661 Unlike parse_fast_nested this function can handle multiple different child elements and 662 returns objects where the child elements can be accessed by name (e.g. timestep.vehicle[0]) 663 as with the parse method. The returned object is not modifiable though. 664 If you set "optional=True", missing attributes will be set to None. 665 Make sure that you list all (potential) attributes (even the ones you are not interested in) 666 in this case. You can only leave out attributes at the end. 667 If attrnames or any value in nested is None, the attribute list is read from the XSD schema 668 referenced in the XML file header (detected while reading); in this case optional is forced to True. 669 @Note: Every element must be on its own line and the attributes must appear in the given order. 670 @Example: parse_fast_structured('fcd.xml', 'timestep', ['time'], 671 {'vehicle': ['id', 'speed', 'lane'], 'person': ['id', 'speed', 'edge']}): 672 @Example: parse_fast_structured('fcd.xml', 'timestep', None, {'vehicle': None, 'person': None}): 673 """ 674 if nested is None: 675 nested = {} 676 reprog = record = None 677 if attrnames is not None and all(v is not None for v in nested.values()): 678 Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional, nested.keys()) 679 re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional) for elem, attr in nested.items()] 680 finalizer = "</%s>" % element_name 681 xmlfile, close_source = _check_file_like(xmlfile) 682 try: 683 for line in _comment_filter(xmlfile): 684 if reprog is None: 685 m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line) 686 if m_schema: 687 resolved = attrnames if attrnames is not None else _attrs_from_xsd_url( 688 m_schema.group(1), element_name) 689 resolved_nested = {name: (attrs if attrs is not None 690 else _attrs_from_xsd_url(m_schema.group(1), name)) 691 for name, attrs in nested.items()} 692 if resolved is not None and all(v is not None for v in resolved_nested.values()): 693 opt_parent = optional if attrnames is not None else True 694 Record, reprog = _createRecordAndPattern( 695 element_name, resolved, warn, opt_parent, resolved_nested.keys()) 696 re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional if nested[elem] is not None else True) # noqa 697 for elem, attr in resolved_nested.items()] 698 if reprog is None: 699 continue 700 if record: 701 for name2, Record2, reprog2 in re2: 702 m2 = reprog2.search(line) 703 if m2: 704 inner = Record2(**m2.groupdict()) 705 getattr(record, name2).append(inner) 706 break 707 else: 708 if finalizer in line: 709 yield record 710 record = None 711 else: 712 m = reprog.search(line) 713 if m: 714 args = dict(m.groupdict()) 715 for name, _, __ in re2: 716 args[name] = [] 717 record = Record(**args) 718 finally: 719 if close_source: 720 xmlfile.close()
Parses the given attrnames from all elements with element_name and nested elements of level 1. Unlike parse_fast_nested this function can handle multiple different child elements and returns objects where the child elements can be accessed by name (e.g. timestep.vehicle[0]) as with the parse method. The returned object is not modifiable though. If you set "optional=True", missing attributes will be set to None. Make sure that you list all (potential) attributes (even the ones you are not interested in) in this case. You can only leave out attributes at the end. If attrnames or any value in nested is None, the attribute list is read from the XSD schema referenced in the XML file header (detected while reading); in this case optional is forced to True. @Note: Every element must be on its own line and the attributes must appear in the given order. @Example: parse_fast_structured('fcd.xml', 'timestep', ['time'], {'vehicle': ['id', 'speed', 'lane'], 'person': ['id', 'speed', 'edge']}): @Example: parse_fast_structured('fcd.xml', 'timestep', None, {'vehicle': None, 'person': None}):
723def quoteattr(val, ensureUnicode=False): 724 # saxutils sometimes uses single quotes around the attribute 725 # we can prevent this by adding an artificial single quote to the value and removing it again 726 if ensureUnicode and type(val) is bytes: 727 val = val.decode("utf-8") 728 return '"' + saxutils.quoteattr("'" + val)[2:]
731def contextualRename(xmlTree, prefixes, attribute='id', ids=None): 732 """ 733 Renames the given attribute in a specified set of child elements within 734 xmlTree and also replaces all attribute values that referred to such an id 735 with the new value. 736 Example: 737 Given an opendrive file, when called with prefixes={'road': 'r', 'junction': 'j'} 738 - all road ids will be renamed to rN where N is a running integer 739 - all junction ids will be renamed to jN where N is also a running integer 740 - all attributes that refered to roads or junctions will now refer to 741 their new ids 742 """ 743 if ids is None: 744 ids = {} 745 newIds = set() 746 attribute = _prefix_keyword(attribute) 747 748 def rename(obj, index=0): 749 if obj.name in prefixes: 750 if obj.hasAttribute(attribute): 751 oldID = obj.getAttribute(attribute) 752 if oldID not in ids: 753 newID = prefixes[obj.name] + str(len(ids)) 754 while newID in newIds: 755 index += 1 756 newID = prefixes[obj.name] + str(index) 757 newIds.add(newID) 758 ids[oldID] = newID 759 # keep id on second pass 760 ids[newID] = newID 761 obj.setAttribute(attribute, ids[oldID]) 762 for a, v in obj.getAttributes(): 763 if v in ids: 764 obj.setAttribute(a, ids[v]) 765 for child in obj.getChildList(): 766 index = rename(child, index) 767 return index 768 769 index = rename(xmlTree) 770 rename(xmlTree, index) # call again in case usage came before definition
Renames the given attribute in a specified set of child elements within xmlTree and also replaces all attribute values that referred to such an id with the new value. Example: Given an opendrive file, when called with prefixes={'road': 'r', 'junction': 'j'}
- all road ids will be renamed to rN where N is a running integer
- all junction ids will be renamed to jN where N is also a running integer
- all attributes that refered to roads or junctions will now refer to their new ids