sumolib.xml.parsing

  1# -*- coding: utf-8 -*-
  2# Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
  3# Copyright (C) 2011-2026 German Aerospace Center (DLR) and others.
  4# This program and the accompanying materials are made available under the
  5# terms of the Eclipse Public License 2.0 which is available at
  6# https://www.eclipse.org/legal/epl-2.0/
  7# This Source Code may also be made available under the following Secondary
  8# Licenses when the conditions for such availability set forth in the Eclipse
  9# Public License 2.0 are satisfied: GNU General Public License, version 2
 10# or later which is available at
 11# https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
 12# SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
 13
 14# @file    parsing.py
 15# @author  Michael Behrisch
 16# @author  Jakob Erdmann
 17# @author  Mirko Barthauer
 18# @date    2011-06-23
 19
 20from __future__ import print_function
 21from __future__ import absolute_import
 22import os
 23import sys
 24import re
 25import gzip
 26import io
 27try:
 28    import xml.etree.cElementTree as ET
 29except ImportError as e:
 30    print("recovering from ImportError '%s'" % e)
 31    import xml.etree.ElementTree as ET
 32from collections import defaultdict, namedtuple, OrderedDict
 33from keyword import iskeyword
 34from functools import reduce
 35import xml.sax
 36from xml.sax import saxutils
 37
 38from . import xsd
 39from .. import miscutils
 40
 41DEFAULT_ATTR_CONVERSIONS = {
 42    # shape-like
 43    'shape': lambda coords: map(lambda xy: map(float, xy.split(',')), coords.split()),
 44    # float
 45    'speed': float,
 46    'length': float,
 47    'width': float,
 48    'angle': float,
 49    'endOffset': float,
 50    'radius': float,
 51    'contPos': float,
 52    'visibility': float,
 53    'startPos': float,
 54    'endPos': float,
 55    'position': float,
 56    'x': float,
 57    'y': float,
 58    'lon': float,
 59    'lat': float,
 60    'freq': float,
 61    # int
 62    'priority': int,
 63    'numLanes': int,
 64    'index': int,
 65    'linkIndex': int,
 66    'linkIndex2': int,
 67    'fromLane': int,
 68    'toLane': int,
 69}
 70
 71
 72class NestingHandler(xml.sax.handler.ContentHandler):
 73
 74    """A handler which knows the current nesting of tags"""
 75
 76    def __init__(self):
 77        self.tagstack = []
 78
 79    def startElement(self, name, attrs):
 80        self.tagstack.append(name)
 81
 82    def endElement(self, name):
 83        self.tagstack.pop()
 84
 85    def depth(self):
 86        # do not count the root element
 87        return len(self.tagstack) - 1
 88
 89
 90class AttrFinder(NestingHandler):
 91
 92    def __init__(self, xsdFile, source, split, keepAttrs=None):
 93        NestingHandler.__init__(self)
 94        self.tagDepths = {}  # tag -> depth of appearance
 95        self.tagAttrs = defaultdict(OrderedDict)  # tag -> set of attrs
 96        self.renamedAttrs = {}  # (name, attr) -> renamedAttr
 97        self.attrs = {}
 98        self.depthTags = {}  # child of root: depth of appearance -> tag list
 99        self.rootDepth = 1 if split else 0
100        self.keepAttrs = keepAttrs
101        if xsdFile:
102            self.xsdStruc = xsd.XsdStructure(xsdFile)
103            if split:
104                for ele in self.xsdStruc.root.children:
105                    self.attrs[ele.name] = []
106                    self.depthTags[ele.name] = [[]]
107                    self.recursiveAttrFind(ele, ele, 1)
108            else:
109                self.attrs[self.xsdStruc.root.name] = []
110                self.depthTags[self.xsdStruc.root.name] = []
111                self.recursiveAttrFind(
112                    self.xsdStruc.root, self.xsdStruc.root, 0)
113        else:
114            self.xsdStruc = None
115            xml.sax.parse(source, self)
116
117    def addElement(self, root, name, depth):
118        # print("adding", root, name, depth)
119        if len(self.depthTags[root]) == depth:
120            self.tagDepths[name] = depth
121            self.depthTags[root].append([name])
122            return True
123        if name not in self.tagDepths:
124            self.depthTags[root][depth].append(name)
125            return True
126        if name not in self.depthTags[root][depth]:
127            print("Ignoring tag %s at depth %s" %
128                  (name, depth), file=sys.stderr)
129        return False
130
131    def recursiveAttrFind(self, root, currEle, depth):
132        if not self.addElement(root.name, currEle.name, depth):
133            return
134        for a in currEle.attributes:
135            if ":" not in a.name:  # no namespace support yet
136                self.tagAttrs[currEle.name][a.name] = a
137                anew = "%s_%s" % (currEle.name, a.name)
138                self.renamedAttrs[(currEle.name, a.name)] = anew
139                attrList = self.attrs[root.name]
140                if anew in attrList:
141                    del attrList[attrList.index(anew)]
142                attrList.append(anew)
143        for ele in currEle.children:
144            # print("attr", root.name, ele.name, depth)
145            self.recursiveAttrFind(root, ele, depth + 1)
146
147    def startElement(self, name, attrs):
148        NestingHandler.startElement(self, name, attrs)
149        if self.depth() >= self.rootDepth:
150            root = self.tagstack[self.rootDepth]
151            if self.depth() == self.rootDepth and root not in self.attrs:
152                self.attrs[root] = []
153                self.depthTags[root] = [[]] * self.rootDepth
154            if not self.addElement(root, name, self.depth()):
155                return
156            # collect attributes
157            for a in sorted(list(attrs.keys())):
158                if self.keepAttrs is not None and a not in self.keepAttrs:
159                    continue
160                if a not in self.tagAttrs[name] and ":" not in a:
161                    self.tagAttrs[name][a] = xsd.XmlAttribute(a)
162                    if not (name, a) in self.renamedAttrs:
163                        anew = "%s_%s" % (name, a)
164                        self.renamedAttrs[(name, a)] = anew
165                        self.attrs[root].append(anew)
166
167
168def xmlescape(value):
169    return saxutils.escape(str(value), {'"': '"'})
170
171
172def supports_comments():
173    return sys.version_info[0] >= 3 and sys.version_info[1] >= 8
174
175
176def _prefix_keyword(name, warn=False):
177    result = name
178    # create a legal identifier (xml allows '-', ':' and '.' ...)
179    result = ''.join([c for c in name if c.isalnum() or c == '_'])
180    if result != name:
181        if result == '':
182            result = 'attr_'
183        if warn:
184            print("Warning: Renaming attribute '%s' to '%s' because it contains illegal characters" % (
185                name, result), file=sys.stderr)
186    if name == "name":
187        result = 'attr_name'
188        if warn:
189            print("Warning: Renaming attribute '%s' to '%s' because it conflicts with a reserved field" % (
190                name, result), file=sys.stderr)
191
192    if iskeyword(name):
193        result = 'attr_' + name
194        if warn:
195            print("Warning: Renaming attribute '%s' to '%s' because it conflicts with a python keyword" % (
196                name, result), file=sys.stderr)
197    return result
198
199
200def compound_object(element_name, attrnames, warn=False, sort=False):
201    """return a class which delegates bracket access to an internal dict.
202       Missing attributes are delegated to the child dict for convenience.
203       @note: Care must be taken when child nodes and attributes have the same names"""
204    class CompoundObject():
205        _original_fields = sorted(attrnames) if sort else tuple(attrnames)
206        _fields = [_prefix_keyword(a, warn) for a in _original_fields]
207
208        def __init__(self, values, child_dict=None, text=None, child_list=None):
209            for name, val in zip(self._fields, values):
210                self.__dict__[name] = val
211            self._child_dict = child_dict if child_dict else {}
212            self.name = element_name
213            self._text = text
214            self._child_list = child_list if child_list else []
215            self._commented = False
216
217        def getAttributes(self):
218            return [(k, getattr(self, k)) for k in self._fields]
219
220        def hasAttribute(self, name):
221            return name in self._fields
222
223        def getAttribute(self, name):
224            if self.hasAttribute(name):
225                return self.__dict__[name]
226            raise AttributeError
227
228        def getAttributeSecure(self, name, default=None):
229            if self.hasAttribute(name):
230                return self.__dict__[name]
231            return default
232
233        def setAttribute(self, name, value):
234            if name in self._fields:
235                self.__dict__[name] = value
236            else:
237                if name not in self._original_fields:
238                    if isinstance(self._original_fields, tuple):
239                        tempList = list(self._original_fields)
240                        tempList.append(name)
241                        self._original_fields = tuple(tempList)
242                    else:
243                        self._original_fields.append(name)
244                    self._fields.append(_prefix_keyword(name, warn))
245                self.__dict__[_prefix_keyword(name, warn)] = value
246
247        def hasChild(self, name):
248            return name in self._child_dict
249
250        def getChild(self, name):
251            return self._child_dict[name]
252
253        def addChild(self, name, attrs=None, sortAttrs=True):
254            if attrs is None:
255                attrs = {}
256            clazz = compound_object(name, attrs.keys(), sort=sortAttrs)
257            child = clazz([attrs.get(a) for a in (sorted(attrs.keys()) if sortAttrs else attrs.keys())])
258            self._child_dict.setdefault(name, []).append(child)
259            self._child_list.append(child)
260            return child
261
262        def removeChild(self, child):
263            self._child_dict[child.name].remove(child)
264            self._child_list.remove(child)
265
266        def setChildList(self, childs):
267            for c in self._child_list:
268                self._child_dict[c.name].remove(c)
269            for c in childs:
270                self._child_dict.setdefault(c.name, []).append(c)
271            self._child_list = childs
272
273        def getChildList(self, withComments=False):
274            if withComments:
275                return self._child_list
276            else:
277                return [c for c in self._child_list if not c.isComment()]
278
279        def getText(self):
280            return self._text
281
282        def setText(self, text):
283            self._text = text
284
285        def isComment(self):
286            return "function Comment" in str(self.name)
287
288        def getComments(self):
289            if not supports_comments:
290                sys.stderr.write("Comment parsing is only supported with version 3.8 or higher by sumolib.xml\n")
291            for name, children in self._child_dict.items():
292                if "function Comment" in str(name):
293                    return [c.getText() for c in children]
294            return []
295
296        def setCommented(self, commented=True, recurse=False):
297            self._commented = commented
298            if commented or recurse:
299                for c in self._child_list:
300                    c.setCommented(False, True)
301
302        def isCommented(self):
303            return self._commented
304
305        def __getattr__(self, name):
306            if name[:2] != "__":
307                return self._child_dict.get(name, None)
308            raise AttributeError
309
310        def __setattr__(self, name, value):
311            if name != "_child_dict" and name in self._child_dict:
312                # this could be optimized by using the child_list only if there are different children
313                for c in self._child_dict[name]:
314                    self._child_list.remove(c)
315                self._child_dict[name] = value
316                for c in value:
317                    self._child_list.append(c)
318            else:
319                self.__dict__[name] = value
320
321        def __delattr__(self, name):
322            if name in self._child_dict:
323                for c in self._child_dict[name]:
324                    self._child_list.remove(c)
325                del self._child_dict[name]
326            else:
327                if name in self.__dict__:
328                    del self.__dict__[name]
329                self._original_fields.remove(name)
330                self._fields.remove(_prefix_keyword(name, False))
331
332        def __getitem__(self, name):
333            return self._child_dict[name]
334
335        def __str__(self):
336            nodeText = '' if self._text is None else ",text=%s" % self._text
337            return "<%s,child_dict=%s%s>" % (self.getAttributes(), dict(self._child_dict), nodeText)
338
339        def toXML(self, initialIndent="", indent="    ", withComments=False):
340            fields = [' %s="%s"' % (self._original_fields[i], xmlescape(getattr(self, k)))
341                      for i, k in enumerate(self._fields) if getattr(self, k) is not None and
342                      # see #3454
343                      '{' not in self._original_fields[i]]
344            if self.isComment():
345                if withComments:
346                    return initialIndent + "<!-- %s -->\n" % self._text
347                else:
348                    return ""
349            commentStart = ""
350            commentEnd = ""
351            if self._commented:
352                commentStart = "!--"
353                commentEnd = "--"
354            if not self._child_dict and self._text is None:
355                return initialIndent + "<%s%s%s/%s>\n" % (commentStart, self.name, "".join(fields), commentEnd)
356            else:
357                s = initialIndent + "<%s%s%s>\n" % (commentStart, self.name, "".join(fields))
358                for i, c in enumerate(self._child_list):
359                    if i > 0 and c.isComment() and withComments == "inline":
360                        s = s[:-1]
361                    s += c.toXML(initialIndent + indent, indent=indent, withComments=withComments)
362                if self._text is not None and self._text.strip():
363                    s += self._text.strip(" ")
364                return s + "%s</%s%s>\n" % (initialIndent, self.name, commentEnd)
365
366        def __repr__(self):
367            return str(self)
368
369        def __lt__(self, other):
370            return str(self) < str(other)
371
372    return CompoundObject
373
374
375def parselines(xmlline, element_name, element_attrs=None, attr_conversions=None,
376               heterogeneous=True, warn=False, addRoot="dummy"):
377    tagStart1 = "<%s>" % element_name
378    tagStart2 = "<%s " % element_name
379    if tagStart1 in xmlline or tagStart2 in xmlline:
380        if addRoot is not None:
381            xmlline = "<%s>\n%s</%s>\n" % (addRoot, xmlline, addRoot)
382        xmlfile = io.StringIO(xmlline)
383        for x in parse(xmlfile, element_name, element_attrs, attr_conversions,
384                       heterogeneous, warn):
385            yield x
386
387
388def _handle_namespace(tag, ignoreXmlns):
389    if ignoreXmlns and "}" in tag:
390        # see https://bugs.python.org/issue18304
391        return tag.split("}")[1]
392    return tag
393
394
395def _check_file_like(xmlfile):
396    if not hasattr(xmlfile, "read"):
397        return miscutils.openz(xmlfile), True
398    return xmlfile, False
399
400
401def parse(xmlfile, element_names=None, element_attrs=None, attr_conversions=None,
402          heterogeneous=True, warn=False, ignoreXmlns=False, outputLevel=1):
403    """
404    Parses the given element_names from xmlfile and yield compound objects for
405    their xml subtrees (no extra objects are returned if element_names appear in
406    the subtree) The compound objects provide all element attributes of
407    the root of the subtree as attributes unless attr_names are supplied. In this
408    case attr_names maps element names to a list of attributes which are
409    supplied. If attr_conversions is not empty it must map attribute names to
410    callables which will be called upon the attribute value before storing under
411    the attribute name.
412    The compound objects gives dictionary style access to list of compound
413    objects o for any children with the given element name
414    o['child_element_name'] = [osub0, osub1, ...]
415    As a shorthand, attribute style access to the list of child elements is
416    provided unless an attribute with the same name as the child elements
417    exists (i.e. o.child_element_name = [osub0, osub1, ...])
418    @Note: All elements with the same name must have the same type regardless of
419    the subtree in which they occur (heterogeneous cases may be handled by
420    setting heterogeneous=True (with reduced parsing speed)
421    @Note: Attribute names may be modified to avoid name clashes
422    with python keywords. (set warn=True to receive renaming warnings)
423    @Note: The element_names may be either a single string or a list of strings.
424    @Example: parse('plain.edg.xml', ['edge'])
425    """
426    if isinstance(element_names, str):
427        element_names = [element_names]
428    if element_attrs is None:
429        element_attrs = {}
430    if attr_conversions is None:
431        attr_conversions = {}
432    element_types = {}
433    kwargs = {'parser': ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))} if supports_comments() else {}
434    xmlfile, close_source = _check_file_like(xmlfile)
435    try:
436        level = -1
437        for event, parsenode in ET.iterparse(xmlfile, events=('start', 'end'), **kwargs):
438            if event == 'start':
439                level += 1
440            else:
441                tag = _handle_namespace(parsenode.tag, ignoreXmlns)
442                if (element_names is None and level == outputLevel) or (element_names and tag in element_names):
443                    yield _get_compound_object(parsenode, element_types,
444                                               tag, element_attrs,
445                                               attr_conversions, heterogeneous, warn,
446                                               ignoreXmlns)
447                    parsenode.clear()
448                level -= 1
449    finally:
450        if close_source:
451            xmlfile.close()
452
453
454def _IDENTITY(x):
455    return x
456
457
458def _get_compound_object(node, element_types, element_name, element_attrs, attr_conversions,
459                         heterogeneous, warn, ignoreXmlns):
460    if element_name not in element_types or heterogeneous:
461        # initialized the compound_object type from the first encountered #
462        # element
463        attrnames = element_attrs.get(element_name, node.keys())
464        if len(attrnames) != len(set(attrnames)):
465            raise Exception(
466                "non-unique attributes %s for element '%s'" % (attrnames, element_name))
467        element_types[element_name] = compound_object(
468            element_name, attrnames, warn)
469    # prepare children
470    child_dict = {}
471    child_list = []
472    if len(node) > 0:
473        for c in node:
474            tag = _handle_namespace(c.tag, ignoreXmlns)
475            child = _get_compound_object(c, element_types, tag, element_attrs, attr_conversions,
476                                         heterogeneous, warn, ignoreXmlns)
477            child_dict.setdefault(tag, []).append(child)
478            child_list.append(child)
479    attrnames = element_types[element_name]._original_fields
480    return element_types[element_name](
481        [attr_conversions.get(a, _IDENTITY)(node.get(a)) for a in attrnames],
482        child_dict, node.text, child_list)
483
484
485def create_document(root_element_name, attrs=None, schema=None):
486    if attrs is None:
487        attrs = {}
488    if schema is None:
489        attrs["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance"
490        attrs["xsi:noNamespaceSchemaLocation"] = "http://sumo.dlr.de/xsd/" + root_element_name + "_file.xsd"
491    clazz = compound_object(root_element_name, sorted(attrs.keys()))
492    return clazz([attrs.get(a) for a in sorted(attrs.keys())], OrderedDict())
493
494
495def sum(elements, attrname):
496    # for the given elements (as returned by method parse) compute the sum for attrname
497    # attrname must be the name of a numerical attribute
498    return reduce(lambda x, y: x + y, [float(getattr(e, attrname)) for e in elements])
499
500
501def average(elements, attrname):
502    # for the given elements (as returned by method parse) compute the average for attrname
503    # attrname must be the name of a numerical attribute
504    if elements:
505        return sum(elements, attrname) / len(elements)
506    else:
507        raise Exception("average of 0 elements is not defined")
508
509
510def _createRecordAndPattern(element_name, attrnames, warn, optional, extra=None):
511    if isinstance(attrnames, str):
512        attrnames = [attrnames]
513    prefixedAttrnames = [_prefix_keyword(a, warn) for a in attrnames]
514    if optional:
515        pattern = ''.join(['<%s' % element_name] +
516                          ['(\\s+%s="(?P<%s>[^"]*?)")?' % a for a in zip(attrnames, prefixedAttrnames)])
517    else:
518        pattern = '(|\\s+.*)'.join(['<%s' % element_name] +
519                                   ['\\s+%s="(?P<%s>[^"]*?)"' % a for a in zip(attrnames, prefixedAttrnames)])
520    if extra is not None:
521        prefixedAttrnames += [_prefix_keyword(a, warn) for a in extra]
522    Record = namedtuple(_prefix_keyword(element_name, warn), prefixedAttrnames)
523    reprog = re.compile(pattern)
524    return Record, reprog
525
526
527def _open(xmlfile, encoding="utf8"):
528    if isinstance(xmlfile, str):
529        if xmlfile.endswith(".gz"):
530            if encoding is None:
531                return gzip.open(xmlfile, "r")
532            return gzip.open(xmlfile, "rt")
533        if encoding is not None:
534            return io.open(xmlfile, encoding=encoding)
535    return xmlfile
536
537
538def _comment_filter(stream):
539    """
540    Filters given stream for comments. Is used by parse_fast and parse_fast_nested
541    """
542    in_comment = False
543    for line in stream:
544        if "<!--" in line or in_comment:
545            if "-->" in line:
546                yield re.sub(".*-->" if in_comment else "<!--.*-->", "", line)
547                in_comment = False
548            elif not in_comment:
549                yield re.sub("<!--.*", "", line)
550                in_comment = True
551        else:
552            yield line
553
554
555def _attrs_from_xsd_url(schema_url, element_name):
556    """Given a noNamespaceSchemaLocation URL and element name, return XSD attribute names or None."""
557    xsd_filename = schema_url.split('/')[-1]
558    sumo_home = os.environ.get('SUMO_HOME', os.path.normpath(
559        os.path.join(os.path.dirname(__file__), '..', '..', '..')))
560    xsd_path = os.path.join(sumo_home, 'data', 'xsd', xsd_filename)
561    if not os.path.isfile(xsd_path):
562        return None
563    xsd_struc = xsd.XsdStructure(xsd_path)
564    element = xsd_struc._namedElements.get(element_name)
565    if element is None:
566        return None
567    return [a.name for a in element.attributes]
568
569
570def parse_fast(xmlfile, element_name, attrnames=None, warn=False, optional=False, encoding="utf8", line_filter=None):
571    """
572    Parses the given attrnames from all elements with element_name
573    @Note: The element must be on its own line and the attributes must appear in
574    the given order. If you set "optional=True", missing attributes will be set to None.
575    Make sure that you list all (potential) attributes (even the ones you are not interested in)
576    in this case. You can only leave out attributes at the end.
577    If attrnames is None, the attribute list is read from the XSD schema referenced in the
578    XML file header (detected while reading); in this case optional is forced to True.
579    @Example: parse_fast('plain.edg.xml', 'edge', ['id', 'speed'])
580    @Example: parse_fast('fcd.xml', 'vehicle')
581    """
582    reprog = None
583    if attrnames is not None:
584        Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional)
585    xmlfile, close_source = _check_file_like(xmlfile)
586    try:
587        for line in _comment_filter(xmlfile):
588            if reprog is None:
589                m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line)
590                if m_schema:
591                    detected = _attrs_from_xsd_url(m_schema.group(1), element_name)
592                    if detected is not None:
593                        Record, reprog = _createRecordAndPattern(element_name, detected, warn, True)
594                if reprog is None:
595                    continue
596            m = reprog.search(line)
597            if m:
598                if line_filter is not None and line_filter(line):
599                    continue
600                yield Record(**m.groupdict())
601    finally:
602        if close_source:
603            xmlfile.close()
604
605
606def parse_fast_nested(xmlfile, element_name, attrnames=None, element_name2=None, attrnames2=None,
607                      warn=False, optional=False, encoding="utf8"):
608    """
609    Parses the given attrnames from all elements with element_name
610    And attrnames2 from element_name2 where element_name2 is a child element of element_name
611    If you set "optional=True", missing attributes will be set to None.
612    Make sure that you list all (potential) attributes (even the ones you are not interested in)
613    in this case. You can only leave out attributes at the end.
614    If attrnames or attrnames2 is None, the attribute list is read from the XSD schema
615    referenced in the XML file header (detected while reading); in this case optional is forced to True.
616    @Note: The element must be on its own line and the attributes must appear in
617    the given order.
618    @Example: parse_fast_nested('fcd.xml', 'timestep', ['time'], 'vehicle', ['id', 'speed', 'lane']):
619    @Example: parse_fast_nested('fcd.xml', 'timestep', None, 'vehicle', None):
620    """
621    reprog = reprog2 = record = None
622    if attrnames is not None:
623        Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional)
624    if attrnames2 is not None:
625        Record2, reprog2 = _createRecordAndPattern(element_name2, attrnames2, warn, optional)
626    xmlfile, close_source = _check_file_like(xmlfile)
627    try:
628        for line in _comment_filter(xmlfile):
629            if reprog is None or reprog2 is None:
630                m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line)
631                if m_schema:
632                    if reprog is None:
633                        detected = _attrs_from_xsd_url(m_schema.group(1), element_name)
634                        if detected is not None:
635                            Record, reprog = _createRecordAndPattern(element_name, detected, warn, True)
636                    if reprog2 is None:
637                        detected2 = _attrs_from_xsd_url(m_schema.group(1), element_name2)
638                        if detected2 is not None:
639                            Record2, reprog2 = _createRecordAndPattern(element_name2, detected2, warn, True)
640                if reprog is None or reprog2 is None:
641                    continue
642            m2 = reprog2.search(line)
643            if record and m2:
644                yield record, Record2(**m2.groupdict())
645            else:
646                m = reprog.search(line)
647                if m:
648                    record = Record(**m.groupdict())
649                elif element_name in line:
650                    record = None
651    finally:
652        if close_source:
653            xmlfile.close()
654
655
656def parse_fast_structured(xmlfile, element_name, attrnames=None, nested=None,
657                          warn=False, optional=False, encoding="utf8"):
658    """
659    Parses the given attrnames from all elements with element_name and nested elements of level 1.
660    Unlike parse_fast_nested this function can handle multiple different child elements and
661    returns objects where the child elements can be accessed by name (e.g. timestep.vehicle[0])
662    as with the parse method. The returned object is not modifiable though.
663    If you set "optional=True", missing attributes will be set to None.
664    Make sure that you list all (potential) attributes (even the ones you are not interested in)
665    in this case. You can only leave out attributes at the end.
666    If attrnames or any value in nested is None, the attribute list is read from the XSD schema
667    referenced in the XML file header (detected while reading); in this case optional is forced to True.
668    @Note: Every element must be on its own line and the attributes must appear in the given order.
669    @Example: parse_fast_structured('fcd.xml', 'timestep', ['time'],
670                                    {'vehicle': ['id', 'speed', 'lane'], 'person': ['id', 'speed', 'edge']}):
671    @Example: parse_fast_structured('fcd.xml', 'timestep', None, {'vehicle': None, 'person': None}):
672    """
673    if nested is None:
674        nested = {}
675    reprog = record = None
676    if attrnames is not None and all(v is not None for v in nested.values()):
677        Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional, nested.keys())
678        re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional) for elem, attr in nested.items()]
679    finalizer = "</%s>" % element_name
680    xmlfile, close_source = _check_file_like(xmlfile)
681    try:
682        for line in _comment_filter(xmlfile):
683            if reprog is None:
684                m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line)
685                if m_schema:
686                    resolved = attrnames if attrnames is not None else _attrs_from_xsd_url(
687                        m_schema.group(1), element_name)
688                    resolved_nested = {name: (attrs if attrs is not None
689                                              else _attrs_from_xsd_url(m_schema.group(1), name))
690                                       for name, attrs in nested.items()}
691                    if resolved is not None and all(v is not None for v in resolved_nested.values()):
692                        opt_parent = optional if attrnames is not None else True
693                        Record, reprog = _createRecordAndPattern(
694                            element_name, resolved, warn, opt_parent, resolved_nested.keys())
695                        re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional if nested[elem] is not None else True)  # noqa
696                               for elem, attr in resolved_nested.items()]
697                if reprog is None:
698                    continue
699            if record:
700                for name2, Record2, reprog2 in re2:
701                    m2 = reprog2.search(line)
702                    if m2:
703                        inner = Record2(**m2.groupdict())
704                        getattr(record, name2).append(inner)
705                        break
706                else:
707                    if finalizer in line:
708                        yield record
709                        record = None
710            else:
711                m = reprog.search(line)
712                if m:
713                    args = dict(m.groupdict())
714                    for name, _, __ in re2:
715                        args[name] = []
716                    record = Record(**args)
717    finally:
718        if close_source:
719            xmlfile.close()
720
721
722def quoteattr(val, ensureUnicode=False):
723    # saxutils sometimes uses single quotes around the attribute
724    # we can prevent this by adding an artificial single quote to the value and removing it again
725    if ensureUnicode and type(val) is bytes:
726        val = val.decode("utf-8")
727    return '"' + saxutils.quoteattr("'" + val)[2:]
728
729
730def contextualRename(xmlTree, prefixes, attribute='id', ids=None):
731    """
732    Renames the given attribute in a specified set of child elements within
733    xmlTree and also replaces all attribute values that referred to such an id
734    with the new value.
735    Example:
736    Given an opendrive file, when called with prefixes={'road': 'r', 'junction': 'j'}
737      - all road ids will be renamed to rN where N is a running integer
738      - all junction ids will be renamed to jN where N is also a running integer
739      - all attributes that refered to roads or junctions will now refer to
740        their new ids
741    """
742    if ids is None:
743        ids = {}
744    newIds = set()
745    attribute = _prefix_keyword(attribute)
746
747    def rename(obj, index=0):
748        if obj.name in prefixes:
749            if obj.hasAttribute(attribute):
750                oldID = obj.getAttribute(attribute)
751                if oldID not in ids:
752                    newID = prefixes[obj.name] + str(len(ids))
753                    while newID in newIds:
754                        index += 1
755                        newID = prefixes[obj.name] + str(index)
756                    newIds.add(newID)
757                    ids[oldID] = newID
758                    # keep id on second pass
759                    ids[newID] = newID
760                obj.setAttribute(attribute, ids[oldID])
761        for a, v in obj.getAttributes():
762            if v in ids:
763                obj.setAttribute(a, ids[v])
764        for child in obj.getChildList():
765            index = rename(child, index)
766        return index
767
768    index = rename(xmlTree)
769    rename(xmlTree, index)  # call again in case usage came before definition
DEFAULT_ATTR_CONVERSIONS = {'shape': <function <lambda>>, 'speed': <class 'float'>, 'length': <class 'float'>, 'width': <class 'float'>, 'angle': <class 'float'>, 'endOffset': <class 'float'>, 'radius': <class 'float'>, 'contPos': <class 'float'>, 'visibility': <class 'float'>, 'startPos': <class 'float'>, 'endPos': <class 'float'>, 'position': <class 'float'>, 'x': <class 'float'>, 'y': <class 'float'>, 'lon': <class 'float'>, 'lat': <class 'float'>, 'freq': <class 'float'>, 'priority': <class 'int'>, 'numLanes': <class 'int'>, 'index': <class 'int'>, 'linkIndex': <class 'int'>, 'linkIndex2': <class 'int'>, 'fromLane': <class 'int'>, 'toLane': <class 'int'>}
class NestingHandler(xml.sax.handler.ContentHandler):
73class NestingHandler(xml.sax.handler.ContentHandler):
74
75    """A handler which knows the current nesting of tags"""
76
77    def __init__(self):
78        self.tagstack = []
79
80    def startElement(self, name, attrs):
81        self.tagstack.append(name)
82
83    def endElement(self, name):
84        self.tagstack.pop()
85
86    def depth(self):
87        # do not count the root element
88        return len(self.tagstack) - 1

A handler which knows the current nesting of tags

tagstack
def startElement(self, name, attrs):
80    def startElement(self, name, attrs):
81        self.tagstack.append(name)

Signals the start of an element in non-namespace mode.

The name parameter contains the raw XML 1.0 name of the element type as a string and the attrs parameter holds an instance of the Attributes class containing the attributes of the element.

def endElement(self, name):
83    def endElement(self, name):
84        self.tagstack.pop()

Signals the end of an element in non-namespace mode.

The name parameter contains the name of the element type, just as with the startElement event.

def depth(self):
86    def depth(self):
87        # do not count the root element
88        return len(self.tagstack) - 1
class AttrFinder(NestingHandler):
 91class AttrFinder(NestingHandler):
 92
 93    def __init__(self, xsdFile, source, split, keepAttrs=None):
 94        NestingHandler.__init__(self)
 95        self.tagDepths = {}  # tag -> depth of appearance
 96        self.tagAttrs = defaultdict(OrderedDict)  # tag -> set of attrs
 97        self.renamedAttrs = {}  # (name, attr) -> renamedAttr
 98        self.attrs = {}
 99        self.depthTags = {}  # child of root: depth of appearance -> tag list
100        self.rootDepth = 1 if split else 0
101        self.keepAttrs = keepAttrs
102        if xsdFile:
103            self.xsdStruc = xsd.XsdStructure(xsdFile)
104            if split:
105                for ele in self.xsdStruc.root.children:
106                    self.attrs[ele.name] = []
107                    self.depthTags[ele.name] = [[]]
108                    self.recursiveAttrFind(ele, ele, 1)
109            else:
110                self.attrs[self.xsdStruc.root.name] = []
111                self.depthTags[self.xsdStruc.root.name] = []
112                self.recursiveAttrFind(
113                    self.xsdStruc.root, self.xsdStruc.root, 0)
114        else:
115            self.xsdStruc = None
116            xml.sax.parse(source, self)
117
118    def addElement(self, root, name, depth):
119        # print("adding", root, name, depth)
120        if len(self.depthTags[root]) == depth:
121            self.tagDepths[name] = depth
122            self.depthTags[root].append([name])
123            return True
124        if name not in self.tagDepths:
125            self.depthTags[root][depth].append(name)
126            return True
127        if name not in self.depthTags[root][depth]:
128            print("Ignoring tag %s at depth %s" %
129                  (name, depth), file=sys.stderr)
130        return False
131
132    def recursiveAttrFind(self, root, currEle, depth):
133        if not self.addElement(root.name, currEle.name, depth):
134            return
135        for a in currEle.attributes:
136            if ":" not in a.name:  # no namespace support yet
137                self.tagAttrs[currEle.name][a.name] = a
138                anew = "%s_%s" % (currEle.name, a.name)
139                self.renamedAttrs[(currEle.name, a.name)] = anew
140                attrList = self.attrs[root.name]
141                if anew in attrList:
142                    del attrList[attrList.index(anew)]
143                attrList.append(anew)
144        for ele in currEle.children:
145            # print("attr", root.name, ele.name, depth)
146            self.recursiveAttrFind(root, ele, depth + 1)
147
148    def startElement(self, name, attrs):
149        NestingHandler.startElement(self, name, attrs)
150        if self.depth() >= self.rootDepth:
151            root = self.tagstack[self.rootDepth]
152            if self.depth() == self.rootDepth and root not in self.attrs:
153                self.attrs[root] = []
154                self.depthTags[root] = [[]] * self.rootDepth
155            if not self.addElement(root, name, self.depth()):
156                return
157            # collect attributes
158            for a in sorted(list(attrs.keys())):
159                if self.keepAttrs is not None and a not in self.keepAttrs:
160                    continue
161                if a not in self.tagAttrs[name] and ":" not in a:
162                    self.tagAttrs[name][a] = xsd.XmlAttribute(a)
163                    if not (name, a) in self.renamedAttrs:
164                        anew = "%s_%s" % (name, a)
165                        self.renamedAttrs[(name, a)] = anew
166                        self.attrs[root].append(anew)

A handler which knows the current nesting of tags

AttrFinder(xsdFile, source, split, keepAttrs=None)
 93    def __init__(self, xsdFile, source, split, keepAttrs=None):
 94        NestingHandler.__init__(self)
 95        self.tagDepths = {}  # tag -> depth of appearance
 96        self.tagAttrs = defaultdict(OrderedDict)  # tag -> set of attrs
 97        self.renamedAttrs = {}  # (name, attr) -> renamedAttr
 98        self.attrs = {}
 99        self.depthTags = {}  # child of root: depth of appearance -> tag list
100        self.rootDepth = 1 if split else 0
101        self.keepAttrs = keepAttrs
102        if xsdFile:
103            self.xsdStruc = xsd.XsdStructure(xsdFile)
104            if split:
105                for ele in self.xsdStruc.root.children:
106                    self.attrs[ele.name] = []
107                    self.depthTags[ele.name] = [[]]
108                    self.recursiveAttrFind(ele, ele, 1)
109            else:
110                self.attrs[self.xsdStruc.root.name] = []
111                self.depthTags[self.xsdStruc.root.name] = []
112                self.recursiveAttrFind(
113                    self.xsdStruc.root, self.xsdStruc.root, 0)
114        else:
115            self.xsdStruc = None
116            xml.sax.parse(source, self)
tagDepths
tagAttrs
renamedAttrs
attrs
depthTags
rootDepth
keepAttrs
def addElement(self, root, name, depth):
118    def addElement(self, root, name, depth):
119        # print("adding", root, name, depth)
120        if len(self.depthTags[root]) == depth:
121            self.tagDepths[name] = depth
122            self.depthTags[root].append([name])
123            return True
124        if name not in self.tagDepths:
125            self.depthTags[root][depth].append(name)
126            return True
127        if name not in self.depthTags[root][depth]:
128            print("Ignoring tag %s at depth %s" %
129                  (name, depth), file=sys.stderr)
130        return False
def recursiveAttrFind(self, root, currEle, depth):
132    def recursiveAttrFind(self, root, currEle, depth):
133        if not self.addElement(root.name, currEle.name, depth):
134            return
135        for a in currEle.attributes:
136            if ":" not in a.name:  # no namespace support yet
137                self.tagAttrs[currEle.name][a.name] = a
138                anew = "%s_%s" % (currEle.name, a.name)
139                self.renamedAttrs[(currEle.name, a.name)] = anew
140                attrList = self.attrs[root.name]
141                if anew in attrList:
142                    del attrList[attrList.index(anew)]
143                attrList.append(anew)
144        for ele in currEle.children:
145            # print("attr", root.name, ele.name, depth)
146            self.recursiveAttrFind(root, ele, depth + 1)
def startElement(self, name, attrs):
148    def startElement(self, name, attrs):
149        NestingHandler.startElement(self, name, attrs)
150        if self.depth() >= self.rootDepth:
151            root = self.tagstack[self.rootDepth]
152            if self.depth() == self.rootDepth and root not in self.attrs:
153                self.attrs[root] = []
154                self.depthTags[root] = [[]] * self.rootDepth
155            if not self.addElement(root, name, self.depth()):
156                return
157            # collect attributes
158            for a in sorted(list(attrs.keys())):
159                if self.keepAttrs is not None and a not in self.keepAttrs:
160                    continue
161                if a not in self.tagAttrs[name] and ":" not in a:
162                    self.tagAttrs[name][a] = xsd.XmlAttribute(a)
163                    if not (name, a) in self.renamedAttrs:
164                        anew = "%s_%s" % (name, a)
165                        self.renamedAttrs[(name, a)] = anew
166                        self.attrs[root].append(anew)

Signals the start of an element in non-namespace mode.

The name parameter contains the raw XML 1.0 name of the element type as a string and the attrs parameter holds an instance of the Attributes class containing the attributes of the element.

def xmlescape(value):
169def xmlescape(value):
170    return saxutils.escape(str(value), {'"': '&quot;'})
def supports_comments():
173def supports_comments():
174    return sys.version_info[0] >= 3 and sys.version_info[1] >= 8
def compound_object(element_name, attrnames, warn=False, sort=False):
201def compound_object(element_name, attrnames, warn=False, sort=False):
202    """return a class which delegates bracket access to an internal dict.
203       Missing attributes are delegated to the child dict for convenience.
204       @note: Care must be taken when child nodes and attributes have the same names"""
205    class CompoundObject():
206        _original_fields = sorted(attrnames) if sort else tuple(attrnames)
207        _fields = [_prefix_keyword(a, warn) for a in _original_fields]
208
209        def __init__(self, values, child_dict=None, text=None, child_list=None):
210            for name, val in zip(self._fields, values):
211                self.__dict__[name] = val
212            self._child_dict = child_dict if child_dict else {}
213            self.name = element_name
214            self._text = text
215            self._child_list = child_list if child_list else []
216            self._commented = False
217
218        def getAttributes(self):
219            return [(k, getattr(self, k)) for k in self._fields]
220
221        def hasAttribute(self, name):
222            return name in self._fields
223
224        def getAttribute(self, name):
225            if self.hasAttribute(name):
226                return self.__dict__[name]
227            raise AttributeError
228
229        def getAttributeSecure(self, name, default=None):
230            if self.hasAttribute(name):
231                return self.__dict__[name]
232            return default
233
234        def setAttribute(self, name, value):
235            if name in self._fields:
236                self.__dict__[name] = value
237            else:
238                if name not in self._original_fields:
239                    if isinstance(self._original_fields, tuple):
240                        tempList = list(self._original_fields)
241                        tempList.append(name)
242                        self._original_fields = tuple(tempList)
243                    else:
244                        self._original_fields.append(name)
245                    self._fields.append(_prefix_keyword(name, warn))
246                self.__dict__[_prefix_keyword(name, warn)] = value
247
248        def hasChild(self, name):
249            return name in self._child_dict
250
251        def getChild(self, name):
252            return self._child_dict[name]
253
254        def addChild(self, name, attrs=None, sortAttrs=True):
255            if attrs is None:
256                attrs = {}
257            clazz = compound_object(name, attrs.keys(), sort=sortAttrs)
258            child = clazz([attrs.get(a) for a in (sorted(attrs.keys()) if sortAttrs else attrs.keys())])
259            self._child_dict.setdefault(name, []).append(child)
260            self._child_list.append(child)
261            return child
262
263        def removeChild(self, child):
264            self._child_dict[child.name].remove(child)
265            self._child_list.remove(child)
266
267        def setChildList(self, childs):
268            for c in self._child_list:
269                self._child_dict[c.name].remove(c)
270            for c in childs:
271                self._child_dict.setdefault(c.name, []).append(c)
272            self._child_list = childs
273
274        def getChildList(self, withComments=False):
275            if withComments:
276                return self._child_list
277            else:
278                return [c for c in self._child_list if not c.isComment()]
279
280        def getText(self):
281            return self._text
282
283        def setText(self, text):
284            self._text = text
285
286        def isComment(self):
287            return "function Comment" in str(self.name)
288
289        def getComments(self):
290            if not supports_comments:
291                sys.stderr.write("Comment parsing is only supported with version 3.8 or higher by sumolib.xml\n")
292            for name, children in self._child_dict.items():
293                if "function Comment" in str(name):
294                    return [c.getText() for c in children]
295            return []
296
297        def setCommented(self, commented=True, recurse=False):
298            self._commented = commented
299            if commented or recurse:
300                for c in self._child_list:
301                    c.setCommented(False, True)
302
303        def isCommented(self):
304            return self._commented
305
306        def __getattr__(self, name):
307            if name[:2] != "__":
308                return self._child_dict.get(name, None)
309            raise AttributeError
310
311        def __setattr__(self, name, value):
312            if name != "_child_dict" and name in self._child_dict:
313                # this could be optimized by using the child_list only if there are different children
314                for c in self._child_dict[name]:
315                    self._child_list.remove(c)
316                self._child_dict[name] = value
317                for c in value:
318                    self._child_list.append(c)
319            else:
320                self.__dict__[name] = value
321
322        def __delattr__(self, name):
323            if name in self._child_dict:
324                for c in self._child_dict[name]:
325                    self._child_list.remove(c)
326                del self._child_dict[name]
327            else:
328                if name in self.__dict__:
329                    del self.__dict__[name]
330                self._original_fields.remove(name)
331                self._fields.remove(_prefix_keyword(name, False))
332
333        def __getitem__(self, name):
334            return self._child_dict[name]
335
336        def __str__(self):
337            nodeText = '' if self._text is None else ",text=%s" % self._text
338            return "<%s,child_dict=%s%s>" % (self.getAttributes(), dict(self._child_dict), nodeText)
339
340        def toXML(self, initialIndent="", indent="    ", withComments=False):
341            fields = [' %s="%s"' % (self._original_fields[i], xmlescape(getattr(self, k)))
342                      for i, k in enumerate(self._fields) if getattr(self, k) is not None and
343                      # see #3454
344                      '{' not in self._original_fields[i]]
345            if self.isComment():
346                if withComments:
347                    return initialIndent + "<!-- %s -->\n" % self._text
348                else:
349                    return ""
350            commentStart = ""
351            commentEnd = ""
352            if self._commented:
353                commentStart = "!--"
354                commentEnd = "--"
355            if not self._child_dict and self._text is None:
356                return initialIndent + "<%s%s%s/%s>\n" % (commentStart, self.name, "".join(fields), commentEnd)
357            else:
358                s = initialIndent + "<%s%s%s>\n" % (commentStart, self.name, "".join(fields))
359                for i, c in enumerate(self._child_list):
360                    if i > 0 and c.isComment() and withComments == "inline":
361                        s = s[:-1]
362                    s += c.toXML(initialIndent + indent, indent=indent, withComments=withComments)
363                if self._text is not None and self._text.strip():
364                    s += self._text.strip(" ")
365                return s + "%s</%s%s>\n" % (initialIndent, self.name, commentEnd)
366
367        def __repr__(self):
368            return str(self)
369
370        def __lt__(self, other):
371            return str(self) < str(other)
372
373    return CompoundObject

return a class which delegates bracket access to an internal dict. Missing attributes are delegated to the child dict for convenience. @note: Care must be taken when child nodes and attributes have the same names

def parselines( xmlline, element_name, element_attrs=None, attr_conversions=None, heterogeneous=True, warn=False, addRoot='dummy'):
376def parselines(xmlline, element_name, element_attrs=None, attr_conversions=None,
377               heterogeneous=True, warn=False, addRoot="dummy"):
378    tagStart1 = "<%s>" % element_name
379    tagStart2 = "<%s " % element_name
380    if tagStart1 in xmlline or tagStart2 in xmlline:
381        if addRoot is not None:
382            xmlline = "<%s>\n%s</%s>\n" % (addRoot, xmlline, addRoot)
383        xmlfile = io.StringIO(xmlline)
384        for x in parse(xmlfile, element_name, element_attrs, attr_conversions,
385                       heterogeneous, warn):
386            yield x
def parse( xmlfile, element_names=None, element_attrs=None, attr_conversions=None, heterogeneous=True, warn=False, ignoreXmlns=False, outputLevel=1):
402def parse(xmlfile, element_names=None, element_attrs=None, attr_conversions=None,
403          heterogeneous=True, warn=False, ignoreXmlns=False, outputLevel=1):
404    """
405    Parses the given element_names from xmlfile and yield compound objects for
406    their xml subtrees (no extra objects are returned if element_names appear in
407    the subtree) The compound objects provide all element attributes of
408    the root of the subtree as attributes unless attr_names are supplied. In this
409    case attr_names maps element names to a list of attributes which are
410    supplied. If attr_conversions is not empty it must map attribute names to
411    callables which will be called upon the attribute value before storing under
412    the attribute name.
413    The compound objects gives dictionary style access to list of compound
414    objects o for any children with the given element name
415    o['child_element_name'] = [osub0, osub1, ...]
416    As a shorthand, attribute style access to the list of child elements is
417    provided unless an attribute with the same name as the child elements
418    exists (i.e. o.child_element_name = [osub0, osub1, ...])
419    @Note: All elements with the same name must have the same type regardless of
420    the subtree in which they occur (heterogeneous cases may be handled by
421    setting heterogeneous=True (with reduced parsing speed)
422    @Note: Attribute names may be modified to avoid name clashes
423    with python keywords. (set warn=True to receive renaming warnings)
424    @Note: The element_names may be either a single string or a list of strings.
425    @Example: parse('plain.edg.xml', ['edge'])
426    """
427    if isinstance(element_names, str):
428        element_names = [element_names]
429    if element_attrs is None:
430        element_attrs = {}
431    if attr_conversions is None:
432        attr_conversions = {}
433    element_types = {}
434    kwargs = {'parser': ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))} if supports_comments() else {}
435    xmlfile, close_source = _check_file_like(xmlfile)
436    try:
437        level = -1
438        for event, parsenode in ET.iterparse(xmlfile, events=('start', 'end'), **kwargs):
439            if event == 'start':
440                level += 1
441            else:
442                tag = _handle_namespace(parsenode.tag, ignoreXmlns)
443                if (element_names is None and level == outputLevel) or (element_names and tag in element_names):
444                    yield _get_compound_object(parsenode, element_types,
445                                               tag, element_attrs,
446                                               attr_conversions, heterogeneous, warn,
447                                               ignoreXmlns)
448                    parsenode.clear()
449                level -= 1
450    finally:
451        if close_source:
452            xmlfile.close()

Parses the given element_names from xmlfile and yield compound objects for their xml subtrees (no extra objects are returned if element_names appear in the subtree) The compound objects provide all element attributes of the root of the subtree as attributes unless attr_names are supplied. In this case attr_names maps element names to a list of attributes which are supplied. If attr_conversions is not empty it must map attribute names to callables which will be called upon the attribute value before storing under the attribute name. The compound objects gives dictionary style access to list of compound objects o for any children with the given element name o['child_element_name'] = [osub0, osub1, ...] As a shorthand, attribute style access to the list of child elements is provided unless an attribute with the same name as the child elements exists (i.e. o.child_element_name = [osub0, osub1, ...]) @Note: All elements with the same name must have the same type regardless of the subtree in which they occur (heterogeneous cases may be handled by setting heterogeneous=True (with reduced parsing speed) @Note: Attribute names may be modified to avoid name clashes with python keywords. (set warn=True to receive renaming warnings) @Note: The element_names may be either a single string or a list of strings. @Example: parse('plain.edg.xml', ['edge'])

def create_document(root_element_name, attrs=None, schema=None):
486def create_document(root_element_name, attrs=None, schema=None):
487    if attrs is None:
488        attrs = {}
489    if schema is None:
490        attrs["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance"
491        attrs["xsi:noNamespaceSchemaLocation"] = "http://sumo.dlr.de/xsd/" + root_element_name + "_file.xsd"
492    clazz = compound_object(root_element_name, sorted(attrs.keys()))
493    return clazz([attrs.get(a) for a in sorted(attrs.keys())], OrderedDict())
def sum(elements, attrname):
496def sum(elements, attrname):
497    # for the given elements (as returned by method parse) compute the sum for attrname
498    # attrname must be the name of a numerical attribute
499    return reduce(lambda x, y: x + y, [float(getattr(e, attrname)) for e in elements])
def average(elements, attrname):
502def average(elements, attrname):
503    # for the given elements (as returned by method parse) compute the average for attrname
504    # attrname must be the name of a numerical attribute
505    if elements:
506        return sum(elements, attrname) / len(elements)
507    else:
508        raise Exception("average of 0 elements is not defined")
def parse_fast( xmlfile, element_name, attrnames=None, warn=False, optional=False, encoding='utf8', line_filter=None):
571def parse_fast(xmlfile, element_name, attrnames=None, warn=False, optional=False, encoding="utf8", line_filter=None):
572    """
573    Parses the given attrnames from all elements with element_name
574    @Note: The element must be on its own line and the attributes must appear in
575    the given order. If you set "optional=True", missing attributes will be set to None.
576    Make sure that you list all (potential) attributes (even the ones you are not interested in)
577    in this case. You can only leave out attributes at the end.
578    If attrnames is None, the attribute list is read from the XSD schema referenced in the
579    XML file header (detected while reading); in this case optional is forced to True.
580    @Example: parse_fast('plain.edg.xml', 'edge', ['id', 'speed'])
581    @Example: parse_fast('fcd.xml', 'vehicle')
582    """
583    reprog = None
584    if attrnames is not None:
585        Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional)
586    xmlfile, close_source = _check_file_like(xmlfile)
587    try:
588        for line in _comment_filter(xmlfile):
589            if reprog is None:
590                m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line)
591                if m_schema:
592                    detected = _attrs_from_xsd_url(m_schema.group(1), element_name)
593                    if detected is not None:
594                        Record, reprog = _createRecordAndPattern(element_name, detected, warn, True)
595                if reprog is None:
596                    continue
597            m = reprog.search(line)
598            if m:
599                if line_filter is not None and line_filter(line):
600                    continue
601                yield Record(**m.groupdict())
602    finally:
603        if close_source:
604            xmlfile.close()

Parses the given attrnames from all elements with element_name @Note: The element must be on its own line and the attributes must appear in the given order. If you set "optional=True", missing attributes will be set to None. Make sure that you list all (potential) attributes (even the ones you are not interested in) in this case. You can only leave out attributes at the end. If attrnames is None, the attribute list is read from the XSD schema referenced in the XML file header (detected while reading); in this case optional is forced to True. @Example: parse_fast('plain.edg.xml', 'edge', ['id', 'speed']) @Example: parse_fast('fcd.xml', 'vehicle')

def parse_fast_nested( xmlfile, element_name, attrnames=None, element_name2=None, attrnames2=None, warn=False, optional=False, encoding='utf8'):
607def parse_fast_nested(xmlfile, element_name, attrnames=None, element_name2=None, attrnames2=None,
608                      warn=False, optional=False, encoding="utf8"):
609    """
610    Parses the given attrnames from all elements with element_name
611    And attrnames2 from element_name2 where element_name2 is a child element of element_name
612    If you set "optional=True", missing attributes will be set to None.
613    Make sure that you list all (potential) attributes (even the ones you are not interested in)
614    in this case. You can only leave out attributes at the end.
615    If attrnames or attrnames2 is None, the attribute list is read from the XSD schema
616    referenced in the XML file header (detected while reading); in this case optional is forced to True.
617    @Note: The element must be on its own line and the attributes must appear in
618    the given order.
619    @Example: parse_fast_nested('fcd.xml', 'timestep', ['time'], 'vehicle', ['id', 'speed', 'lane']):
620    @Example: parse_fast_nested('fcd.xml', 'timestep', None, 'vehicle', None):
621    """
622    reprog = reprog2 = record = None
623    if attrnames is not None:
624        Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional)
625    if attrnames2 is not None:
626        Record2, reprog2 = _createRecordAndPattern(element_name2, attrnames2, warn, optional)
627    xmlfile, close_source = _check_file_like(xmlfile)
628    try:
629        for line in _comment_filter(xmlfile):
630            if reprog is None or reprog2 is None:
631                m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line)
632                if m_schema:
633                    if reprog is None:
634                        detected = _attrs_from_xsd_url(m_schema.group(1), element_name)
635                        if detected is not None:
636                            Record, reprog = _createRecordAndPattern(element_name, detected, warn, True)
637                    if reprog2 is None:
638                        detected2 = _attrs_from_xsd_url(m_schema.group(1), element_name2)
639                        if detected2 is not None:
640                            Record2, reprog2 = _createRecordAndPattern(element_name2, detected2, warn, True)
641                if reprog is None or reprog2 is None:
642                    continue
643            m2 = reprog2.search(line)
644            if record and m2:
645                yield record, Record2(**m2.groupdict())
646            else:
647                m = reprog.search(line)
648                if m:
649                    record = Record(**m.groupdict())
650                elif element_name in line:
651                    record = None
652    finally:
653        if close_source:
654            xmlfile.close()

Parses the given attrnames from all elements with element_name And attrnames2 from element_name2 where element_name2 is a child element of element_name If you set "optional=True", missing attributes will be set to None. Make sure that you list all (potential) attributes (even the ones you are not interested in) in this case. You can only leave out attributes at the end. If attrnames or attrnames2 is None, the attribute list is read from the XSD schema referenced in the XML file header (detected while reading); in this case optional is forced to True. @Note: The element must be on its own line and the attributes must appear in the given order. @Example: parse_fast_nested('fcd.xml', 'timestep', ['time'], 'vehicle', ['id', 'speed', 'lane']): @Example: parse_fast_nested('fcd.xml', 'timestep', None, 'vehicle', None):

def parse_fast_structured( xmlfile, element_name, attrnames=None, nested=None, warn=False, optional=False, encoding='utf8'):
657def parse_fast_structured(xmlfile, element_name, attrnames=None, nested=None,
658                          warn=False, optional=False, encoding="utf8"):
659    """
660    Parses the given attrnames from all elements with element_name and nested elements of level 1.
661    Unlike parse_fast_nested this function can handle multiple different child elements and
662    returns objects where the child elements can be accessed by name (e.g. timestep.vehicle[0])
663    as with the parse method. The returned object is not modifiable though.
664    If you set "optional=True", missing attributes will be set to None.
665    Make sure that you list all (potential) attributes (even the ones you are not interested in)
666    in this case. You can only leave out attributes at the end.
667    If attrnames or any value in nested is None, the attribute list is read from the XSD schema
668    referenced in the XML file header (detected while reading); in this case optional is forced to True.
669    @Note: Every element must be on its own line and the attributes must appear in the given order.
670    @Example: parse_fast_structured('fcd.xml', 'timestep', ['time'],
671                                    {'vehicle': ['id', 'speed', 'lane'], 'person': ['id', 'speed', 'edge']}):
672    @Example: parse_fast_structured('fcd.xml', 'timestep', None, {'vehicle': None, 'person': None}):
673    """
674    if nested is None:
675        nested = {}
676    reprog = record = None
677    if attrnames is not None and all(v is not None for v in nested.values()):
678        Record, reprog = _createRecordAndPattern(element_name, attrnames, warn, optional, nested.keys())
679        re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional) for elem, attr in nested.items()]
680    finalizer = "</%s>" % element_name
681    xmlfile, close_source = _check_file_like(xmlfile)
682    try:
683        for line in _comment_filter(xmlfile):
684            if reprog is None:
685                m_schema = re.search(r'noNamespaceSchemaLocation="([^"]*)"', line)
686                if m_schema:
687                    resolved = attrnames if attrnames is not None else _attrs_from_xsd_url(
688                        m_schema.group(1), element_name)
689                    resolved_nested = {name: (attrs if attrs is not None
690                                              else _attrs_from_xsd_url(m_schema.group(1), name))
691                                       for name, attrs in nested.items()}
692                    if resolved is not None and all(v is not None for v in resolved_nested.values()):
693                        opt_parent = optional if attrnames is not None else True
694                        Record, reprog = _createRecordAndPattern(
695                            element_name, resolved, warn, opt_parent, resolved_nested.keys())
696                        re2 = [(elem,) + _createRecordAndPattern(elem, attr, warn, optional if nested[elem] is not None else True)  # noqa
697                               for elem, attr in resolved_nested.items()]
698                if reprog is None:
699                    continue
700            if record:
701                for name2, Record2, reprog2 in re2:
702                    m2 = reprog2.search(line)
703                    if m2:
704                        inner = Record2(**m2.groupdict())
705                        getattr(record, name2).append(inner)
706                        break
707                else:
708                    if finalizer in line:
709                        yield record
710                        record = None
711            else:
712                m = reprog.search(line)
713                if m:
714                    args = dict(m.groupdict())
715                    for name, _, __ in re2:
716                        args[name] = []
717                    record = Record(**args)
718    finally:
719        if close_source:
720            xmlfile.close()

Parses the given attrnames from all elements with element_name and nested elements of level 1. Unlike parse_fast_nested this function can handle multiple different child elements and returns objects where the child elements can be accessed by name (e.g. timestep.vehicle[0]) as with the parse method. The returned object is not modifiable though. If you set "optional=True", missing attributes will be set to None. Make sure that you list all (potential) attributes (even the ones you are not interested in) in this case. You can only leave out attributes at the end. If attrnames or any value in nested is None, the attribute list is read from the XSD schema referenced in the XML file header (detected while reading); in this case optional is forced to True. @Note: Every element must be on its own line and the attributes must appear in the given order. @Example: parse_fast_structured('fcd.xml', 'timestep', ['time'], {'vehicle': ['id', 'speed', 'lane'], 'person': ['id', 'speed', 'edge']}): @Example: parse_fast_structured('fcd.xml', 'timestep', None, {'vehicle': None, 'person': None}):

def quoteattr(val, ensureUnicode=False):
723def quoteattr(val, ensureUnicode=False):
724    # saxutils sometimes uses single quotes around the attribute
725    # we can prevent this by adding an artificial single quote to the value and removing it again
726    if ensureUnicode and type(val) is bytes:
727        val = val.decode("utf-8")
728    return '"' + saxutils.quoteattr("'" + val)[2:]
def contextualRename(xmlTree, prefixes, attribute='id', ids=None):
731def contextualRename(xmlTree, prefixes, attribute='id', ids=None):
732    """
733    Renames the given attribute in a specified set of child elements within
734    xmlTree and also replaces all attribute values that referred to such an id
735    with the new value.
736    Example:
737    Given an opendrive file, when called with prefixes={'road': 'r', 'junction': 'j'}
738      - all road ids will be renamed to rN where N is a running integer
739      - all junction ids will be renamed to jN where N is also a running integer
740      - all attributes that refered to roads or junctions will now refer to
741        their new ids
742    """
743    if ids is None:
744        ids = {}
745    newIds = set()
746    attribute = _prefix_keyword(attribute)
747
748    def rename(obj, index=0):
749        if obj.name in prefixes:
750            if obj.hasAttribute(attribute):
751                oldID = obj.getAttribute(attribute)
752                if oldID not in ids:
753                    newID = prefixes[obj.name] + str(len(ids))
754                    while newID in newIds:
755                        index += 1
756                        newID = prefixes[obj.name] + str(index)
757                    newIds.add(newID)
758                    ids[oldID] = newID
759                    # keep id on second pass
760                    ids[newID] = newID
761                obj.setAttribute(attribute, ids[oldID])
762        for a, v in obj.getAttributes():
763            if v in ids:
764                obj.setAttribute(a, ids[v])
765        for child in obj.getChildList():
766            index = rename(child, index)
767        return index
768
769    index = rename(xmlTree)
770    rename(xmlTree, index)  # call again in case usage came before definition

Renames the given attribute in a specified set of child elements within xmlTree and also replaces all attribute values that referred to such an id with the new value. Example: Given an opendrive file, when called with prefixes={'road': 'r', 'junction': 'j'}

  • all road ids will be renamed to rN where N is a running integer
  • all junction ids will be renamed to jN where N is also a running integer
  • all attributes that refered to roads or junctions will now refer to their new ids