| Module | RSS::ListenerMixin |
| In: |
lib/rss/atom.rb
lib/rss/0.9.rb lib/rss/1.0.rb lib/rss/parser.rb |
| CONTENT_PATTERN | = | /\s*([^=]+)=(["'])([^\2]+?)\2/ |
| NAMESPLIT | = | /^(?:([\w:][-\w\d.]*):)?([\w:][-\w\d.]*)/ |
| do_validate | [RW] | |
| ignore_unknown_element | [RW] | |
| rss | [R] |
# File lib/rss/parser.rb, line 269
269: def initialize
270: @rss = nil
271: @ignore_unknown_element = true
272: @do_validate = true
273: @ns_stack = [{"xml" => :xml}]
274: @tag_stack = [[]]
275: @text_stack = ['']
276: @proc_stack = []
277: @last_element = nil
278: @version = @encoding = @standalone = nil
279: @xml_stylesheets = []
280: @xml_child_mode = false
281: @xml_element = nil
282: @last_xml_element = nil
283: end
# File lib/rss/parser.rb, line 290
290: def instruction(name, content)
291: if name == "xml-stylesheet"
292: params = parse_pi_content(content)
293: if params.has_key?("href")
294: @xml_stylesheets << XMLStyleSheet.new(params)
295: end
296: end
297: end
# File lib/rss/parser.rb, line 352
352: def tag_end(name)
353: if DEBUG
354: p "end tag #{name}"
355: p @tag_stack
356: end
357: text = @text_stack.pop
358: tags = @tag_stack.pop
359: pr = @proc_stack.pop
360: pr.call(text, tags) unless pr.nil?
361: @ns_stack.pop
362: end
# File lib/rss/parser.rb, line 299
299: def tag_start(name, attributes)
300: @text_stack.push('')
301:
302: ns = @ns_stack.last.dup
303: attrs = {}
304: attributes.each do |n, v|
305: if /\Axmlns(?:\z|:)/ =~ n
306: ns[$POSTMATCH] = v
307: else
308: attrs[n] = v
309: end
310: end
311: @ns_stack.push(ns)
312:
313: prefix, local = split_name(name)
314: @tag_stack.last.push([_ns(ns, prefix), local])
315: @tag_stack.push([])
316: if @xml_child_mode
317: previous = @last_xml_element
318: element_attrs = attributes.dup
319: unless previous
320: ns.each do |ns_prefix, value|
321: next if ns_prefix == "xml"
322: key = ns_prefix.empty? ? "xmlns" : "xmlns:#{ns_prefix}"
323: element_attrs[key] ||= value
324: end
325: end
326: next_element = XML::Element.new(local,
327: prefix.empty? ? nil : prefix,
328: _ns(ns, prefix),
329: element_attrs)
330: previous << next_element if previous
331: @last_xml_element = next_element
332: pr = Proc.new do |text, tags|
333: if previous
334: @last_xml_element = previous
335: else
336: @xml_element = @last_xml_element
337: @last_xml_element = nil
338: end
339: end
340: @proc_stack.push(pr)
341: else
342: if @rss.nil? and respond_to?("initial_start_#{local}", true)
343: __send__("initial_start_#{local}", local, prefix, attrs, ns.dup)
344: elsif respond_to?("start_#{local}", true)
345: __send__("start_#{local}", local, prefix, attrs, ns.dup)
346: else
347: start_else_element(local, prefix, attrs, ns.dup)
348: end
349: end
350: end
# File lib/rss/parser.rb, line 364
364: def text(data)
365: if @xml_child_mode
366: @last_xml_element << data if @last_xml_element
367: else
368: @text_stack.last << data
369: end
370: end
set instance vars for version, encoding, standalone
# File lib/rss/parser.rb, line 286
286: def xmldecl(version, encoding, standalone)
287: @version, @encoding, @standalone = version, encoding, standalone
288: end
# File lib/rss/parser.rb, line 416
416: def check_ns(tag_name, prefix, ns, require_uri)
417: unless _ns(ns, prefix) == require_uri
418: if @do_validate
419: raise NSError.new(tag_name, prefix, require_uri)
420: else
421: # Force bind required URI with prefix
422: @ns_stack.last[prefix] = require_uri
423: end
424: end
425: end
# File lib/rss/parser.rb, line 454
454: def collect_attributes(tag_name, prefix, attrs, ns, klass)
455: attributes = {}
456: klass.get_attributes.each do |a_name, a_uri, required, element_name|
457: if a_uri.is_a?(String) or !a_uri.respond_to?(:include?)
458: a_uri = [a_uri]
459: end
460: unless a_uri == [""]
461: for prefix, uri in ns
462: if a_uri.include?(uri)
463: val = attrs["#{prefix}:#{a_name}"]
464: break if val
465: end
466: end
467: end
468: if val.nil? and a_uri.include?("")
469: val = attrs[a_name]
470: end
471:
472: if @do_validate and required and val.nil?
473: unless a_uri.include?("")
474: for prefix, uri in ns
475: if a_uri.include?(uri)
476: a_name = "#{prefix}:#{a_name}"
477: end
478: end
479: end
480: raise MissingAttributeError.new(tag_name, a_name)
481: end
482:
483: attributes[a_name] = val
484: end
485: attributes
486: end
# File lib/rss/1.0.rb, line 438
438: def initial_start_RDF(tag_name, prefix, attrs, ns)
439: check_ns(tag_name, prefix, ns, RDF::URI)
440:
441: @rss = RDF.new(@version, @encoding, @standalone)
442: @rss.do_validate = @do_validate
443: @rss.xml_stylesheets = @xml_stylesheets
444: @last_element = @rss
445: pr = Proc.new do |text, tags|
446: @rss.validate_for_stream(tags, @ignore_unknown_element) if @do_validate
447: end
448: @proc_stack.push(pr)
449: end
# File lib/rss/atom.rb, line 734
734: def initial_start_entry(tag_name, prefix, attrs, ns)
735: check_ns(tag_name, prefix, ns, Atom::URI)
736:
737: @rss = Atom::Entry.new(@version, @encoding, @standalone)
738: @rss.do_validate = @do_validate
739: @rss.xml_stylesheets = @xml_stylesheets
740: @rss.lang = attrs["xml:lang"]
741: @rss.base = attrs["xml:base"]
742: @last_element = @rss
743: pr = Proc.new do |text, tags|
744: @rss.validate_for_stream(tags) if @do_validate
745: end
746: @proc_stack.push(pr)
747: end
# File lib/rss/atom.rb, line 719
719: def initial_start_feed(tag_name, prefix, attrs, ns)
720: check_ns(tag_name, prefix, ns, Atom::URI)
721:
722: @rss = Atom::Feed.new(@version, @encoding, @standalone)
723: @rss.do_validate = @do_validate
724: @rss.xml_stylesheets = @xml_stylesheets
725: @rss.lang = attrs["xml:lang"]
726: @rss.base = attrs["xml:base"]
727: @last_element = @rss
728: pr = Proc.new do |text, tags|
729: @rss.validate_for_stream(tags) if @do_validate
730: end
731: @proc_stack.push(pr)
732: end
# File lib/rss/0.9.rb, line 413
413: def initial_start_rss(tag_name, prefix, attrs, ns)
414: check_ns(tag_name, prefix, ns, "")
415:
416: @rss = Rss.new(attrs['version'], @version, @encoding, @standalone)
417: @rss.do_validate = @do_validate
418: @rss.xml_stylesheets = @xml_stylesheets
419: @last_element = @rss
420: pr = Proc.new do |text, tags|
421: @rss.validate_for_stream(tags, @ignore_unknown_element) if @do_validate
422: end
423: @proc_stack.push(pr)
424: end
Extract the first name="value" pair from content. Works with single quotes according to the constant CONTENT_PATTERN. Return a Hash.
# File lib/rss/parser.rb, line 381
381: def parse_pi_content(content)
382: params = {}
383: content.scan(CONTENT_PATTERN) do |name, quote, value|
384: params[name] = value
385: end
386: params
387: end
# File lib/rss/parser.rb, line 488
488: def setup_next_element(tag_name, klass, attributes)
489: previous = @last_element
490: next_element = klass.new(@do_validate, attributes)
491: previous.set_next_element(tag_name, next_element)
492: @last_element = next_element
493: @last_element.parent = previous if klass.need_parent?
494: @xml_child_mode = @last_element.have_xml_content?
495:
496: Proc.new do |text, tags|
497: p(@last_element.class) if DEBUG
498: if @xml_child_mode
499: @last_element.content = @xml_element.to_s
500: xml_setter = @last_element.class.xml_setter
501: @last_element.__send__(xml_setter, @xml_element)
502: @xml_element = nil
503: @xml_child_mode = false
504: else
505: if klass.have_content?
506: if @last_element.need_base64_encode?
507: text = Base64.decode64(text.lstrip)
508: end
509: @last_element.content = text
510: end
511: end
512: if @do_validate
513: @last_element.validate_for_stream(tags, @ignore_unknown_element)
514: end
515: @last_element = previous
516: end
517: end
# File lib/rss/parser.rb, line 411
411: def split_name(name)
412: name =~ NAMESPLIT
413: [$1 || '', $2]
414: end
# File lib/rss/parser.rb, line 389
389: def start_else_element(local, prefix, attrs, ns)
390: class_name = self.class.class_name(_ns(ns, prefix), local)
391: current_class = @last_element.class
392: if class_name and
393: (current_class.const_defined?(class_name) or
394: current_class.constants.include?(class_name))
395: next_class = current_class.const_get(class_name)
396: start_have_something_element(local, prefix, attrs, ns, next_class)
397: else
398: if !@do_validate or @ignore_unknown_element
399: @proc_stack.push(nil)
400: else
401: parent = "ROOT ELEMENT???"
402: if current_class.tag_name
403: parent = current_class.tag_name
404: end
405: raise NotExpectedTagError.new(local, _ns(ns, prefix), parent)
406: end
407: end
408: end
# File lib/rss/parser.rb, line 427
427: def start_get_text_element(tag_name, prefix, ns, required_uri)
428: pr = Proc.new do |text, tags|
429: setter = self.class.setter(required_uri, tag_name)
430: if @last_element.respond_to?(setter)
431: if @do_validate
432: getter = self.class.getter(required_uri, tag_name)
433: if @last_element.__send__(getter)
434: raise TooMuchTagError.new(tag_name, @last_element.tag_name)
435: end
436: end
437: @last_element.__send__(setter, text.to_s)
438: else
439: if @do_validate and !@ignore_unknown_element
440: raise NotExpectedTagError.new(tag_name, _ns(ns, prefix),
441: @last_element.tag_name)
442: end
443: end
444: end
445: @proc_stack.push(pr)
446: end
# File lib/rss/parser.rb, line 448
448: def start_have_something_element(tag_name, prefix, attrs, ns, klass)
449: check_ns(tag_name, prefix, ns, klass.required_uri)
450: attributes = collect_attributes(tag_name, prefix, attrs, ns, klass)
451: @proc_stack.push(setup_next_element(tag_name, klass, attributes))
452: end