| Class | REXML::Text |
| In: |
lib/rexml/text.rb
|
| Parent: | Child |
Represents text nodes in an XML document
| SPECIALS | = | [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ] | The order in which the substitutions occur | |
| SUBSTITUTES | = | ['&', '<', '>', '"', ''', ' '] | ||
| SLAICEPS | = | [ '<', '>', '"', "'", '&' ] | Characters which are substituted in written strings | |
| SETUTITSBUS | = | [ /</u, />/u, /"/u, /'/u, /&/u ] | ||
| ILLEGAL | = | /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um | ||
| NUMERICENTITY | = | /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ | ||
| REFERENCE | = | /#{Entity::REFERENCE}/ | ||
| EREFERENCE | = | /&(?!#{Entity::NAME};)/ |
Constructor arg if a String, the content is set to the String. If a Text, the object is shallowly cloned.
respect_whitespace (boolean, false) if true, whitespace is respected
parent (nil) if this is a Parent object, the parent will be set to this.
raw (nil) This argument can be given three values. If true, then the value of used to construct this object is expected to contain no unescaped XML markup, and REXML will not change the text. If this value is false, the string may contain any characters, and REXML will escape any and all defined entities whose values are contained in the text. If this value is nil (the default), then the raw value of the parent will be used as the raw value for this node. If there is no raw value for the parent, and no value is supplied, the default is false. Use this field if you have entities defined for some text, and you don‘t want REXML to escape that text in output.
Text.new( "<&", false, nil, false ) #-> "<&" Text.new( "<&", false, nil, false ) #-> "&lt;&amp;" Text.new( "<&", false, nil, true ) #-> Parse exception Text.new( "<&", false, nil, true ) #-> "<&" # Assume that the entity "s" is defined to be "sean" # and that the entity "r" is defined to be "russell" Text.new( "sean russell" ) #-> "&s; &r;" Text.new( "sean russell", false, nil, true ) #-> "sean russell"
entity_filter (nil) This can be an array of entities to match in the supplied text. This argument is only useful if raw is set to false.
Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell" Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
In the last example, the entity_filter argument is ignored.
pattern INTERNAL USE ONLY
# File lib/rexml/text.rb, line 60
60: def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
61: entity_filter=nil, illegal=ILLEGAL )
62:
63: @raw = false
64:
65: if parent
66: super( parent )
67: @raw = parent.raw
68: else
69: @parent = nil
70: end
71:
72: @raw = raw unless raw.nil?
73: @entity_filter = entity_filter
74: @normalized = @unnormalized = nil
75:
76: if arg.kind_of? String
77: @string = arg.clone
78: @string.squeeze!(" \n\t") unless respect_whitespace
79: elsif arg.kind_of? Text
80: @string = arg.to_s
81: @raw = arg.raw
82: elsif
83: raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
84: end
85:
86: @string.gsub!( /\r\n?/, "\n" )
87:
88: # check for illegal characters
89: if @raw
90: if @string =~ illegal
91: raise "Illegal character '#{$1}' in raw string \"#{@string}\""
92: end
93: end
94: end
Escapes all possible entities
# File lib/rexml/text.rb, line 288
288: def Text::normalize( input, doctype=nil, entity_filter=nil )
289: copy = input.to_s
290: # Doing it like this rather than in a loop improves the speed
291: #copy = copy.gsub( EREFERENCE, '&' )
292: copy = copy.gsub( "&", "&" )
293: if doctype
294: # Replace all ampersands that aren't part of an entity
295: doctype.entities.each_value do |entity|
296: copy = copy.gsub( entity.value,
297: "&#{entity.name};" ) if entity.value and
298: not( entity_filter and entity_filter.include?(entity) )
299: end
300: else
301: # Replace all ampersands that aren't part of an entity
302: DocType::DEFAULT_ENTITIES.each_value do |entity|
303: copy = copy.gsub(entity.value, "&#{entity.name};" )
304: end
305: end
306: copy
307: end
Reads text, substituting entities
# File lib/rexml/text.rb, line 262
262: def Text::read_with_substitution( input, illegal=nil )
263: copy = input.clone
264:
265: if copy =~ illegal
266: raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
267: end if illegal
268:
269: copy.gsub!( /\r\n?/, "\n" )
270: if copy.include? ?&
271: copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
272: copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
273: copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
274: copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
275: copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
276: copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
277: m=$1
278: #m='0' if m==''
279: m = "0#{m}" if m[0] == ?x
280: [Integer(m)].pack('U*')
281: }
282: end
283: copy
284: end
Unescapes all possible entities
# File lib/rexml/text.rb, line 310
310: def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
311: rv = string.clone
312: rv.gsub!( /\r\n?/, "\n" )
313: matches = rv.scan( REFERENCE )
314: return rv if matches.size == 0
315: rv.gsub!( NUMERICENTITY ) {|m|
316: m=$1
317: m = "0#{m}" if m[0] == ?x
318: [Integer(m)].pack('U*')
319: }
320: matches.collect!{|x|x[0]}.compact!
321: if matches.size > 0
322: if doctype
323: matches.each do |entity_reference|
324: unless filter and filter.include?(entity_reference)
325: entity_value = doctype.entity( entity_reference )
326: re = /&#{entity_reference};/
327: rv.gsub!( re, entity_value ) if entity_value
328: end
329: end
330: else
331: matches.each do |entity_reference|
332: unless filter and filter.include?(entity_reference)
333: entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
334: re = /&#{entity_reference};/
335: rv.gsub!( re, entity_value.value ) if entity_value
336: end
337: end
338: end
339: rv.gsub!( /&/, '&' )
340: end
341: rv
342: end
Appends text to this text node. The text is appended in the raw mode of this text node.
# File lib/rexml/text.rb, line 112
112: def <<( to_append )
113: @string << to_append.gsub( /\r\n?/, "\n" )
114: end
# File lib/rexml/text.rb, line 202
202: def indent_text(string, level=1, style="\t", indentfirstline=true)
203: return string if level < 0
204: new_string = ''
205: string.each { |line|
206: indent_string = style * level
207: new_line = (indent_string + line).sub(/[\s]+$/,'')
208: new_string << new_line
209: }
210: new_string.strip! unless indentfirstline
211: return new_string
212: end
Returns the string value of this text node. This string is always escaped, meaning that it is a valid XML text node string, and all entities that can be escaped, have been inserted. This method respects the entity filter set in the constructor.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.to_s #-> "< & &s; russell" t = Text.new( "< & &s; russell", false, nil, false ) t.to_s #-> "< & &s; russell" u = Text.new( "sean russell", false, nil, true ) u.to_s #-> "sean russell"
# File lib/rexml/text.rb, line 137
137: def to_s
138: return @string if @raw
139: return @normalized if @normalized
140:
141: doctype = nil
142: if @parent
143: doc = @parent.document
144: doctype = doc.doctype if doc
145: end
146:
147: @normalized = Text::normalize( @string, doctype, @entity_filter )
148: end
Returns the string value of this text. This is the text without entities, as it might be used programmatically, or printed to the console. This ignores the ‘raw’ attribute setting, and any entity_filter.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.value #-> "< & sean russell" t = Text.new( "< & &s; russell", false, nil, false ) t.value #-> "< & sean russell" u = Text.new( "sean russell", false, nil, true ) u.value #-> "sean russell"
# File lib/rexml/text.rb, line 167
167: def value
168: @unnormalized if @unnormalized
169: doctype = nil
170: if @parent
171: doc = @parent.document
172: doctype = doc.doctype if doc
173: end
174: @unnormalized = Text::unnormalize( @string, doctype )
175: end
Sets the contents of this text node. This expects the text to be unnormalized. It returns self.
e = Element.new( "a" ) e.add_text( "foo" ) # <a>foo</a> e[0].value = "bar" # <a>bar</a> e[0].value = "<a>" # <a><a></a>
# File lib/rexml/text.rb, line 184
184: def value=( val )
185: @string = val.gsub( /\r\n?/, "\n" )
186: @unnormalized = nil
187: @normalized = nil
188: @raw = false
189: end
# File lib/rexml/text.rb, line 191
191: def wrap(string, width, addnewline=false)
192: # Recursively wrap string at width.
193: return string if string.length <= width
194: place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
195: if addnewline then
196: return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
197: else
198: return string[0,place] + "\n" + wrap(string[place+1..-1], width)
199: end
200: end
# File lib/rexml/text.rb, line 217
217: def write( writer, indent=-1, transitive=false, ie_hack=false )
218: Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
219: formatter = if indent > -1
220: REXML::Formatters::Pretty.new( indent )
221: else
222: REXML::Formatters::Default.new
223: end
224: formatter.write( self, writer )
225: end
Writes out text, substituting special characters beforehand. out A String, IO, or any other object supporting <<( String ) input the text to substitute and the write out
z=utf8.unpack("U*")
ascOut=""
z.each{|r|
if r < 0x100
ascOut.concat(r.chr)
else
ascOut.concat(sprintf("&#x%x;", r))
end
}
puts ascOut
# File lib/rexml/text.rb, line 249
249: def write_with_substitution out, input
250: copy = input.clone
251: # Doing it like this rather than in a loop improves the speed
252: copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
253: copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
254: copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
255: copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
256: copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
257: copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
258: out << copy
259: end