| Class | RDoc::C_Parser |
| In: |
lib/rdoc/parsers/parse_c.rb
|
| Parent: | Object |
We attempt to parse C extension files. Basically we look for the standard patterns that you find in extensions: rb_define_class, rb_define_method and so on. We also try to find the corresponding C source for the methods and extract comments, but if we fail we don‘t worry too much.
The comments associated with a Ruby method are extracted from the C comment block associated with the routine that implements that method, that is to say the method whose name is given in the rb_define_method call. For example, you might write:
/*
* Returns a new array that is a one-dimensional flattening of this
* array (recursively). That is, for every element that is an array,
* extract its elements into the new array.
*
* s = [ 1, 2, 3 ] #=> [1, 2, 3]
* t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]]
* a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
* a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
*/
static VALUE
rb_ary_flatten(ary)
VALUE ary;
{
ary = rb_obj_dup(ary);
rb_ary_flatten_bang(ary);
return ary;
}
...
void
Init_Array()
{
...
rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
Here RDoc will determine from the rb_define_method line that there‘s a method called "flatten" in class Array, and will look for the implementation in the method rb_ary_flatten. It will then use the comment from that method in the HTML output. This method must be in the same source file as the rb_define_method.
C classes can be diagrammed (see /tc/dl/ruby/ruby/error.c), and RDoc integrates C and Ruby source into one tree
The comment blocks may include special directives:
In addition, RDoc assumes by default that the C method implementing a Ruby function is in the same source file as the rb_define_method call. If this isn‘t the case, add the comment
rb_define_method(....); // in: filename
As an example, we might have an extension that defines multiple classes in its Init_xxx method. We could document them using
/*
* Document-class: MyClass
*
* Encapsulate the writing and reading of the configuration
* file. ...
*/
/*
* Document-method: read_value
*
* call-seq:
* cfg.read_value(key) -> value
* cfg.read_value(key} { |key| } -> value
*
* Return the value corresponding to +key+ from the configuration.
* In the second form, if the key isn't found, invoke the
* block and return its value.
*/
| progress | [RW] |
prepare to parse a C file
# File lib/rdoc/parsers/parse_c.rb, line 177
177: def initialize(top_level, file_name, body, options, stats)
178: @known_classes = KNOWN_CLASSES.dup
179: @body = handle_tab_width(handle_ifdefs_in(body))
180: @options = options
181: @stats = stats
182: @top_level = top_level
183: @classes = Hash.new
184: @file_dir = File.dirname(file_name)
185: @progress = $stderr unless options.quiet
186: end
Extract the classes/modules and methods from a C file and return the corresponding top-level object
# File lib/rdoc/parsers/parse_c.rb, line 190
190: def scan
191: remove_commented_out_lines
192: do_classes
193: do_constants
194: do_methods
195: do_includes
196: do_aliases
197: @top_level
198: end
# File lib/rdoc/parsers/parse_c.rb, line 455
455: def do_aliases
456: @body.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do
457: |var_name, new_name, old_name|
458: @stats.num_methods += 1
459: class_name = @known_classes[var_name] || var_name
460: class_obj = find_class(var_name, class_name)
461:
462: class_obj.add_alias(Alias.new("", old_name, new_name, ""))
463: end
464: end
# File lib/rdoc/parsers/parse_c.rb, line 318
318: def do_classes
319: @body.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do
320: |var_name, class_name|
321: handle_class_module(var_name, "module", class_name, nil, nil)
322: end
323:
324: # The '.' lets us handle SWIG-generated files
325: @body.scan(/([\w\.]+)\s* = \s*rb_define_class\s*
326: \(
327: \s*"(\w+)",
328: \s*(\w+)\s*
329: \)/mx) do
330:
331: |var_name, class_name, parent|
332: handle_class_module(var_name, "class", class_name, parent, nil)
333: end
334:
335: @body.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do
336: |var_name, class_name, parent|
337: parent = nil if parent == "0"
338: handle_class_module(var_name, "class", class_name, parent, nil)
339: end
340:
341: @body.scan(/(\w+)\s* = \s*rb_define_module_under\s*
342: \(
343: \s*(\w+),
344: \s*"(\w+)"
345: \s*\)/mx) do
346:
347: |var_name, in_module, class_name|
348: handle_class_module(var_name, "module", class_name, nil, in_module)
349: end
350:
351: @body.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s*
352: \(
353: \s*(\w+),
354: \s*"(\w+)",
355: \s*(\w+)\s*
356: \s*\)/mx) do
357:
358: |var_name, in_module, class_name, parent|
359: handle_class_module(var_name, "class", class_name, parent, in_module)
360: end
361:
362: end
# File lib/rdoc/parsers/parse_c.rb, line 366
366: def do_constants
367: @body.scan(%r{\Wrb_define_
368: (
369: variable |
370: readonly_variable |
371: const |
372: global_const |
373: )
374: \s*\(
375: (?:\s*(\w+),)?
376: \s*"(\w+)",
377: \s*(.*?)\s*\)\s*;
378: }xm) do
379:
380: |type, var_name, const_name, definition|
381: var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel"
382: handle_constants(type, var_name, const_name, definition)
383: end
384: end
Look for includes of the form:
rb_include_module(rb_cArray, rb_mEnumerable);
# File lib/rdoc/parsers/parse_c.rb, line 721
721: def do_includes
722: @body.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m|
723: if cls = @classes[c]
724: m = @known_classes[m] || m
725: cls.add_include(Include.new(m, ""))
726: end
727: end
728: end
# File lib/rdoc/parsers/parse_c.rb, line 388
388: def do_methods
389:
390: @body.scan(%r{rb_define_
391: (
392: singleton_method |
393: method |
394: module_function |
395: private_method
396: )
397: \s*\(\s*([\w\.]+),
398: \s*"([^"]+)",
399: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
400: \s*(-?\w+)\s*\)
401: (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
402: }xm) do
403: |type, var_name, meth_name, meth_body, param_count, source_file|
404: #"
405:
406: # Ignore top-object and weird struct.c dynamic stuff
407: next if var_name == "ruby_top_self"
408: next if var_name == "nstr"
409: next if var_name == "envtbl"
410: next if var_name == "argf" # it'd be nice to handle this one
411:
412: var_name = "rb_cObject" if var_name == "rb_mKernel"
413: handle_method(type, var_name, meth_name,
414: meth_body, param_count, source_file)
415: end
416:
417: @body.scan(%r{rb_define_attr\(
418: \s*([\w\.]+),
419: \s*"([^"]+)",
420: \s*(\d+),
421: \s*(\d+)\s*\);
422: }xm) do #"
423: |var_name, attr_name, attr_reader, attr_writer|
424:
425: #var_name = "rb_cObject" if var_name == "rb_mKernel"
426: handle_attr(var_name, attr_name,
427: attr_reader.to_i != 0,
428: attr_writer.to_i != 0)
429: end
430:
431: @body.scan(%r{rb_define_global_function\s*\(
432: \s*"([^"]+)",
433: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
434: \s*(-?\w+)\s*\)
435: (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
436: }xm) do #"
437: |meth_name, meth_body, param_count, source_file|
438: handle_method("method", "rb_mKernel", meth_name,
439: meth_body, param_count, source_file)
440: end
441:
442: @body.scan(/define_filetest_function\s*\(
443: \s*"([^"]+)",
444: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
445: \s*(-?\w+)\s*\)/xm) do #"
446: |meth_name, meth_body, param_count|
447:
448: handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count)
449: handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count)
450: end
451: end
# File lib/rdoc/parsers/parse_c.rb, line 566
566: def find_attr_comment(attr_name)
567: if @body =~ %r{((?>/\*.*?\*/\s+))
568: rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi
569: $1
570: elsif @body =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m
571: $1
572: else
573: ''
574: end
575: end
Find the C code corresponding to a Ruby method
# File lib/rdoc/parsers/parse_c.rb, line 626
626: def find_body(meth_name, meth_obj, body, quiet = false)
627: case body
628: when %r{((?>/\*.*?\*/\s*))(?:static\s+)?VALUE\s+#{meth_name}
629: \s*(\(.*?\)).*?^}xm
630: comment, params = $1, $2
631: body_text = $&
632:
633: remove_private_comments(comment) if comment
634:
635: # see if we can find the whole body
636:
637: re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}'
638: if Regexp.new(re, Regexp::MULTILINE).match(body)
639: body_text = $&
640: end
641:
642: # The comment block may have been overridden with a
643: # 'Document-method' block. This happens in the interpreter
644: # when multiple methods are vectored through to the same
645: # C method but those methods are logically distinct (for
646: # example Kernel.hash and Kernel.object_id share the same
647: # implementation
648:
649: override_comment = find_override_comment(meth_obj.name)
650: comment = override_comment if override_comment
651:
652: find_modifiers(comment, meth_obj) if comment
653:
654: # meth_obj.params = params
655: meth_obj.start_collecting_tokens
656: meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text))
657: meth_obj.comment = mangle_comment(comment)
658: when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
659: comment = $1
660: find_body($2, meth_obj, body, true)
661: find_modifiers(comment, meth_obj)
662: meth_obj.comment = mangle_comment(comment) + meth_obj.comment
663: when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
664: unless find_body($1, meth_obj, body, true)
665: warn "No definition for #{meth_name}" unless quiet
666: return false
667: end
668: else
669:
670: # No body, but might still have an override comment
671: comment = find_override_comment(meth_obj.name)
672:
673: if comment
674: find_modifiers(comment, meth_obj)
675: meth_obj.comment = mangle_comment(comment)
676: else
677: warn "No definition for #{meth_name}" unless quiet
678: return false
679: end
680: end
681: true
682: end
# File lib/rdoc/parsers/parse_c.rb, line 740
740: def find_class(raw_name, name)
741: unless @classes[raw_name]
742: if raw_name =~ /^rb_m/
743: @classes[raw_name] = @top_level.add_module(NormalModule, name)
744: else
745: @classes[raw_name] = @top_level.add_class(NormalClass, name, nil)
746: end
747: end
748: @classes[raw_name]
749: end
Look for class or module documentation above Init_+class_name+(void), in a Document-class class_name (or module) comment or above an rb_define_class (or module). If a comment is supplied above a matching Init_ and a rb_define_class the Init_ comment is used.
/*
* This is a comment for Foo
*/
Init_Foo(void) {
VALUE cFoo = rb_define_class("Foo", rb_cObject);
}
/*
* Document-class: Foo
* This is a comment for Foo
*/
Init_foo(void) {
VALUE cFoo = rb_define_class("Foo", rb_cObject);
}
/*
* This is a comment for Foo
*/
VALUE cFoo = rb_define_class("Foo", rb_cObject);
# File lib/rdoc/parsers/parse_c.rb, line 293
293: def find_class_comment(class_name, class_meth)
294: comment = nil
295: if @body =~ %r{((?>/\*.*?\*/\s+))
296: (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)?\)}xmi
297: comment = $1
298: elsif @body =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m
299: comment = $2
300: else
301: if @body =~ /rb_define_(class|module)/m then
302: class_name = class_name.split("::").last
303: comments = []
304: @body.split(/(\/\*.*?\*\/)\s*?\n/m).each_with_index do |chunk, index|
305: comments[index] = chunk
306: if chunk =~ /rb_define_(class|module).*?"(#{class_name})"/m then
307: comment = comments[index-1]
308: break
309: end
310: end
311: end
312: end
313: class_meth.comment = mangle_comment(comment) if comment
314: end
Finds a comment matching type and const_name either above the comment or in the matching Document- section.
# File lib/rdoc/parsers/parse_c.rb, line 523
523: def find_const_comment(type, const_name)
524: if @body =~ %r{((?>^\s*/\*.*?\*/\s+))
525: rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi
526: $1
527: elsif @body =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m
528: $1
529: else
530: ''
531: end
532: end
If the comment block contains a section that looks like:
use it for the parameters.
# File lib/rdoc/parsers/parse_c.rb, line 694
694: def find_modifiers(comment, meth_obj)
695: if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or
696: comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '')
697: meth_obj.document_self = false
698: end
699: if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or
700: comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '')
701: seq = $1
702: seq.gsub!(/^\s*\*\s*/, '')
703: meth_obj.call_seq = seq
704: end
705: end
# File lib/rdoc/parsers/parse_c.rb, line 709
709: def find_override_comment(meth_name)
710: name = Regexp.escape(meth_name)
711: if @body =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m
712: $1
713: end
714: end
# File lib/rdoc/parsers/parse_c.rb, line 536
536: def handle_attr(var_name, attr_name, reader, writer)
537: rw = ''
538: if reader
539: #@stats.num_methods += 1
540: rw << 'R'
541: end
542: if writer
543: #@stats.num_methods += 1
544: rw << 'W'
545: end
546:
547: class_name = @known_classes[var_name]
548:
549: return unless class_name
550:
551: class_obj = find_class(var_name, class_name)
552:
553: if class_obj
554: comment = find_attr_comment(attr_name)
555: unless comment.empty?
556: comment = mangle_comment(comment)
557: end
558: att = Attr.new('', attr_name, rw, comment)
559: class_obj.add_attribute(att)
560: end
561:
562: end
# File lib/rdoc/parsers/parse_c.rb, line 230
230: def handle_class_module(var_name, class_mod, class_name, parent, in_module)
231: progress(class_mod[0, 1])
232:
233: parent_name = @known_classes[parent] || parent
234:
235: if in_module
236: enclosure = @classes[in_module]
237: unless enclosure
238: if enclosure = @known_classes[in_module]
239: handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"),
240: enclosure, nil, nil)
241: enclosure = @classes[in_module]
242: end
243: end
244: unless enclosure
245: warn("Enclosing class/module '#{in_module}' for " +
246: "#{class_mod} #{class_name} not known")
247: return
248: end
249: else
250: enclosure = @top_level
251: end
252:
253: if class_mod == "class"
254: cm = enclosure.add_class(NormalClass, class_name, parent_name)
255: @stats.num_classes += 1
256: else
257: cm = enclosure.add_module(NormalModule, class_name)
258: @stats.num_modules += 1
259: end
260: cm.record_location(enclosure.toplevel)
261:
262: find_class_comment(cm.full_name, cm)
263: @classes[var_name] = cm
264: @known_classes[var_name] = cm.full_name
265: end
Adds constant comments. By providing some_value: at the start ofthe comment you can override the C value of the comment to give a friendly definition.
/* 300: The perfect score in bowling */ rb_define_const(cFoo, "PERFECT", INT2FIX(300);
Will override +INT2FIX(300)+ with the value +300+ in the output RDoc. Values may include quotes and escaped colons (\:).
# File lib/rdoc/parsers/parse_c.rb, line 477
477: def handle_constants(type, var_name, const_name, definition)
478: #@stats.num_constants += 1
479: class_name = @known_classes[var_name]
480:
481: return unless class_name
482:
483: class_obj = find_class(var_name, class_name)
484:
485: unless class_obj
486: warn("Enclosing class/module '#{const_name}' for not known")
487: return
488: end
489:
490: comment = find_const_comment(type, const_name)
491:
492: # In the case of rb_define_const, the definition and comment are in
493: # "/* definition: comment */" form. The literal ':' and '\' characters
494: # can be escaped with a backslash.
495: if type.downcase == 'const' then
496: elements = mangle_comment(comment).split(':')
497: if elements.nil? or elements.empty? then
498: con = Constant.new(const_name, definition, mangle_comment(comment))
499: else
500: new_definition = elements[0..-2].join(':')
501: if new_definition.empty? then # Default to literal C definition
502: new_definition = definition
503: else
504: new_definition.gsub!("\:", ":")
505: new_definition.gsub!("\\", '\\')
506: end
507: new_definition.sub!(/\A(\s+)/, '')
508: new_comment = $1.nil? ? elements.last : "#{$1}#{elements.last.lstrip}"
509: con = Constant.new(const_name, new_definition,
510: mangle_comment(new_comment))
511: end
512: else
513: con = Constant.new(const_name, definition, mangle_comment(comment))
514: end
515:
516: class_obj.add_constant(con)
517: end
Removes ifdefs that would otherwise confuse us
# File lib/rdoc/parsers/parse_c.rb, line 766
766: def handle_ifdefs_in(body)
767: body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m) { $1 }
768: end
# File lib/rdoc/parsers/parse_c.rb, line 579
579: def handle_method(type, var_name, meth_name,
580: meth_body, param_count, source_file = nil)
581: progress(".")
582:
583: @stats.num_methods += 1
584: class_name = @known_classes[var_name]
585:
586: return unless class_name
587:
588: class_obj = find_class(var_name, class_name)
589:
590: if class_obj
591: if meth_name == "initialize"
592: meth_name = "new"
593: type = "singleton_method"
594: end
595: meth_obj = AnyMethod.new("", meth_name)
596: meth_obj.singleton =
597: %w{singleton_method module_function}.include?(type)
598:
599: p_count = (Integer(param_count) rescue -1)
600:
601: if p_count < 0
602: meth_obj.params = "(...)"
603: elsif p_count == 0
604: meth_obj.params = "()"
605: else
606: meth_obj.params = "(" +
607: (1..p_count).map{|i| "p#{i}"}.join(", ") +
608: ")"
609: end
610:
611: if source_file
612: file_name = File.join(@file_dir, source_file)
613: body = (@@known_bodies[source_file] ||= File.read(file_name))
614: else
615: body = @body
616: end
617: if find_body(meth_body, meth_obj, body) and meth_obj.document_self
618: class_obj.add_method(meth_obj)
619: end
620: end
621: end
# File lib/rdoc/parsers/parse_c.rb, line 751
751: def handle_tab_width(body)
752: if /\t/ =~ body
753: tab_width = Options.instance.tab_width
754: body.split(/\n/).map do |line|
755: 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
756: line
757: end .join("\n")
758: else
759: body
760: end
761: end
Remove the /*’s and leading asterisks from C comments
# File lib/rdoc/parsers/parse_c.rb, line 733
733: def mangle_comment(comment)
734: comment.sub!(%r{/\*+}) { " " * $&.length }
735: comment.sub!(%r{\*+/}) { " " * $&.length }
736: comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
737: comment
738: end
# File lib/rdoc/parsers/parse_c.rb, line 204
204: def progress(char)
205: unless @options.quiet
206: @progress.print(char)
207: @progress.flush
208: end
209: end
removes lines that are commented out that might otherwise get picked up when scanning for classes and methods
# File lib/rdoc/parsers/parse_c.rb, line 226
226: def remove_commented_out_lines
227: @body.gsub!(%r{//.*rb_define_}, '//')
228: end